# Benchmarking Quali - Aprendizado Profundo (Ext. Artibutos RP)

Estruturação de pipeline baseado em aprendizado raso utilizando atributos extraídos via Deep Learning dos RPs.


# Configurações

In [1]:
import os
import sys
import gc
from pprint import pprint
from collections import Counter
import copy
import warnings
warnings.filterwarnings(action="ignore")

import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
from IPython.display import display

from tqdm import *

from pretty_confusion_matrix import *

# TODO: implementar rotina na classe PyNILM.utils
def sizeof_fmt(num, suffix='B'):
    ''' by Fred Cirera,  https://stackoverflow.com/a/1094933/1870254, modified'''
    for unit in ['','Ki','Mi','Gi','Ti','Pi','Ei','Zi']:
        if abs(num) < 1024.0:
            return "%3.1f %s%s" % (num, unit, suffix)
        num /= 1024.0
    return "%.1f %s%s" % (num, 'Yi', suffix)

def listar_variaveis_memoria(ambiente):
    print("* Variáveis instanciadas em memória:")
    print("---")
    total = 0
    for name, size in sorted(((name, sys.getsizeof(value)) for name, value in ambiente.items()),
                             key= lambda x: -x[1])[:10]:
        total += size
        print("{:>30}: {:>8}".format(name, sizeof_fmt(size)))
    print("---")
    print("Total:", sizeof_fmt(total))
    
# TODO: implementar na classe utils
def highlight_col(x):
    r = 'background-color: #D9D9D9'
    df1 = pd.DataFrame('', index=x.index, columns=x.columns)
    df1.iloc[:, -2] = r
    return df1   

In [2]:
# CONSTANTES FUNDAMENTAIS DE ORGANIZACAO DE PASTAS/ARQUIVOS
RESIDENCIA = 3

# Path do arquivo H5 (base REDD ja preparada p/ NILMTK) e outros insumos fundamentais
caminho_dados = "D:/Projetos/phd-thesis/datasets/"

# Definir diretorios onde iremos salvar os insumos gerados do notebook (dados, imagens, etc.)
caminho_dados_notebook = os.path.join(caminho_dados, "23") # Num. notebook
if not os.path.isdir(caminho_dados_notebook):
    os.makedirs(caminho_dados_notebook)
caminho_imagens_notebook = os.path.join(caminho_dados_notebook, "imagens") # Num. notebook
if not os.path.isdir(caminho_imagens_notebook):
    os.makedirs(caminho_imagens_notebook)

# Path do arquivo H5 (base REDD ja preparada p/ NILMTK)
caminho_redd = os.path.join(caminho_dados, "REDD/low_freq")

# Path completo do arquivo REDD
arquivo_dataset = os.path.join(caminho_redd, "redd.h5")

# VARIAVEL AUXILIAR
# Path dos arquivos relacionados as janelas
caminho_janelas = os.path.join(caminho_redd, "../../phd")
if not os.path.isdir(caminho_janelas):
    os.makedirs(caminho_janelas)

In [3]:
from matplotlib import rcParams
import matplotlib.pyplot as plt
from six import iteritems

from nilmtk import DataSet, TimeFrame, MeterGroup, HDFDataStore
from nilmtk.legacy.disaggregate import CombinatorialOptimisation, FHMM
import nilmtk.utils

%matplotlib inline

# Dados

## Base REDD

In [4]:
# Gerar arquivo H5 (Nilmtk) do dataset REDD, caso n exista
if not os.path.isfile(arquivo_dataset):
    from nilmtk.dataset_converters import convert_redd
    
    print("Gerando arquivo H5 (NILMTK) da base REDD, aguarde...")
    print("-----")
    convert_redd(caminho_redd, arquivo_dataset)

# Carregando dataset REDD no objeto NILMTK
# Exemplo de carregamento da base REDD no NILMTK
import h5py # * Evitar erro de incompatibilidade entre h5py e nilmtk
from nilmtk import DataSet
from nilmtk.utils import print_dict
redd = DataSet(arquivo_dataset)
print("NILMTK -> Detalhes sobre o dataset REDD:")
print_dict(redd.metadata)
print()

# Parametros dos dados
PARAMETROS_DATASET = {
    "base":redd,
    "id_residencia": RESIDENCIA,
    "inicio_intervalo":'2011-04-16 05:11:30',
    "fim_intervalo":'2011-04-23 08:43:26',
    "debug": False    
}
print("PARÂMETROS DO ESTUDO:")
pprint(PARAMETROS_DATASET)

NILMTK -> Detalhes sobre o dataset REDD:



PARÂMETROS DO ESTUDO:
{'base': <nilmtk.dataset.DataSet object at 0x00000259CD79C790>,
 'debug': False,
 'fim_intervalo': '2011-04-23 08:43:26',
 'id_residencia': 3,
 'inicio_intervalo': '2011-04-16 05:11:30'}


In [5]:
def carregar_dados_aparelho(janelas, instancia, aparelho, taxa, tamanho_janela, split_teste=None, eliminar_janelas_vazias=False, debug=False):
    # Extrair series divididas em janelas para cada medidor
    dados_cargas = janelas.preparar(
        taxa_amostral=taxa, 
        intervalo_medicao=tamanho_janela
    )
    print()

    # Pprearando dados (Serie / Estado)
    # X
    dados_medidores = janelas.filtrar_cargas(
        dados_cargas,
        filtros=[
            (1, 'site_meter'),
            (2, 'site_meter'),    
        ]
    )
    
    dados_aparelho = janelas.filtrar_cargas(dados_cargas, filtros=[(instancia, aparelho)])[0]
    
    # Validar tamanho dos dados de medidores (podem ter mais registros que os aparelhos)
    janela_media_medidores = int(np.sum([len(d["janelas"])for d in dados_medidores])/len(dados_medidores))
    janela_media_aparelho = len(dados_aparelho["janelas"])#int(np.sum([len(d["janelas"])for d in dados_aparelho])/len(dados_aparelho))

    # Ajustando para medidores terem o mesmo shape de janelas dos aparelhos 
    if janela_media_medidores > janela_media_aparelho:
        diferenca = janela_media_medidores-janela_media_aparelho
        #if debug: print("  -> Diferenca encontrada entre medidores/aparelhos:", diferenca, ", ajustando..")
        for i in range(len(dados_medidores)):
            removidos = 0
            while removidos < diferenca:
                # Remover ultima janela
                dados_medidores[i]["janelas"] = dados_medidores[i]["janelas"][:-1,:]
                removidos += 1
    
    # Estruturando dados modelagem (X e y)
    X = dados_medidores[0]["janelas"] + dados_medidores[1]["janelas"]

    # Selecionando apenas janelas VALIDAS (ocorrencia de ao menos 1 carga)
    # TODO: Implementar na biblioteca esta rotina de validacao
    if eliminar_janelas_vazias:
        idx_janelas_validas = np.where(np.sum(X, axis=1)>0)[0]
        X = X[idx_janelas_validas]
        #for i in range(len(dados_aparelhos)):
        dados_aparelho["janelas"] = dados_aparelho["janelas"][idx_janelas_validas]
        rotulos = copy.deepcopy(dados_aparelho["rotulos"])
        dados_aparelho["rotulos"]["estado"] = rotulos["estado"][idx_janelas_validas]
        dados_aparelho["rotulos"]["media"]  = rotulos["media"][idx_janelas_validas]
        dados_aparelho["rotulos"]["total"]  = rotulos["total"][idx_janelas_validas]
        if debug:
            print("   - `{}-{}`: {} => {}".format(
                dados_aparelho["carga"].upper(), 
                dados_aparelho["instancia"],
                Counter(rotulos["estado"]),
                Counter(dados_aparelho["rotulos"]["estado"])
            ))

    # y
    y = dados_aparelho["rotulos"]["estado"]

    # <<< Limpando memoria >>>
    dados_cargas = None
    del dados_cargas
    dados_medidores = None
    del dados_medidores
    dados_aparelho = None
    del dados_aparelho
    gc.collect()
    # <<< Limpando memoria >>>

    # Fazendo split dos dados (treino/teste)
    if split_teste is None:
        return X, y
    else:
        X_treino, X_teste, y_treino, y_teste = train_test_split(
            X, y, 
            test_size=split_teste,
            stratify=y,
            random_state=SEED
        )
        print()

        return X_treino, X_teste, y_treino, y_teste        
        

## Melhores Combinações de Taxas e Janelas para cada Aparelho (estudo 19)

In [6]:
df_melhores_taxas_janelas = pd.read_csv(os.path.join(caminho_dados, "19", "melhores_taxa_janela_aparelhos.csv"), index_col=0)
df_melhores_taxas_janelas

Unnamed: 0,carga,taxa_amostragem,janela,loss,acuracia,precisao,recall,f1,f1_macro
0,dish_washer - 9,2,720,0.05,95.33,20.0,25.0,22.22,59.91
1,fridge - 7,2,1080,0.0,100.0,100.0,100.0,100.0,100.0
2,microwave - 16,2,900,0.04,95.83,66.67,33.33,44.44,71.14
3,washer_dryer - 13,2,60,0.0,99.89,100.0,95.74,97.83,98.88
4,washer_dryer - 14,3,360,0.02,97.99,100.0,55.56,71.43,85.19


In [7]:
# TODO: 
# - Desenvolver módulo da metodologia na lib PyNILM

## Parâmetros de RP dos Aparelhos (estudo 18)

In [8]:
# Carregando arquivos de parametros, caso n estejam (kernel reiniciado)
if not 'parametros_rp_aparelho' in locals():
    with open(os.path.join(caminho_dados, "18", "parametros_rp_aparelho.json"),'r') as arquivo:
        parametros_rp_aparelho = json.load(arquivo)

## Ambiente e Funções Auxiliares

In [9]:
# from PyNILM.dados.janelas import Janelas
from PyNILM.dados.janelas import Janelas
from PyNILM.avaliacao.metricas import *
from PyNILM.avaliacao.graficos import *
from PyNILM.avaliacao.analises import *

from pyts.image import RecurrencePlot, GramianAngularField

# Garantindo reprodutibilidade
import random as rn

# Constantes dos experimentos
SEED = 33
FRACAO_TESTE = 0.25
EPOCAS = 100
TAMANHO_LOTE = 32
VERBOSIDADE = 2

# Parametros RP (verificado empiricamente)
PARAMETROS_RP = {
    "dimension": 1,
    "time_delay": 1,
    "threshold": None,
    "percentage": 10
}
TAMANHO_IMAGEM = (32,32)

# Travar Seed's
np.random.seed(SEED)
rn.seed(SEED)
os.environ['PYTHONHASHSEED']=str(SEED)

# Teste da classe
janelas_treino = Janelas(
    base=redd,
    id_residencia=3,
    inicio_intervalo='2011-04-16',
    fim_intervalo='2011-05-16',
    debug = False
)

janelas_teste = Janelas(
    base=redd,
    id_residencia=3,
    inicio_intervalo='2011-05-17',
    fim_intervalo='2011-05-30',
    debug = False
)
# Habilitando/limitando utilização de GPUs

In [10]:
def instancia_aparelho_residencia(aparelho, residencia, base = redd):
    """Função para coletar o id/instancia do aparelho na residencia,
    permitindo executar os testes independente da residencia"""
    instancia = []
    #for e in base.buildings[residencia].elec.all_meters():
    for e_i in range(1, len(janelas.base.buildings[residencia].elec.all_meters())):

        # Selecionando canal/aparelho
        e = janelas.base.buildings[residencia].elec[e_i]
        
        if not hasattr(e,'meters'):
            if e.label().lower().replace(" ","_") == aparelho:
                instancia.append( e.instance() )
        else:
            for e_ in e.meters:
                if e_.label().lower().replace(" ","_") == aparelho:
                    instancia.append( e_.instance() )
    return instancia

## Extração RP

In [11]:
# Construindo o pipeline de dados
# ----------

import cv2

# Constante fundamentais
TAMANHO_IMAGEM = (224,224,3) # Apenas 1 canal
TIPO_DADOS = np.float32
def serie_para_imagem(serie, params_rp = PARAMETROS_RP, tam_imagem=TAMANHO_IMAGEM, 
                      normalizar=False, padronizar=False):
    """
    Funcao responsavel por gerar e tratar a imagem RP (baseado estudo #17).
    """
    # Gerando imagem RP/redimensiona_prndo
    imagem = RecurrencePlot(**params_rp).fit_transform([serie])[0]
    imagem = cv2.resize(
            imagem, 
            dsize=tam_imagem[:2], 
            interpolation=cv2.INTER_CUBIC
        ).astype(TIPO_DADOS)
    
    if np.sum(imagem) > 0:
        # Normalizar
        if normalizar:
                imagem = (imagem - imagem.min()) / (imagem.max() - imagem.min()) # MinMax (0,1)
            #imagem = (imagem - imagem.mean()) / np.max([imagem.std(), 1e-4])

    #     # centralizar
    #     if centralizar:
    #         imagem -= imagem.mean()

        # Padronizar
        elif padronizar:
            imagem = (imagem - imagem.mean())/imagem.std()#tf.image.per_image_standardization(imagem).numpy()

    # N canais
    imagem = np.stack([imagem for i in range(tam_imagem[-1])],axis=-1).astype(TIPO_DADOS)     
    
    return imagem

def preparar_amostras(X, y, params_rp=PARAMETROS_RP, tam_imagem=TAMANHO_IMAGEM, normalizar=False, padronizar=False):
    X_imagem = np.empty((len(X), *TAMANHO_IMAGEM))
    for i, x in tqdm_notebook(enumerate(X), total=len(X)):
        X_imagem[i,] = serie_para_imagem(
            x, 
            params_rp=PARAMETROS_RP, 
            tam_imagem=TAMANHO_IMAGEM,
            normalizar=normalizar,
            padronizar=padronizar,
        )
    return X_imagem, y

In [14]:
rqa = []

janelas = janelas_treino

print("# EXTRAÇÃO RP (BASE TREINO)...\n")

for rotulo_aparelho in df_melhores_taxas_janelas.loc[
    df_melhores_taxas_janelas["carga"].isin(
        ['dish_washer - 9','fridge - 7','microwave - 16','washer_dryer - 13',
         'washer_dryer - 14']),
    : ]["carga"].values:
    
    print(f"* Aparelho {rotulo_aparelho.upper()}:")
    print()
    
    # Informacoes da carga selecionada
    CARGA = rotulo_aparelho.split(" - ")[0]
    #INSTANCIA = int(rotulo_aparelho.split(" - ")[1])

    config_aparelho = df_melhores_taxas_janelas[
        df_melhores_taxas_janelas["carga"]==rotulo_aparelho
    ].to_dict("records")[0]
    TAXA = config_aparelho["taxa_amostragem"]
    TAMANHO_JANELA = config_aparelho["janela"]
    CONFIG_RP_APARELHO = PARAMETROS_RP
    
    # Percorrer instancias do aparelho na residencia
    for INSTANCIA in instancia_aparelho_residencia(CARGA, RESIDENCIA, base = redd):
        
        # Extrair series divididas em janelas para cada medidor
        print("   - Carregando dados (taxa={:.0f}, janela={:.0f})...".format(
            TAXA, TAMANHO_JANELA
        ))
        X, y = carregar_dados_aparelho(
            janelas=janelas,
            instancia=INSTANCIA,
            aparelho=CARGA,
            tamanho_janela=TAMANHO_JANELA,
            taxa=TAXA,
            eliminar_janelas_vazias=True
        )
        print()
        
        print("   - Detalhes da amostragem (lotes):")
        print("   ---")
        for item in Counter(y).items():
            print(f"      - Classe `{item[0]}`: {item[1]} amostras ({round(item[1]/len(y)*100,1)}%)" )
        print()
        
        print("* Convertendo séries para RPs...")
        #rqa.extend( preparar_amostras(X, y, rotulo_aparelho) )
        X, y = preparar_amostras(
            X, y, 
            params_rp=PARAMETROS_RP,
            tam_imagem=TAMANHO_IMAGEM,
            normalizar=False # config. estudo 17 = False
        )
        #print(X.shape)
        
        print("* Persistindo dados...")
        np.save(os.path.join(
            caminho_dados_notebook, "treino",
            rotulo_aparelho.lower().replace(" ", "_"),
        )+"_X.npy", X) 
        np.save(os.path.join(
            caminho_dados_notebook, "treino",
            rotulo_aparelho.lower().replace(" ", "_"),
        )+"_y.npy", y) 
        print()
        #rotulo_aparelho


# EXTRAÇÃO RP (BASE TREINO)...

* Aparelho DISH_WASHER - 9:

Meter 13 is in a nested meter group. Retrieving just the ElecMeter.
Meter 14 is in a nested meter group. Retrieving just the ElecMeter.
   - Carregando dados (taxa=2, janela=720)...
Meter 13 is in a nested meter group. Retrieving just the ElecMeter.
Meter 14 is in a nested meter group. Retrieving just the ElecMeter.


   - Detalhes da amostragem (lotes):
   ---
      - Classe `0`: 395 amostras (96.8%)
      - Classe `1`: 13 amostras (3.2%)

* Convertendo séries para RPs...


  0%|          | 0/408 [00:00<?, ?it/s]

* Persistindo dados...

* Aparelho FRIDGE - 7:

Meter 13 is in a nested meter group. Retrieving just the ElecMeter.
Meter 14 is in a nested meter group. Retrieving just the ElecMeter.
   - Carregando dados (taxa=2, janela=1080)...
Meter 13 is in a nested meter group. Retrieving just the ElecMeter.
Meter 14 is in a nested meter group. Retrieving just the ElecMeter.


   - Detalhes da amostragem (lotes):
   ---
      - Classe `1`: 269 amostras (98.2%)
      - Classe `0`: 5 amostras (1.8%)

* Convertendo séries para RPs...


  0%|          | 0/274 [00:00<?, ?it/s]

* Persistindo dados...

* Aparelho MICROWAVE - 16:

Meter 13 is in a nested meter group. Retrieving just the ElecMeter.
Meter 14 is in a nested meter group. Retrieving just the ElecMeter.
   - Carregando dados (taxa=2, janela=900)...
Meter 13 is in a nested meter group. Retrieving just the ElecMeter.
Meter 14 is in a nested meter group. Retrieving just the ElecMeter.


   - Detalhes da amostragem (lotes):
   ---
      - Classe `0`: 306 amostras (93.9%)
      - Classe `1`: 20 amostras (6.1%)

* Convertendo séries para RPs...


  0%|          | 0/326 [00:00<?, ?it/s]

* Persistindo dados...

* Aparelho WASHER_DRYER - 13:

Meter 13 is in a nested meter group. Retrieving just the ElecMeter.
Meter 14 is in a nested meter group. Retrieving just the ElecMeter.
   - Carregando dados (taxa=2, janela=60)...
Meter 13 is in a nested meter group. Retrieving just the ElecMeter.
Meter 14 is in a nested meter group. Retrieving just the ElecMeter.


   - Detalhes da amostragem (lotes):
   ---
      - Classe `0`: 4643 amostras (96.0%)
      - Classe `1`: 192 amostras (4.0%)

* Convertendo séries para RPs...


  0%|          | 0/4835 [00:00<?, ?it/s]

* Persistindo dados...

   - Carregando dados (taxa=2, janela=60)...
Meter 13 is in a nested meter group. Retrieving just the ElecMeter.
Meter 14 is in a nested meter group. Retrieving just the ElecMeter.


   - Detalhes da amostragem (lotes):
   ---
      - Classe `0`: 4582 amostras (94.8%)
      - Classe `1`: 253 amostras (5.2%)

* Convertendo séries para RPs...


  0%|          | 0/4835 [00:00<?, ?it/s]

* Persistindo dados...

* Aparelho WASHER_DRYER - 14:

Meter 13 is in a nested meter group. Retrieving just the ElecMeter.
Meter 14 is in a nested meter group. Retrieving just the ElecMeter.
   - Carregando dados (taxa=3, janela=360)...
Meter 13 is in a nested meter group. Retrieving just the ElecMeter.
Meter 14 is in a nested meter group. Retrieving just the ElecMeter.


   - Detalhes da amostragem (lotes):
   ---
      - Classe `0`: 514 amostras (94.8%)
      - Classe `1`: 28 amostras (5.2%)

* Convertendo séries para RPs...


  0%|          | 0/542 [00:00<?, ?it/s]

* Persistindo dados...

   - Carregando dados (taxa=3, janela=360)...
Meter 13 is in a nested meter group. Retrieving just the ElecMeter.
Meter 14 is in a nested meter group. Retrieving just the ElecMeter.


   - Detalhes da amostragem (lotes):
   ---
      - Classe `0`: 508 amostras (93.7%)
      - Classe `1`: 34 amostras (6.3%)

* Convertendo séries para RPs...


  0%|          | 0/542 [00:00<?, ?it/s]

* Persistindo dados...



In [15]:
rqa = []

janelas = janelas_teste

print("# EXTRAÇÃO RP (BASE TESTE)...\n")

for rotulo_aparelho in df_melhores_taxas_janelas.loc[
    df_melhores_taxas_janelas["carga"].isin(
        ['dish_washer - 9','fridge - 7','microwave - 16','washer_dryer - 13',
         'washer_dryer - 14']),
    : ]["carga"].values:
    
    print(f"* Aparelho {rotulo_aparelho.upper()}:")
    print()
    
    # Informacoes da carga selecionada
    CARGA = rotulo_aparelho.split(" - ")[0]
    #INSTANCIA = int(rotulo_aparelho.split(" - ")[1])

    config_aparelho = df_melhores_taxas_janelas[
        df_melhores_taxas_janelas["carga"]==rotulo_aparelho
    ].to_dict("records")[0]
    TAXA = config_aparelho["taxa_amostragem"]
    TAMANHO_JANELA = config_aparelho["janela"]
    CONFIG_RP_APARELHO = PARAMETROS_RP
    
    # Percorrer instancias do aparelho na residencia
    for INSTANCIA in instancia_aparelho_residencia(CARGA, RESIDENCIA, base = redd):
        
        # Extrair series divididas em janelas para cada medidor
        print("   - Carregando dados (taxa={:.0f}, janela={:.0f})...".format(
            TAXA, TAMANHO_JANELA
        ))
        X, y = carregar_dados_aparelho(
            janelas=janelas,
            instancia=INSTANCIA,
            aparelho=CARGA,
            tamanho_janela=TAMANHO_JANELA,
            taxa=TAXA,
            eliminar_janelas_vazias=True
        )
        print()
        
        print("   - Detalhes da amostragem (lotes):")
        print("   ---")
        for item in Counter(y).items():
            print(f"      - Classe `{item[0]}`: {item[1]} amostras ({round(item[1]/len(y)*100,1)}%)" )
        print()
        
        print("* Convertendo séries para RPs...")
        #rqa.extend( preparar_amostras(X, y, rotulo_aparelho) )
        X, y = preparar_amostras(
            X, y, 
            params_rp=PARAMETROS_RP,
            tam_imagem=TAMANHO_IMAGEM,
            normalizar=False # config. estudo 17 = False
        )
        #print(X.shape)
        
        print("* Persistindo dados...")
        np.save(os.path.join(
            caminho_dados_notebook, "teste",
            rotulo_aparelho.lower().replace(" ", "_"),
        )+"_X.npy", X) 
        np.save(os.path.join(
            caminho_dados_notebook, "teste",
            rotulo_aparelho.lower().replace(" ", "_"),
        )+"_y.npy", y) 
        print()
        #rotulo_aparelho


# EXTRAÇÃO RP (BASE TESTE)...

* Aparelho DISH_WASHER - 9:

Meter 13 is in a nested meter group. Retrieving just the ElecMeter.
Meter 14 is in a nested meter group. Retrieving just the ElecMeter.
   - Carregando dados (taxa=2, janela=720)...
Meter 13 is in a nested meter group. Retrieving just the ElecMeter.
Meter 14 is in a nested meter group. Retrieving just the ElecMeter.


   - Detalhes da amostragem (lotes):
   ---
      - Classe `0`: 395 amostras (96.8%)
      - Classe `1`: 13 amostras (3.2%)

* Convertendo séries para RPs...


  0%|          | 0/408 [00:00<?, ?it/s]

* Persistindo dados...

* Aparelho FRIDGE - 7:

Meter 13 is in a nested meter group. Retrieving just the ElecMeter.
Meter 14 is in a nested meter group. Retrieving just the ElecMeter.
   - Carregando dados (taxa=2, janela=1080)...
Meter 13 is in a nested meter group. Retrieving just the ElecMeter.
Meter 14 is in a nested meter group. Retrieving just the ElecMeter.


   - Detalhes da amostragem (lotes):
   ---
      - Classe `1`: 269 amostras (98.2%)
      - Classe `0`: 5 amostras (1.8%)

* Convertendo séries para RPs...


  0%|          | 0/274 [00:00<?, ?it/s]

* Persistindo dados...

* Aparelho MICROWAVE - 16:

Meter 13 is in a nested meter group. Retrieving just the ElecMeter.
Meter 14 is in a nested meter group. Retrieving just the ElecMeter.
   - Carregando dados (taxa=2, janela=900)...
Meter 13 is in a nested meter group. Retrieving just the ElecMeter.
Meter 14 is in a nested meter group. Retrieving just the ElecMeter.


   - Detalhes da amostragem (lotes):
   ---
      - Classe `0`: 306 amostras (93.9%)
      - Classe `1`: 20 amostras (6.1%)

* Convertendo séries para RPs...


  0%|          | 0/326 [00:00<?, ?it/s]

* Persistindo dados...

* Aparelho WASHER_DRYER - 13:

Meter 13 is in a nested meter group. Retrieving just the ElecMeter.
Meter 14 is in a nested meter group. Retrieving just the ElecMeter.
   - Carregando dados (taxa=2, janela=60)...
Meter 13 is in a nested meter group. Retrieving just the ElecMeter.
Meter 14 is in a nested meter group. Retrieving just the ElecMeter.


   - Detalhes da amostragem (lotes):
   ---
      - Classe `0`: 4643 amostras (96.0%)
      - Classe `1`: 192 amostras (4.0%)

* Convertendo séries para RPs...


  0%|          | 0/4835 [00:00<?, ?it/s]

* Persistindo dados...

   - Carregando dados (taxa=2, janela=60)...
Meter 13 is in a nested meter group. Retrieving just the ElecMeter.
Meter 14 is in a nested meter group. Retrieving just the ElecMeter.


   - Detalhes da amostragem (lotes):
   ---
      - Classe `0`: 4582 amostras (94.8%)
      - Classe `1`: 253 amostras (5.2%)

* Convertendo séries para RPs...


  0%|          | 0/4835 [00:00<?, ?it/s]

* Persistindo dados...

* Aparelho WASHER_DRYER - 14:

Meter 13 is in a nested meter group. Retrieving just the ElecMeter.
Meter 14 is in a nested meter group. Retrieving just the ElecMeter.
   - Carregando dados (taxa=3, janela=360)...
Meter 13 is in a nested meter group. Retrieving just the ElecMeter.
Meter 14 is in a nested meter group. Retrieving just the ElecMeter.


   - Detalhes da amostragem (lotes):
   ---
      - Classe `0`: 514 amostras (94.8%)
      - Classe `1`: 28 amostras (5.2%)

* Convertendo séries para RPs...


  0%|          | 0/542 [00:00<?, ?it/s]

* Persistindo dados...

   - Carregando dados (taxa=3, janela=360)...
Meter 13 is in a nested meter group. Retrieving just the ElecMeter.
Meter 14 is in a nested meter group. Retrieving just the ElecMeter.


   - Detalhes da amostragem (lotes):
   ---
      - Classe `0`: 508 amostras (93.7%)
      - Classe `1`: 34 amostras (6.3%)

* Convertendo séries para RPs...


  0%|          | 0/542 [00:00<?, ?it/s]

* Persistindo dados...



# Extração de Atributos (Deep Learning)

In [12]:
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier
from xgboost.sklearn import XGBClassifier

from sklearn.model_selection import StratifiedKFold
skf = StratifiedKFold(n_splits=10, shuffle=True, random_state=SEED)

resultados = []

In [13]:
from tensorflow.keras import applications as transfer_learning
from tensorflow.keras.models import Model

def extrair_atributos_dl(X, modelo, preprocessamento):
    X = preprocessamento(X)
    atributos = modelo.predict(X)
    return atributos

# Exemplo:
modelo_extrator = transfer_learning.vgg16.VGG16(
            weights='imagenet', 
            include_top=False,
            pooling='avg'
        )
preprocess_extrator = transfer_learning.vgg16.preprocess_input
# extrair_atributos_dl(
#     X, 
#     modelo=modelo_extrator,
#     preprocessamento=preprocess_extrator
# ).shape

## SVM

In [14]:
resultados_modelo = {
    "appliance": [], "fold": [],
    "acc": [], "f1": [], "auc": [], "base": []
}

for a in df_melhores_taxas_janelas.loc[
    df_melhores_taxas_janelas["carga"].isin(
        ['dish_washer - 9','fridge - 7','microwave - 16','washer_dryer - 13',
         'washer_dryer - 14']),
    : ]["carga"].values:
    
    
    print(f"* Appliance `{a}`...\n")
    
    # Carregando dados (treino)
    X = np.load(
        os.path.join(
            caminho_dados_notebook, "treino", 
            a.lower().replace(" ", "_"),
        )+"_X.npy"
    ).astype(TIPO_DADOS)
    y = np.load(
        os.path.join(
            caminho_dados_notebook, "treino",
            a.lower().replace(" ", "_"),
        )+"_y.npy"
    ).astype(TIPO_DADOS)
    
    # Extrair atributos usando Deep/Tranfer Learning
    X = extrair_atributos_dl(
        X, 
        modelo=modelo_extrator,
        preprocessamento=preprocess_extrator
    )
    
    y_true, y_pred  = [], []
    
    
    print("   - Evaluation model (CV - 10 folds)...\n")
    for it, (idx_treino, idx_teste) in enumerate(skf.split(X, y)):

        # Preparando lotes
        X_treino, X_teste = X[idx_treino], X[idx_teste]
        y_treino, y_teste = y[idx_treino], y[idx_teste]
        
        # Treinando modelo
        modelo = SVC(kernel='rbf', random_state=SEED)
        modelo.fit(X_treino, y_treino)
        
        # Prevendo conjunto de teste
        y_hat = modelo.predict(X_teste)

        # Incrementando resultados
        resultados_modelo["appliance"].append(a)
        resultados_modelo["fold"].append(it+1)
        resultados_modelo["acc"].append( accuracy_score(y_teste, y_hat) )
        resultados_modelo["f1"].append( f1_score(y_teste, y_hat, average="macro") )
        resultados_modelo["auc"].append(roc_auc_score(y_teste, y_hat) if np.unique(y_teste).shape[0]>1 else 0.5)
        resultados_modelo["base"].append("treino")
        
        # Extendendo rotulos (analise global)
        y_true.extend(y_teste)
        y_pred.extend(y_hat)
     
    
    # Carregando dados (teste)
    X_teste = np.load(
        os.path.join(
            caminho_dados_notebook, "teste", 
            a.lower().replace(" ", "_"),
        )+"_X.npy"
    ).astype(TIPO_DADOS)
    y_teste = np.load(
        os.path.join(
            caminho_dados_notebook, "teste",
            a.lower().replace(" ", "_"),
        )+"_y.npy"
    ).astype(TIPO_DADOS)
    
    # Extrair atributos usando Deep/Tranfer Learning
    X_teste = extrair_atributos_dl(
        X_teste, 
        modelo=modelo_extrator,
        preprocessamento=preprocess_extrator
    )
    
    # Treinando modelo
    modelo = SVC(kernel='rbf', random_state=SEED)
    modelo.fit(X, y)

    # Prevendo conjunto de teste
    y_hat = modelo.predict(X_teste)

    # Incrementando resultados
    resultados_modelo["appliance"].append(a)
    resultados_modelo["fold"].append(0)
    resultados_modelo["acc"].append( accuracy_score(y_teste, y_hat) )
    resultados_modelo["f1"].append( f1_score(y_teste, y_hat, average="macro") )
    resultados_modelo["auc"].append(roc_auc_score(y_teste, y_hat) if np.unique(y_teste).shape[0]>1 else 0.5)
    resultados_modelo["base"].append("teste")
    
    print()
    print("   - Final Results:")
    print("   ---")
    print()

    print("      -> Classification Report:")
    print()
    print(classification_report(y_true, y_pred))
    print("      -> Confusion Matrix:")
    print()
    print(confusion_matrix(y_true, y_pred))
    
    print()
    
# Consolidating DataFrame
df_resultados_svm = pd.DataFrame(resultados_modelo)
df_resultados_svm.to_excel(os.path.join(caminho_dados_notebook, "df_resultados_svm.xlsx"))
    
print("############################## FINAL MODEL RESULTS ##############################")
display(df_resultados_svm.groupby(["appliance","base"]).agg({
    "acc": ["mean", "std", "max", "min"],
    "f1": ["mean", "std", "max", "min"],
    "auc": ["mean", "std", "max", "min"]
}))

* Appliance `dish_washer - 9`...

   - Evaluation model (CV - 10 folds)...


   - Final Results:
   ---

      -> Classification Report:

              precision    recall  f1-score   support

         0.0       0.97      1.00      0.98       395
         1.0       0.00      0.00      0.00        13

    accuracy                           0.97       408
   macro avg       0.48      0.50      0.49       408
weighted avg       0.94      0.97      0.95       408

      -> Confusion Matrix:

[[395   0]
 [ 13   0]]

* Appliance `fridge - 7`...

   - Evaluation model (CV - 10 folds)...


   - Final Results:
   ---

      -> Classification Report:

              precision    recall  f1-score   support

         0.0       0.00      0.00      0.00         5
         1.0       0.98      1.00      0.99       269

    accuracy                           0.98       274
   macro avg       0.49      0.50      0.50       274
weighted avg       0.96      0.98      0.97       274

      -> Confusion Matr

Unnamed: 0_level_0,Unnamed: 1_level_0,acc,acc,acc,acc,f1,f1,f1,f1,auc,auc,auc,auc
Unnamed: 0_level_1,Unnamed: 1_level_1,mean,std,max,min,mean,std,max,min,mean,std,max,min
appliance,base,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2
dish_washer - 9,teste,0.968137,,0.968137,0.968137,0.491905,,0.491905,0.491905,0.5,,0.5,0.5
dish_washer - 9,treino,0.968171,0.0117,0.97561,0.95122,0.491898,0.003035,0.493827,0.4875,0.5,0.0,0.5,0.5
fridge - 7,teste,0.981752,,0.981752,0.981752,0.495396,,0.495396,0.495396,0.5,,0.5,0.5
fridge - 7,treino,0.982011,0.018967,1.0,0.962963,0.74542,0.268351,1.0,0.490566,0.5,0.0,0.5,0.5
microwave - 16,teste,0.993865,,0.993865,0.993865,0.972056,,0.972056,0.972056,0.95,,0.95,0.95
microwave - 16,treino,0.963258,0.019375,1.0,0.9375,0.747087,0.189375,1.0,0.483871,0.723333,0.181761,1.0,0.5
washer_dryer - 13,teste,0.985729,,0.985729,0.985729,0.917316,,0.917316,0.917316,0.863636,,0.863636,0.863636
washer_dryer - 13,treino,0.984902,0.005164,0.991718,0.975155,0.911358,0.036825,0.956155,0.83564,0.86341,0.052259,0.938908,0.76
washer_dryer - 14,teste,0.987085,,0.987085,0.987085,0.939202,,0.939202,0.939202,0.897059,,0.897059,0.897059
washer_dryer - 14,treino,0.985219,0.014599,1.0,0.962963,0.913418,0.10073,1.0,0.740385,0.88652,0.135286,1.0,0.666667


## XGBOOST

In [16]:
resultados_modelo = {
    "appliance": [], "fold": [],
    "acc": [], "f1": [], "auc": [], "base": []
}

for a in df_melhores_taxas_janelas.loc[
    df_melhores_taxas_janelas["carga"].isin(
        ['dish_washer - 9','fridge - 7','microwave - 16','washer_dryer - 13',
         'washer_dryer - 14']),
    : ]["carga"].values:
    
    
    print(f"* Appliance `{a}`...\n")
    
    # Carregando dados (treino)
    X = np.load(
        os.path.join(
            caminho_dados_notebook, "treino", 
            a.lower().replace(" ", "_"),
        )+"_X.npy"
    ).astype(TIPO_DADOS)
    y = np.load(
        os.path.join(
            caminho_dados_notebook, "treino",
            a.lower().replace(" ", "_"),
        )+"_y.npy"
    ).astype(TIPO_DADOS)
    
    # Extrair atributos usando Deep/Tranfer Learning
    X = extrair_atributos_dl(
        X, 
        modelo=modelo_extrator,
        preprocessamento=preprocess_extrator
    )
    
    y_true, y_pred  = [], []
    
    
    print("   - Evaluation model (CV - 10 folds)...\n")
    for it, (idx_treino, idx_teste) in enumerate(skf.split(X, y)):

        # Preparando lotes
        X_treino, X_teste = X[idx_treino], X[idx_teste]
        y_treino, y_teste = y[idx_treino], y[idx_teste]
        
        # Treinando modelo
        modelo = XGBClassifier(random_state=SEED, n_jobs=4)
        modelo.fit(X_treino, y_treino)
        
        # Prevendo conjunto de teste
        y_hat = modelo.predict(X_teste)

        # Incrementando resultados
        resultados_modelo["appliance"].append(a)
        resultados_modelo["fold"].append(it+1)
        resultados_modelo["acc"].append( accuracy_score(y_teste, y_hat) )
        resultados_modelo["f1"].append( f1_score(y_teste, y_hat, average="macro") )
        resultados_modelo["auc"].append(roc_auc_score(y_teste, y_hat) if np.unique(y_teste).shape[0]>1 else 0.5)
        resultados_modelo["base"].append("treino")
        
        # Extendendo rotulos (analise global)
        y_true.extend(y_teste)
        y_pred.extend(y_hat)
     
    
    # Carregando dados (teste)
    X_teste = np.load(
        os.path.join(
            caminho_dados_notebook, "teste", 
            a.lower().replace(" ", "_"),
        )+"_X.npy"
    ).astype(TIPO_DADOS)
    y_teste = np.load(
        os.path.join(
            caminho_dados_notebook, "teste",
            a.lower().replace(" ", "_"),
        )+"_y.npy"
    ).astype(TIPO_DADOS)
    
    # Extrair atributos usando Deep/Tranfer Learning
    X_teste = extrair_atributos_dl(
        X_teste, 
        modelo=modelo_extrator,
        preprocessamento=preprocess_extrator
    )
    
    # Treinando modelo
    modelo = XGBClassifier(random_state=SEED, n_jobs=4)
    modelo.fit(X, y)

    # Prevendo conjunto de teste
    y_hat = modelo.predict(X_teste)

    # Incrementando resultados
    resultados_modelo["appliance"].append(a)
    resultados_modelo["fold"].append(0)
    resultados_modelo["acc"].append( accuracy_score(y_teste, y_hat) )
    resultados_modelo["f1"].append( f1_score(y_teste, y_hat, average="macro") )
    resultados_modelo["auc"].append(roc_auc_score(y_teste, y_hat) if np.unique(y_teste).shape[0]>1 else 0.5)
    resultados_modelo["base"].append("teste")
    
    print()
    print("   - Final Results:")
    print("   ---")
    print()

    print("      -> Classification Report:")
    print()
    print(classification_report(y_true, y_pred))
    print("      -> Confusion Matrix:")
    print()
    print(confusion_matrix(y_true, y_pred))
    
    print()
    
# Consolidating DataFrame
df_resultados_xgboost = pd.DataFrame(resultados_modelo)
df_resultados_xgboost.to_excel(os.path.join(caminho_dados_notebook, "df_resultados_xgboost.xlsx"))
    
print("############################## FINAL MODEL RESULTS ##############################")
display(df_resultados_xgboost.groupby(["appliance","base"]).agg({
    "acc": ["mean", "std", "max", "min"],
    "f1": ["mean", "std", "max", "min"],
    "auc": ["mean", "std", "max", "min"]
}))

* Appliance `dish_washer - 9`...

   - Evaluation model (CV - 10 folds)...


   - Final Results:
   ---

      -> Classification Report:

              precision    recall  f1-score   support

         0.0       0.97      1.00      0.98       395
         1.0       0.00      0.00      0.00        13

    accuracy                           0.97       408
   macro avg       0.48      0.50      0.49       408
weighted avg       0.94      0.97      0.95       408

      -> Confusion Matrix:

[[395   0]
 [ 13   0]]

* Appliance `fridge - 7`...

   - Evaluation model (CV - 10 folds)...


   - Final Results:
   ---

      -> Classification Report:

              precision    recall  f1-score   support

         0.0       0.00      0.00      0.00         5
         1.0       0.98      0.99      0.99       269

    accuracy                           0.97       274
   macro avg       0.49      0.50      0.49       274
weighted avg       0.96      0.97      0.97       274

      -> Confusion Matr


   - Final Results:
   ---

      -> Classification Report:

              precision    recall  f1-score   support

         0.0       0.97      0.98      0.98       306
         1.0       0.63      0.60      0.62        20

    accuracy                           0.95       326
   macro avg       0.80      0.79      0.80       326
weighted avg       0.95      0.95      0.95       326

      -> Confusion Matrix:

[[299   7]
 [  8  12]]

* Appliance `washer_dryer - 13`...

   - Evaluation model (CV - 10 folds)...


   - Final Results:
   ---

      -> Classification Report:

              precision    recall  f1-score   support

         0.0       0.98      1.00      0.99      4582
         1.0       0.99      0.72      0.84       253

    accuracy                           0.99      4835
   macro avg       0.99      0.86      0.91      4835
weighted avg       0.99      0.99      0.98      4835

      -> Confusion Matrix:

[[4580    2]
 [  70  183]]

* Appliance `washer_dryer - 14`...




   - Final Results:
   ---

      -> Classification Report:

              precision    recall  f1-score   support

         0.0       0.98      1.00      0.99       508
         1.0       1.00      0.68      0.81        34

    accuracy                           0.98       542
   macro avg       0.99      0.84      0.90       542
weighted avg       0.98      0.98      0.98       542

      -> Confusion Matrix:

[[508   0]
 [ 11  23]]

############################## FINAL MODEL RESULTS ##############################


Unnamed: 0_level_0,Unnamed: 1_level_0,acc,acc,acc,acc,f1,f1,f1,f1,auc,auc,auc,auc
Unnamed: 0_level_1,Unnamed: 1_level_1,mean,std,max,min,mean,std,max,min,mean,std,max,min
appliance,base,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2
dish_washer - 9,teste,1.0,,1.0,1.0,1.0,,1.0,1.0,1.0,,1.0,1.0
dish_washer - 9,treino,0.968171,0.0117,0.97561,0.95122,0.491898,0.003035,0.493827,0.4875,0.5,0.0,0.5,0.5
fridge - 7,teste,1.0,,1.0,1.0,1.0,,1.0,1.0,1.0,,1.0,1.0
fridge - 7,treino,0.974603,0.034957,1.0,0.888889,0.743422,0.270524,1.0,0.470588,0.496154,0.012163,0.5,0.461538
microwave - 16,teste,1.0,,1.0,1.0,1.0,,1.0,1.0,1.0,,1.0,1.0
microwave - 16,treino,0.953977,0.021591,0.969697,0.909091,0.761079,0.158922,0.891803,0.483871,0.788495,0.189077,0.983871,0.5
washer_dryer - 13,teste,1.0,,1.0,1.0,1.0,,1.0,1.0,1.0,,1.0,1.0
washer_dryer - 13,treino,0.985109,0.005142,0.991718,0.975155,0.912183,0.036527,0.954348,0.83564,0.861628,0.049488,0.92,0.76
washer_dryer - 14,teste,1.0,,1.0,1.0,1.0,,1.0,1.0,1.0,,1.0,1.0
washer_dryer - 14,treino,0.97963,0.018415,1.0,0.962963,0.862554,0.130776,1.0,0.740385,0.820833,0.166725,1.0,0.666667


Unnamed: 0_level_0,Unnamed: 1_level_0,acc,acc,acc,acc,f1,f1,f1,f1,auc,auc,auc,auc
Unnamed: 0_level_1,Unnamed: 1_level_1,mean,std,max,min,mean,std,max,min,mean,std,max,min
appliance,base,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2
dish_washer - 9,teste,1.0,,1.0,1.0,1.0,,1.0,1.0,1.0,,1.0,1.0
dish_washer - 9,treino,0.968171,0.0117,0.97561,0.95122,0.491898,0.003035,0.493827,0.4875,0.5,0.0,0.5,0.5
fridge - 7,teste,1.0,,1.0,1.0,1.0,,1.0,1.0,1.0,,1.0,1.0
fridge - 7,treino,0.974603,0.034957,1.0,0.888889,0.743422,0.270524,1.0,0.470588,0.496154,0.012163,0.5,0.461538
microwave - 16,teste,1.0,,1.0,1.0,1.0,,1.0,1.0,1.0,,1.0,1.0
microwave - 16,treino,0.953977,0.021591,0.969697,0.909091,0.761079,0.158922,0.891803,0.483871,0.788495,0.189077,0.983871,0.5
washer_dryer - 13,teste,1.0,,1.0,1.0,1.0,,1.0,1.0,1.0,,1.0,1.0
washer_dryer - 13,treino,0.985109,0.005142,0.991718,0.975155,0.912183,0.036527,0.954348,0.83564,0.861628,0.049488,0.92,0.76
washer_dryer - 14,teste,1.0,,1.0,1.0,1.0,,1.0,1.0,1.0,,1.0,1.0
washer_dryer - 14,treino,0.97963,0.018415,1.0,0.962963,0.862554,0.130776,1.0,0.740385,0.820833,0.166725,1.0,0.666667


## MLP

In [17]:
# resultados_modelo = {
#     "appliance": [], "fold": [],
#     "acc": [], "f1": [], "auc": []
# }

# for a in df_melhores_taxas_janelas.loc[
#     df_melhores_taxas_janelas["carga"].isin(
#         ['dish_washer - 9','fridge - 7','microwave - 16','washer_dryer - 13',
#          'washer_dryer - 14']),
#     : ]["carga"].values:
    
    
#     print(f"* Appliance `{a}`...\n")
    
#     # Carregando dados
#     X = np.load(
#         os.path.join(
#             caminho_dados_notebook, 
#             a.lower().replace(" ", "_"),
#         )+"_X.npy"
#     ).astype(TIPO_DADOS)
#     y = np.load(
#         os.path.join(
#             caminho_dados_notebook, 
#             a.lower().replace(" ", "_"),
#         )+"_y.npy"
#     ).astype(TIPO_DADOS)
    
#     # Extrair atributos usando Deep/Tranfer Learning
#     X = extrair_atributos_dl(
#         X, 
#         modelo=modelo_extrator,
#         preprocessamento=preprocess_extrator
#     )
    
#     y_true, y_pred  = [], []
    
    
#     print("   - Evaluation model (CV - 10 folds)...\n")
#     for it, (idx_treino, idx_teste) in enumerate(skf.split(X, y)):

#         # Preparando lotes
#         X_treino, X_teste = X[idx_treino], X[idx_teste]
#         y_treino, y_teste = y[idx_treino], y[idx_teste]
        
#         # Treinando modelo
#         modelo = MLPClassifier(alpha=1e-3, hidden_layer_sizes=(10,), random_state=SEED)
#         modelo.fit(X_treino, y_treino)
        
#         # Prevendo conjunto de teste
#         y_hat = modelo.predict(X_teste)

#         # Incrementando resultados
#         resultados_modelo["appliance"].append(a)
#         resultados_modelo["fold"].append(it+1)
#         resultados_modelo["acc"].append( accuracy_score(y_teste, y_hat) )
#         resultados_modelo["f1"].append( f1_score(y_teste, y_hat, average="macro") )
#         resultados_modelo["auc"].append(roc_auc_score(y_teste, y_hat) if np.unique(y_teste).shape[0]>1 else 0.5)
        
#         # Extendendo rotulos (analise global)
#         y_true.extend(y_teste)
#         y_pred.extend(y_hat)
        
#     print()
#     print("   - Final Results:")
#     print("   ---")
#     print()

#     print("      -> Classification Report:")
#     print()
#     print(classification_report(y_true, y_pred))
#     print("      -> Confusion Matrix:")
#     print()
#     print(confusion_matrix(y_true, y_pred))
    
#     print()
    
# # Consolidating DataFrame
# df_resultados_mlp = pd.DataFrame(resultados_modelo)
# df_resultados_mlp.to_excel(os.path.join(caminho_dados_notebook, "df_resultados_mlp.xlsx"))

# print("############################## FINAL MODEL RESULTS ##############################")
# display(df_resultados_mlp.groupby("appliance").agg({
#     "acc": ["mean", "std", "max", "min"],
#     "f1": ["mean", "std", "max", "min"],
#     "auc": ["mean", "std", "max", "min"]
# }))

In [18]:
resultados_modelo = {
    "appliance": [], "fold": [],
    "acc": [], "f1": [], "auc": [], "base": []
}

for a in df_melhores_taxas_janelas.loc[
    df_melhores_taxas_janelas["carga"].isin(
        ['dish_washer - 9','fridge - 7','microwave - 16','washer_dryer - 13',
         'washer_dryer - 14']),
    : ]["carga"].values:
    
    
    print(f"* Appliance `{a}`...\n")
    
    # Carregando dados (treino)
    X = np.load(
        os.path.join(
            caminho_dados_notebook, "treino", 
            a.lower().replace(" ", "_"),
        )+"_X.npy"
    ).astype(TIPO_DADOS)
    y = np.load(
        os.path.join(
            caminho_dados_notebook, "treino",
            a.lower().replace(" ", "_"),
        )+"_y.npy"
    ).astype(TIPO_DADOS)
    
    # Extrair atributos usando Deep/Tranfer Learning
    X = extrair_atributos_dl(
        X, 
        modelo=modelo_extrator,
        preprocessamento=preprocess_extrator
    )
    
    y_true, y_pred  = [], []
    
    
    print("   - Evaluation model (CV - 10 folds)...\n")
    for it, (idx_treino, idx_teste) in enumerate(skf.split(X, y)):

        # Preparando lotes
        X_treino, X_teste = X[idx_treino], X[idx_teste]
        y_treino, y_teste = y[idx_treino], y[idx_teste]
        
        # Treinando modelo
        modelo =  MLPClassifier(alpha=1e-3, hidden_layer_sizes=(10,), random_state=SEED)
        modelo.fit(X_treino, y_treino)
        
        # Prevendo conjunto de teste
        y_hat = modelo.predict(X_teste)

        # Incrementando resultados
        resultados_modelo["appliance"].append(a)
        resultados_modelo["fold"].append(it+1)
        resultados_modelo["acc"].append( accuracy_score(y_teste, y_hat) )
        resultados_modelo["f1"].append( f1_score(y_teste, y_hat, average="macro") )
        resultados_modelo["auc"].append(roc_auc_score(y_teste, y_hat) if np.unique(y_teste).shape[0]>1 else 0.5)
        resultados_modelo["base"].append("treino")
        
        # Extendendo rotulos (analise global)
        y_true.extend(y_teste)
        y_pred.extend(y_hat)
     
    
    # Carregando dados (teste)
    X_teste = np.load(
        os.path.join(
            caminho_dados_notebook, "teste", 
            a.lower().replace(" ", "_"),
        )+"_X.npy"
    ).astype(TIPO_DADOS)
    y_teste = np.load(
        os.path.join(
            caminho_dados_notebook, "teste",
            a.lower().replace(" ", "_"),
        )+"_y.npy"
    ).astype(TIPO_DADOS)
    
    # Extrair atributos usando Deep/Tranfer Learning
    X_teste = extrair_atributos_dl(
        X_teste, 
        modelo=modelo_extrator,
        preprocessamento=preprocess_extrator
    )
    
    # Treinando modelo
    modelo =  MLPClassifier(alpha=1e-3, hidden_layer_sizes=(10,), random_state=SEED)
    modelo.fit(X, y)

    # Prevendo conjunto de teste
    y_hat = modelo.predict(X_teste)

    # Incrementando resultados
    resultados_modelo["appliance"].append(a)
    resultados_modelo["fold"].append(0)
    resultados_modelo["acc"].append( accuracy_score(y_teste, y_hat) )
    resultados_modelo["f1"].append( f1_score(y_teste, y_hat, average="macro") )
    resultados_modelo["auc"].append(roc_auc_score(y_teste, y_hat) if np.unique(y_teste).shape[0]>1 else 0.5)
    resultados_modelo["base"].append("teste")
    
    print()
    print("   - Final Results:")
    print("   ---")
    print()

    print("      -> Classification Report:")
    print()
    print(classification_report(y_true, y_pred))
    print("      -> Confusion Matrix:")
    print()
    print(confusion_matrix(y_true, y_pred))
    
    print()
    
# Consolidating DataFrame
df_resultados_mlp = pd.DataFrame(resultados_modelo)
df_resultados_mlp.to_excel(os.path.join(caminho_dados_notebook, "df_resultados_mlp.xlsx"))
    
print("############################## FINAL MODEL RESULTS ##############################")
display(df_resultados_mlp.groupby(["appliance","base"]).agg({
    "acc": ["mean", "std", "max", "min"],
    "f1": ["mean", "std", "max", "min"],
    "auc": ["mean", "std", "max", "min"]
}))

* Appliance `dish_washer - 9`...

   - Evaluation model (CV - 10 folds)...


   - Final Results:
   ---

      -> Classification Report:

              precision    recall  f1-score   support

         0.0       0.97      0.99      0.98       395
         1.0       0.20      0.08      0.11        13

    accuracy                           0.96       408
   macro avg       0.59      0.53      0.55       408
weighted avg       0.95      0.96      0.95       408

      -> Confusion Matrix:

[[391   4]
 [ 12   1]]

* Appliance `fridge - 7`...

   - Evaluation model (CV - 10 folds)...


   - Final Results:
   ---

      -> Classification Report:

              precision    recall  f1-score   support

         0.0       0.00      0.00      0.00         5
         1.0       0.98      0.99      0.99       269

    accuracy                           0.97       274
   macro avg       0.49      0.50      0.49       274
weighted avg       0.96      0.97      0.97       274

      -> Confusion Matr

Unnamed: 0_level_0,Unnamed: 1_level_0,acc,acc,acc,acc,f1,f1,f1,f1,auc,auc,auc,auc
Unnamed: 0_level_1,Unnamed: 1_level_1,mean,std,max,min,mean,std,max,min,mean,std,max,min
appliance,base,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2
dish_washer - 9,teste,0.997549,,0.997549,0.997549,0.979368,,0.979368,0.979368,0.961538,,0.961538,0.961538
dish_washer - 9,treino,0.960854,0.034818,1.0,0.878049,0.53987,0.161901,1.0,0.467532,0.544872,0.160385,1.0,0.461538
fridge - 7,teste,0.978102,,0.978102,0.978102,0.494465,,0.494465,0.494465,0.498141,,0.498141,0.498141
fridge - 7,treino,0.974603,0.034957,1.0,0.888889,0.743422,0.270524,1.0,0.470588,0.496154,0.012163,0.5,0.461538
microwave - 16,teste,1.0,,1.0,1.0,1.0,,1.0,1.0,1.0,,1.0,1.0
microwave - 16,treino,0.944413,0.035634,1.0,0.875,0.701923,0.178774,1.0,0.47541,0.690054,0.158364,1.0,0.483333
washer_dryer - 13,teste,0.989245,,0.989245,0.989245,0.93991,,0.93991,0.93991,0.897233,,0.897233,0.897233
washer_dryer - 13,treino,0.979525,0.006124,0.989648,0.968944,0.883928,0.040598,0.94173,0.808965,0.849459,0.058568,0.934553,0.756725
washer_dryer - 14,teste,0.998155,,0.998155,0.998155,0.992046,,0.992046,0.992046,0.985294,,0.985294,0.985294
washer_dryer - 14,treino,0.968586,0.024753,1.0,0.925926,0.834783,0.152204,1.0,0.480769,0.823284,0.159154,1.0,0.490196


## ELM - Extreme Learning Machine 

In [95]:
from elm import ELM
from sklearn.preprocessing import normalize

In [96]:
resultados_modelo = {
    "appliance": [], "fold": [],
    "acc": [], "f1": [], "auc": []
}

for a in df_melhores_taxas_janelas.loc[
    df_melhores_taxas_janelas["carga"].isin(
        ['dish_washer - 9','fridge - 7','microwave - 16','washer_dryer - 13',
         'washer_dryer - 14']),
    : ]["carga"].values:
    
    
    print(f"* Appliance `{a}`...\n")
    
    # Carregando dados
    X = np.load(
        os.path.join(
            caminho_dados_notebook, 
            a.lower().replace(" ", "_"),
        )+"_X.npy"
    ).astype(TIPO_DADOS)
    y = np.load(
        os.path.join(
            caminho_dados_notebook, 
            a.lower().replace(" ", "_"),
        )+"_y.npy"
    ).astype(TIPO_DADOS)
    
    # Extrair atributos usando Deep/Tranfer Learning
    X = extrair_atributos_dl(
        X, 
        modelo=modelo_extrator,
        preprocessamento=preprocess_extrator
    )
        
    y_true, y_pred  = [], []
    
    
    print("   - Evaluation model (CV - 10 folds)...\n")
    for it, (idx_treino, idx_teste) in enumerate(skf.split(X, y)):

        # Preparando lotes
        X_treino, X_teste = X[idx_treino], X[idx_teste]
        y_treino, y_teste = y[idx_treino], y[idx_teste]
        
        # Treinando modelo
        modelo = ELM(hid_num=10)
        modelo.fit(normalize(X_treino), y_treino)
        
        # Prevendo conjunto de teste
        y_hat = modelo.predict(normalize(X_teste))
        y_hat = (y_hat > 0.5).astype(int)

        # Incrementando resultados
        resultados_modelo["appliance"].append(a)
        resultados_modelo["fold"].append(it+1)
        resultados_modelo["acc"].append( accuracy_score(y_teste, y_hat) )
        resultados_modelo["f1"].append( f1_score(y_teste, y_hat, average="macro") )
        resultados_modelo["auc"].append(roc_auc_score(y_teste, y_hat) if np.unique(y_teste).shape[0]>1 else 0.5)
        
        # Extendendo rotulos (analise global)
        y_true.extend(y_teste)
        y_pred.extend(y_hat)
        
    print()
    print("   - Final Results:")
    print("   ---")
    print()

    print("      -> Classification Report:")
    print()
    print(classification_report(y_true, y_pred))
    print("      -> Confusion Matrix:")
    print()
    print(confusion_matrix(y_true, y_pred))
    
    print()
    
# Consolidating DataFrame
df_resultados_elm = pd.DataFrame(resultados_modelo)
df_resultados_elm.to_excel(os.path.join(caminho_dados_notebook, "df_resultados_elm.xlsx"))

print("############################## FINAL MODEL RESULTS ##############################")
display(df_resultados_elm.groupby("appliance").agg({
    "acc": ["mean", "std", "max", "min"],
    "f1": ["mean", "std", "max", "min"],
    "auc": ["mean", "std", "max", "min"]
}))

* Appliance `fridge - 7`...

   - Evaluation model (CV - 10 folds)...


   - Final Results:
   ---

      -> Classification Report:

              precision    recall  f1-score   support

         0.0       0.00      0.00      0.00        11
         1.0       0.85      1.00      0.92        60

    accuracy                           0.85        71
   macro avg       0.42      0.50      0.46        71
weighted avg       0.71      0.85      0.77        71

      -> Confusion Matrix:

[[ 0 11]
 [ 0 60]]

* Appliance `dish_washer - 9`...

   - Evaluation model (CV - 10 folds)...


   - Final Results:
   ---

      -> Classification Report:

              precision    recall  f1-score   support

         0.0       1.00      0.30      0.46       336
         1.0       0.03      1.00      0.06         7

    accuracy                           0.31       343
   macro avg       0.51      0.65      0.26       343
weighted avg       0.98      0.31      0.45       343

      -> Confusion Matrix:


Unnamed: 0_level_0,acc,acc,acc,acc,f1,f1,f1,f1,auc,auc,auc,auc
Unnamed: 0_level_1,mean,std,max,min,mean,std,max,min,mean,std,max,min
appliance,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2
dish_washer - 9,0.311513,0.147756,0.5,0.147059,0.250487,0.102857,0.379162,0.128205,0.616622,0.101367,0.742424,0.5
fridge - 7,0.846429,0.033882,0.857143,0.75,0.458242,0.010425,0.461538,0.428571,0.5,0.0,0.5,0.5
microwave - 16,0.348117,0.108929,0.511628,0.214286,0.300441,0.074358,0.407869,0.20299,0.657165,0.057642,0.743902,0.5875
washer_dryer - 13,0.337945,0.06631,0.434783,0.217391,0.287393,0.044969,0.356989,0.203846,0.606061,0.144927,0.704545,0.204545
washer_dryer - 14,0.391107,0.090202,0.545455,0.26087,0.311818,0.056856,0.427083,0.233333,0.538528,0.22779,0.761905,0.159091


# Análise dos Resultados (1)

In [22]:
df_resultados_svm = pd.read_excel(os.path.join(caminho_dados_notebook, "df_resultados_svm.xlsx"), engine="openpyxl")
df_resultados_svm["model"] = "SVM"

df_resultados_xgboost = pd.read_excel(os.path.join(caminho_dados_notebook, "df_resultados_xgboost.xlsx"), engine="openpyxl")
df_resultados_xgboost["model"] = "XGBOOST"

df_resultados_mlp = pd.read_excel(os.path.join(caminho_dados_notebook, "df_resultados_mlp.xlsx"), engine="openpyxl")
df_resultados_mlp["model"] = "MLP"

# df_resultados_elm = pd.read_excel(os.path.join(caminho_dados_notebook, "df_resultados_elm.xlsx"), engine="openpyxl)
# df_resultados_elm["model"] = "ELM"

df_analise = pd.concat([
    df_resultados_svm,
    df_resultados_xgboost,
    df_resultados_mlp, 
#     df_resultados_elm,  
])

print("* Análise por modelo:")
df_analise_modelo = df_analise.groupby(["model","base"]).agg({
    "acc": ["mean","std","max","min"],
    "f1": ["mean","std","max","min"],
    "auc": ["mean","std","max","min"]
}).reset_index().sort_values(('f1','mean'), ascending=False).set_index("model")
display(df_analise_modelo)
df_analise_modelo.to_excel(os.path.join(caminho_dados_notebook, "df_analise_modelo.xlsx"))

print()
print("* Análise por aparelho/modelo:")
df_analise_aparelho = df_analise.groupby(["appliance","model","base"]).agg({
    "acc": ["mean","std","max","min"],
    "f1": ["mean","std","max","min"],
    "auc": ["mean","std","max","min"]
})#.reset_index().sort_values(('f1','mean'), ascending=False).set_index(["aparelho","metodologia"])
display(df_analise_aparelho)
df_analise_aparelho.to_excel(os.path.join(caminho_dados_notebook, "df_analise_aparelho.xlsx"))

* Análise por modelo:


Unnamed: 0_level_0,base,acc,acc,acc,acc,f1,f1,f1,f1,auc,auc,auc,auc
Unnamed: 0_level_1,Unnamed: 1_level_1,mean,std,max,min,mean,std,max,min,mean,std,max,min
model,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2
XGBOOST,teste,1.0,0.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,0.0,1.0,1.0
MLP,teste,0.99261,0.009104,1.0,0.978102,0.881158,0.217398,1.0,0.494465,0.868441,0.210701,1.0,0.498141
SVM,teste,0.983314,0.009541,0.993865,0.968137,0.763175,0.246814,0.972056,0.491905,0.742139,0.223176,0.95,0.5
SVM,treino,0.976712,0.017036,1.0,0.9375,0.761836,0.214826,1.0,0.483871,0.694653,0.197203,1.0,0.5
XGBOOST,treino,0.972298,0.022801,1.0,0.888889,0.754227,0.207444,1.0,0.470588,0.693422,0.196615,1.0,0.461538
MLP,treino,0.965596,0.030885,1.0,0.875,0.740785,0.207886,1.0,0.467532,0.680764,0.187977,1.0,0.461538



* Análise por aparelho/modelo:


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,acc,acc,acc,acc,f1,f1,f1,f1,auc,auc,auc,auc
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,mean,std,max,min,mean,std,max,min,mean,std,max,min
appliance,model,base,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2
dish_washer - 9,MLP,teste,0.997549,,0.997549,0.997549,0.979368,,0.979368,0.979368,0.961538,,0.961538,0.961538
dish_washer - 9,MLP,treino,0.960854,0.034818,1.0,0.878049,0.53987,0.161901,1.0,0.467532,0.544872,0.160385,1.0,0.461538
dish_washer - 9,SVM,teste,0.968137,,0.968137,0.968137,0.491905,,0.491905,0.491905,0.5,,0.5,0.5
dish_washer - 9,SVM,treino,0.968171,0.0117,0.97561,0.95122,0.491898,0.003035,0.493827,0.4875,0.5,0.0,0.5,0.5
dish_washer - 9,XGBOOST,teste,1.0,,1.0,1.0,1.0,,1.0,1.0,1.0,,1.0,1.0
dish_washer - 9,XGBOOST,treino,0.968171,0.0117,0.97561,0.95122,0.491898,0.003035,0.493827,0.4875,0.5,0.0,0.5,0.5
fridge - 7,MLP,teste,0.978102,,0.978102,0.978102,0.494465,,0.494465,0.494465,0.498141,,0.498141,0.498141
fridge - 7,MLP,treino,0.974603,0.034957,1.0,0.888889,0.743422,0.270524,1.0,0.470588,0.496154,0.012163,0.5,0.461538
fridge - 7,SVM,teste,0.981752,,0.981752,0.981752,0.495396,,0.495396,0.495396,0.5,,0.5,0.5
fridge - 7,SVM,treino,0.982011,0.018967,1.0,0.962963,0.74542,0.268351,1.0,0.490566,0.5,0.0,0.5,0.5


# Conclusões

...

# Fim.

In [1]:
%load_ext watermark

In [2]:
%watermark -a "Diego Luiz Cavalca" -u -n -t -z -v -m -g

Diego Luiz Cavalca 
last updated: Sat Sep 05 2020 17:08:48 Hora oficial do Brasil 

CPython 3.7.8
IPython 7.17.0

compiler   : MSC v.1916 64 bit (AMD64)
system     : Windows
release    : 10
machine    : AMD64
processor  : Intel64 Family 6 Model 158 Stepping 9, GenuineIntel
CPU cores  : 8
interpreter: 64bit
Git hash   : 5725caa95e9d7f4b19a57eff5b998f1738bc40b4
