# Benchmarking Quali - Aprendizado Raso (RQA)

Estruturação de pipeline baseado em aprendizado raso utilizando atributos determinísticos calculados sobre os gráficos de recorrência (RQA).


# Configurações

In [1]:
import os
import sys
import gc
from pprint import pprint
from collections import Counter
import copy
import warnings
warnings.filterwarnings(action="ignore")

import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
from IPython.display import display

from tqdm import *

from pretty_confusion_matrix import *

# TODO: implementar rotina na classe PyNILM.utils
def sizeof_fmt(num, suffix='B'):
    ''' by Fred Cirera,  https://stackoverflow.com/a/1094933/1870254, modified'''
    for unit in ['','Ki','Mi','Gi','Ti','Pi','Ei','Zi']:
        if abs(num) < 1024.0:
            return "%3.1f %s%s" % (num, unit, suffix)
        num /= 1024.0
    return "%.1f %s%s" % (num, 'Yi', suffix)

def listar_variaveis_memoria(ambiente):
    print("* Variáveis instanciadas em memória:")
    print("---")
    total = 0
    for name, size in sorted(((name, sys.getsizeof(value)) for name, value in ambiente.items()),
                             key= lambda x: -x[1])[:10]:
        total += size
        print("{:>30}: {:>8}".format(name, sizeof_fmt(size)))
    print("---")
    print("Total:", sizeof_fmt(total))
    
# TODO: implementar na classe utils
def highlight_col(x):
    r = 'background-color: #D9D9D9'
    df1 = pd.DataFrame('', index=x.index, columns=x.columns)
    df1.iloc[:, -2] = r
    return df1   

In [2]:
# CONSTANTES FUNDAMENTAIS DE ORGANIZACAO DE PASTAS/ARQUIVOS
RESIDENCIA = 3

# Path do arquivo H5 (base REDD ja preparada p/ NILMTK) e outros insumos fundamentais
caminho_dados = "D:/Projetos/phd-thesis/datasets/"

# Definir diretorios onde iremos salvar os insumos gerados do notebook (dados, imagens, etc.)
caminho_dados_notebook = os.path.join(caminho_dados, "22") # Num. notebook
if not os.path.isdir(caminho_dados_notebook):
    os.makedirs(caminho_dados_notebook)
caminho_imagens_notebook = os.path.join(caminho_dados_notebook, "imagens") # Num. notebook
if not os.path.isdir(caminho_imagens_notebook):
    os.makedirs(caminho_imagens_notebook)

# Path do arquivo H5 (base REDD ja preparada p/ NILMTK)
caminho_redd = os.path.join(caminho_dados, "REDD/low_freq")

# Path completo do arquivo REDD
arquivo_dataset = os.path.join(caminho_redd, "redd.h5")

# VARIAVEL AUXILIAR
# Path dos arquivos relacionados as janelas
caminho_janelas = os.path.join(caminho_redd, "../../phd")
if not os.path.isdir(caminho_janelas):
    os.makedirs(caminho_janelas)

In [3]:
from matplotlib import rcParams
import matplotlib.pyplot as plt
from six import iteritems

from nilmtk import DataSet, TimeFrame, MeterGroup, HDFDataStore
from nilmtk.legacy.disaggregate import CombinatorialOptimisation, FHMM
import nilmtk.utils

%matplotlib inline

# Dados

## Base REDD

In [4]:
# Gerar arquivo H5 (Nilmtk) do dataset REDD, caso n exista
if not os.path.isfile(arquivo_dataset):
    from nilmtk.dataset_converters import convert_redd
    
    print("Gerando arquivo H5 (NILMTK) da base REDD, aguarde...")
    print("-----")
    convert_redd(caminho_redd, arquivo_dataset)

# Carregando dataset REDD no objeto NILMTK
# Exemplo de carregamento da base REDD no NILMTK
import h5py # * Evitar erro de incompatibilidade entre h5py e nilmtk
from nilmtk import DataSet
from nilmtk.utils import print_dict
redd = DataSet(arquivo_dataset)
print("NILMTK -> Detalhes sobre o dataset REDD:")
print_dict(redd.metadata)
print()

# Parametros dos dados (treino)
PARAMETROS_DATASET = {
    "base":redd,
    "id_residencia": RESIDENCIA,
    "inicio_intervalo":'2011-04-16 05:11:30',
    "fim_intervalo":'2011-04-23 08:43:26',
    "debug": False    
}
# print("PARÂMETROS DO ESTUDO:")
# pprint(PARAMETROS_DATASET)



NILMTK -> Detalhes sobre o dataset REDD:





## Melhores Combinações de Taxas e Janelas para cada Aparelho (estudo 19)

In [5]:
df_melhores_taxas_janelas = pd.read_csv(os.path.join(caminho_dados, "19", "melhores_taxa_janela_aparelhos.csv"), index_col=0)
df_melhores_taxas_janelas

Unnamed: 0,carga,taxa_amostragem,janela,loss,acuracia,precisao,recall,f1,f1_macro
0,dish_washer - 9,2,720,0.05,95.33,20.0,25.0,22.22,59.91
1,fridge - 7,2,1080,0.0,100.0,100.0,100.0,100.0,100.0
2,microwave - 16,2,900,0.04,95.83,66.67,33.33,44.44,71.14
3,washer_dryer - 13,2,60,0.0,99.89,100.0,95.74,97.83,98.88
4,washer_dryer - 14,3,360,0.02,97.99,100.0,55.56,71.43,85.19


In [6]:
# TODO: 
# - Desenvolver módulo da metodologia na lib PyNILM

## Parâmetros de RP dos Aparelhos (estudo 18)

In [7]:
# Carregando arquivos de parametros, caso n estejam (kernel reiniciado)
if not 'parametros_rp_aparelho' in locals():
    with open(os.path.join(caminho_dados, "18", "parametros_rp_aparelho.json"),'r') as arquivo:
        parametros_rp_aparelho = json.load(arquivo)

# Ambiente e Funções Auxiliares

In [8]:
# from PyNILM.dados.janelas import Janelas
from PyNILM.dados.janelas import Janelas
from PyNILM.dados.utils import *

from PyNILM.avaliacao.metricas import *
from PyNILM.avaliacao.graficos import *
from PyNILM.avaliacao.analises import *

from PyNILM.modelos.utils import *
from PyNILM.modelos.dlafe import DLAFE
from PyNILM.modelos.rqa import RQA

# Inicializar uso GPU
start_tf_session(memory_limit=int(1024*4))

Virtual devices cannot be modified after being initialized


## Configurações do Experimento

In [9]:
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier
from xgboost.sklearn import XGBClassifier

from sklearn.model_selection import StratifiedKFold
skf = StratifiedKFold(n_splits=10, shuffle=True, random_state=SEED)

# Teste da classe
janelas_treino = Janelas(
    base=DataSet(arquivo_dataset),
    id_residencia=3,
    inicio_intervalo='2011-04-16',
    fim_intervalo='2011-05-16',
    debug = False
)

janelas_teste = Janelas(
    base=DataSet(arquivo_dataset),
    id_residencia=3,
    inicio_intervalo='2011-05-17',
    fim_intervalo='2011-05-30',
    debug = False
)

aparelhos = [
    'dish_washer - 9',
    'fridge - 7',
    'microwave - 16',
    'washer_dryer - 13', 
    'washer_dryer - 14'
]

TAXA = 2 # Fixa

# Metodologia RQA

## Treinamento/Avaliação dos Classificadores

### SVM

In [11]:
modelo = SVC(kernel='rbf', random_state=SEED)

resultados_modelo = {
    "appliance": [], "fold": [],
    "acc": [], "f1": [], "auc": [], 
    "base": []
}

for rotulo_aparelho in aparelhos:
    
    print(f"* Aparelho `{rotulo_aparelho}`...\n")
    
    # Informacoes da carga selecionada
    CARGA = rotulo_aparelho.split(" - ")[0]
    INSTANCIA = int(rotulo_aparelho.split(" - ")[1])
    config_aparelho = df_melhores_taxas_janelas[
        df_melhores_taxas_janelas["carga"]==rotulo_aparelho
    ].to_dict("records")[0]
    TAMANHO_JANELA = config_aparelho["janela"]

    #######################################################################
    #                AVALIACAO 1 - Base de treino / CV                    #
    #######################################################################
    # Extrair series divididas em janelas para cada medidor
    print("   - Base de TREINO\n")
    print("     -> Carregando dados (taxa={:.0f}, janela={:.0f})...".format(
        TAXA, TAMANHO_JANELA
    ))
    X, y = carregar_dados_aparelho(
        janelas=janelas_treino,
        instancia=INSTANCIA,
        aparelho=CARGA,
        tamanho_janela=TAMANHO_JANELA,
        taxa=TAXA,
        eliminar_janelas_vazias=True
    )
    print("     -> Detalhes da amostragem (lotes):")
    print("     ---")
    for item in Counter(y).items():
        print(f"        - Classe `{item[0]}`: {item[1]} amostras ({round(item[1]/len(y)*100,1)}%)" )
    print()
    
    y_true, y_pred  = [], []

    print(f"     -> Avaliando modelo (CV - {skf.n_splits} folds)...\n")
    for it, (idx_treino, idx_teste) in tqdm_notebook(enumerate(skf.split(X, y)), total=skf.n_splits):

        # Preparando lotes
        X_treino, X_teste = X[idx_treino], X[idx_teste]
        y_treino, y_teste = y[idx_treino], y[idx_teste]

        # Treinando modelo
        rqa = RQA(
            classifier=clone(modelo),
            appliance_label=rotulo_aparelho,
            params=PARAMETROS_RP,
            columns_model=[
                "Recurrence rate (RR)",
                "Determinism (DET)"
            ]
        )
        rqa.fit(X_treino, y_treino)

        # Prevendo conjunto de dados
        y_hat = rqa.predict(X_teste)

        # Incrementando resultados
        resultados_modelo["appliance"].append(rotulo_aparelho)
        resultados_modelo["fold"].append(it+1)
        resultados_modelo["acc"].append( accuracy_score(y_teste, y_hat) )
        resultados_modelo["f1"].append( f1_score(y_teste, y_hat, average="macro") )
        resultados_modelo["auc"].append(roc_auc_score(y_teste, y_hat) if np.unique(y_teste).shape[0]>1 else 0.5)
        resultados_modelo["base"].append("treino")

        # Extendendo rotulos (analise global)
        y_true.extend(y_teste)
        y_pred.extend(y_hat)
        
    #######################################################################
    #                 AVALIACAO 2 - Base de teste / CV                    #
    #######################################################################
    print("   - Base de TESTE\n")
    print("     -> Carregando dados (taxa={:.0f}, janela={:.0f})...".format(
        TAXA, TAMANHO_JANELA
    ))

    # Avaliar na base de teste
    X_teste, y_teste = carregar_dados_aparelho(
        janelas=janelas_teste,
        instancia=INSTANCIA,
        aparelho=CARGA,
        tamanho_janela=TAMANHO_JANELA,
        taxa=TAXA,
        eliminar_janelas_vazias=True
    )
    
    print("     -> Detalhes da amostragem (lotes):")
    print("     ---")
    for item in Counter(y_teste).items():
        print(f"       - Classe `{item[0]}`: {item[1]} amostras ({round(item[1]/len(y)*100,1)}%)" )
    print()

    # Treinando modelo
    rqa = RQA(
        classifier = clone(modelo),
        appliance_label=rotulo_aparelho,
        params=PARAMETROS_RP,
        columns_model=[
            "Recurrence rate (RR)",
            "Determinism (DET)"
        ]
    )
    rqa.fit(X, y)

    # Prevendo conjunto de dados
    y_hat = rqa.predict(X_teste)

    # Incrementando resultados
    resultados_modelo["appliance"].append(rotulo_aparelho)
    resultados_modelo["fold"].append(it+1)
    resultados_modelo["acc"].append( accuracy_score(y_teste, y_hat) )
    resultados_modelo["f1"].append( f1_score(y_teste, y_hat, average="macro") )
    resultados_modelo["auc"].append(roc_auc_score(y_teste, y_hat) if np.unique(y_teste).shape[0]>1 else 0.5)
    resultados_modelo["base"].append("teste")
    
    print()
    print("   - Final Results:")
    print("   ---")
    print()
    
    print("***** TRAIN *****")
    print("      -> Classification Report:")
    print()
    print(classification_report(y_true, y_pred))
    print("      -> Confusion Matrix:")
    print()
    print(confusion_matrix(y_true, y_pred))
    print()
    
    print("***** TEST *****")
    print("      -> Classification Report:")
    print()
    print(classification_report(y_teste, y_hat))
    print("      -> Confusion Matrix:")
    print()
    print(confusion_matrix(y_teste, y_hat))
    print()

* Aparelho `dish_washer - 9`...

   - Base de TREINO

     -> Carregando dados (taxa=2, janela=720)...
Meter 13 is in a nested meter group. Retrieving just the ElecMeter.
Meter 14 is in a nested meter group. Retrieving just the ElecMeter.

     -> Detalhes da amostragem (lotes):
     ---
        - Classe `0`: 623 amostras (97.5%)
        - Classe `1`: 16 amostras (2.5%)

     -> Avaliando modelo (CV - 10 folds)...



  0%|          | 0/10 [00:00<?, ?it/s]

   - Base de TESTE

     -> Carregando dados (taxa=2, janela=720)...
Meter 13 is in a nested meter group. Retrieving just the ElecMeter.
Meter 14 is in a nested meter group. Retrieving just the ElecMeter.

     -> Detalhes da amostragem (lotes):
     ---
       - Classe `0`: 395 amostras (61.8%)
       - Classe `1`: 13 amostras (2.0%)


   - Final Results:
   ---

***** TRAIN *****
      -> Classification Report:

              precision    recall  f1-score   support

           0       0.97      1.00      0.99       623
           1       0.00      0.00      0.00        16

    accuracy                           0.97       639
   macro avg       0.49      0.50      0.49       639
weighted avg       0.95      0.97      0.96       639

      -> Confusion Matrix:

[[623   0]
 [ 16   0]]

***** TEST *****
      -> Classification Report:

              precision    recall  f1-score   support

           0       0.97      1.00      0.98       395
           1       0.00      0.00      0.00 

  0%|          | 0/10 [00:00<?, ?it/s]

   - Base de TESTE

     -> Carregando dados (taxa=2, janela=1080)...
Meter 13 is in a nested meter group. Retrieving just the ElecMeter.
Meter 14 is in a nested meter group. Retrieving just the ElecMeter.

     -> Detalhes da amostragem (lotes):
     ---
       - Classe `1`: 269 amostras (63.0%)
       - Classe `0`: 5 amostras (1.2%)


   - Final Results:
   ---

***** TRAIN *****
      -> Classification Report:

              precision    recall  f1-score   support

           0       0.00      0.00      0.00         2
           1       1.00      1.00      1.00       425

    accuracy                           0.99       427
   macro avg       0.50      0.50      0.50       427
weighted avg       0.99      0.99      0.99       427

      -> Confusion Matrix:

[[  0   2]
 [  1 424]]

***** TEST *****
      -> Classification Report:

              precision    recall  f1-score   support

           0       0.50      0.40      0.44         5
           1       0.99      0.99      0.99 

  0%|          | 0/10 [00:00<?, ?it/s]

   - Base de TESTE

     -> Carregando dados (taxa=2, janela=900)...
Meter 13 is in a nested meter group. Retrieving just the ElecMeter.
Meter 14 is in a nested meter group. Retrieving just the ElecMeter.

     -> Detalhes da amostragem (lotes):
     ---
       - Classe `0`: 306 amostras (59.9%)
       - Classe `1`: 20 amostras (3.9%)


   - Final Results:
   ---

***** TRAIN *****
      -> Classification Report:

              precision    recall  f1-score   support

           0       0.95      1.00      0.97       483
           1       0.00      0.00      0.00        28

    accuracy                           0.95       511
   macro avg       0.47      0.50      0.49       511
weighted avg       0.89      0.95      0.92       511

      -> Confusion Matrix:

[[483   0]
 [ 28   0]]

***** TEST *****
      -> Classification Report:

              precision    recall  f1-score   support

           0       0.94      1.00      0.97       306
           1       0.00      0.00      0.00 

  0%|          | 0/10 [00:00<?, ?it/s]

   - Base de TESTE

     -> Carregando dados (taxa=2, janela=60)...
Meter 13 is in a nested meter group. Retrieving just the ElecMeter.
Meter 14 is in a nested meter group. Retrieving just the ElecMeter.

     -> Detalhes da amostragem (lotes):
     ---
       - Classe `0`: 4643 amostras (61.6%)
       - Classe `1`: 192 amostras (2.5%)


   - Final Results:
   ---

***** TRAIN *****
      -> Classification Report:

              precision    recall  f1-score   support

           0       0.98      1.00      0.99      7353
           1       0.00      0.00      0.00       187

    accuracy                           0.98      7540
   macro avg       0.49      0.50      0.49      7540
weighted avg       0.95      0.98      0.96      7540

      -> Confusion Matrix:

[[7353    0]
 [ 187    0]]

***** TEST *****
      -> Classification Report:

              precision    recall  f1-score   support

           0       0.96      1.00      0.98      4643
           1       0.00      0.00      

  0%|          | 0/10 [00:00<?, ?it/s]

   - Base de TESTE

     -> Carregando dados (taxa=2, janela=360)...
Meter 13 is in a nested meter group. Retrieving just the ElecMeter.
Meter 14 is in a nested meter group. Retrieving just the ElecMeter.

     -> Detalhes da amostragem (lotes):
     ---
       - Classe `0`: 762 amostras (60.0%)
       - Classe `1`: 48 amostras (3.8%)


   - Final Results:
   ---

***** TRAIN *****
      -> Classification Report:

              precision    recall  f1-score   support

           0       0.96      1.00      0.98      1216
           1       0.00      0.00      0.00        54

    accuracy                           0.96      1270
   macro avg       0.48      0.50      0.49      1270
weighted avg       0.92      0.96      0.94      1270

      -> Confusion Matrix:

[[1216    0]
 [  54    0]]

***** TEST *****
      -> Classification Report:

              precision    recall  f1-score   support

           0       0.94      1.00      0.97       762
           1       0.00      0.00      0

In [12]:
# Consolidating DataFrame
df_resultados = pd.DataFrame(resultados_modelo)
df_resultados.to_excel(os.path.join(caminho_dados_notebook, "df_resultados_svm.xlsx"))
    
print("############################## FINAL MODEL RESULTS ##############################")
display(df_resultados.groupby(["appliance","base"]).agg({
    "acc": ["mean", "std", "max", "min"],
    "f1": ["mean", "std", "max", "min"],
    "auc": ["mean", "std", "max", "min"]
}))  

############################## FINAL MODEL RESULTS ##############################


Unnamed: 0_level_0,Unnamed: 1_level_0,acc,acc,acc,acc,f1,f1,f1,f1,auc,auc,auc,auc
Unnamed: 0_level_1,Unnamed: 1_level_1,mean,std,max,min,mean,std,max,min,mean,std,max,min
appliance,base,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2
dish_washer - 9,teste,0.968137,,0.968137,0.968137,0.491905,,0.491905,0.491905,0.5,,0.5,0.5
dish_washer - 9,treino,0.974975,0.008037,0.984375,0.96875,0.493657,0.002057,0.496063,0.492063,0.5,0.0,0.5,0.5
fridge - 7,teste,0.981752,,0.981752,0.981752,0.717584,,0.717584,0.717584,0.696283,,0.696283,0.696283
fridge - 7,treino,0.993023,0.011234,1.0,0.976744,0.848235,0.244364,1.0,0.494118,0.5,0.0,0.5,0.5
microwave - 16,teste,0.93865,,0.93865,0.93865,0.484177,,0.484177,0.484177,0.5,,0.5,0.5
microwave - 16,treino,0.945211,0.008215,0.960784,0.941176,0.485909,0.002158,0.49,0.484848,0.5,0.0,0.5,0.5
washer_dryer - 13,teste,0.96029,,0.96029,0.96029,0.489871,,0.489871,0.489871,0.5,,0.5,0.5
washer_dryer - 13,treino,0.975199,0.000641,0.976127,0.974801,0.493722,0.000164,0.49396,0.49362,0.5,0.0,0.5,0.5
washer_dryer - 14,teste,0.940741,,0.940741,0.940741,0.484733,,0.484733,0.484733,0.5,,0.5,0.5
washer_dryer - 14,treino,0.95748,0.004066,0.96063,0.952756,0.489137,0.001062,0.48996,0.487903,0.5,0.0,0.5,0.5


## XGBOOST

In [13]:
modelo = XGBClassifier(random_state=SEED, n_jobs=4)

resultados_modelo = {
    "appliance": [], "fold": [],
    "acc": [], "f1": [], "auc": [], 
    "base": []
}

for rotulo_aparelho in aparelhos:
    
    print(f"* Aparelho `{rotulo_aparelho}`...\n")
    
    # Informacoes da carga selecionada
    CARGA = rotulo_aparelho.split(" - ")[0]
    INSTANCIA = int(rotulo_aparelho.split(" - ")[1])
    config_aparelho = df_melhores_taxas_janelas[
        df_melhores_taxas_janelas["carga"]==rotulo_aparelho
    ].to_dict("records")[0]
    TAMANHO_JANELA = config_aparelho["janela"]

    #######################################################################
    #                AVALIACAO 1 - Base de treino / CV                    #
    #######################################################################
    # Extrair series divididas em janelas para cada medidor
    print("   - Base de TREINO\n")
    print("     -> Carregando dados (taxa={:.0f}, janela={:.0f})...".format(
        TAXA, TAMANHO_JANELA
    ))
    X, y = carregar_dados_aparelho(
        janelas=janelas_treino,
        instancia=INSTANCIA,
        aparelho=CARGA,
        tamanho_janela=TAMANHO_JANELA,
        taxa=TAXA,
        eliminar_janelas_vazias=True
    )
    print("     -> Detalhes da amostragem (lotes):")
    print("     ---")
    for item in Counter(y).items():
        print(f"        - Classe `{item[0]}`: {item[1]} amostras ({round(item[1]/len(y)*100,1)}%)" )
    print()
    
    y_true, y_pred  = [], []

    print(f"     -> Avaliando modelo (CV - {skf.n_splits} folds)...\n")
    for it, (idx_treino, idx_teste) in tqdm_notebook(enumerate(skf.split(X, y)), total=skf.n_splits):

        # Preparando lotes
        X_treino, X_teste = X[idx_treino], X[idx_teste]
        y_treino, y_teste = y[idx_treino], y[idx_teste]

        # Treinando modelo
        rqa = RQA(
            classifier=clone(modelo),
            appliance_label=rotulo_aparelho,
            params=PARAMETROS_RP,
            columns_model=[
                "Recurrence rate (RR)",
                "Determinism (DET)"
            ]
        )
        rqa.fit(X_treino, y_treino)

        # Prevendo conjunto de dados
        y_hat = rqa.predict(X_teste)

        # Incrementando resultados
        resultados_modelo["appliance"].append(rotulo_aparelho)
        resultados_modelo["fold"].append(it+1)
        resultados_modelo["acc"].append( accuracy_score(y_teste, y_hat) )
        resultados_modelo["f1"].append( f1_score(y_teste, y_hat, average="macro") )
        resultados_modelo["auc"].append(roc_auc_score(y_teste, y_hat) if np.unique(y_teste).shape[0]>1 else 0.5)
        resultados_modelo["base"].append("treino")

        # Extendendo rotulos (analise global)
        y_true.extend(y_teste)
        y_pred.extend(y_hat)
        
    #######################################################################
    #                 AVALIACAO 2 - Base de teste / CV                    #
    #######################################################################
    print("   - Base de TESTE\n")
    print("     -> Carregando dados (taxa={:.0f}, janela={:.0f})...".format(
        TAXA, TAMANHO_JANELA
    ))

    # Avaliar na base de teste
    X_teste, y_teste = carregar_dados_aparelho(
        janelas=janelas_teste,
        instancia=INSTANCIA,
        aparelho=CARGA,
        tamanho_janela=TAMANHO_JANELA,
        taxa=TAXA,
        eliminar_janelas_vazias=True
    )
    
    print("     -> Detalhes da amostragem (lotes):")
    print("     ---")
    for item in Counter(y_teste).items():
        print(f"       - Classe `{item[0]}`: {item[1]} amostras ({round(item[1]/len(y)*100,1)}%)" )
    print()

    # Treinando modelo
    rqa = RQA(
        classifier = clone(modelo),
        appliance_label=rotulo_aparelho,
        params=PARAMETROS_RP,
        columns_model=[
            "Recurrence rate (RR)",
            "Determinism (DET)"
        ]
    )
    rqa.fit(X, y)

    # Prevendo conjunto de dados
    y_hat = rqa.predict(X_teste)

    # Incrementando resultados
    resultados_modelo["appliance"].append(rotulo_aparelho)
    resultados_modelo["fold"].append(it+1)
    resultados_modelo["acc"].append( accuracy_score(y_teste, y_hat) )
    resultados_modelo["f1"].append( f1_score(y_teste, y_hat, average="macro") )
    resultados_modelo["auc"].append(roc_auc_score(y_teste, y_hat) if np.unique(y_teste).shape[0]>1 else 0.5)
    resultados_modelo["base"].append("teste")
    
    print()
    print("   - Final Results:")
    print("   ---")
    print()
    
    print("***** TRAIN *****")
    print("      -> Classification Report:")
    print()
    print(classification_report(y_true, y_pred))
    print("      -> Confusion Matrix:")
    print()
    print(confusion_matrix(y_true, y_pred))
    print()
    
    print("***** TEST *****")
    print("      -> Classification Report:")
    print()
    print(classification_report(y_teste, y_hat))
    print("      -> Confusion Matrix:")
    print()
    print(confusion_matrix(y_teste, y_hat))
    print()

* Aparelho `dish_washer - 9`...

   - Base de TREINO

     -> Carregando dados (taxa=2, janela=720)...
Meter 13 is in a nested meter group. Retrieving just the ElecMeter.
Meter 14 is in a nested meter group. Retrieving just the ElecMeter.

     -> Detalhes da amostragem (lotes):
     ---
        - Classe `0`: 623 amostras (97.5%)
        - Classe `1`: 16 amostras (2.5%)

     -> Avaliando modelo (CV - 10 folds)...



  0%|          | 0/10 [00:00<?, ?it/s]

   - Base de TESTE

     -> Carregando dados (taxa=2, janela=720)...
Meter 13 is in a nested meter group. Retrieving just the ElecMeter.
Meter 14 is in a nested meter group. Retrieving just the ElecMeter.

     -> Detalhes da amostragem (lotes):
     ---
       - Classe `0`: 395 amostras (61.8%)
       - Classe `1`: 13 amostras (2.0%)


   - Final Results:
   ---

***** TRAIN *****
      -> Classification Report:

              precision    recall  f1-score   support

           0       0.98      1.00      0.99       623
           1       0.33      0.06      0.11        16

    accuracy                           0.97       639
   macro avg       0.65      0.53      0.55       639
weighted avg       0.96      0.97      0.96       639

      -> Confusion Matrix:

[[621   2]
 [ 15   1]]

***** TEST *****
      -> Classification Report:

              precision    recall  f1-score   support

           0       0.97      1.00      0.98       395
           1       0.00      0.00      0.00 

  0%|          | 0/10 [00:00<?, ?it/s]

   - Base de TESTE

     -> Carregando dados (taxa=2, janela=1080)...
Meter 13 is in a nested meter group. Retrieving just the ElecMeter.
Meter 14 is in a nested meter group. Retrieving just the ElecMeter.

     -> Detalhes da amostragem (lotes):
     ---
       - Classe `1`: 269 amostras (63.0%)
       - Classe `0`: 5 amostras (1.2%)


   - Final Results:
   ---

***** TRAIN *****
      -> Classification Report:

              precision    recall  f1-score   support

           0       0.00      0.00      0.00         2
           1       1.00      1.00      1.00       425

    accuracy                           1.00       427
   macro avg       0.50      0.50      0.50       427
weighted avg       0.99      1.00      0.99       427

      -> Confusion Matrix:

[[  0   2]
 [  0 425]]

***** TEST *****
      -> Classification Report:

              precision    recall  f1-score   support

           0       0.00      0.00      0.00         5
           1       0.98      1.00      0.99 

  0%|          | 0/10 [00:00<?, ?it/s]

   - Base de TESTE

     -> Carregando dados (taxa=2, janela=900)...
Meter 13 is in a nested meter group. Retrieving just the ElecMeter.
Meter 14 is in a nested meter group. Retrieving just the ElecMeter.

     -> Detalhes da amostragem (lotes):
     ---
       - Classe `0`: 306 amostras (59.9%)
       - Classe `1`: 20 amostras (3.9%)


   - Final Results:
   ---

***** TRAIN *****
      -> Classification Report:

              precision    recall  f1-score   support

           0       0.95      0.98      0.96       483
           1       0.17      0.07      0.10        28

    accuracy                           0.93       511
   macro avg       0.56      0.53      0.53       511
weighted avg       0.91      0.93      0.92       511

      -> Confusion Matrix:

[[473  10]
 [ 26   2]]

***** TEST *****
      -> Classification Report:

              precision    recall  f1-score   support

           0       0.94      0.97      0.96       306
           1       0.18      0.10      0.13 

  0%|          | 0/10 [00:00<?, ?it/s]

   - Base de TESTE

     -> Carregando dados (taxa=2, janela=60)...
Meter 13 is in a nested meter group. Retrieving just the ElecMeter.
Meter 14 is in a nested meter group. Retrieving just the ElecMeter.

     -> Detalhes da amostragem (lotes):
     ---
       - Classe `0`: 4643 amostras (61.6%)
       - Classe `1`: 192 amostras (2.5%)


   - Final Results:
   ---

***** TRAIN *****
      -> Classification Report:

              precision    recall  f1-score   support

           0       0.98      1.00      0.99      7353
           1       0.17      0.01      0.01       187

    accuracy                           0.97      7540
   macro avg       0.57      0.50      0.50      7540
weighted avg       0.96      0.97      0.96      7540

      -> Confusion Matrix:

[[7348    5]
 [ 186    1]]

***** TEST *****
      -> Classification Report:

              precision    recall  f1-score   support

           0       0.96      1.00      0.98      4643
           1       0.38      0.02      

  0%|          | 0/10 [00:00<?, ?it/s]

   - Base de TESTE

     -> Carregando dados (taxa=2, janela=360)...
Meter 13 is in a nested meter group. Retrieving just the ElecMeter.
Meter 14 is in a nested meter group. Retrieving just the ElecMeter.

     -> Detalhes da amostragem (lotes):
     ---
       - Classe `0`: 762 amostras (60.0%)
       - Classe `1`: 48 amostras (3.8%)


   - Final Results:
   ---

***** TRAIN *****
      -> Classification Report:

              precision    recall  f1-score   support

           0       0.96      0.98      0.97      1216
           1       0.32      0.19      0.24        54

    accuracy                           0.95      1270
   macro avg       0.64      0.58      0.60      1270
weighted avg       0.94      0.95      0.94      1270

      -> Confusion Matrix:

[[1195   21]
 [  44   10]]

***** TEST *****
      -> Classification Report:

              precision    recall  f1-score   support

           0       0.95      0.98      0.96       762
           1       0.32      0.15      0

In [14]:
# Consolidating DataFrame
df_resultados = pd.DataFrame(resultados_modelo)
df_resultados.to_excel(os.path.join(caminho_dados_notebook, "df_resultados_xgboost.xlsx"))
    
print("############################## FINAL MODEL RESULTS ##############################")
display(df_resultados.groupby(["appliance","base"]).agg({
    "acc": ["mean", "std", "max", "min"],
    "f1": ["mean", "std", "max", "min"],
    "auc": ["mean", "std", "max", "min"]
}))  

############################## FINAL MODEL RESULTS ##############################


Unnamed: 0_level_0,Unnamed: 1_level_0,acc,acc,acc,acc,f1,f1,f1,f1,auc,auc,auc,auc
Unnamed: 0_level_1,Unnamed: 1_level_1,mean,std,max,min,mean,std,max,min,mean,std,max,min
appliance,base,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2
dish_washer - 9,teste,0.965686,,0.965686,0.965686,0.491272,,0.491272,0.491272,0.498734,,0.498734,0.498734
dish_washer - 9,treino,0.973413,0.007508,0.984375,0.96875,0.518244,0.078619,0.741935,0.492063,0.5234,0.076826,0.741935,0.492063
fridge - 7,teste,0.981752,,0.981752,0.981752,0.495396,,0.495396,0.495396,0.5,,0.5,0.5
fridge - 7,treino,0.995349,0.009806,1.0,0.976744,0.898824,0.213299,1.0,0.494118,0.5,0.0,0.5,0.5
microwave - 16,teste,0.917178,,0.917178,0.917178,0.542777,,0.542777,0.542777,0.535294,,0.535294,0.535294
microwave - 16,treino,0.929563,0.016462,0.960784,0.901961,0.526662,0.098678,0.739796,0.474227,0.523002,0.073246,0.666667,0.479167
washer_dryer - 13,teste,0.959876,,0.959876,0.959876,0.504757,,0.504757,0.504757,0.507274,,0.507274,0.507274
washer_dryer - 13,treino,0.974668,0.000979,0.976127,0.973475,0.498347,0.01507,0.54123,0.49328,0.502292,0.008209,0.525636,0.49932
washer_dryer - 14,teste,0.930864,,0.930864,0.930864,0.581935,,0.581935,0.581935,0.563074,,0.563074,0.563074
washer_dryer - 14,treino,0.948819,0.017111,0.968504,0.92126,0.597225,0.113031,0.791803,0.479508,0.581366,0.094117,0.745868,0.479508


## MLP

In [15]:
modelo = MLPClassifier(alpha=1e-3, hidden_layer_sizes=(10,), random_state=SEED)

resultados_modelo = {
    "appliance": [], "fold": [],
    "acc": [], "f1": [], "auc": [], 
    "base": []
}

for rotulo_aparelho in aparelhos:
    
    print(f"* Aparelho `{rotulo_aparelho}`...\n")
    
    # Informacoes da carga selecionada
    CARGA = rotulo_aparelho.split(" - ")[0]
    INSTANCIA = int(rotulo_aparelho.split(" - ")[1])
    config_aparelho = df_melhores_taxas_janelas[
        df_melhores_taxas_janelas["carga"]==rotulo_aparelho
    ].to_dict("records")[0]
    TAMANHO_JANELA = config_aparelho["janela"]

    #######################################################################
    #                AVALIACAO 1 - Base de treino / CV                    #
    #######################################################################
    # Extrair series divididas em janelas para cada medidor
    print("   - Base de TREINO\n")
    print("     -> Carregando dados (taxa={:.0f}, janela={:.0f})...".format(
        TAXA, TAMANHO_JANELA
    ))
    X, y = carregar_dados_aparelho(
        janelas=janelas_treino,
        instancia=INSTANCIA,
        aparelho=CARGA,
        tamanho_janela=TAMANHO_JANELA,
        taxa=TAXA,
        eliminar_janelas_vazias=True
    )
    print("     -> Detalhes da amostragem (lotes):")
    print("     ---")
    for item in Counter(y).items():
        print(f"        - Classe `{item[0]}`: {item[1]} amostras ({round(item[1]/len(y)*100,1)}%)" )
    print()
    
    y_true, y_pred  = [], []

    print(f"     -> Avaliando modelo (CV - {skf.n_splits} folds)...\n")
    for it, (idx_treino, idx_teste) in tqdm_notebook(enumerate(skf.split(X, y)), total=skf.n_splits):

        # Preparando lotes
        X_treino, X_teste = X[idx_treino], X[idx_teste]
        y_treino, y_teste = y[idx_treino], y[idx_teste]

        # Treinando modelo
        rqa = RQA(
            classifier=clone(modelo),
            appliance_label=rotulo_aparelho,
            params=PARAMETROS_RP,
            columns_model=[
                "Recurrence rate (RR)",
                "Determinism (DET)"
            ]
        )
        rqa.fit(X_treino, y_treino)

        # Prevendo conjunto de dados
        y_hat = rqa.predict(X_teste)

        # Incrementando resultados
        resultados_modelo["appliance"].append(rotulo_aparelho)
        resultados_modelo["fold"].append(it+1)
        resultados_modelo["acc"].append( accuracy_score(y_teste, y_hat) )
        resultados_modelo["f1"].append( f1_score(y_teste, y_hat, average="macro") )
        resultados_modelo["auc"].append(roc_auc_score(y_teste, y_hat) if np.unique(y_teste).shape[0]>1 else 0.5)
        resultados_modelo["base"].append("treino")

        # Extendendo rotulos (analise global)
        y_true.extend(y_teste)
        y_pred.extend(y_hat)
        
    #######################################################################
    #                 AVALIACAO 2 - Base de teste / CV                    #
    #######################################################################
    print("   - Base de TESTE\n")
    print("     -> Carregando dados (taxa={:.0f}, janela={:.0f})...".format(
        TAXA, TAMANHO_JANELA
    ))

    # Avaliar na base de teste
    X_teste, y_teste = carregar_dados_aparelho(
        janelas=janelas_teste,
        instancia=INSTANCIA,
        aparelho=CARGA,
        tamanho_janela=TAMANHO_JANELA,
        taxa=TAXA,
        eliminar_janelas_vazias=True
    )
    
    print("     -> Detalhes da amostragem (lotes):")
    print("     ---")
    for item in Counter(y_teste).items():
        print(f"       - Classe `{item[0]}`: {item[1]} amostras ({round(item[1]/len(y)*100,1)}%)" )
    print()

    # Treinando modelo
    rqa = RQA(
        classifier = clone(modelo),
        appliance_label=rotulo_aparelho,
        params=PARAMETROS_RP,
        columns_model=[
            "Recurrence rate (RR)",
            "Determinism (DET)"
        ]
    )
    rqa.fit(X, y)

    # Prevendo conjunto de dados
    y_hat = rqa.predict(X_teste)

    # Incrementando resultados
    resultados_modelo["appliance"].append(rotulo_aparelho)
    resultados_modelo["fold"].append(it+1)
    resultados_modelo["acc"].append( accuracy_score(y_teste, y_hat) )
    resultados_modelo["f1"].append( f1_score(y_teste, y_hat, average="macro") )
    resultados_modelo["auc"].append(roc_auc_score(y_teste, y_hat) if np.unique(y_teste).shape[0]>1 else 0.5)
    resultados_modelo["base"].append("teste")
    
    print()
    print("   - Final Results:")
    print("   ---")
    print()
    
    print("***** TRAIN *****")
    print("      -> Classification Report:")
    print()
    print(classification_report(y_true, y_pred))
    print("      -> Confusion Matrix:")
    print()
    print(confusion_matrix(y_true, y_pred))
    print()
    
    print("***** TEST *****")
    print("      -> Classification Report:")
    print()
    print(classification_report(y_teste, y_hat))
    print("      -> Confusion Matrix:")
    print()
    print(confusion_matrix(y_teste, y_hat))
    print()

* Aparelho `dish_washer - 9`...

   - Base de TREINO

     -> Carregando dados (taxa=2, janela=720)...
Meter 13 is in a nested meter group. Retrieving just the ElecMeter.
Meter 14 is in a nested meter group. Retrieving just the ElecMeter.

     -> Detalhes da amostragem (lotes):
     ---
        - Classe `0`: 623 amostras (97.5%)
        - Classe `1`: 16 amostras (2.5%)

     -> Avaliando modelo (CV - 10 folds)...



  0%|          | 0/10 [00:00<?, ?it/s]

   - Base de TESTE

     -> Carregando dados (taxa=2, janela=720)...
Meter 13 is in a nested meter group. Retrieving just the ElecMeter.
Meter 14 is in a nested meter group. Retrieving just the ElecMeter.

     -> Detalhes da amostragem (lotes):
     ---
       - Classe `0`: 395 amostras (61.8%)
       - Classe `1`: 13 amostras (2.0%)


   - Final Results:
   ---

***** TRAIN *****
      -> Classification Report:

              precision    recall  f1-score   support

           0       0.97      1.00      0.99       623
           1       0.00      0.00      0.00        16

    accuracy                           0.97       639
   macro avg       0.49      0.50      0.49       639
weighted avg       0.95      0.97      0.96       639

      -> Confusion Matrix:

[[623   0]
 [ 16   0]]

***** TEST *****
      -> Classification Report:

              precision    recall  f1-score   support

           0       0.97      1.00      0.98       395
           1       0.00      0.00      0.00 

  0%|          | 0/10 [00:00<?, ?it/s]

   - Base de TESTE

     -> Carregando dados (taxa=2, janela=1080)...
Meter 13 is in a nested meter group. Retrieving just the ElecMeter.
Meter 14 is in a nested meter group. Retrieving just the ElecMeter.

     -> Detalhes da amostragem (lotes):
     ---
       - Classe `1`: 269 amostras (63.0%)
       - Classe `0`: 5 amostras (1.2%)


   - Final Results:
   ---

***** TRAIN *****
      -> Classification Report:

              precision    recall  f1-score   support

           0       0.00      0.00      0.00         2
           1       1.00      1.00      1.00       425

    accuracy                           1.00       427
   macro avg       0.50      0.50      0.50       427
weighted avg       0.99      1.00      0.99       427

      -> Confusion Matrix:

[[  0   2]
 [  0 425]]

***** TEST *****
      -> Classification Report:

              precision    recall  f1-score   support

           0       0.00      0.00      0.00         5
           1       0.98      1.00      0.99 

  0%|          | 0/10 [00:00<?, ?it/s]

   - Base de TESTE

     -> Carregando dados (taxa=2, janela=900)...
Meter 13 is in a nested meter group. Retrieving just the ElecMeter.
Meter 14 is in a nested meter group. Retrieving just the ElecMeter.

     -> Detalhes da amostragem (lotes):
     ---
       - Classe `0`: 306 amostras (59.9%)
       - Classe `1`: 20 amostras (3.9%)


   - Final Results:
   ---

***** TRAIN *****
      -> Classification Report:

              precision    recall  f1-score   support

           0       0.95      1.00      0.97       483
           1       0.00      0.00      0.00        28

    accuracy                           0.95       511
   macro avg       0.47      0.50      0.49       511
weighted avg       0.89      0.95      0.92       511

      -> Confusion Matrix:

[[483   0]
 [ 28   0]]

***** TEST *****
      -> Classification Report:

              precision    recall  f1-score   support

           0       0.94      1.00      0.97       306
           1       0.00      0.00      0.00 

  0%|          | 0/10 [00:00<?, ?it/s]

   - Base de TESTE

     -> Carregando dados (taxa=2, janela=60)...
Meter 13 is in a nested meter group. Retrieving just the ElecMeter.
Meter 14 is in a nested meter group. Retrieving just the ElecMeter.

     -> Detalhes da amostragem (lotes):
     ---
       - Classe `0`: 4643 amostras (61.6%)
       - Classe `1`: 192 amostras (2.5%)


   - Final Results:
   ---

***** TRAIN *****
      -> Classification Report:

              precision    recall  f1-score   support

           0       0.98      1.00      0.99      7353
           1       0.00      0.00      0.00       187

    accuracy                           0.98      7540
   macro avg       0.49      0.50      0.49      7540
weighted avg       0.95      0.98      0.96      7540

      -> Confusion Matrix:

[[7353    0]
 [ 187    0]]

***** TEST *****
      -> Classification Report:

              precision    recall  f1-score   support

           0       0.96      1.00      0.98      4643
           1       0.00      0.00      

  0%|          | 0/10 [00:00<?, ?it/s]

   - Base de TESTE

     -> Carregando dados (taxa=2, janela=360)...
Meter 13 is in a nested meter group. Retrieving just the ElecMeter.
Meter 14 is in a nested meter group. Retrieving just the ElecMeter.

     -> Detalhes da amostragem (lotes):
     ---
       - Classe `0`: 762 amostras (60.0%)
       - Classe `1`: 48 amostras (3.8%)


   - Final Results:
   ---

***** TRAIN *****
      -> Classification Report:

              precision    recall  f1-score   support

           0       0.96      1.00      0.98      1216
           1       0.00      0.00      0.00        54

    accuracy                           0.96      1270
   macro avg       0.48      0.50      0.49      1270
weighted avg       0.92      0.96      0.94      1270

      -> Confusion Matrix:

[[1216    0]
 [  54    0]]

***** TEST *****
      -> Classification Report:

              precision    recall  f1-score   support

           0       0.94      1.00      0.97       762
           1       0.00      0.00      0

In [16]:
# Consolidating DataFrame
df_resultados = pd.DataFrame(resultados_modelo)
df_resultados.to_excel(os.path.join(caminho_dados_notebook, "df_resultados_mlp.xlsx"))
    
print("############################## FINAL MODEL RESULTS ##############################")
display(df_resultados.groupby(["appliance","base"]).agg({
    "acc": ["mean", "std", "max", "min"],
    "f1": ["mean", "std", "max", "min"],
    "auc": ["mean", "std", "max", "min"]
}))  

############################## FINAL MODEL RESULTS ##############################


Unnamed: 0_level_0,Unnamed: 1_level_0,acc,acc,acc,acc,f1,f1,f1,f1,auc,auc,auc,auc
Unnamed: 0_level_1,Unnamed: 1_level_1,mean,std,max,min,mean,std,max,min,mean,std,max,min
appliance,base,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2
dish_washer - 9,teste,0.968137,,0.968137,0.968137,0.491905,,0.491905,0.491905,0.5,,0.5,0.5
dish_washer - 9,treino,0.974975,0.008037,0.984375,0.96875,0.493657,0.002057,0.496063,0.492063,0.5,0.0,0.5,0.5
fridge - 7,teste,0.981752,,0.981752,0.981752,0.495396,,0.495396,0.495396,0.5,,0.5,0.5
fridge - 7,treino,0.995349,0.009806,1.0,0.976744,0.898824,0.213299,1.0,0.494118,0.5,0.0,0.5,0.5
microwave - 16,teste,0.93865,,0.93865,0.93865,0.484177,,0.484177,0.484177,0.5,,0.5,0.5
microwave - 16,treino,0.945211,0.008215,0.960784,0.941176,0.485909,0.002158,0.49,0.484848,0.5,0.0,0.5,0.5
washer_dryer - 13,teste,0.96029,,0.96029,0.96029,0.489871,,0.489871,0.489871,0.5,,0.5,0.5
washer_dryer - 13,treino,0.975199,0.000641,0.976127,0.974801,0.493722,0.000164,0.49396,0.49362,0.5,0.0,0.5,0.5
washer_dryer - 14,teste,0.940741,,0.940741,0.940741,0.484733,,0.484733,0.484733,0.5,,0.5,0.5
washer_dryer - 14,treino,0.95748,0.004066,0.96063,0.952756,0.489137,0.001062,0.48996,0.487903,0.5,0.0,0.5,0.5


# Análise dos Resultados (1)

In [17]:
df_resultados_svm = pd.read_excel(os.path.join(caminho_dados_notebook, "df_resultados_svm.xlsx"), engine='openpyxl')
df_resultados_svm["model"] = "SVM"

df_resultados_xgboost = pd.read_excel(os.path.join(caminho_dados_notebook, "df_resultados_xgboost.xlsx"), engine='openpyxl')
df_resultados_xgboost["model"] = "XGBOOST"

df_resultados_mlp = pd.read_excel(os.path.join(caminho_dados_notebook, "df_resultados_mlp.xlsx"), engine='openpyxl')
df_resultados_mlp["model"] = "MLP"

# df_resultados_elm = pd.read_excel(os.path.join(caminho_dados_notebook, "df_resultados_elm.xlsx"))
# df_resultados_elm["model"] = "ELM"

df_analise = pd.concat([
    df_resultados_svm,
    df_resultados_xgboost,
    df_resultados_mlp, 
#     df_resultados_elm,  
])

print("* Análise por modelo:")
df_analise_modelo = df_analise.groupby(["model","base"]).agg({
    "acc": ["mean","std","max","min"],
    "f1": ["mean","std","max","min"],
    "auc": ["mean","std","max","min"]
}).reset_index().sort_values(('f1','mean'), ascending=False).set_index("model")
display(df_analise_modelo)
df_analise_modelo.to_excel(os.path.join(caminho_dados_notebook, "df_analise_modelo.xlsx"))

print()
print("* Análise por aparelho/modelo:")
df_analise_aparelho = df_analise.groupby(["appliance","model","base"]).agg({
    "acc": ["mean","std","max","min"],
    "f1": ["mean","std","max","min"],
    "auc": ["mean","std","max","min"]
})#.reset_index().sort_values(('f1','mean'), ascending=False).set_index(["aparelho","metodologia"])
display(df_analise_aparelho)
df_analise_aparelho.to_excel(os.path.join(caminho_dados_notebook, "df_analise_aparelho.xls"))

* Análise por modelo:


Unnamed: 0_level_0,base,acc,acc,acc,acc,f1,f1,f1,f1,auc,auc,auc,auc
Unnamed: 0_level_1,Unnamed: 1_level_1,mean,std,max,min,mean,std,max,min,mean,std,max,min
model,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2
XGBOOST,treino,0.964362,0.02574,1.0,0.901961,0.60786,0.190791,1.0,0.474227,0.526012,0.067752,0.745868,0.479167
MLP,treino,0.969643,0.018547,1.0,0.941176,0.57225,0.18861,1.0,0.484848,0.5,0.0,0.5,0.5
SVM,treino,0.969178,0.018055,1.0,0.941176,0.562132,0.178494,1.0,0.484848,0.5,0.0,0.5,0.5
SVM,teste,0.957914,0.018333,0.981752,0.93865,0.533654,0.102873,0.717584,0.484177,0.539257,0.08778,0.696283,0.5
XGBOOST,teste,0.951071,0.026408,0.981752,0.917178,0.523227,0.038619,0.581935,0.491272,0.520875,0.027837,0.563074,0.498734
MLP,teste,0.957914,0.018333,0.981752,0.93865,0.489217,0.004779,0.495396,0.484177,0.5,0.0,0.5,0.5



* Análise por aparelho/modelo:


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,acc,acc,acc,acc,f1,f1,f1,f1,auc,auc,auc,auc
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,mean,std,max,min,mean,std,max,min,mean,std,max,min
appliance,model,base,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2
dish_washer - 9,MLP,teste,0.968137,,0.968137,0.968137,0.491905,,0.491905,0.491905,0.5,,0.5,0.5
dish_washer - 9,MLP,treino,0.974975,0.008037,0.984375,0.96875,0.493657,0.002057,0.496063,0.492063,0.5,0.0,0.5,0.5
dish_washer - 9,SVM,teste,0.968137,,0.968137,0.968137,0.491905,,0.491905,0.491905,0.5,,0.5,0.5
dish_washer - 9,SVM,treino,0.974975,0.008037,0.984375,0.96875,0.493657,0.002057,0.496063,0.492063,0.5,0.0,0.5,0.5
dish_washer - 9,XGBOOST,teste,0.965686,,0.965686,0.965686,0.491272,,0.491272,0.491272,0.498734,,0.498734,0.498734
dish_washer - 9,XGBOOST,treino,0.973413,0.007508,0.984375,0.96875,0.518244,0.078619,0.741935,0.492063,0.5234,0.076826,0.741935,0.492063
fridge - 7,MLP,teste,0.981752,,0.981752,0.981752,0.495396,,0.495396,0.495396,0.5,,0.5,0.5
fridge - 7,MLP,treino,0.995349,0.009806,1.0,0.976744,0.898824,0.213299,1.0,0.494118,0.5,0.0,0.5,0.5
fridge - 7,SVM,teste,0.981752,,0.981752,0.981752,0.717584,,0.717584,0.717584,0.696283,,0.696283,0.696283
fridge - 7,SVM,treino,0.993023,0.011234,1.0,0.976744,0.848235,0.244364,1.0,0.494118,0.5,0.0,0.5,0.5


# Conclusões

...

# Fim.

In [18]:
%load_ext watermark

In [19]:
%watermark -a "Diego Luiz Cavalca" -u -n -t -z -v -m -g

Author: Diego Luiz Cavalca

Last updated: Mon May 03 2021 06:10:32Hora oficial do Brasil

Python implementation: CPython
Python version       : 3.8.8
IPython version      : 7.21.0

Compiler    : MSC v.1928 64 bit (AMD64)
OS          : Windows
Release     : 10
Machine     : AMD64
Processor   : Intel64 Family 6 Model 158 Stepping 9, GenuineIntel
CPU cores   : 8
Architecture: 64bit

Git hash: a29eb3e98689f89f3597358428a2cab6bb3ab9b0

