# Benchmarking Defesa - Dataset LIAA - Aprendizado Raso em Atributos no domínio do Tempo/Frequência

Estruturação de pipeline baseado em aprendizado raso utilizando atributos de alta frequência.


# Configurações

In [57]:
import os
import sys
import gc
from pprint import pprint
from collections import Counter
import copy
import warnings
warnings.filterwarnings(action="ignore")

import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
from IPython.display import display

from tqdm import *

from pretty_confusion_matrix import *

# TODO: implementar rotina na classe PyNILM.utils
def sizeof_fmt(num, suffix='B'):
    ''' by Fred Cirera,  https://stackoverflow.com/a/1094933/1870254, modified'''
    for unit in ['','Ki','Mi','Gi','Ti','Pi','Ei','Zi']:
        if abs(num) < 1024.0:
            return "%3.1f %s%s" % (num, unit, suffix)
        num /= 1024.0
    return "%.1f %s%s" % (num, 'Yi', suffix)

def listar_variaveis_memoria(ambiente):
    print("* Variáveis instanciadas em memória:")
    print("---")
    total = 0
    for name, size in sorted(((name, sys.getsizeof(value)) for name, value in ambiente.items()),
                             key= lambda x: -x[1])[:10]:
        total += size
        print("{:>30}: {:>8}".format(name, sizeof_fmt(size)))
    print("---")
    print("Total:", sizeof_fmt(total))
    
# TODO: implementar na classe utils
def highlight_col(x):
    r = 'background-color: #D9D9D9'
    df1 = pd.DataFrame('', index=x.index, columns=x.columns)
    df1.iloc[:, -2] = r
    return df1   

In [58]:
# CONSTANTES FUNDAMENTAIS DE ORGANIZACAO DE PASTAS/ARQUIVOS
RESIDENCIA = 3

# Path do arquivo H5 (base REDD ja preparada p/ NILMTK) e outros insumos fundamentais
caminho_dados = "D:/Projetos/phd-thesis/datasets/"

# Definir diretorios onde iremos salvar os insumos gerados do notebook (dados, imagens, etc.)
caminho_dados_notebook = os.path.join(caminho_dados, "26") # Num. notebook
if not os.path.isdir(caminho_dados_notebook):
    os.makedirs(caminho_dados_notebook)
caminho_imagens_notebook = os.path.join(caminho_dados_notebook, "imagens") # Num. notebook
if not os.path.isdir(caminho_imagens_notebook):
    os.makedirs(caminho_imagens_notebook)

# Path do arquivo H5 (base REDD ja preparada p/ NILMTK)
caminho_redd = os.path.join(caminho_dados, "REDD/low_freq")

# Path completo do arquivo REDD
arquivo_dataset = os.path.join(caminho_redd, "redd.h5")

# VARIAVEL AUXILIAR
# Path dos arquivos relacionados as janelas
caminho_janelas = os.path.join(caminho_redd, "../../phd")
if not os.path.isdir(caminho_janelas):
    os.makedirs(caminho_janelas)

In [59]:
from matplotlib import rcParams
import matplotlib.pyplot as plt
from six import iteritems

from nilmtk import DataSet, TimeFrame, MeterGroup, HDFDataStore
from nilmtk.legacy.disaggregate import CombinatorialOptimisation, FHMM
import nilmtk.utils

%matplotlib inline

# Carregando Dados

In [60]:
# Carregando datasets
df_treino = pd.read_csv(os.path.join(caminho_dados_notebook, '512_UmCiclo_Treinamento.csv'))
df_validacao = pd.read_csv(os.path.join(caminho_dados_notebook, '512_UmCiclo_Validacao.csv'))

# Selecionando feature dominio do tempo e frequencia / outputs (status dos aparelhos - dummy)
colunas_tempo = ['C1', 'C2', 'C3', 'C4', 'C5', 'C6', 'C7', 'C8', 'C9', 'C10']
colunas_frequencia = ['Fund', '2nd', '3rd', '4th', '5th', '6th', '7th', '8th', '9th', '10th',
       '11th', '12th', '13th', '14th', '15th']

colunas_output = ['LC', 'LI', 'MO', 'MT', 'PC', 'LF']

# Preparando dados de treino e validacao por dominio
X_treino_tempo = df_treino[colunas_tempo]
X_validacao_tempo = df_validacao[colunas_tempo]

X_treino_frequencia = df_treino[colunas_frequencia]
X_validacao_frequencia = df_validacao[colunas_frequencia]

y_treino = df_treino[colunas_output].replace(-1, 0)
y_validacao = df_validacao[colunas_output].replace(-1, 0)

# Treinamento e Avaliação dos Modelos

In [66]:
# from PyNILM.dados.janelas import Janelas
from PyNILM.dados.janelas import Janelas
from PyNILM.dados.utils import *

from PyNILM.avaliacao.metricas import *
from PyNILM.avaliacao.graficos import *
from PyNILM.avaliacao.analises import *

from PyNILM.modelos.utils import *
from PyNILM.modelos.dlafe import DLAFE
from PyNILM.modelos.rqa import RQA

# Inicializar uso GPU
start_tf_session(memory_limit=int(1024*4))

Virtual devices cannot be modified after being initialized


In [67]:
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier
from xgboost.sklearn import XGBClassifier

from sklearn.model_selection import StratifiedKFold
skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=SEED)

## Atributos Domínio do Tempo

In [68]:
dominio = 'tempo'

X_treino = X_treino_tempo
X_validacao = X_validacao_tempo

# Dados agregados (validacao cruzada)
X_cv = pd.concat([X_treino, X_validacao]).reset_index(drop=True) 
y_cv = pd.concat([y_treino, y_validacao]).reset_index(drop=True) 

### SVM

In [46]:
modelo = SVC(kernel='rbf', random_state=SEED)

resultados_modelo = {
    "appliance": [], "fold": [],
    "acc": [], "f1": [], "auc": [], 
    "base": []
}

for rotulo_aparelho in colunas_output:
    
    print("****************************************************************\n")
    print(f"* Aparelho `{rotulo_aparelho}`...\n")
    
    #######################################################################
    #                AVALIACAO 1 - Base de treino/validacao               #
    #######################################################################

    # # Filtrando output/status por aparelho
    y_treino_aparelho = y_treino[rotulo_aparelho]
    y_validacao_aparelho = y_validacao[rotulo_aparelho]

    print(f"  - Avaliando modelo através da base treino/validacao...")
    
    print("     -> Detalhes da amostragem (lote validacao):")
    print("     ---")
    for item in Counter(y_validacao_aparelho).items():
        print(f"       - Classe `{item[0]}`: {item[1]} amostras ({round(item[1]/len(y_validacao_aparelho)*100,1)}%)" )
    print()

    # Treinando modelo
    print(f"     -> Treinando modelo...\n")
    clf = clone(modelo)
    
    clf.fit(X_treino, y_treino_aparelho)

    # Prevendo conjunto de dados
    y_hat = clf.predict(X_validacao)

    # Incrementando resultados
    resultados_modelo["appliance"].append(rotulo_aparelho)
    resultados_modelo["fold"].append("-")
    resultados_modelo["acc"].append( accuracy_score(y_validacao_aparelho, y_hat) )
    resultados_modelo["f1"].append( f1_score(y_validacao_aparelho, y_hat, average="macro") )
    resultados_modelo["auc"].append(roc_auc_score(y_validacao_aparelho, y_hat) if np.unique(y_validacao_aparelho).shape[0]>1 else 0.5)
    resultados_modelo["base"].append("treino-teste")

    print("      > Resultado:")
    print("        = Classification Report:")
    print()
    print(classification_report(y_validacao_aparelho, y_hat))
    print("        = Confusion Matrix:")
    print()
    print(confusion_matrix(y_validacao_aparelho, y_hat))
    print()
    
    
    #######################################################################
    #                  AVALIACAO 2 - Validacao Cruzada                    #
    #######################################################################
    
    y_true_cv, y_pred_cv  = [], []

    print(f"  - Avaliando através de validação cruzada ({skf.n_splits}-folds)...")

    # Filtrando output/status por aparelho
    y_aparelho = y_cv[rotulo_aparelho]
    
    print("     -> Detalhes da amostragem:")
    print("     ---")
    for item in Counter(y_aparelho).items():
        print(f"        - Classe `{item[0]}`: {item[1]} amostras ({round(item[1]/len(y_aparelho)*100,1)}%)" )
    print()
    

    print(f"     -> Avaliando modelo (CV - {skf.n_splits} folds)...\n")
    for it, (idx_treino, idx_teste) in tqdm_notebook(enumerate(skf.split(X_cv, y_aparelho)), total=skf.n_splits):
        
        # Preparando lotes
        X_treino_cv, X_teste_cv = X_cv.iloc[idx_treino], X_cv.iloc[idx_teste]
        y_treino_cv, y_teste_cv = y_aparelho.iloc[idx_treino], y_aparelho.iloc[idx_teste]

        # Treinando modelo
        clf = clone(modelo)
        
        clf.fit(X_treino_cv, y_treino_cv)

        # Prevendo conjunto de dados
        y_hat = clf.predict(X_teste_cv)

        # Incrementando resultados
        resultados_modelo["appliance"].append(rotulo_aparelho)
        resultados_modelo["fold"].append(it+1)
        resultados_modelo["acc"].append( accuracy_score(y_teste_cv, y_hat) )
        resultados_modelo["f1"].append( f1_score(y_teste_cv, y_hat, average="macro") )
        resultados_modelo["auc"].append(roc_auc_score(y_teste_cv, y_hat) if np.unique(y_teste_cv).shape[0]>1 else 0.5)
        resultados_modelo["base"].append("cv")

        # Extendendo rotulos (analise global)
        y_true_cv.extend(y_teste_cv)
        y_pred_cv.extend(y_hat)

    print("      > Resultado:")
    print("        = Classification Report:")
    print()
    print(classification_report(y_true_cv, y_pred_cv))
    print("        = Confusion Matrix:")
    print()
    print(confusion_matrix(y_true_cv, y_pred_cv))
    print()
    print("**********************************************")
    print()
    
# Consolidando DataFrame
df_resultados = pd.DataFrame(resultados_modelo)

arquivo_resultados = os.path.join(caminho_dados_notebook, f"resultados_{dominio}_svm.xlsx")
if os.path.isfile(arquivo_resultados): os.remove(arquivo_resultados)
df_resultados.to_excel(arquivo_resultados, index=False)
    
print("############################## RESULTADO FINAL DO DOMINIO/MODELO ##############################")
display(df_resultados.groupby(["appliance","base"]).agg({
    "acc": ["mean", "std", "max", "min"],
    "f1": ["mean", "std", "max", "min"],
    "auc": ["mean", "std", "max", "min"]
}))  

****************************************************************

* Aparelho `LC`...

  - Avaliando modelo através da base treino/validacao...
     -> Detalhes da amostragem (lote validacao):
     ---
       - Classe `1`: 12800 amostras (50.8%)
       - Classe `0`: 12400 amostras (49.2%)

     -> Treinando modelo...

      > Resultado:
        = Classification Report:

              precision    recall  f1-score   support

           0       0.57      0.65      0.60     12400
           1       0.60      0.53      0.56     12800

    accuracy                           0.58     25200
   macro avg       0.59      0.59      0.58     25200
weighted avg       0.59      0.58      0.58     25200

        = Confusion Matrix:

[[8000 4400]
 [6061 6739]]

  - Avaliando através de validação cruzada (5-folds)...
     -> Detalhes da amostragem:
     ---
        - Classe `1`: 32000 amostras (50.8%)
        - Classe `0`: 31000 amostras (49.2%)

     -> Avaliando modelo (CV - 5 folds)...



  0%|          | 0/5 [00:00<?, ?it/s]

      > Resultado:
        = Classification Report:

              precision    recall  f1-score   support

           0       0.57      0.65      0.61     31000
           1       0.61      0.53      0.57     32000

    accuracy                           0.59     63000
   macro avg       0.59      0.59      0.59     63000
weighted avg       0.59      0.59      0.59     63000

        = Confusion Matrix:

[[20018 10982]
 [14996 17004]]

**********************************************

****************************************************************

* Aparelho `LI`...

  - Avaliando modelo através da base treino/validacao...
     -> Detalhes da amostragem (lote validacao):
     ---
       - Classe `0`: 12400 amostras (49.2%)
       - Classe `1`: 12800 amostras (50.8%)

     -> Treinando modelo...

      > Resultado:
        = Classification Report:

              precision    recall  f1-score   support

           0       0.95      0.90      0.93     12400
           1       0.91      0

  0%|          | 0/5 [00:00<?, ?it/s]

      > Resultado:
        = Classification Report:

              precision    recall  f1-score   support

           0       0.97      0.90      0.93     31000
           1       0.91      0.97      0.94     32000

    accuracy                           0.94     63000
   macro avg       0.94      0.94      0.94     63000
weighted avg       0.94      0.94      0.94     63000

        = Confusion Matrix:

[[27997  3003]
 [ 1000 31000]]

**********************************************

****************************************************************

* Aparelho `MO`...

  - Avaliando modelo através da base treino/validacao...
     -> Detalhes da amostragem (lote validacao):
     ---
       - Classe `0`: 12400 amostras (49.2%)
       - Classe `1`: 12800 amostras (50.8%)

     -> Treinando modelo...

      > Resultado:
        = Classification Report:

              precision    recall  f1-score   support

           0       0.83      0.61      0.71     12400
           1       0.70      0

  0%|          | 0/5 [00:00<?, ?it/s]

      > Resultado:
        = Classification Report:

              precision    recall  f1-score   support

           0       0.90      0.61      0.73     31000
           1       0.71      0.93      0.81     32000

    accuracy                           0.77     63000
   macro avg       0.80      0.77      0.77     63000
weighted avg       0.80      0.77      0.77     63000

        = Confusion Matrix:

[[18997 12003]
 [ 2214 29786]]

**********************************************

****************************************************************

* Aparelho `MT`...

  - Avaliando modelo através da base treino/validacao...
     -> Detalhes da amostragem (lote validacao):
     ---
       - Classe `0`: 12400 amostras (49.2%)
       - Classe `1`: 12800 amostras (50.8%)

     -> Treinando modelo...

      > Resultado:
        = Classification Report:

              precision    recall  f1-score   support

           0       0.91      0.87      0.89     12400
           1       0.88      0

  0%|          | 0/5 [00:00<?, ?it/s]

      > Resultado:
        = Classification Report:

              precision    recall  f1-score   support

           0       0.89      0.90      0.89     31000
           1       0.90      0.89      0.90     32000

    accuracy                           0.90     63000
   macro avg       0.90      0.90      0.90     63000
weighted avg       0.90      0.90      0.90     63000

        = Confusion Matrix:

[[27849  3151]
 [ 3389 28611]]

**********************************************

****************************************************************

* Aparelho `PC`...

  - Avaliando modelo através da base treino/validacao...
     -> Detalhes da amostragem (lote validacao):
     ---
       - Classe `0`: 12400 amostras (49.2%)
       - Classe `1`: 12800 amostras (50.8%)

     -> Treinando modelo...

      > Resultado:
        = Classification Report:

              precision    recall  f1-score   support

           0       0.64      0.87      0.74     12400
           1       0.81      0

  0%|          | 0/5 [00:00<?, ?it/s]

      > Resultado:
        = Classification Report:

              precision    recall  f1-score   support

           0       0.64      0.91      0.75     31000
           1       0.85      0.51      0.64     32000

    accuracy                           0.70     63000
   macro avg       0.75      0.71      0.69     63000
weighted avg       0.75      0.70      0.69     63000

        = Confusion Matrix:

[[28161  2839]
 [15789 16211]]

**********************************************

****************************************************************

* Aparelho `LF`...

  - Avaliando modelo através da base treino/validacao...
     -> Detalhes da amostragem (lote validacao):
     ---
       - Classe `0`: 12400 amostras (49.2%)
       - Classe `1`: 12800 amostras (50.8%)

     -> Treinando modelo...

      > Resultado:
        = Classification Report:

              precision    recall  f1-score   support

           0       0.76      0.37      0.50     12400
           1       0.59      0

  0%|          | 0/5 [00:00<?, ?it/s]

      > Resultado:
        = Classification Report:

              precision    recall  f1-score   support

           0       0.73      0.36      0.48     31000
           1       0.58      0.87      0.70     32000

    accuracy                           0.62     63000
   macro avg       0.66      0.62      0.59     63000
weighted avg       0.66      0.62      0.59     63000

        = Confusion Matrix:

[[11211 19789]
 [ 4159 27841]]

**********************************************

############################## RESULTADO FINAL DO DOMINIO/MODELO ##############################


Unnamed: 0_level_0,Unnamed: 1_level_0,acc,acc,acc,acc,f1,f1,f1,f1,auc,auc,auc,auc
Unnamed: 0_level_1,Unnamed: 1_level_1,mean,std,max,min,mean,std,max,min,mean,std,max,min
appliance,base,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2
LC,cv,0.587651,0.005133,0.591032,0.57881,0.586566,0.005277,0.590842,0.577654,0.588558,0.005083,0.591588,0.579786
LC,treino-teste,0.584881,,0.584881,0.584881,0.583839,,0.583839,0.583839,0.585823,,0.585823,0.585823
LF,cv,0.619873,0.009559,0.635952,0.612619,0.591407,0.008368,0.60557,0.584772,0.615838,0.009428,0.631706,0.608662
LF,treino-teste,0.632262,,0.632262,0.632262,0.603938,,0.603938,0.603938,0.628175,,0.628175,0.628175
LI,cv,0.93646,0.002049,0.938968,0.933413,0.936315,0.002063,0.938846,0.933253,0.93594,0.002073,0.938495,0.93287
LI,treino-teste,0.929246,,0.929246,0.929246,0.929131,,0.929131,0.929131,0.928839,,0.928839,0.928839
MO,cv,0.774333,0.011955,0.789206,0.761508,0.767521,0.011214,0.781202,0.755426,0.771809,0.011745,0.786368,0.759196
MO,treino-teste,0.749802,,0.749802,0.749802,0.744304,,0.744304,0.744304,0.747663,,0.747663,0.747663
MT,cv,0.89619,0.004926,0.902063,0.890794,0.896163,0.00494,0.902057,0.890793,0.896224,0.004938,0.902165,0.891053
MT,treino-teste,0.893333,,0.893333,0.893333,0.893194,,0.893194,0.893194,0.892985,,0.892985,0.892985


### XGBOOST

In [47]:
modelo = XGBClassifier(eval_metric='error', random_state=SEED, n_jobs=4)

resultados_modelo = {
    "appliance": [], "fold": [],
    "acc": [], "f1": [], "auc": [], 
    "base": []
}

for rotulo_aparelho in colunas_output:
    
    print("****************************************************************\n")
    print(f"* Aparelho `{rotulo_aparelho}`...\n")
    
    #######################################################################
    #                AVALIACAO 1 - Base de treino/validacao               #
    #######################################################################

    # # Filtrando output/status por aparelho
    y_treino_aparelho = y_treino[rotulo_aparelho]
    y_validacao_aparelho = y_validacao[rotulo_aparelho]

    print(f"  - Avaliando modelo através da base treino/validacao...")
    
    print("     -> Detalhes da amostragem (lote validacao):")
    print("     ---")
    for item in Counter(y_validacao_aparelho).items():
        print(f"       - Classe `{item[0]}`: {item[1]} amostras ({round(item[1]/len(y_validacao_aparelho)*100,1)}%)" )
    print()

    # Treinando modelo
    print(f"     -> Treinando modelo...\n")
    clf = clone(modelo)
    
    clf.fit(X_treino, y_treino_aparelho)

    # Prevendo conjunto de dados
    y_hat = clf.predict(X_validacao)

    # Incrementando resultados
    resultados_modelo["appliance"].append(rotulo_aparelho)
    resultados_modelo["fold"].append("-")
    resultados_modelo["acc"].append( accuracy_score(y_validacao_aparelho, y_hat) )
    resultados_modelo["f1"].append( f1_score(y_validacao_aparelho, y_hat, average="macro") )
    resultados_modelo["auc"].append(roc_auc_score(y_validacao_aparelho, y_hat) if np.unique(y_validacao_aparelho).shape[0]>1 else 0.5)
    resultados_modelo["base"].append("treino-teste")

    print("      > Resultado:")
    print("        = Classification Report:")
    print()
    print(classification_report(y_validacao_aparelho, y_hat))
    print("        = Confusion Matrix:")
    print()
    print(confusion_matrix(y_validacao_aparelho, y_hat))
    print()
    
    
    #######################################################################
    #                  AVALIACAO 2 - Validacao Cruzada                    #
    #######################################################################
    
    y_true_cv, y_pred_cv  = [], []

    print(f"  - Avaliando através de validação cruzada ({skf.n_splits}-folds)...")

    # Filtrando output/status por aparelho
    y_aparelho = y_cv[rotulo_aparelho]
    
    print("     -> Detalhes da amostragem:")
    print("     ---")
    for item in Counter(y_aparelho).items():
        print(f"        - Classe `{item[0]}`: {item[1]} amostras ({round(item[1]/len(y_aparelho)*100,1)}%)" )
    print()
    

    print(f"     -> Avaliando modelo (CV - {skf.n_splits} folds)...\n")
    for it, (idx_treino, idx_teste) in tqdm_notebook(enumerate(skf.split(X_cv, y_aparelho)), total=skf.n_splits):
        
        # Preparando lotes
        X_treino_cv, X_teste_cv = X_cv.iloc[idx_treino], X_cv.iloc[idx_teste]
        y_treino_cv, y_teste_cv = y_aparelho.iloc[idx_treino], y_aparelho.iloc[idx_teste]

        # Treinando modelo
        clf = clone(modelo)
        
        clf.fit(X_treino_cv, y_treino_cv)

        # Prevendo conjunto de dados
        y_hat = clf.predict(X_teste_cv)

        # Incrementando resultados
        resultados_modelo["appliance"].append(rotulo_aparelho)
        resultados_modelo["fold"].append(it+1)
        resultados_modelo["acc"].append( accuracy_score(y_teste_cv, y_hat) )
        resultados_modelo["f1"].append( f1_score(y_teste_cv, y_hat, average="macro") )
        resultados_modelo["auc"].append(roc_auc_score(y_teste_cv, y_hat) if np.unique(y_teste_cv).shape[0]>1 else 0.5)
        resultados_modelo["base"].append("cv")

        # Extendendo rotulos (analise global)
        y_true_cv.extend(y_teste_cv)
        y_pred_cv.extend(y_hat)

    print("      > Resultado:")
    print("        = Classification Report:")
    print()
    print(classification_report(y_true_cv, y_pred_cv))
    print("        = Confusion Matrix:")
    print()
    print(confusion_matrix(y_true_cv, y_pred_cv))
    print()
    print("**********************************************")
    print()

# Consolidando DataFrame
df_resultados = pd.DataFrame(resultados_modelo)

arquivo_resultados = os.path.join(caminho_dados_notebook, f"resultados_{dominio}_xgboost.xlsx")
if os.path.isfile(arquivo_resultados): os.remove(arquivo_resultados)
df_resultados.to_excel(arquivo_resultados, index=False)
    
print("############################## RESULTADO FINAL DO DOMINIO/MODELO ##############################")
display(df_resultados.groupby(["appliance","base"]).agg({
    "acc": ["mean", "std", "max", "min"],
    "f1": ["mean", "std", "max", "min"],
    "auc": ["mean", "std", "max", "min"]
}))  

****************************************************************

* Aparelho `LC`...

  - Avaliando modelo através da base treino/validacao...
     -> Detalhes da amostragem (lote validacao):
     ---
       - Classe `1`: 12800 amostras (50.8%)
       - Classe `0`: 12400 amostras (49.2%)

     -> Treinando modelo...

      > Resultado:
        = Classification Report:

              precision    recall  f1-score   support

           0       0.95      0.90      0.92     12400
           1       0.91      0.95      0.93     12800

    accuracy                           0.93     25200
   macro avg       0.93      0.93      0.93     25200
weighted avg       0.93      0.93      0.93     25200

        = Confusion Matrix:

[[11172  1228]
 [  624 12176]]

  - Avaliando através de validação cruzada (5-folds)...
     -> Detalhes da amostragem:
     ---
        - Classe `1`: 32000 amostras (50.8%)
        - Classe `0`: 31000 amostras (49.2%)

     -> Avaliando modelo (CV - 5 folds)...



  0%|          | 0/5 [00:00<?, ?it/s]

      > Resultado:
        = Classification Report:

              precision    recall  f1-score   support

           0       1.00      1.00      1.00     31000
           1       1.00      1.00      1.00     32000

    accuracy                           1.00     63000
   macro avg       1.00      1.00      1.00     63000
weighted avg       1.00      1.00      1.00     63000

        = Confusion Matrix:

[[31000     0]
 [    1 31999]]

**********************************************

****************************************************************

* Aparelho `LI`...

  - Avaliando modelo através da base treino/validacao...
     -> Detalhes da amostragem (lote validacao):
     ---
       - Classe `0`: 12400 amostras (49.2%)
       - Classe `1`: 12800 amostras (50.8%)

     -> Treinando modelo...

      > Resultado:
        = Classification Report:

              precision    recall  f1-score   support

           0       1.00      1.00      1.00     12400
           1       1.00      1

  0%|          | 0/5 [00:00<?, ?it/s]

      > Resultado:
        = Classification Report:

              precision    recall  f1-score   support

           0       1.00      1.00      1.00     31000
           1       1.00      1.00      1.00     32000

    accuracy                           1.00     63000
   macro avg       1.00      1.00      1.00     63000
weighted avg       1.00      1.00      1.00     63000

        = Confusion Matrix:

[[31000     0]
 [    0 32000]]

**********************************************

****************************************************************

* Aparelho `MO`...

  - Avaliando modelo através da base treino/validacao...
     -> Detalhes da amostragem (lote validacao):
     ---
       - Classe `0`: 12400 amostras (49.2%)
       - Classe `1`: 12800 amostras (50.8%)

     -> Treinando modelo...

      > Resultado:
        = Classification Report:

              precision    recall  f1-score   support

           0       1.00      1.00      1.00     12400
           1       1.00      1

  0%|          | 0/5 [00:00<?, ?it/s]

      > Resultado:
        = Classification Report:

              precision    recall  f1-score   support

           0       1.00      1.00      1.00     31000
           1       1.00      1.00      1.00     32000

    accuracy                           1.00     63000
   macro avg       1.00      1.00      1.00     63000
weighted avg       1.00      1.00      1.00     63000

        = Confusion Matrix:

[[31000     0]
 [    0 32000]]

**********************************************

****************************************************************

* Aparelho `MT`...

  - Avaliando modelo através da base treino/validacao...
     -> Detalhes da amostragem (lote validacao):
     ---
       - Classe `0`: 12400 amostras (49.2%)
       - Classe `1`: 12800 amostras (50.8%)

     -> Treinando modelo...

      > Resultado:
        = Classification Report:

              precision    recall  f1-score   support

           0       1.00      1.00      1.00     12400
           1       1.00      1

  0%|          | 0/5 [00:00<?, ?it/s]

      > Resultado:
        = Classification Report:

              precision    recall  f1-score   support

           0       1.00      1.00      1.00     31000
           1       1.00      1.00      1.00     32000

    accuracy                           1.00     63000
   macro avg       1.00      1.00      1.00     63000
weighted avg       1.00      1.00      1.00     63000

        = Confusion Matrix:

[[31000     0]
 [    0 32000]]

**********************************************

****************************************************************

* Aparelho `PC`...

  - Avaliando modelo através da base treino/validacao...
     -> Detalhes da amostragem (lote validacao):
     ---
       - Classe `0`: 12400 amostras (49.2%)
       - Classe `1`: 12800 amostras (50.8%)

     -> Treinando modelo...

      > Resultado:
        = Classification Report:

              precision    recall  f1-score   support

           0       1.00      1.00      1.00     12400
           1       1.00      1

  0%|          | 0/5 [00:00<?, ?it/s]

      > Resultado:
        = Classification Report:

              precision    recall  f1-score   support

           0       1.00      1.00      1.00     31000
           1       1.00      1.00      1.00     32000

    accuracy                           1.00     63000
   macro avg       1.00      1.00      1.00     63000
weighted avg       1.00      1.00      1.00     63000

        = Confusion Matrix:

[[31000     0]
 [    0 32000]]

**********************************************

****************************************************************

* Aparelho `LF`...

  - Avaliando modelo através da base treino/validacao...
     -> Detalhes da amostragem (lote validacao):
     ---
       - Classe `0`: 12400 amostras (49.2%)
       - Classe `1`: 12800 amostras (50.8%)

     -> Treinando modelo...

      > Resultado:
        = Classification Report:

              precision    recall  f1-score   support

           0       1.00      1.00      1.00     12400
           1       1.00      1

  0%|          | 0/5 [00:00<?, ?it/s]

      > Resultado:
        = Classification Report:

              precision    recall  f1-score   support

           0       1.00      1.00      1.00     31000
           1       1.00      1.00      1.00     32000

    accuracy                           1.00     63000
   macro avg       1.00      1.00      1.00     63000
weighted avg       1.00      1.00      1.00     63000

        = Confusion Matrix:

[[31000     0]
 [    3 31997]]

**********************************************

############################## RESULTADO FINAL DO DOMINIO/MODELO ##############################


Unnamed: 0_level_0,Unnamed: 1_level_0,acc,acc,acc,acc,f1,f1,f1,f1,auc,auc,auc,auc
Unnamed: 0_level_1,Unnamed: 1_level_1,mean,std,max,min,mean,std,max,min,mean,std,max,min
appliance,base,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2
LC,cv,0.999984,3.5e-05,1.0,0.999921,0.999984,3.6e-05,1.0,0.999921,0.999984,3.5e-05,1.0,0.999922
LC,treino-teste,0.926508,,0.926508,0.926508,0.926391,,0.926391,0.926391,0.926109,,0.926109,0.926109
LF,cv,0.999952,7.1e-05,1.0,0.999841,0.999952,7.1e-05,1.0,0.999841,0.999953,7e-05,1.0,0.999844
LF,treino-teste,1.0,,1.0,1.0,1.0,,1.0,1.0,1.0,,1.0,1.0
LI,cv,1.0,0.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,0.0,1.0,1.0
LI,treino-teste,1.0,,1.0,1.0,1.0,,1.0,1.0,1.0,,1.0,1.0
MO,cv,1.0,0.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,0.0,1.0,1.0
MO,treino-teste,1.0,,1.0,1.0,1.0,,1.0,1.0,1.0,,1.0,1.0
MT,cv,1.0,0.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,0.0,1.0,1.0
MT,treino-teste,1.0,,1.0,1.0,1.0,,1.0,1.0,1.0,,1.0,1.0


### MLP

In [48]:
modelo = MLPClassifier(alpha=1e-3, hidden_layer_sizes=(10,), random_state=SEED)

resultados_modelo = {
    "appliance": [], "fold": [],
    "acc": [], "f1": [], "auc": [], 
    "base": []
}

for rotulo_aparelho in colunas_output:
    
    print("****************************************************************\n")
    print(f"* Aparelho `{rotulo_aparelho}`...\n")
    
    #######################################################################
    #                AVALIACAO 1 - Base de treino/validacao               #
    #######################################################################

    # # Filtrando output/status por aparelho
    y_treino_aparelho = y_treino[rotulo_aparelho]
    y_validacao_aparelho = y_validacao[rotulo_aparelho]

    print(f"  - Avaliando modelo através da base treino/validacao...")
    
    print("     -> Detalhes da amostragem (lote validacao):")
    print("     ---")
    for item in Counter(y_validacao_aparelho).items():
        print(f"       - Classe `{item[0]}`: {item[1]} amostras ({round(item[1]/len(y_validacao_aparelho)*100,1)}%)" )
    print()

    # Treinando modelo
    print(f"     -> Treinando modelo...\n")
    clf = clone(modelo)
    
    clf.fit(X_treino, y_treino_aparelho)

    # Prevendo conjunto de dados
    y_hat = clf.predict(X_validacao)

    # Incrementando resultados
    resultados_modelo["appliance"].append(rotulo_aparelho)
    resultados_modelo["fold"].append("-")
    resultados_modelo["acc"].append( accuracy_score(y_validacao_aparelho, y_hat) )
    resultados_modelo["f1"].append( f1_score(y_validacao_aparelho, y_hat, average="macro") )
    resultados_modelo["auc"].append(roc_auc_score(y_validacao_aparelho, y_hat) if np.unique(y_validacao_aparelho).shape[0]>1 else 0.5)
    resultados_modelo["base"].append("treino-teste")

    print("      > Resultado:")
    print("        = Classification Report:")
    print()
    print(classification_report(y_validacao_aparelho, y_hat))
    print("        = Confusion Matrix:")
    print()
    print(confusion_matrix(y_validacao_aparelho, y_hat))
    print()
    
    
    #######################################################################
    #                  AVALIACAO 2 - Validacao Cruzada                    #
    #######################################################################
    
    y_true_cv, y_pred_cv  = [], []

    print(f"  - Avaliando através de validação cruzada ({skf.n_splits}-folds)...")

    # Filtrando output/status por aparelho
    y_aparelho = y_cv[rotulo_aparelho]
    
    print("     -> Detalhes da amostragem:")
    print("     ---")
    for item in Counter(y_aparelho).items():
        print(f"        - Classe `{item[0]}`: {item[1]} amostras ({round(item[1]/len(y_aparelho)*100,1)}%)" )
    print()
    

    print(f"     -> Avaliando modelo (CV - {skf.n_splits} folds)...\n")
    for it, (idx_treino, idx_teste) in tqdm_notebook(enumerate(skf.split(X_cv, y_aparelho)), total=skf.n_splits):
        
        # Preparando lotes
        X_treino_cv, X_teste_cv = X_cv.iloc[idx_treino], X_cv.iloc[idx_teste]
        y_treino_cv, y_teste_cv = y_aparelho.iloc[idx_treino], y_aparelho.iloc[idx_teste]

        # Treinando modelo
        clf = clone(modelo)
        
        clf.fit(X_treino_cv, y_treino_cv)

        # Prevendo conjunto de dados
        y_hat = clf.predict(X_teste_cv)

        # Incrementando resultados
        resultados_modelo["appliance"].append(rotulo_aparelho)
        resultados_modelo["fold"].append(it+1)
        resultados_modelo["acc"].append( accuracy_score(y_teste_cv, y_hat) )
        resultados_modelo["f1"].append( f1_score(y_teste_cv, y_hat, average="macro") )
        resultados_modelo["auc"].append(roc_auc_score(y_teste_cv, y_hat) if np.unique(y_teste_cv).shape[0]>1 else 0.5)
        resultados_modelo["base"].append("cv")

        # Extendendo rotulos (analise global)
        y_true_cv.extend(y_teste_cv)
        y_pred_cv.extend(y_hat)

    print("      > Resultado:")
    print("        = Classification Report:")
    print()
    print(classification_report(y_true_cv, y_pred_cv))
    print("        = Confusion Matrix:")
    print()
    print(confusion_matrix(y_true_cv, y_pred_cv))
    print()
    print("**********************************************")
    print()

# Consolidando DataFrame
df_resultados = pd.DataFrame(resultados_modelo)

arquivo_resultados = os.path.join(caminho_dados_notebook, f"resultados_{dominio}_mlp.xlsx")
if os.path.isfile(arquivo_resultados): os.remove(arquivo_resultados)
df_resultados.to_excel(arquivo_resultados, index=False)
    
print("############################## RESULTADO FINAL DO DOMINIO/MODELO ##############################")
display(df_resultados.groupby(["appliance","base"]).agg({
    "acc": ["mean", "std", "max", "min"],
    "f1": ["mean", "std", "max", "min"],
    "auc": ["mean", "std", "max", "min"]
}))  

****************************************************************

* Aparelho `LC`...

  - Avaliando modelo através da base treino/validacao...
     -> Detalhes da amostragem (lote validacao):
     ---
       - Classe `1`: 12800 amostras (50.8%)
       - Classe `0`: 12400 amostras (49.2%)

     -> Treinando modelo...

      > Resultado:
        = Classification Report:

              precision    recall  f1-score   support

           0       0.68      0.29      0.41     12400
           1       0.56      0.87      0.68     12800

    accuracy                           0.58     25200
   macro avg       0.62      0.58      0.54     25200
weighted avg       0.62      0.58      0.55     25200

        = Confusion Matrix:

[[ 3595  8805]
 [ 1686 11114]]

  - Avaliando através de validação cruzada (5-folds)...
     -> Detalhes da amostragem:
     ---
        - Classe `1`: 32000 amostras (50.8%)
        - Classe `0`: 31000 amostras (49.2%)

     -> Avaliando modelo (CV - 5 folds)...



  0%|          | 0/5 [00:00<?, ?it/s]

      > Resultado:
        = Classification Report:

              precision    recall  f1-score   support

           0       0.58      0.39      0.47     31000
           1       0.55      0.72      0.63     32000

    accuracy                           0.56     63000
   macro avg       0.57      0.56      0.55     63000
weighted avg       0.57      0.56      0.55     63000

        = Confusion Matrix:

[[12201 18799]
 [ 8870 23130]]

**********************************************

****************************************************************

* Aparelho `LI`...

  - Avaliando modelo através da base treino/validacao...
     -> Detalhes da amostragem (lote validacao):
     ---
       - Classe `0`: 12400 amostras (49.2%)
       - Classe `1`: 12800 amostras (50.8%)

     -> Treinando modelo...

      > Resultado:
        = Classification Report:

              precision    recall  f1-score   support

           0       0.98      1.00      0.99     12400
           1       1.00      0

  0%|          | 0/5 [00:00<?, ?it/s]

      > Resultado:
        = Classification Report:

              precision    recall  f1-score   support

           0       1.00      0.99      1.00     31000
           1       0.99      1.00      1.00     32000

    accuracy                           1.00     63000
   macro avg       1.00      1.00      1.00     63000
weighted avg       1.00      1.00      1.00     63000

        = Confusion Matrix:

[[30756   244]
 [   24 31976]]

**********************************************

****************************************************************

* Aparelho `MO`...

  - Avaliando modelo através da base treino/validacao...
     -> Detalhes da amostragem (lote validacao):
     ---
       - Classe `0`: 12400 amostras (49.2%)
       - Classe `1`: 12800 amostras (50.8%)

     -> Treinando modelo...

      > Resultado:
        = Classification Report:

              precision    recall  f1-score   support

           0       0.83      0.73      0.78     12400
           1       0.77      0

  0%|          | 0/5 [00:00<?, ?it/s]

      > Resultado:
        = Classification Report:

              precision    recall  f1-score   support

           0       0.84      0.87      0.85     31000
           1       0.87      0.84      0.85     32000

    accuracy                           0.85     63000
   macro avg       0.85      0.85      0.85     63000
weighted avg       0.85      0.85      0.85     63000

        = Confusion Matrix:

[[26835  4165]
 [ 5059 26941]]

**********************************************

****************************************************************

* Aparelho `MT`...

  - Avaliando modelo através da base treino/validacao...
     -> Detalhes da amostragem (lote validacao):
     ---
       - Classe `0`: 12400 amostras (49.2%)
       - Classe `1`: 12800 amostras (50.8%)

     -> Treinando modelo...

      > Resultado:
        = Classification Report:

              precision    recall  f1-score   support

           0       1.00      0.98      0.99     12400
           1       0.98      1

  0%|          | 0/5 [00:00<?, ?it/s]

      > Resultado:
        = Classification Report:

              precision    recall  f1-score   support

           0       0.91      0.93      0.92     31000
           1       0.93      0.92      0.93     32000

    accuracy                           0.92     63000
   macro avg       0.92      0.92      0.92     63000
weighted avg       0.92      0.92      0.92     63000

        = Confusion Matrix:

[[28940  2060]
 [ 2691 29309]]

**********************************************

****************************************************************

* Aparelho `PC`...

  - Avaliando modelo através da base treino/validacao...
     -> Detalhes da amostragem (lote validacao):
     ---
       - Classe `0`: 12400 amostras (49.2%)
       - Classe `1`: 12800 amostras (50.8%)

     -> Treinando modelo...

      > Resultado:
        = Classification Report:

              precision    recall  f1-score   support

           0       0.58      0.60      0.59     12400
           1       0.60      0

  0%|          | 0/5 [00:00<?, ?it/s]

      > Resultado:
        = Classification Report:

              precision    recall  f1-score   support

           0       0.65      0.73      0.69     31000
           1       0.70      0.63      0.66     32000

    accuracy                           0.68     63000
   macro avg       0.68      0.68      0.68     63000
weighted avg       0.68      0.68      0.68     63000

        = Confusion Matrix:

[[22520  8480]
 [11876 20124]]

**********************************************

****************************************************************

* Aparelho `LF`...

  - Avaliando modelo através da base treino/validacao...
     -> Detalhes da amostragem (lote validacao):
     ---
       - Classe `0`: 12400 amostras (49.2%)
       - Classe `1`: 12800 amostras (50.8%)

     -> Treinando modelo...

      > Resultado:
        = Classification Report:

              precision    recall  f1-score   support

           0       0.78      0.34      0.48     12400
           1       0.59      0

  0%|          | 0/5 [00:00<?, ?it/s]

      > Resultado:
        = Classification Report:

              precision    recall  f1-score   support

           0       0.55      0.76      0.64     31000
           1       0.63      0.40      0.49     32000

    accuracy                           0.58     63000
   macro avg       0.59      0.58      0.57     63000
weighted avg       0.59      0.58      0.56     63000

        = Confusion Matrix:

[[23488  7512]
 [19108 12892]]

**********************************************

############################## RESULTADO FINAL DO DOMINIO/MODELO ##############################


Unnamed: 0_level_0,Unnamed: 1_level_0,acc,acc,acc,acc,f1,f1,f1,f1,auc,auc,auc,auc
Unnamed: 0_level_1,Unnamed: 1_level_1,mean,std,max,min,mean,std,max,min,mean,std,max,min
appliance,base,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2
LC,cv,0.56081,0.033714,0.598095,0.523968,0.510568,0.070343,0.577894,0.404872,0.558197,0.032549,0.593639,0.520696
LC,treino-teste,0.58369,,0.58369,0.58369,0.543006,,0.543006,0.543006,0.5791,,0.5791,0.5791
LF,cv,0.57746,0.044927,0.627063,0.522937,0.546938,0.069653,0.620484,0.456666,0.580276,0.042847,0.629312,0.526709
LF,treino-teste,0.628452,,0.628452,0.628452,0.594265,,0.594265,0.594265,0.624002,,0.624002,0.624002
LI,cv,0.995746,0.009159,1.0,0.979365,0.995741,0.00917,1.0,0.979342,0.99569,0.009288,1.0,0.979078
LI,treino-teste,0.991706,,0.991706,0.991706,0.991706,,0.991706,0.991706,0.991836,,0.991836,0.991836
MO,cv,0.853587,0.149201,0.999841,0.67746,0.85007,0.153898,0.999841,0.663345,0.853776,0.148457,0.999839,0.680882
MO,treino-teste,0.793373,,0.793373,0.793373,0.792205,,0.792205,0.792205,0.792383,,0.792383,0.792383
MT,cv,0.924587,0.025218,0.940952,0.880873,0.924357,0.025505,0.940884,0.880152,0.924727,0.025732,0.94065,0.879864
MT,treino-teste,0.990198,,0.990198,0.990198,0.990192,,0.990192,0.990192,0.99004,,0.99004,0.99004


### Análise dos Resultados (1)

In [49]:
df_resultados_svm = pd.read_excel(os.path.join(caminho_dados_notebook, f"resultados_{dominio}_svm.xlsx"), engine='openpyxl')
df_resultados_svm["model"] = "SVM"

df_resultados_xgboost = pd.read_excel(os.path.join(caminho_dados_notebook, f"resultados_{dominio}_xgboost.xlsx"), engine='openpyxl')
df_resultados_xgboost["model"] = "XGBOOST"

df_resultados_mlp = pd.read_excel(os.path.join(caminho_dados_notebook, f"resultados_{dominio}_mlp.xlsx"), engine='openpyxl')
df_resultados_mlp["model"] = "MLP"

# df_resultados_elm = pd.read_excel(os.path.join(caminho_dados_notebook, "df_resultados_elm.xlsx"))
# df_resultados_elm["model"] = "ELM"

df_analise = pd.concat([
    df_resultados_svm,
    df_resultados_xgboost,
    df_resultados_mlp, 
#     df_resultados_elm,  
])

print("* Análise por modelo:")
df_analise_modelo = df_analise.groupby(["model","base"]).agg({
    "acc": ["mean","std","max","min"],
    "f1": ["mean","std","max","min"],
    "auc": ["mean","std","max","min"]
}).reset_index().sort_values(('f1','mean'), ascending=False).set_index("model")
display(df_analise_modelo)
df_analise_modelo.to_excel(os.path.join(caminho_dados_notebook, f"analise_{dominio}_modelos.xlsx"))

print()
print("* Análise por aparelho/modelo:")
df_analise_aparelho = df_analise.groupby(["appliance","model","base"]).agg({
    "acc": ["mean","std","max","min"],
    "f1": ["mean","std","max","min"],
    "auc": ["mean","std","max","min"]
})#.reset_index().sort_values(('f1','mean'), ascending=False).set_index(["aparelho","metodologia"])
display(df_analise_aparelho)
df_analise_aparelho.to_excel(os.path.join(caminho_dados_notebook, f"analise_{dominio}_aparelhos.xls"))

* Análise por modelo:


Unnamed: 0_level_0,base,acc,acc,acc,acc,f1,f1,f1,f1,auc,auc,auc,auc
Unnamed: 0_level_1,Unnamed: 1_level_1,mean,std,max,min,mean,std,max,min,mean,std,max,min
model,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2
XGBOOST,cv,0.999989,3.4e-05,1.0,0.999841,0.999989,3.4e-05,1.0,0.999841,0.99999,3.4e-05,1.0,0.999844
XGBOOST,treino-teste,0.987751,0.030003,1.0,0.926508,0.987732,0.030051,1.0,0.926391,0.987685,0.030166,1.0,0.926109
MLP,treino-teste,0.763366,0.191874,0.991706,0.58369,0.750692,0.204798,0.991706,0.543006,0.761718,0.1933,0.991836,0.5791
MLP,cv,0.764847,0.183426,1.0,0.522937,0.749905,0.201564,1.0,0.404872,0.765055,0.183242,1.0,0.520696
SVM,cv,0.753138,0.132858,0.938968,0.57881,0.745209,0.138723,0.938846,0.577654,0.752646,0.13299,0.938495,0.579786
SVM,treino-teste,0.746852,0.139394,0.929246,0.584881,0.739532,0.145209,0.929131,0.583839,0.746315,0.139455,0.928839,0.585823



* Análise por aparelho/modelo:


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,acc,acc,acc,acc,f1,f1,f1,f1,auc,auc,auc,auc
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,mean,std,max,min,mean,std,max,min,mean,std,max,min
appliance,model,base,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2
LC,MLP,cv,0.56081,0.033714,0.598095,0.523968,0.510568,0.070343,0.577894,0.404872,0.558197,0.032549,0.593639,0.520696
LC,MLP,treino-teste,0.58369,,0.58369,0.58369,0.543006,,0.543006,0.543006,0.5791,,0.5791,0.5791
LC,SVM,cv,0.587651,0.005133,0.591032,0.57881,0.586566,0.005277,0.590842,0.577654,0.588558,0.005083,0.591588,0.579786
LC,SVM,treino-teste,0.584881,,0.584881,0.584881,0.583839,,0.583839,0.583839,0.585823,,0.585823,0.585823
LC,XGBOOST,cv,0.999984,3.5e-05,1.0,0.999921,0.999984,3.6e-05,1.0,0.999921,0.999984,3.5e-05,1.0,0.999922
LC,XGBOOST,treino-teste,0.926508,,0.926508,0.926508,0.926391,,0.926391,0.926391,0.926109,,0.926109,0.926109
LF,MLP,cv,0.57746,0.044927,0.627063,0.522937,0.546938,0.069653,0.620484,0.456666,0.580276,0.042847,0.629312,0.526709
LF,MLP,treino-teste,0.628452,,0.628452,0.628452,0.594265,,0.594265,0.594265,0.624002,,0.624002,0.624002
LF,SVM,cv,0.619873,0.009559,0.635952,0.612619,0.591407,0.008368,0.60557,0.584772,0.615838,0.009428,0.631706,0.608662
LF,SVM,treino-teste,0.632262,,0.632262,0.632262,0.603938,,0.603938,0.603938,0.628175,,0.628175,0.628175


## Atributos Domínio da Frequência

In [50]:
dominio = 'frequencia'

X_treino = X_treino_frequencia
X_validacao = X_validacao_frequencia

# Dados agregados (validacao cruzada)
X_cv = pd.concat([X_treino, X_validacao]).reset_index(drop=True) 
y_cv = pd.concat([y_treino, y_validacao]).reset_index(drop=True) 

### SVM

In [51]:
modelo = SVC(kernel='rbf', random_state=SEED)

resultados_modelo = {
    "appliance": [], "fold": [],
    "acc": [], "f1": [], "auc": [], 
    "base": []
}

for rotulo_aparelho in colunas_output:
    
    print("****************************************************************\n")
    print(f"* Aparelho `{rotulo_aparelho}`...\n")
    
    #######################################################################
    #                AVALIACAO 1 - Base de treino/validacao               #
    #######################################################################

    # # Filtrando output/status por aparelho
    y_treino_aparelho = y_treino[rotulo_aparelho]
    y_validacao_aparelho = y_validacao[rotulo_aparelho]

    print(f"  - Avaliando modelo através da base treino/validacao...")
    
    print("     -> Detalhes da amostragem (lote validacao):")
    print("     ---")
    for item in Counter(y_validacao_aparelho).items():
        print(f"       - Classe `{item[0]}`: {item[1]} amostras ({round(item[1]/len(y_validacao_aparelho)*100,1)}%)" )
    print()

    # Treinando modelo
    print(f"     -> Treinando modelo...\n")
    clf = clone(modelo)
    
    clf.fit(X_treino, y_treino_aparelho)

    # Prevendo conjunto de dados
    y_hat = clf.predict(X_validacao)

    # Incrementando resultados
    resultados_modelo["appliance"].append(rotulo_aparelho)
    resultados_modelo["fold"].append("-")
    resultados_modelo["acc"].append( accuracy_score(y_validacao_aparelho, y_hat) )
    resultados_modelo["f1"].append( f1_score(y_validacao_aparelho, y_hat, average="macro") )
    resultados_modelo["auc"].append(roc_auc_score(y_validacao_aparelho, y_hat) if np.unique(y_validacao_aparelho).shape[0]>1 else 0.5)
    resultados_modelo["base"].append("treino-teste")

    print("      > Resultado:")
    print("        = Classification Report:")
    print()
    print(classification_report(y_validacao_aparelho, y_hat))
    print("        = Confusion Matrix:")
    print()
    print(confusion_matrix(y_validacao_aparelho, y_hat))
    print()
    
    
    #######################################################################
    #                  AVALIACAO 2 - Validacao Cruzada                    #
    #######################################################################
    
    y_true_cv, y_pred_cv  = [], []

    print(f"  - Avaliando através de validação cruzada ({skf.n_splits}-folds)...")

    # Filtrando output/status por aparelho
    y_aparelho = y_cv[rotulo_aparelho]
    
    print("     -> Detalhes da amostragem:")
    print("     ---")
    for item in Counter(y_aparelho).items():
        print(f"        - Classe `{item[0]}`: {item[1]} amostras ({round(item[1]/len(y_aparelho)*100,1)}%)" )
    print()
    

    print(f"     -> Avaliando modelo (CV - {skf.n_splits} folds)...\n")
    for it, (idx_treino, idx_teste) in tqdm_notebook(enumerate(skf.split(X_cv, y_aparelho)), total=skf.n_splits):
        
        # Preparando lotes
        X_treino_cv, X_teste_cv = X_cv.iloc[idx_treino], X_cv.iloc[idx_teste]
        y_treino_cv, y_teste_cv = y_aparelho.iloc[idx_treino], y_aparelho.iloc[idx_teste]

        # Treinando modelo
        clf = clone(modelo)
        
        clf.fit(X_treino_cv, y_treino_cv)

        # Prevendo conjunto de dados
        y_hat = clf.predict(X_teste_cv)

        # Incrementando resultados
        resultados_modelo["appliance"].append(rotulo_aparelho)
        resultados_modelo["fold"].append(it+1)
        resultados_modelo["acc"].append( accuracy_score(y_teste_cv, y_hat) )
        resultados_modelo["f1"].append( f1_score(y_teste_cv, y_hat, average="macro") )
        resultados_modelo["auc"].append(roc_auc_score(y_teste_cv, y_hat) if np.unique(y_teste_cv).shape[0]>1 else 0.5)
        resultados_modelo["base"].append("cv")

        # Extendendo rotulos (analise global)
        y_true_cv.extend(y_teste_cv)
        y_pred_cv.extend(y_hat)

    print("      > Resultado:")
    print("        = Classification Report:")
    print()
    print(classification_report(y_true_cv, y_pred_cv))
    print("        = Confusion Matrix:")
    print()
    print(confusion_matrix(y_true_cv, y_pred_cv))
    print()
    print("**********************************************")
    print()
    
# Consolidando DataFrame
df_resultados = pd.DataFrame(resultados_modelo)

arquivo_resultados = os.path.join(caminho_dados_notebook, f"resultados_{dominio}_svm.xlsx")
if os.path.isfile(arquivo_resultados): os.remove(arquivo_resultados)
df_resultados.to_excel(arquivo_resultados, index=False)
    
print("############################## RESULTADO FINAL DO DOMINIO/MODELO ##############################")
display(df_resultados.groupby(["appliance","base"]).agg({
    "acc": ["mean", "std", "max", "min"],
    "f1": ["mean", "std", "max", "min"],
    "auc": ["mean", "std", "max", "min"]
}))  

****************************************************************

* Aparelho `LC`...

  - Avaliando modelo através da base treino/validacao...
     -> Detalhes da amostragem (lote validacao):
     ---
       - Classe `1`: 12800 amostras (50.8%)
       - Classe `0`: 12400 amostras (49.2%)

     -> Treinando modelo...

      > Resultado:
        = Classification Report:

              precision    recall  f1-score   support

           0       1.00      1.00      1.00     12400
           1       1.00      1.00      1.00     12800

    accuracy                           1.00     25200
   macro avg       1.00      1.00      1.00     25200
weighted avg       1.00      1.00      1.00     25200

        = Confusion Matrix:

[[12400     0]
 [    0 12800]]

  - Avaliando através de validação cruzada (5-folds)...
     -> Detalhes da amostragem:
     ---
        - Classe `1`: 32000 amostras (50.8%)
        - Classe `0`: 31000 amostras (49.2%)

     -> Avaliando modelo (CV - 5 folds)...



  0%|          | 0/5 [00:00<?, ?it/s]

      > Resultado:
        = Classification Report:

              precision    recall  f1-score   support

           0       1.00      1.00      1.00     31000
           1       1.00      1.00      1.00     32000

    accuracy                           1.00     63000
   macro avg       1.00      1.00      1.00     63000
weighted avg       1.00      1.00      1.00     63000

        = Confusion Matrix:

[[31000     0]
 [    0 32000]]

**********************************************

****************************************************************

* Aparelho `LI`...

  - Avaliando modelo através da base treino/validacao...
     -> Detalhes da amostragem (lote validacao):
     ---
       - Classe `0`: 12400 amostras (49.2%)
       - Classe `1`: 12800 amostras (50.8%)

     -> Treinando modelo...

      > Resultado:
        = Classification Report:

              precision    recall  f1-score   support

           0       1.00      1.00      1.00     12400
           1       1.00      1

  0%|          | 0/5 [00:00<?, ?it/s]

      > Resultado:
        = Classification Report:

              precision    recall  f1-score   support

           0       1.00      1.00      1.00     31000
           1       1.00      1.00      1.00     32000

    accuracy                           1.00     63000
   macro avg       1.00      1.00      1.00     63000
weighted avg       1.00      1.00      1.00     63000

        = Confusion Matrix:

[[31000     0]
 [    0 32000]]

**********************************************

****************************************************************

* Aparelho `MO`...

  - Avaliando modelo através da base treino/validacao...
     -> Detalhes da amostragem (lote validacao):
     ---
       - Classe `0`: 12400 amostras (49.2%)
       - Classe `1`: 12800 amostras (50.8%)

     -> Treinando modelo...

      > Resultado:
        = Classification Report:

              precision    recall  f1-score   support

           0       1.00      1.00      1.00     12400
           1       1.00      1

  0%|          | 0/5 [00:00<?, ?it/s]

      > Resultado:
        = Classification Report:

              precision    recall  f1-score   support

           0       1.00      1.00      1.00     31000
           1       1.00      1.00      1.00     32000

    accuracy                           1.00     63000
   macro avg       1.00      1.00      1.00     63000
weighted avg       1.00      1.00      1.00     63000

        = Confusion Matrix:

[[31000     0]
 [    0 32000]]

**********************************************

****************************************************************

* Aparelho `MT`...

  - Avaliando modelo através da base treino/validacao...
     -> Detalhes da amostragem (lote validacao):
     ---
       - Classe `0`: 12400 amostras (49.2%)
       - Classe `1`: 12800 amostras (50.8%)

     -> Treinando modelo...

      > Resultado:
        = Classification Report:

              precision    recall  f1-score   support

           0       1.00      1.00      1.00     12400
           1       1.00      1

  0%|          | 0/5 [00:00<?, ?it/s]

      > Resultado:
        = Classification Report:

              precision    recall  f1-score   support

           0       1.00      1.00      1.00     31000
           1       1.00      1.00      1.00     32000

    accuracy                           1.00     63000
   macro avg       1.00      1.00      1.00     63000
weighted avg       1.00      1.00      1.00     63000

        = Confusion Matrix:

[[31000     0]
 [    0 32000]]

**********************************************

****************************************************************

* Aparelho `PC`...

  - Avaliando modelo através da base treino/validacao...
     -> Detalhes da amostragem (lote validacao):
     ---
       - Classe `0`: 12400 amostras (49.2%)
       - Classe `1`: 12800 amostras (50.8%)

     -> Treinando modelo...

      > Resultado:
        = Classification Report:

              precision    recall  f1-score   support

           0       0.83      0.94      0.88     12400
           1       0.93      0

  0%|          | 0/5 [00:00<?, ?it/s]

      > Resultado:
        = Classification Report:

              precision    recall  f1-score   support

           0       0.89      0.98      0.93     31000
           1       0.98      0.88      0.93     32000

    accuracy                           0.93     63000
   macro avg       0.93      0.93      0.93     63000
weighted avg       0.93      0.93      0.93     63000

        = Confusion Matrix:

[[30291   709]
 [ 3786 28214]]

**********************************************

****************************************************************

* Aparelho `LF`...

  - Avaliando modelo através da base treino/validacao...
     -> Detalhes da amostragem (lote validacao):
     ---
       - Classe `0`: 12400 amostras (49.2%)
       - Classe `1`: 12800 amostras (50.8%)

     -> Treinando modelo...

      > Resultado:
        = Classification Report:

              precision    recall  f1-score   support

           0       0.74      0.74      0.74     12400
           1       0.75      0

  0%|          | 0/5 [00:00<?, ?it/s]

      > Resultado:
        = Classification Report:

              precision    recall  f1-score   support

           0       0.74      0.72      0.73     31000
           1       0.74      0.76      0.75     32000

    accuracy                           0.74     63000
   macro avg       0.74      0.74      0.74     63000
weighted avg       0.74      0.74      0.74     63000

        = Confusion Matrix:

[[22299  8701]
 [ 7634 24366]]

**********************************************

############################## RESULTADO FINAL DO DOMINIO/MODELO ##############################


Unnamed: 0_level_0,Unnamed: 1_level_0,acc,acc,acc,acc,f1,f1,f1,f1,auc,auc,auc,auc
Unnamed: 0_level_1,Unnamed: 1_level_1,mean,std,max,min,mean,std,max,min,mean,std,max,min
appliance,base,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2
LC,cv,1.0,0.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,0.0,1.0,1.0
LC,treino-teste,1.0,,1.0,1.0,1.0,,1.0,1.0,1.0,,1.0,1.0
LF,cv,0.740714,0.00331,0.744127,0.73619,0.740423,0.003435,0.743966,0.73573,0.74038,0.003423,0.743916,0.735713
LF,treino-teste,0.746032,,0.746032,0.746032,0.745968,,0.745968,0.745968,0.745968,,0.745968,0.745968
LI,cv,1.0,0.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,0.0,1.0,1.0
LI,treino-teste,1.0,,1.0,1.0,1.0,,1.0,1.0,1.0,,1.0,1.0
MO,cv,1.0,0.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,0.0,1.0,1.0
MO,treino-teste,1.0,,1.0,1.0,1.0,,1.0,1.0,1.0,,1.0,1.0
MT,cv,1.0,0.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,0.0,1.0,1.0
MT,treino-teste,1.0,,1.0,1.0,1.0,,1.0,1.0,1.0,,1.0,1.0


### XGBOOST

In [52]:
modelo = XGBClassifier(eval_metric='error', random_state=SEED, n_jobs=4)

resultados_modelo = {
    "appliance": [], "fold": [],
    "acc": [], "f1": [], "auc": [], 
    "base": []
}

for rotulo_aparelho in colunas_output:
    
    print("****************************************************************\n")
    print(f"* Aparelho `{rotulo_aparelho}`...\n")
    
    #######################################################################
    #                AVALIACAO 1 - Base de treino/validacao               #
    #######################################################################

    # # Filtrando output/status por aparelho
    y_treino_aparelho = y_treino[rotulo_aparelho]
    y_validacao_aparelho = y_validacao[rotulo_aparelho]

    print(f"  - Avaliando modelo através da base treino/validacao...")
    
    print("     -> Detalhes da amostragem (lote validacao):")
    print("     ---")
    for item in Counter(y_validacao_aparelho).items():
        print(f"       - Classe `{item[0]}`: {item[1]} amostras ({round(item[1]/len(y_validacao_aparelho)*100,1)}%)" )
    print()

    # Treinando modelo
    print(f"     -> Treinando modelo...\n")
    clf = clone(modelo)
    
    clf.fit(X_treino, y_treino_aparelho)

    # Prevendo conjunto de dados
    y_hat = clf.predict(X_validacao)

    # Incrementando resultados
    resultados_modelo["appliance"].append(rotulo_aparelho)
    resultados_modelo["fold"].append("-")
    resultados_modelo["acc"].append( accuracy_score(y_validacao_aparelho, y_hat) )
    resultados_modelo["f1"].append( f1_score(y_validacao_aparelho, y_hat, average="macro") )
    resultados_modelo["auc"].append(roc_auc_score(y_validacao_aparelho, y_hat) if np.unique(y_validacao_aparelho).shape[0]>1 else 0.5)
    resultados_modelo["base"].append("treino-teste")

    print("      > Resultado:")
    print("        = Classification Report:")
    print()
    print(classification_report(y_validacao_aparelho, y_hat))
    print("        = Confusion Matrix:")
    print()
    print(confusion_matrix(y_validacao_aparelho, y_hat))
    print()
    
    
    #######################################################################
    #                  AVALIACAO 2 - Validacao Cruzada                    #
    #######################################################################
    
    y_true_cv, y_pred_cv  = [], []

    print(f"  - Avaliando através de validação cruzada ({skf.n_splits}-folds)...")

    # Filtrando output/status por aparelho
    y_aparelho = y_cv[rotulo_aparelho]
    
    print("     -> Detalhes da amostragem:")
    print("     ---")
    for item in Counter(y_aparelho).items():
        print(f"        - Classe `{item[0]}`: {item[1]} amostras ({round(item[1]/len(y_aparelho)*100,1)}%)" )
    print()
    

    print(f"     -> Avaliando modelo (CV - {skf.n_splits} folds)...\n")
    for it, (idx_treino, idx_teste) in tqdm_notebook(enumerate(skf.split(X_cv, y_aparelho)), total=skf.n_splits):
        
        # Preparando lotes
        X_treino_cv, X_teste_cv = X_cv.iloc[idx_treino], X_cv.iloc[idx_teste]
        y_treino_cv, y_teste_cv = y_aparelho.iloc[idx_treino], y_aparelho.iloc[idx_teste]

        # Treinando modelo
        clf = clone(modelo)
        
        clf.fit(X_treino_cv, y_treino_cv)

        # Prevendo conjunto de dados
        y_hat = clf.predict(X_teste_cv)

        # Incrementando resultados
        resultados_modelo["appliance"].append(rotulo_aparelho)
        resultados_modelo["fold"].append(it+1)
        resultados_modelo["acc"].append( accuracy_score(y_teste_cv, y_hat) )
        resultados_modelo["f1"].append( f1_score(y_teste_cv, y_hat, average="macro") )
        resultados_modelo["auc"].append(roc_auc_score(y_teste_cv, y_hat) if np.unique(y_teste_cv).shape[0]>1 else 0.5)
        resultados_modelo["base"].append("cv")

        # Extendendo rotulos (analise global)
        y_true_cv.extend(y_teste_cv)
        y_pred_cv.extend(y_hat)

    print("      > Resultado:")
    print("        = Classification Report:")
    print()
    print(classification_report(y_true_cv, y_pred_cv))
    print("        = Confusion Matrix:")
    print()
    print(confusion_matrix(y_true_cv, y_pred_cv))
    print()
    print("**********************************************")
    print()

# Consolidando DataFrame
df_resultados = pd.DataFrame(resultados_modelo)

arquivo_resultados = os.path.join(caminho_dados_notebook, f"resultados_{dominio}_xgboost.xlsx")
if os.path.isfile(arquivo_resultados): os.remove(arquivo_resultados)
df_resultados.to_excel(arquivo_resultados, index=False)
    
print("############################## RESULTADO FINAL DO DOMINIO/MODELO ##############################")
display(df_resultados.groupby(["appliance","base"]).agg({
    "acc": ["mean", "std", "max", "min"],
    "f1": ["mean", "std", "max", "min"],
    "auc": ["mean", "std", "max", "min"]
}))  

****************************************************************

* Aparelho `LC`...

  - Avaliando modelo através da base treino/validacao...
     -> Detalhes da amostragem (lote validacao):
     ---
       - Classe `1`: 12800 amostras (50.8%)
       - Classe `0`: 12400 amostras (49.2%)

     -> Treinando modelo...

      > Resultado:
        = Classification Report:

              precision    recall  f1-score   support

           0       0.98      1.00      0.99     12400
           1       1.00      0.98      0.99     12800

    accuracy                           0.99     25200
   macro avg       0.99      0.99      0.99     25200
weighted avg       0.99      0.99      0.99     25200

        = Confusion Matrix:

[[12381    19]
 [  239 12561]]

  - Avaliando através de validação cruzada (5-folds)...
     -> Detalhes da amostragem:
     ---
        - Classe `1`: 32000 amostras (50.8%)
        - Classe `0`: 31000 amostras (49.2%)

     -> Avaliando modelo (CV - 5 folds)...



  0%|          | 0/5 [00:00<?, ?it/s]

      > Resultado:
        = Classification Report:

              precision    recall  f1-score   support

           0       1.00      1.00      1.00     31000
           1       1.00      1.00      1.00     32000

    accuracy                           1.00     63000
   macro avg       1.00      1.00      1.00     63000
weighted avg       1.00      1.00      1.00     63000

        = Confusion Matrix:

[[30999     1]
 [    1 31999]]

**********************************************

****************************************************************

* Aparelho `LI`...

  - Avaliando modelo através da base treino/validacao...
     -> Detalhes da amostragem (lote validacao):
     ---
       - Classe `0`: 12400 amostras (49.2%)
       - Classe `1`: 12800 amostras (50.8%)

     -> Treinando modelo...

      > Resultado:
        = Classification Report:

              precision    recall  f1-score   support

           0       1.00      1.00      1.00     12400
           1       1.00      1

  0%|          | 0/5 [00:00<?, ?it/s]

      > Resultado:
        = Classification Report:

              precision    recall  f1-score   support

           0       1.00      1.00      1.00     31000
           1       1.00      1.00      1.00     32000

    accuracy                           1.00     63000
   macro avg       1.00      1.00      1.00     63000
weighted avg       1.00      1.00      1.00     63000

        = Confusion Matrix:

[[31000     0]
 [    0 32000]]

**********************************************

****************************************************************

* Aparelho `MO`...

  - Avaliando modelo através da base treino/validacao...
     -> Detalhes da amostragem (lote validacao):
     ---
       - Classe `0`: 12400 amostras (49.2%)
       - Classe `1`: 12800 amostras (50.8%)

     -> Treinando modelo...

      > Resultado:
        = Classification Report:

              precision    recall  f1-score   support

           0       1.00      0.98      0.99     12400
           1       0.98      1

  0%|          | 0/5 [00:00<?, ?it/s]

      > Resultado:
        = Classification Report:

              precision    recall  f1-score   support

           0       1.00      1.00      1.00     31000
           1       1.00      1.00      1.00     32000

    accuracy                           1.00     63000
   macro avg       1.00      1.00      1.00     63000
weighted avg       1.00      1.00      1.00     63000

        = Confusion Matrix:

[[31000     0]
 [    0 32000]]

**********************************************

****************************************************************

* Aparelho `MT`...

  - Avaliando modelo através da base treino/validacao...
     -> Detalhes da amostragem (lote validacao):
     ---
       - Classe `0`: 12400 amostras (49.2%)
       - Classe `1`: 12800 amostras (50.8%)

     -> Treinando modelo...

      > Resultado:
        = Classification Report:

              precision    recall  f1-score   support

           0       1.00      1.00      1.00     12400
           1       1.00      1

  0%|          | 0/5 [00:00<?, ?it/s]

      > Resultado:
        = Classification Report:

              precision    recall  f1-score   support

           0       1.00      1.00      1.00     31000
           1       1.00      1.00      1.00     32000

    accuracy                           1.00     63000
   macro avg       1.00      1.00      1.00     63000
weighted avg       1.00      1.00      1.00     63000

        = Confusion Matrix:

[[31000     0]
 [    0 32000]]

**********************************************

****************************************************************

* Aparelho `PC`...

  - Avaliando modelo através da base treino/validacao...
     -> Detalhes da amostragem (lote validacao):
     ---
       - Classe `0`: 12400 amostras (49.2%)
       - Classe `1`: 12800 amostras (50.8%)

     -> Treinando modelo...

      > Resultado:
        = Classification Report:

              precision    recall  f1-score   support

           0       1.00      1.00      1.00     12400
           1       1.00      1

  0%|          | 0/5 [00:00<?, ?it/s]

      > Resultado:
        = Classification Report:

              precision    recall  f1-score   support

           0       1.00      1.00      1.00     31000
           1       1.00      1.00      1.00     32000

    accuracy                           1.00     63000
   macro avg       1.00      1.00      1.00     63000
weighted avg       1.00      1.00      1.00     63000

        = Confusion Matrix:

[[31000     0]
 [    0 32000]]

**********************************************

****************************************************************

* Aparelho `LF`...

  - Avaliando modelo através da base treino/validacao...
     -> Detalhes da amostragem (lote validacao):
     ---
       - Classe `0`: 12400 amostras (49.2%)
       - Classe `1`: 12800 amostras (50.8%)

     -> Treinando modelo...

      > Resultado:
        = Classification Report:

              precision    recall  f1-score   support

           0       0.95      0.97      0.96     12400
           1       0.97      0

  0%|          | 0/5 [00:00<?, ?it/s]

      > Resultado:
        = Classification Report:

              precision    recall  f1-score   support

           0       1.00      1.00      1.00     31000
           1       1.00      1.00      1.00     32000

    accuracy                           1.00     63000
   macro avg       1.00      1.00      1.00     63000
weighted avg       1.00      1.00      1.00     63000

        = Confusion Matrix:

[[31000     0]
 [    2 31998]]

**********************************************

############################## RESULTADO FINAL DO DOMINIO/MODELO ##############################


Unnamed: 0_level_0,Unnamed: 1_level_0,acc,acc,acc,acc,f1,f1,f1,f1,auc,auc,auc,auc
Unnamed: 0_level_1,Unnamed: 1_level_1,mean,std,max,min,mean,std,max,min,mean,std,max,min
appliance,base,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2
LC,cv,0.999968,4.3e-05,1.0,0.999921,0.999968,4.3e-05,1.0,0.999921,0.999968,4.3e-05,1.0,0.999919
LC,treino-teste,0.989762,,0.989762,0.989762,0.989761,,0.989761,0.989761,0.989898,,0.989898,0.989898
LF,cv,0.999968,7.1e-05,1.0,0.999841,0.999968,7.1e-05,1.0,0.999841,0.999969,7e-05,1.0,0.999844
LF,treino-teste,0.960238,,0.960238,0.960238,0.960235,,0.960235,0.960235,0.96033,,0.96033,0.96033
LI,cv,1.0,0.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,0.0,1.0,1.0
LI,treino-teste,1.0,,1.0,1.0,1.0,,1.0,1.0,1.0,,1.0,1.0
MO,cv,1.0,0.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,0.0,1.0,1.0
MO,treino-teste,0.987302,,0.987302,0.987302,0.987292,,0.987292,0.987292,0.987113,,0.987113,0.987113
MT,cv,1.0,0.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,0.0,1.0,1.0
MT,treino-teste,1.0,,1.0,1.0,1.0,,1.0,1.0,1.0,,1.0,1.0


### MLP

In [53]:
modelo = MLPClassifier(alpha=1e-3, hidden_layer_sizes=(10,), random_state=SEED)

resultados_modelo = {
    "appliance": [], "fold": [],
    "acc": [], "f1": [], "auc": [], 
    "base": []
}

for rotulo_aparelho in colunas_output:
    
    print("****************************************************************\n")
    print(f"* Aparelho `{rotulo_aparelho}`...\n")
    
    #######################################################################
    #                AVALIACAO 1 - Base de treino/validacao               #
    #######################################################################

    # # Filtrando output/status por aparelho
    y_treino_aparelho = y_treino[rotulo_aparelho]
    y_validacao_aparelho = y_validacao[rotulo_aparelho]

    print(f"  - Avaliando modelo através da base treino/validacao...")
    
    print("     -> Detalhes da amostragem (lote validacao):")
    print("     ---")
    for item in Counter(y_validacao_aparelho).items():
        print(f"       - Classe `{item[0]}`: {item[1]} amostras ({round(item[1]/len(y_validacao_aparelho)*100,1)}%)" )
    print()

    # Treinando modelo
    print(f"     -> Treinando modelo...\n")
    clf = clone(modelo)
    
    clf.fit(X_treino, y_treino_aparelho)

    # Prevendo conjunto de dados
    y_hat = clf.predict(X_validacao)

    # Incrementando resultados
    resultados_modelo["appliance"].append(rotulo_aparelho)
    resultados_modelo["fold"].append("-")
    resultados_modelo["acc"].append( accuracy_score(y_validacao_aparelho, y_hat) )
    resultados_modelo["f1"].append( f1_score(y_validacao_aparelho, y_hat, average="macro") )
    resultados_modelo["auc"].append(roc_auc_score(y_validacao_aparelho, y_hat) if np.unique(y_validacao_aparelho).shape[0]>1 else 0.5)
    resultados_modelo["base"].append("treino-teste")

    print("      > Resultado:")
    print("        = Classification Report:")
    print()
    print(classification_report(y_validacao_aparelho, y_hat))
    print("        = Confusion Matrix:")
    print()
    print(confusion_matrix(y_validacao_aparelho, y_hat))
    print()
    
    
    #######################################################################
    #                  AVALIACAO 2 - Validacao Cruzada                    #
    #######################################################################
    
    y_true_cv, y_pred_cv  = [], []

    print(f"  - Avaliando através de validação cruzada ({skf.n_splits}-folds)...")

    # Filtrando output/status por aparelho
    y_aparelho = y_cv[rotulo_aparelho]
    
    print("     -> Detalhes da amostragem:")
    print("     ---")
    for item in Counter(y_aparelho).items():
        print(f"        - Classe `{item[0]}`: {item[1]} amostras ({round(item[1]/len(y_aparelho)*100,1)}%)" )
    print()
    

    print(f"     -> Avaliando modelo (CV - {skf.n_splits} folds)...\n")
    for it, (idx_treino, idx_teste) in tqdm_notebook(enumerate(skf.split(X_cv, y_aparelho)), total=skf.n_splits):
        
        # Preparando lotes
        X_treino_cv, X_teste_cv = X_cv.iloc[idx_treino], X_cv.iloc[idx_teste]
        y_treino_cv, y_teste_cv = y_aparelho.iloc[idx_treino], y_aparelho.iloc[idx_teste]

        # Treinando modelo
        clf = clone(modelo)
        
        clf.fit(X_treino_cv, y_treino_cv)

        # Prevendo conjunto de dados
        y_hat = clf.predict(X_teste_cv)

        # Incrementando resultados
        resultados_modelo["appliance"].append(rotulo_aparelho)
        resultados_modelo["fold"].append(it+1)
        resultados_modelo["acc"].append( accuracy_score(y_teste_cv, y_hat) )
        resultados_modelo["f1"].append( f1_score(y_teste_cv, y_hat, average="macro") )
        resultados_modelo["auc"].append(roc_auc_score(y_teste_cv, y_hat) if np.unique(y_teste_cv).shape[0]>1 else 0.5)
        resultados_modelo["base"].append("cv")

        # Extendendo rotulos (analise global)
        y_true_cv.extend(y_teste_cv)
        y_pred_cv.extend(y_hat)

    print("      > Resultado:")
    print("        = Classification Report:")
    print()
    print(classification_report(y_true_cv, y_pred_cv))
    print("        = Confusion Matrix:")
    print()
    print(confusion_matrix(y_true_cv, y_pred_cv))
    print()
    print("**********************************************")
    print()

# Consolidando DataFrame
df_resultados = pd.DataFrame(resultados_modelo)

arquivo_resultados = os.path.join(caminho_dados_notebook, f"resultados_{dominio}_mlp.xlsx")
if os.path.isfile(arquivo_resultados): os.remove(arquivo_resultados)
df_resultados.to_excel(arquivo_resultados, index=False)
    
print("############################## RESULTADO FINAL DO DOMINIO/MODELO ##############################")
display(df_resultados.groupby(["appliance","base"]).agg({
    "acc": ["mean", "std", "max", "min"],
    "f1": ["mean", "std", "max", "min"],
    "auc": ["mean", "std", "max", "min"]
}))  

****************************************************************

* Aparelho `LC`...

  - Avaliando modelo através da base treino/validacao...
     -> Detalhes da amostragem (lote validacao):
     ---
       - Classe `1`: 12800 amostras (50.8%)
       - Classe `0`: 12400 amostras (49.2%)

     -> Treinando modelo...

      > Resultado:
        = Classification Report:

              precision    recall  f1-score   support

           0       1.00      1.00      1.00     12400
           1       1.00      1.00      1.00     12800

    accuracy                           1.00     25200
   macro avg       1.00      1.00      1.00     25200
weighted avg       1.00      1.00      1.00     25200

        = Confusion Matrix:

[[12400     0]
 [    0 12800]]

  - Avaliando através de validação cruzada (5-folds)...
     -> Detalhes da amostragem:
     ---
        - Classe `1`: 32000 amostras (50.8%)
        - Classe `0`: 31000 amostras (49.2%)

     -> Avaliando modelo (CV - 5 folds)...



  0%|          | 0/5 [00:00<?, ?it/s]

      > Resultado:
        = Classification Report:

              precision    recall  f1-score   support

           0       1.00      1.00      1.00     31000
           1       1.00      1.00      1.00     32000

    accuracy                           1.00     63000
   macro avg       1.00      1.00      1.00     63000
weighted avg       1.00      1.00      1.00     63000

        = Confusion Matrix:

[[31000     0]
 [    0 32000]]

**********************************************

****************************************************************

* Aparelho `LI`...

  - Avaliando modelo através da base treino/validacao...
     -> Detalhes da amostragem (lote validacao):
     ---
       - Classe `0`: 12400 amostras (49.2%)
       - Classe `1`: 12800 amostras (50.8%)

     -> Treinando modelo...

      > Resultado:
        = Classification Report:

              precision    recall  f1-score   support

           0       1.00      1.00      1.00     12400
           1       1.00      1

  0%|          | 0/5 [00:00<?, ?it/s]

      > Resultado:
        = Classification Report:

              precision    recall  f1-score   support

           0       1.00      1.00      1.00     31000
           1       1.00      1.00      1.00     32000

    accuracy                           1.00     63000
   macro avg       1.00      1.00      1.00     63000
weighted avg       1.00      1.00      1.00     63000

        = Confusion Matrix:

[[31000     0]
 [    0 32000]]

**********************************************

****************************************************************

* Aparelho `MO`...

  - Avaliando modelo através da base treino/validacao...
     -> Detalhes da amostragem (lote validacao):
     ---
       - Classe `0`: 12400 amostras (49.2%)
       - Classe `1`: 12800 amostras (50.8%)

     -> Treinando modelo...

      > Resultado:
        = Classification Report:

              precision    recall  f1-score   support

           0       1.00      1.00      1.00     12400
           1       1.00      1

  0%|          | 0/5 [00:00<?, ?it/s]

      > Resultado:
        = Classification Report:

              precision    recall  f1-score   support

           0       1.00      0.48      0.65     31000
           1       0.67      1.00      0.80     32000

    accuracy                           0.75     63000
   macro avg       0.83      0.74      0.73     63000
weighted avg       0.83      0.75      0.73     63000

        = Confusion Matrix:

[[15000 16000]
 [   45 31955]]

**********************************************

****************************************************************

* Aparelho `MT`...

  - Avaliando modelo através da base treino/validacao...
     -> Detalhes da amostragem (lote validacao):
     ---
       - Classe `0`: 12400 amostras (49.2%)
       - Classe `1`: 12800 amostras (50.8%)

     -> Treinando modelo...

      > Resultado:
        = Classification Report:

              precision    recall  f1-score   support

           0       1.00      1.00      1.00     12400
           1       1.00      1

  0%|          | 0/5 [00:00<?, ?it/s]

      > Resultado:
        = Classification Report:

              precision    recall  f1-score   support

           0       1.00      1.00      1.00     31000
           1       1.00      1.00      1.00     32000

    accuracy                           1.00     63000
   macro avg       1.00      1.00      1.00     63000
weighted avg       1.00      1.00      1.00     63000

        = Confusion Matrix:

[[31000     0]
 [    0 32000]]

**********************************************

****************************************************************

* Aparelho `PC`...

  - Avaliando modelo através da base treino/validacao...
     -> Detalhes da amostragem (lote validacao):
     ---
       - Classe `0`: 12400 amostras (49.2%)
       - Classe `1`: 12800 amostras (50.8%)

     -> Treinando modelo...

      > Resultado:
        = Classification Report:

              precision    recall  f1-score   support

           0       0.88      0.81      0.84     12400
           1       0.83      0

  0%|          | 0/5 [00:00<?, ?it/s]

      > Resultado:
        = Classification Report:

              precision    recall  f1-score   support

           0       0.93      0.81      0.87     31000
           1       0.84      0.94      0.89     32000

    accuracy                           0.88     63000
   macro avg       0.88      0.88      0.88     63000
weighted avg       0.88      0.88      0.88     63000

        = Confusion Matrix:

[[25109  5891]
 [ 1845 30155]]

**********************************************

****************************************************************

* Aparelho `LF`...

  - Avaliando modelo através da base treino/validacao...
     -> Detalhes da amostragem (lote validacao):
     ---
       - Classe `0`: 12400 amostras (49.2%)
       - Classe `1`: 12800 amostras (50.8%)

     -> Treinando modelo...

      > Resultado:
        = Classification Report:

              precision    recall  f1-score   support

           0       0.87      0.87      0.87     12400
           1       0.87      0

  0%|          | 0/5 [00:00<?, ?it/s]

      > Resultado:
        = Classification Report:

              precision    recall  f1-score   support

           0       0.90      0.90      0.90     31000
           1       0.91      0.90      0.90     32000

    accuracy                           0.90     63000
   macro avg       0.90      0.90      0.90     63000
weighted avg       0.90      0.90      0.90     63000

        = Confusion Matrix:

[[27998  3002]
 [ 3266 28734]]

**********************************************

############################## RESULTADO FINAL DO DOMINIO/MODELO ##############################


Unnamed: 0_level_0,Unnamed: 1_level_0,acc,acc,acc,acc,f1,f1,f1,f1,auc,auc,auc,auc
Unnamed: 0_level_1,Unnamed: 1_level_1,mean,std,max,min,mean,std,max,min,mean,std,max,min
appliance,base,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2
LC,cv,1.0,0.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,0.0,1.0,1.0
LC,treino-teste,1.0,,1.0,1.0,1.0,,1.0,1.0,1.0,,1.0,1.0
LF,cv,0.900508,0.003033,0.904206,0.896032,0.900493,0.003038,0.904188,0.896004,0.900549,0.003056,0.904214,0.895998
LF,treino-teste,0.872024,,0.872024,0.872024,0.87199,,0.87199,0.87199,0.871987,,0.871987,0.871987
LI,cv,1.0,0.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,0.0,1.0,1.0
LI,treino-teste,1.0,,1.0,1.0,1.0,,1.0,1.0,1.0,,1.0,1.0
MO,cv,0.745317,0.001864,0.748254,0.743492,0.725425,0.002426,0.729423,0.723475,0.741232,0.001905,0.744254,0.739408
MO,treino-teste,1.0,,1.0,1.0,1.0,,1.0,1.0,1.0,,1.0,1.0
MT,cv,1.0,0.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,0.0,1.0,1.0
MT,treino-teste,1.0,,1.0,1.0,1.0,,1.0,1.0,1.0,,1.0,1.0


### Análise dos Resultados (1)

In [54]:
df_resultados_svm = pd.read_excel(os.path.join(caminho_dados_notebook, f"resultados_{dominio}_svm.xlsx"), engine='openpyxl')
df_resultados_svm["model"] = "SVM"

df_resultados_xgboost = pd.read_excel(os.path.join(caminho_dados_notebook, f"resultados_{dominio}_xgboost.xlsx"), engine='openpyxl')
df_resultados_xgboost["model"] = "XGBOOST"

df_resultados_mlp = pd.read_excel(os.path.join(caminho_dados_notebook, f"resultados_{dominio}_mlp.xlsx"), engine='openpyxl')
df_resultados_mlp["model"] = "MLP"

# df_resultados_elm = pd.read_excel(os.path.join(caminho_dados_notebook, "df_resultados_elm.xlsx"))
# df_resultados_elm["model"] = "ELM"

df_analise = pd.concat([
    df_resultados_svm,
    df_resultados_xgboost,
    df_resultados_mlp, 
#     df_resultados_elm,  
])

print("* Análise por modelo:")
df_analise_modelo = df_analise.groupby(["model","base"]).agg({
    "acc": ["mean","std","max","min"],
    "f1": ["mean","std","max","min"],
    "auc": ["mean","std","max","min"]
}).reset_index().sort_values(('f1','mean'), ascending=False).set_index("model")
display(df_analise_modelo)
df_analise_modelo.to_excel(os.path.join(caminho_dados_notebook, f"analise_{dominio}_modelos.xlsx"))

print()
print("* Análise por aparelho/modelo:")
df_analise_aparelho = df_analise.groupby(["appliance","model","base"]).agg({
    "acc": ["mean","std","max","min"],
    "f1": ["mean","std","max","min"],
    "auc": ["mean","std","max","min"]
})#.reset_index().sort_values(('f1','mean'), ascending=False).set_index(["aparelho","metodologia"])
display(df_analise_aparelho)
df_analise_aparelho.to_excel(os.path.join(caminho_dados_notebook, f"analise_{dominio}_aparelhos.xls"))

* Análise por modelo:


Unnamed: 0_level_0,base,acc,acc,acc,acc,f1,f1,f1,f1,auc,auc,auc,auc
Unnamed: 0_level_1,Unnamed: 1_level_1,mean,std,max,min,mean,std,max,min,mean,std,max,min
model,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2
XGBOOST,cv,0.999989,3.4e-05,1.0,0.999841,0.999989,3.4e-05,1.0,0.999841,0.999989,3.4e-05,1.0,0.999844
XGBOOST,treino-teste,0.98955,0.01544,1.0,0.960238,0.989548,0.015441,1.0,0.960235,0.989557,0.015411,1.0,0.96033
MLP,treino-teste,0.954206,0.071192,1.0,0.853214,0.954116,0.071345,1.0,0.852704,0.95408,0.071405,1.0,0.852495
SVM,cv,0.944894,0.096612,1.0,0.73619,0.944832,0.096722,1.0,0.73573,0.944965,0.096713,1.0,0.735713
SVM,treino-teste,0.936224,0.106449,1.0,0.746032,0.936161,0.10651,1.0,0.745968,0.93638,0.10635,1.0,0.745968
MLP,cv,0.920505,0.094627,1.0,0.743492,0.917054,0.101116,1.0,0.723475,0.919656,0.096015,1.0,0.739408



* Análise por aparelho/modelo:


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,acc,acc,acc,acc,f1,f1,f1,f1,auc,auc,auc,auc
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,mean,std,max,min,mean,std,max,min,mean,std,max,min
appliance,model,base,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2
LC,MLP,cv,1.0,0.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,0.0,1.0,1.0
LC,MLP,treino-teste,1.0,,1.0,1.0,1.0,,1.0,1.0,1.0,,1.0,1.0
LC,SVM,cv,1.0,0.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,0.0,1.0,1.0
LC,SVM,treino-teste,1.0,,1.0,1.0,1.0,,1.0,1.0,1.0,,1.0,1.0
LC,XGBOOST,cv,0.999968,4.3e-05,1.0,0.999921,0.999968,4.3e-05,1.0,0.999921,0.999968,4.3e-05,1.0,0.999919
LC,XGBOOST,treino-teste,0.989762,,0.989762,0.989762,0.989761,,0.989761,0.989761,0.989898,,0.989898,0.989898
LF,MLP,cv,0.900508,0.003033,0.904206,0.896032,0.900493,0.003038,0.904188,0.896004,0.900549,0.003056,0.904214,0.895998
LF,MLP,treino-teste,0.872024,,0.872024,0.872024,0.87199,,0.87199,0.87199,0.871987,,0.871987,0.871987
LF,SVM,cv,0.740714,0.00331,0.744127,0.73619,0.740423,0.003435,0.743966,0.73573,0.74038,0.003423,0.743916,0.735713
LF,SVM,treino-teste,0.746032,,0.746032,0.746032,0.745968,,0.745968,0.745968,0.745968,,0.745968,0.745968


# Conclusões

...

# Fim.

In [55]:
%load_ext watermark

In [56]:
%watermark -a "Diego Luiz Cavalca" -u -n -t -z -v -m -g

Author: Diego Luiz Cavalca

Last updated: Mon Jan 24 2022 08:52:41Hora oficial do Brasil

Python implementation: CPython
Python version       : 3.8.8
IPython version      : 7.21.0

Compiler    : MSC v.1928 64 bit (AMD64)
OS          : Windows
Release     : 10
Machine     : AMD64
Processor   : Intel64 Family 6 Model 158 Stepping 9, GenuineIntel
CPU cores   : 8
Architecture: 64bit

Git hash: 5e5bccaaf9e541e11be67706c7eb7d7b39a8be65

