# Benchmarking Defesa - Dataset LIAA - Aprendizado Profundo em Imagens RP (CNN)

Estruturação de pipeline baseado em aprendizado raso utilizando atributos de alta frequência.


# Configurações

In [5]:
import os
import sys
import gc
from pprint import pprint
from collections import Counter
import copy
import warnings
warnings.filterwarnings(action="ignore")

import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
from IPython.display import display

from tqdm import *

from pretty_confusion_matrix import *

# TODO: implementar rotina na classe PyNILM.utils
def sizeof_fmt(num, suffix='B'):
    ''' by Fred Cirera,  https://stackoverflow.com/a/1094933/1870254, modified'''
    for unit in ['','Ki','Mi','Gi','Ti','Pi','Ei','Zi']:
        if abs(num) < 1024.0:
            return "%3.1f %s%s" % (num, unit, suffix)
        num /= 1024.0
    return "%.1f %s%s" % (num, 'Yi', suffix)

def listar_variaveis_memoria(ambiente):
    print("* Variáveis instanciadas em memória:")
    print("---")
    total = 0
    for name, size in sorted(((name, sys.getsizeof(value)) for name, value in ambiente.items()),
                             key= lambda x: -x[1])[:10]:
        total += size
        print("{:>30}: {:>8}".format(name, sizeof_fmt(size)))
    print("---")
    print("Total:", sizeof_fmt(total))
    
# TODO: implementar na classe utils
def highlight_col(x):
    r = 'background-color: #D9D9D9'
    df1 = pd.DataFrame('', index=x.index, columns=x.columns)
    df1.iloc[:, -2] = r
    return df1   

In [6]:
# CONSTANTES FUNDAMENTAIS DE ORGANIZACAO DE PASTAS/ARQUIVOS
RESIDENCIA = 3

# Path do arquivo H5 (base REDD ja preparada p/ NILMTK) e outros insumos fundamentais
caminho_dados = "D:/Projetos/phd-thesis/datasets/"

# Definir diretorios onde iremos salvar os insumos gerados do notebook (dados, imagens, etc.)
caminho_dados_notebook = os.path.join(caminho_dados, "26") # Num. notebook
if not os.path.isdir(caminho_dados_notebook):
    os.makedirs(caminho_dados_notebook)
caminho_imagens_notebook = os.path.join(caminho_dados_notebook, "imagens") # Num. notebook
if not os.path.isdir(caminho_imagens_notebook):
    os.makedirs(caminho_imagens_notebook)

# Path do arquivo H5 (base REDD ja preparada p/ NILMTK)
caminho_redd = os.path.join(caminho_dados, "REDD/low_freq")

# Path completo do arquivo REDD
arquivo_dataset = os.path.join(caminho_redd, "redd.h5")

# VARIAVEL AUXILIAR
# Path dos arquivos relacionados as janelas
caminho_janelas = os.path.join(caminho_redd, "../../phd")
if not os.path.isdir(caminho_janelas):
    os.makedirs(caminho_janelas)

In [7]:
from matplotlib import rcParams
import matplotlib.pyplot as plt
from six import iteritems

from nilmtk import DataSet, TimeFrame, MeterGroup, HDFDataStore
from nilmtk.legacy.disaggregate import CombinatorialOptimisation, FHMM
import nilmtk.utils

%matplotlib inline

# Carregando Dados

In [8]:
# Carregando datasets
df_treino = pd.read_csv(os.path.join(caminho_dados_notebook, 'training_windows.txt'))
df_validacao = pd.read_csv(os.path.join(caminho_dados_notebook, 'validation_windows.txt'))

# Selecionando feature dominio do tempo e frequencia / outputs (status dos aparelhos - dummy)
colunas_janela = df_treino.columns[:512]

colunas_output = ['LC', 'LI', 'MO', 'MT', 'PC', 'LF']

# Preparando dados de treino e validacao
X_treino = df_treino[colunas_janela]
X_validacao = df_validacao[colunas_janela]

y_treino = df_treino[colunas_output].replace(-1, 0)
y_validacao = df_validacao[colunas_output].replace(-1, 0)

del df_treino
del df_validacao
gc.collect()

4

# Treinamento e Avaliação dos Modelos

In [9]:
# from PyNILM.dados.janelas import Janelas
from PyNILM.dados.janelas import Janelas
from PyNILM.dados.utils import *

from PyNILM.avaliacao.metricas import *
from PyNILM.avaliacao.graficos import *
from PyNILM.avaliacao.analises import *

from PyNILM.modelos.utils import *
from PyNILM.modelos.dlafe import DLAFE
# from PyNILM.modelos.rqa import RQA

# Inicializar uso GPU
start_tf_session(memory_limit=int(1024*4))

Virtual devices cannot be modified after being initialized


In [10]:
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier
from xgboost.sklearn import XGBClassifier

from sklearn.model_selection import StratifiedKFold
skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=SEED)

## Imagens RP

In [11]:
from pyts.image import RecurrencePlot

# Parametros execucao do experimento
atributos = "rp" 

# Parametros DTLFE (antigo DLAFE)
TAMANHO_IMAGEM_DLAFE = (32, 32, 3)
# modelo_extrator = transfer_learning.vgg16.VGG16(
#             weights='imagenet', 
#             include_top=False,
#             pooling='avg'
#         )
# preprocessamento_extrator = transfer_learning.vgg16.preprocess_input

def converter_janelas_para_rp(
    X,
    input_shape=TAMANHO_IMAGEM_DLAFE,
    data_type=np.float32,
    normalize=False, 
    standardize=False, 
    rescale=False,
    # preprocessamento_extrator=None,
    # modelo_extrator=None,
    arquivo=None):
    
    X_ = np.empty((len(X), * input_shape))
        
    for i, x in tqdm(enumerate(X), total=X.shape[0]):
        
        img = RecurrencePlot(**PARAMETROS_RP).fit_transform([x])[0]
        img = cv2.resize(
                img, 
                dsize=input_shape[:2], 
                interpolation=cv2.INTER_CUBIC
            ).astype(data_type)

        if np.sum(img) > 0:
            # TODO: improve fit/predict statistics
            # Normalizar
            if normalize:
                img = (img - img.min()) / (img.max() - img.min()) # MinMax (0,1)
                #img = (img - img.mean()) / np.max([img.std(), 1e-4])

        #     # centralizar
        #     if centralizar:
        #         img -= img.mean()

            # Padronizar
            elif standardize:
                img = (img - img.mean())/img.std()#tf.image.per_image_standardization(img).numpy()
                
            elif rescale:
                img = (img - img.min()) / (img.max() - img.min())

        # N canais
        img = np.stack([img for i in range(input_shape[-1])],axis=-1).astype(data_type)     
        
        X_[i,] = img
    
    # X_ = np.array(X_).astype(data_type)

    # # Extranindo atributos via DL
    # if preprocessamento_extrator:
    #     X_ = preprocessamento_extrator(X_).astype(data_type)
    # if modelo_extrator:
    #     output = modelo_extrator.predict(X_)
    # else:
    #     output = X_

    if arquivo:
        if os.path.isfile(arquivo): os.remove(arquivo)
        np.save(arquivo, X_)

    # return df    
    return X_


# Carregando dados RQA (treino)
arquivo_treino = os.path.join(caminho_dados_notebook, f"{atributos}_treino.npy")
if os.path.isfile(arquivo_treino):
    X_treino = np.load(arquivo_treino)
else:
    X_treino = converter_janelas_para_rp(
        X_treino.values,
        input_shape=TAMANHO_IMAGEM_DLAFE,
        # preprocessamento_extrator=preprocessamento_extrator,
        # modelo_extrator=modelo_extrator,
        arquivo=arquivo_treino)

# Carregando dados RQA (validacao)
arquivo_validacao = os.path.join(caminho_dados_notebook, f"{atributos}_validacao.npy")
if os.path.isfile(arquivo_validacao):
    X_validacao = np.load(arquivo_validacao)
else:
    X_validacao = converter_janelas_para_rp(
        X_validacao.values,
        input_shape=TAMANHO_IMAGEM_DLAFE,
        # preprocessamento_extrator=preprocessamento_extrator,
        # modelo_extrator=modelo_extrator,
        arquivo=arquivo_validacao)

# # Convertendo Numpy para Dataframe (evitar refatoracao codigo)
# X_treino = pd.DataFrame(X_treino)
# X_validacao = pd.DataFrame(X_validacao)

# Dados agregados (validacao cruzada)
X_cv = np.concatenate([X_treino, X_validacao])#.reset_index(drop=True) 
y_cv = pd.concat([y_treino, y_validacao]).reset_index(drop=True) 

100%|██████████| 37800/37800 [01:57<00:00, 322.86it/s]
100%|██████████| 25200/25200 [01:07<00:00, 371.28it/s]


### CNN

In [17]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Flatten
from tensorflow.keras.layers import Conv2D, MaxPooling2D

def convnet(
    input_shape_ = TAMANHO_IMAGEM_DLAFE, 
    output_dim=1, 
    optimizer='adam',
    loss_function = 'binary_crossentropy', 
    metrics=['accuracy'],
    output_activation = 'sigmoid',
    bias_output = None,
):
    
    if bias_output is not None:
        bias_output = tf.keras.initializers.Constant(bias_output)
        
    model = Sequential()

    model.add(Conv2D(filters=32, kernel_size=(3, 3), activation="relu", input_shape=input_shape_))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(0.25))
    
    model.add(Conv2D(filters=32, kernel_size=(3, 3), activation='relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(0.25))    
    
    model.add(Flatten())
    model.add(Dense(128, activation='relu'))
    model.add(Dropout(0.25))

    model.add(Dense(output_dim, bias_initializer=bias_output, activation=output_activation))

    model.compile(optimizer=optimizer, loss=[loss_function], metrics=metrics)
    
    return model

In [25]:
nome_modelo = "cnn"

resultados_modelo = {
    "appliance": [], "fold": [],
    "acc": [], "f1": [], "auc": [], 
    "base": []
}

for rotulo_aparelho in colunas_output:
    
    print("****************************************************************\n")
    print(f"* Aparelho `{rotulo_aparelho}`...\n")
    
    #######################################################################
    #                AVALIACAO 1 - Base de treino/validacao               #
    #######################################################################

    # # Filtrando output/status por aparelho
    y_treino_aparelho = y_treino[rotulo_aparelho]
    y_validacao_aparelho = y_validacao[rotulo_aparelho]

    print(f"  - Avaliando modelo através da base treino/validacao...")
    
    print("     -> Detalhes da amostragem (lote validacao):")
    print("     ---")
    for item in Counter(y_validacao_aparelho).items():
        print(f"       - Classe `{item[0]}`: {item[1]} amostras ({round(item[1]/len(y_validacao_aparelho)*100,1)}%)" )
    print()

    # Treinando modelo
    print(f"     -> Treinando modelo...\n")
    clf = convnet(
        input_shape_= TAMANHO_IMAGEM_DLAFE,
        output_dim = 1,
        loss_function='binary_crossentropy',
        metrics=['accuracy'],
        output_activation='sigmoid'
    )

        
    clf.fit(X_treino, y_treino_aparelho)

    # Prevendo conjunto de dados
    y_hat = clf.predict(X_validacao).round().astype(np.int16)

    # Incrementando resultados
    resultados_modelo["appliance"].append(rotulo_aparelho)
    resultados_modelo["fold"].append("-")
    resultados_modelo["acc"].append( accuracy_score(y_validacao_aparelho, y_hat) )
    resultados_modelo["f1"].append( f1_score(y_validacao_aparelho, y_hat, average="macro") )
    resultados_modelo["auc"].append(roc_auc_score(y_validacao_aparelho, y_hat) if np.unique(y_validacao_aparelho).shape[0]>1 else 0.5)
    resultados_modelo["base"].append("treino-teste")

    print("      > Resultado:")
    print("        = Classification Report:")
    print()
    print(classification_report(y_validacao_aparelho, y_hat))
    print("        = Confusion Matrix:")
    print()
    print(confusion_matrix(y_validacao_aparelho, y_hat))
    print()
    
    
    #######################################################################
    #                  AVALIACAO 2 - Validacao Cruzada                    #
    #######################################################################
    
    y_true_cv, y_pred_cv  = [], []

    print(f"  - Avaliando através de validação cruzada ({skf.n_splits}-folds)...")

    # Filtrando output/status por aparelho
    y_aparelho = y_cv[rotulo_aparelho]
    
    print("     -> Detalhes da amostragem:")
    print("     ---")
    for item in Counter(y_aparelho).items():
        print(f"        - Classe `{item[0]}`: {item[1]} amostras ({round(item[1]/len(y_aparelho)*100,1)}%)" )
    print()
    

    print(f"     -> Avaliando modelo (CV - {skf.n_splits} folds)...\n")
    for it, (idx_treino, idx_teste) in tqdm_notebook(enumerate(skf.split(X_cv, y_aparelho)), total=skf.n_splits):
        
        # Preparando lotes
        X_treino_cv, X_teste_cv = X_cv[idx_treino], X_cv[idx_teste]
        y_treino_cv, y_teste_cv = y_aparelho.iloc[idx_treino], y_aparelho.iloc[idx_teste]

        # Treinando modelo
        clf = convnet(
            input_shape_= TAMANHO_IMAGEM_DLAFE,
            output_dim = 1,
            loss_function='binary_crossentropy',
            metrics=['accuracy'],
            output_activation='sigmoid'
        )
        
        clf.fit(X_treino_cv, y_treino_cv)

        # Prevendo conjunto de dados
        y_hat = clf.predict(X_teste_cv).round().astype(np.int16)

        # Incrementando resultados
        resultados_modelo["appliance"].append(rotulo_aparelho)
        resultados_modelo["fold"].append(it+1)
        resultados_modelo["acc"].append( accuracy_score(y_teste_cv, y_hat) )
        resultados_modelo["f1"].append( f1_score(y_teste_cv, y_hat, average="macro") )
        resultados_modelo["auc"].append(roc_auc_score(y_teste_cv, y_hat) if np.unique(y_teste_cv).shape[0]>1 else 0.5)
        resultados_modelo["base"].append("cv")

        # Extendendo rotulos (analise global)
        y_true_cv.extend(y_teste_cv)
        y_pred_cv.extend(y_hat)

    print("      > Resultado:")
    print("        = Classification Report:")
    print()
    print(classification_report(y_true_cv, y_pred_cv))
    print("        = Confusion Matrix:")
    print()
    print(confusion_matrix(y_true_cv, y_pred_cv))
    print()
    print("**********************************************")
    print()
    
# Consolidando DataFrame
df_resultados = pd.DataFrame(resultados_modelo)

arquivo_resultados = os.path.join(caminho_dados_notebook, "resultados", f"{atributos}_resultados_{nome_modelo}.xlsx")
if os.path.isfile(arquivo_resultados): os.remove(arquivo_resultados)
df_resultados.to_excel(arquivo_resultados, index=False)
    
print("############################## RESULTADO FINAL DO DOMINIO/MODELO ##############################")
display(df_resultados.groupby(["appliance","base"]).agg({
    "acc": ["mean", "std", "max", "min"],
    "f1": ["mean", "std", "max", "min"],
    "auc": ["mean", "std", "max", "min"]
}))  

****************************************************************

* Aparelho `LC`...

  - Avaliando modelo através da base treino/validacao...
     -> Detalhes da amostragem (lote validacao):
     ---
       - Classe `1`: 12800 amostras (50.8%)
       - Classe `0`: 12400 amostras (49.2%)

     -> Treinando modelo...

      > Resultado:
        = Classification Report:

              precision    recall  f1-score   support

           0       1.00      1.00      1.00     12400
           1       1.00      1.00      1.00     12800

    accuracy                           1.00     25200
   macro avg       1.00      1.00      1.00     25200
weighted avg       1.00      1.00      1.00     25200

        = Confusion Matrix:

[[12400     0]
 [    0 12800]]

  - Avaliando através de validação cruzada (5-folds)...
     -> Detalhes da amostragem:
     ---
        - Classe `1`: 32000 amostras (50.8%)
        - Classe `0`: 31000 amostras (49.2%)

     -> Avaliando modelo (CV - 5 folds)...



  0%|          | 0/5 [00:00<?, ?it/s]

      > Resultado:
        = Classification Report:

              precision    recall  f1-score   support

           0       1.00      1.00      1.00     31000
           1       1.00      1.00      1.00     32000

    accuracy                           1.00     63000
   macro avg       1.00      1.00      1.00     63000
weighted avg       1.00      1.00      1.00     63000

        = Confusion Matrix:

[[31000     0]
 [    0 32000]]

**********************************************

****************************************************************

* Aparelho `LI`...

  - Avaliando modelo através da base treino/validacao...
     -> Detalhes da amostragem (lote validacao):
     ---
       - Classe `0`: 12400 amostras (49.2%)
       - Classe `1`: 12800 amostras (50.8%)

     -> Treinando modelo...

      > Resultado:
        = Classification Report:

              precision    recall  f1-score   support

           0       0.95      1.00      0.98     12400
           1       1.00      0

  0%|          | 0/5 [00:00<?, ?it/s]

      > Resultado:
        = Classification Report:

              precision    recall  f1-score   support

           0       1.00      1.00      1.00     31000
           1       1.00      1.00      1.00     32000

    accuracy                           1.00     63000
   macro avg       1.00      1.00      1.00     63000
weighted avg       1.00      1.00      1.00     63000

        = Confusion Matrix:

[[31000     0]
 [    0 32000]]

**********************************************

****************************************************************

* Aparelho `MO`...

  - Avaliando modelo através da base treino/validacao...
     -> Detalhes da amostragem (lote validacao):
     ---
       - Classe `0`: 12400 amostras (49.2%)
       - Classe `1`: 12800 amostras (50.8%)

     -> Treinando modelo...

      > Resultado:
        = Classification Report:

              precision    recall  f1-score   support

           0       0.89      1.00      0.94     12400
           1       1.00      0

  0%|          | 0/5 [00:00<?, ?it/s]

      > Resultado:
        = Classification Report:

              precision    recall  f1-score   support

           0       0.99      1.00      1.00     31000
           1       1.00      0.99      1.00     32000

    accuracy                           1.00     63000
   macro avg       1.00      1.00      1.00     63000
weighted avg       1.00      1.00      1.00     63000

        = Confusion Matrix:

[[31000     0]
 [  224 31776]]

**********************************************

****************************************************************

* Aparelho `MT`...

  - Avaliando modelo através da base treino/validacao...
     -> Detalhes da amostragem (lote validacao):
     ---
       - Classe `0`: 12400 amostras (49.2%)
       - Classe `1`: 12800 amostras (50.8%)

     -> Treinando modelo...

      > Resultado:
        = Classification Report:

              precision    recall  f1-score   support

           0       1.00      1.00      1.00     12400
           1       1.00      1

  0%|          | 0/5 [00:00<?, ?it/s]

      > Resultado:
        = Classification Report:

              precision    recall  f1-score   support

           0       0.99      1.00      1.00     31000
           1       1.00      0.99      1.00     32000

    accuracy                           1.00     63000
   macro avg       1.00      1.00      1.00     63000
weighted avg       1.00      1.00      1.00     63000

        = Confusion Matrix:

[[31000     0]
 [  215 31785]]

**********************************************

****************************************************************

* Aparelho `PC`...

  - Avaliando modelo através da base treino/validacao...
     -> Detalhes da amostragem (lote validacao):
     ---
       - Classe `0`: 12400 amostras (49.2%)
       - Classe `1`: 12800 amostras (50.8%)

     -> Treinando modelo...

      > Resultado:
        = Classification Report:

              precision    recall  f1-score   support

           0       1.00      0.99      0.99     12400
           1       0.99      1

  0%|          | 0/5 [00:00<?, ?it/s]

      > Resultado:
        = Classification Report:

              precision    recall  f1-score   support

           0       1.00      1.00      1.00     31000
           1       1.00      1.00      1.00     32000

    accuracy                           1.00     63000
   macro avg       1.00      1.00      1.00     63000
weighted avg       1.00      1.00      1.00     63000

        = Confusion Matrix:

[[31000     0]
 [    0 32000]]

**********************************************

****************************************************************

* Aparelho `LF`...

  - Avaliando modelo através da base treino/validacao...
     -> Detalhes da amostragem (lote validacao):
     ---
       - Classe `0`: 12400 amostras (49.2%)
       - Classe `1`: 12800 amostras (50.8%)

     -> Treinando modelo...

      > Resultado:
        = Classification Report:

              precision    recall  f1-score   support

           0       0.73      0.84      0.78     12400
           1       0.82      0

  0%|          | 0/5 [00:00<?, ?it/s]

      > Resultado:
        = Classification Report:

              precision    recall  f1-score   support

           0       0.82      0.89      0.85     31000
           1       0.89      0.81      0.85     32000

    accuracy                           0.85     63000
   macro avg       0.85      0.85      0.85     63000
weighted avg       0.85      0.85      0.85     63000

        = Confusion Matrix:

[[27706  3294]
 [ 6172 25828]]

**********************************************

############################## RESULTADO FINAL DO DOMINIO/MODELO ##############################


Unnamed: 0_level_0,Unnamed: 1_level_0,acc,acc,acc,acc,f1,f1,f1,f1,auc,auc,auc,auc
Unnamed: 0_level_1,Unnamed: 1_level_1,mean,std,max,min,mean,std,max,min,mean,std,max,min
appliance,base,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2
LC,cv,1.0,0.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,0.0,1.0,1.0
LC,treino-teste,1.0,,1.0,1.0,1.0,,1.0,1.0,1.0,,1.0,1.0
LF,cv,0.849746,0.030841,0.896111,0.824206,0.849,0.031342,0.895655,0.821143,0.850433,0.030854,0.897387,0.825612
LF,treino-teste,0.768651,,0.768651,0.768651,0.767903,,0.767903,0.767903,0.769745,,0.769745,0.769745
LI,cv,1.0,0.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,0.0,1.0,1.0
LI,treino-teste,0.976111,,0.976111,0.976111,0.97611,,0.97611,0.97611,0.976484,,0.976484,0.976484
MO,cv,0.996444,0.007687,1.0,0.982698,0.996444,0.007687,1.0,0.982698,0.9965,0.007567,1.0,0.982969
MO,treino-teste,0.936508,,0.936508,0.936508,0.936364,,0.936364,0.936364,0.9375,,0.9375,0.9375
MT,cv,0.996587,0.007631,1.0,0.982937,0.996587,0.007631,1.0,0.982936,0.996641,0.007512,1.0,0.983203
MT,treino-teste,1.0,,1.0,1.0,1.0,,1.0,1.0,1.0,,1.0,1.0


### Análise de Resultados

In [26]:
# df_resultados_svm = pd.read_excel(os.path.join(caminho_dados_notebook, "resultados", f"{atributos}_resultados_svm.xlsx"), engine='openpyxl')
# df_resultados_svm["model"] = "SVM"

# df_resultados_xgboost = pd.read_excel(os.path.join(caminho_dados_notebook, "resultados", f"{atributos}_resultados_xgboost.xlsx"), engine='openpyxl')
# df_resultados_xgboost["model"] = "XGBOOST"

# df_resultados_mlp = pd.read_excel(os.path.join(caminho_dados_notebook, "resultados", f"{atributos}_resultados_mlp.xlsx"), engine='openpyxl')
# df_resultados_mlp["model"] = "MLP"

# # df_resultados_elm = pd.read_excel(os.path.join(caminho_dados_notebook, "df_resultados_elm.xlsx"))
# # df_resultados_elm["model"] = "ELM"

# df_analise = pd.concat([
#     df_resultados_svm,
#     df_resultados_xgboost,
#     df_resultados_mlp, 
# #     df_resultados_elm,  
# ])

# print("* Análise por modelo:")
# df_analise_modelo = df_analise.groupby(["model","base"]).agg({
#     "acc": ["mean","std","max","min"],
#     "f1": ["mean","std","max","min"],
#     "auc": ["mean","std","max","min"]
# }).reset_index().sort_values(('f1','mean'), ascending=False).set_index("model")
# display(df_analise_modelo)
# df_analise_modelo.to_excel(os.path.join(caminho_dados_notebook, "resultados", f"{atributos}_analise_modelos.xlsx"))

print()
print("* Análise por aparelho/modelo:")
df_analise_aparelho = df_resultados.groupby(["appliance","base"]).agg({
    "acc": ["mean","std","max","min"],
    "f1": ["mean","std","max","min"],
    "auc": ["mean","std","max","min"]
})#.reset_index().sort_values(('f1','mean'), ascending=False).set_index(["aparelho","metodologia"])
display(df_analise_aparelho)
df_analise_aparelho.to_excel(os.path.join(caminho_dados_notebook, "resultados", f"{atributos}_analise_aparelhos.xls"))


* Análise por aparelho/modelo:


Unnamed: 0_level_0,Unnamed: 1_level_0,acc,acc,acc,acc,f1,f1,f1,f1,auc,auc,auc,auc
Unnamed: 0_level_1,Unnamed: 1_level_1,mean,std,max,min,mean,std,max,min,mean,std,max,min
appliance,base,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2
LC,cv,1.0,0.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,0.0,1.0,1.0
LC,treino-teste,1.0,,1.0,1.0,1.0,,1.0,1.0,1.0,,1.0,1.0
LF,cv,0.849746,0.030841,0.896111,0.824206,0.849,0.031342,0.895655,0.821143,0.850433,0.030854,0.897387,0.825612
LF,treino-teste,0.768651,,0.768651,0.768651,0.767903,,0.767903,0.767903,0.769745,,0.769745,0.769745
LI,cv,1.0,0.0,1.0,1.0,1.0,0.0,1.0,1.0,1.0,0.0,1.0,1.0
LI,treino-teste,0.976111,,0.976111,0.976111,0.97611,,0.97611,0.97611,0.976484,,0.976484,0.976484
MO,cv,0.996444,0.007687,1.0,0.982698,0.996444,0.007687,1.0,0.982698,0.9965,0.007567,1.0,0.982969
MO,treino-teste,0.936508,,0.936508,0.936508,0.936364,,0.936364,0.936364,0.9375,,0.9375,0.9375
MT,cv,0.996587,0.007631,1.0,0.982937,0.996587,0.007631,1.0,0.982936,0.996641,0.007512,1.0,0.983203
MT,treino-teste,1.0,,1.0,1.0,1.0,,1.0,1.0,1.0,,1.0,1.0


# Conclusões

...

# Fim.

In [55]:
%load_ext watermark

In [56]:
%watermark -a "Diego Luiz Cavalca" -u -n -t -z -v -m -g

Author: Diego Luiz Cavalca

Last updated: Mon Jan 24 2022 08:52:41Hora oficial do Brasil

Python implementation: CPython
Python version       : 3.8.8
IPython version      : 7.21.0

Compiler    : MSC v.1928 64 bit (AMD64)
OS          : Windows
Release     : 10
Machine     : AMD64
Processor   : Intel64 Family 6 Model 158 Stepping 9, GenuineIntel
CPU cores   : 8
Architecture: 64bit

Git hash: 5e5bccaaf9e541e11be67706c7eb7d7b39a8be65

