# Benchmarking Quali - Aprendizado Raso (RQA)

Estruturação de pipeline baseado em aprendizado raso utilizando atributos determinísticos calculados sobre os gráficos de recorrência (RQA).


# Configurações

In [1]:
import os
import sys
import gc
from pprint import pprint
from collections import Counter
import copy
import warnings
warnings.filterwarnings(action="ignore")

import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
from IPython.display import display

from tqdm import *

from pretty_confusion_matrix import *

# TODO: implementar rotina na classe PyNILM.utils
def sizeof_fmt(num, suffix='B'):
    ''' by Fred Cirera,  https://stackoverflow.com/a/1094933/1870254, modified'''
    for unit in ['','Ki','Mi','Gi','Ti','Pi','Ei','Zi']:
        if abs(num) < 1024.0:
            return "%3.1f %s%s" % (num, unit, suffix)
        num /= 1024.0
    return "%.1f %s%s" % (num, 'Yi', suffix)

def listar_variaveis_memoria(ambiente):
    print("* Variáveis instanciadas em memória:")
    print("---")
    total = 0
    for name, size in sorted(((name, sys.getsizeof(value)) for name, value in ambiente.items()),
                             key= lambda x: -x[1])[:10]:
        total += size
        print("{:>30}: {:>8}".format(name, sizeof_fmt(size)))
    print("---")
    print("Total:", sizeof_fmt(total))
    
# TODO: implementar na classe utils
def highlight_col(x):
    r = 'background-color: #D9D9D9'
    df1 = pd.DataFrame('', index=x.index, columns=x.columns)
    df1.iloc[:, -2] = r
    return df1   

In [2]:
# CONSTANTES FUNDAMENTAIS DE ORGANIZACAO DE PASTAS/ARQUIVOS
RESIDENCIA = 3

# Path do arquivo H5 (base REDD ja preparada p/ NILMTK) e outros insumos fundamentais
caminho_dados = "D:/Projetos/phd-thesis/datasets/"

# Definir diretorios onde iremos salvar os insumos gerados do notebook (dados, imagens, etc.)
caminho_dados_notebook = os.path.join(caminho_dados, "22") # Num. notebook
if not os.path.isdir(caminho_dados_notebook):
    os.makedirs(caminho_dados_notebook)
caminho_imagens_notebook = os.path.join(caminho_dados_notebook, "imagens") # Num. notebook
if not os.path.isdir(caminho_imagens_notebook):
    os.makedirs(caminho_imagens_notebook)

# Path do arquivo H5 (base REDD ja preparada p/ NILMTK)
caminho_redd = os.path.join(caminho_dados, "REDD/low_freq")

# Path completo do arquivo REDD
arquivo_dataset = os.path.join(caminho_redd, "redd.h5")

# VARIAVEL AUXILIAR
# Path dos arquivos relacionados as janelas
caminho_janelas = os.path.join(caminho_redd, "../../phd")
if not os.path.isdir(caminho_janelas):
    os.makedirs(caminho_janelas)

In [3]:
from matplotlib import rcParams
import matplotlib.pyplot as plt
from six import iteritems

from nilmtk import DataSet, TimeFrame, MeterGroup, HDFDataStore
from nilmtk.legacy.disaggregate import CombinatorialOptimisation, FHMM
import nilmtk.utils

%matplotlib inline

# Dados

## Base REDD

In [4]:
# Gerar arquivo H5 (Nilmtk) do dataset REDD, caso n exista
if not os.path.isfile(arquivo_dataset):
    from nilmtk.dataset_converters import convert_redd
    
    print("Gerando arquivo H5 (NILMTK) da base REDD, aguarde...")
    print("-----")
    convert_redd(caminho_redd, arquivo_dataset)

# Carregando dataset REDD no objeto NILMTK
# Exemplo de carregamento da base REDD no NILMTK
import h5py # * Evitar erro de incompatibilidade entre h5py e nilmtk
from nilmtk import DataSet
from nilmtk.utils import print_dict
redd = DataSet(arquivo_dataset)
print("NILMTK -> Detalhes sobre o dataset REDD:")
print_dict(redd.metadata)
print()

# Parametros dos dados (treino)
PARAMETROS_DATASET = {
    "base":redd,
    "id_residencia": RESIDENCIA,
    "inicio_intervalo":'2011-04-16 05:11:30',
    "fim_intervalo":'2011-04-23 08:43:26',
    "debug": False    
}
# print("PARÂMETROS DO ESTUDO:")
# pprint(PARAMETROS_DATASET)



NILMTK -> Detalhes sobre o dataset REDD:





In [5]:
def carregar_dados_aparelho(janelas, instancia, aparelho, taxa, tamanho_janela, split_teste=None, eliminar_janelas_vazias=False, debug=False):
    # Extrair series divididas em janelas para cada medidor
    dados_cargas = janelas.preparar(
        taxa_amostral=taxa, 
        intervalo_medicao=tamanho_janela
    )
    print()

    # Pprearando dados (Serie / Estado)
    # X
    dados_medidores = janelas.filtrar_cargas(
        dados_cargas,
        filtros=[
            (1, 'site_meter'),
            (2, 'site_meter'),    
        ]
    )
    
    dados_aparelho = janelas.filtrar_cargas(dados_cargas, filtros=[(instancia, aparelho)])[0]
    
    # Validar tamanho dos dados de medidores (podem ter mais registros que os aparelhos)
    janela_media_medidores = int(np.sum([len(d["janelas"])for d in dados_medidores])/len(dados_medidores))
    janela_media_aparelho = len(dados_aparelho["janelas"])#int(np.sum([len(d["janelas"])for d in dados_aparelho])/len(dados_aparelho))

    # Ajustando para medidores terem o mesmo shape de janelas dos aparelhos 
    if janela_media_medidores > janela_media_aparelho:
        diferenca = janela_media_medidores-janela_media_aparelho
        #if debug: print("  -> Diferenca encontrada entre medidores/aparelhos:", diferenca, ", ajustando..")
        for i in range(len(dados_medidores)):
            removidos = 0
            while removidos < diferenca:
                # Remover ultima janela
                dados_medidores[i]["janelas"] = dados_medidores[i]["janelas"][:-1,:]
                removidos += 1
    
    # Estruturando dados modelagem (X e y)
    X = dados_medidores[0]["janelas"] + dados_medidores[1]["janelas"]

    # Selecionando apenas janelas VALIDAS (ocorrencia de ao menos 1 carga)
    # TODO: Implementar na biblioteca esta rotina de validacao
    if eliminar_janelas_vazias:
        idx_janelas_validas = np.where(np.sum(X, axis=1)>0)[0]
        X = X[idx_janelas_validas]
        #for i in range(len(dados_aparelhos)):
        dados_aparelho["janelas"] = dados_aparelho["janelas"][idx_janelas_validas]
        rotulos = copy.deepcopy(dados_aparelho["rotulos"])
        dados_aparelho["rotulos"]["estado"] = rotulos["estado"][idx_janelas_validas]
        dados_aparelho["rotulos"]["media"]  = rotulos["media"][idx_janelas_validas]
        dados_aparelho["rotulos"]["total"]  = rotulos["total"][idx_janelas_validas]
        if debug:
            print("   - `{}-{}`: {} => {}".format(
                dados_aparelho["carga"].upper(), 
                dados_aparelho["instancia"],
                Counter(rotulos["estado"]),
                Counter(dados_aparelho["rotulos"]["estado"])
            ))

    # y
    y = dados_aparelho["rotulos"]["estado"]

    # <<< Limpando memoria >>>
    dados_cargas = None
    del dados_cargas
    dados_medidores = None
    del dados_medidores
    dados_aparelho = None
    del dados_aparelho
    gc.collect()
    # <<< Limpando memoria >>>

    # Fazendo split dos dados (treino/teste)
    if split_teste is None:
        return X, y
    else:
        X_treino, X_teste, y_treino, y_teste = train_test_split(
            X, y, 
            test_size=split_teste,
            stratify=y,
            random_state=SEED
        )
        print()

        return X_treino, X_teste, y_treino, y_teste        
        

## Melhores Combinações de Taxas e Janelas para cada Aparelho (estudo 19)

In [6]:
df_melhores_taxas_janelas = pd.read_csv(os.path.join(caminho_dados, "19", "melhores_taxa_janela_aparelhos.csv"), index_col=0)
df_melhores_taxas_janelas

Unnamed: 0,carga,taxa_amostragem,janela,loss,acuracia,precisao,recall,f1,f1_macro
0,dish_washer - 9,2,720,0.05,95.33,20.0,25.0,22.22,59.91
1,fridge - 7,2,1080,0.0,100.0,100.0,100.0,100.0,100.0
2,microwave - 16,2,900,0.04,95.83,66.67,33.33,44.44,71.14
3,washer_dryer - 13,2,60,0.0,99.89,100.0,95.74,97.83,98.88
4,washer_dryer - 14,3,360,0.02,97.99,100.0,55.56,71.43,85.19


In [29]:
# TODO: 
# - Desenvolver módulo da metodologia na lib PyNILM

## Parâmetros de RP dos Aparelhos (estudo 18)

In [7]:
# Carregando arquivos de parametros, caso n estejam (kernel reiniciado)
if not 'parametros_rp_aparelho' in locals():
    with open(os.path.join(caminho_dados, "18", "parametros_rp_aparelho.json"),'r') as arquivo:
        parametros_rp_aparelho = json.load(arquivo)

## Ambiente e Funções Auxiliares

In [8]:
# from PyNILM.dados.janelas import Janelas
from PyNILM.dados.janelas import Janelas
from PyNILM.avaliacao.metricas import *
from PyNILM.avaliacao.graficos import *
from PyNILM.avaliacao.analises import *

from pyts.image import RecurrencePlot, GramianAngularField

# Garantindo reprodutibilidade
import random as rn

# Constantes dos experimentos
SEED = 33
FRACAO_TESTE = 0.25
EPOCAS = 100
TAMANHO_LOTE = 32
VERBOSIDADE = 2

# Parametros RP (verificado empiricamente)
PARAMETROS_RP = {
    "dimension": 1,
    "time_delay": 1,
    "threshold": None,
    "percentage": 10
}
TAMANHO_IMAGEM = (32,32)

# Travar Seed's
np.random.seed(SEED)
rn.seed(SEED)
os.environ['PYTHONHASHSEED']=str(SEED)

# Habilitando/limitando utilização de GPUs

In [9]:
def instancia_aparelho_residencia(aparelho, residencia, base = redd):
    """Função para coletar o id/instancia do aparelho na residencia,
    permitindo executar os testes independente da residencia"""
    instancia = []
    #for e in base.buildings[residencia].elec.all_meters():
    for e_i in range(1, len(janelas.base.buildings[residencia].elec.all_meters())):

        # Selecionando canal/aparelho
        e = janelas.base.buildings[residencia].elec[e_i]
        
        if not hasattr(e,'meters'):
            if e.label().lower().replace(" ","_") == aparelho:
                instancia.append( e.instance() )
        else:
            for e_ in e.meters:
                if e_.label().lower().replace(" ","_") == aparelho:
                    instancia.append( e_.instance() )
    return instancia

## Cálculo RQA

In [10]:
# from pyrqa.time_series import TimeSeries
# from pyrqa.settings import Settings
# from pyrqa.analysis_type import Classic
# from pyrqa.neighbourhood import FixedRadius
# from pyrqa.metric import EuclideanMetric
# from pyrqa.computation import RQAComputation

# def calcular_rqa_amostras(X, rotulo_aparelho, params = PARAMETROS_RP):
    
#     rqa_data = []

#     for x in tqdm_notebook(X):

#         # Calculating RQA
#         time_series = TimeSeries(x,
#                      embedding_dimension=params["dimension"],
#                      time_delay=params["time_delay"])
#         settings = Settings(time_series,
#                             analysis_type=Classic,
#                             neighbourhood=FixedRadius(params["threshold"]),
#                             similarity_measure=EuclideanMetric)
#         computation = RQAComputation.create(settings, verbose=False)
#         rqa_result = computation.run()

#         rqa_data.append( [aparelho] + list(rqa_result.to_array()) )
        
#     return rqa_data
from pyrqa.time_series import TimeSeries
from pyrqa.settings import Settings
from pyrqa.analysis_type import Classic
from pyrqa.neighbourhood import FixedRadius
from pyrqa.metric import EuclideanMetric
from pyrqa.computation import RQAComputation

def calcular_rqa_amostras(X, Y, rotulo_aparelho, params = PARAMETROS_RP):
    
    rqa_data = []

    for x, y in tqdm_notebook(zip(X, Y), total=Y.shape[0]):

        # Calculating RQA
        time_series = TimeSeries(x,
                     embedding_dimension=params["dimension"],
                     time_delay=params["time_delay"])
        settings = Settings(time_series,
                            analysis_type=Classic,
                            neighbourhood=FixedRadius(params["percentage"]/100), 
                            # PS.: Utilizando percentage ao inves de threshold 
                            # devido a semanticas distintas entre libs (pyts e pyrqa)
                            # bem como distincao entre RPs (cnn) e RQAs (supervisionado).
                            similarity_measure=EuclideanMetric)
        computation = RQAComputation.create(settings, verbose=False)
        rqa_result = computation.run()

        rqa_data.append( [rotulo_aparelho, y]  + list(rqa_result.to_array()) )
        
    return rqa_data


In [54]:
# Teste da classe
janelas_treino = Janelas(
    base=redd,
    id_residencia=3,
    inicio_intervalo='2011-04-16',
    fim_intervalo='2011-05-16',
    debug = False
)

janelas_teste = Janelas(
    base=redd,
    id_residencia=3,
    inicio_intervalo='2011-05-17',
    fim_intervalo='2011-05-30',
    debug = False
)

In [40]:
rqa = []

janelas = janelas_treino#Janelas(**PARAMETROS_DATASET)

print("# CALCULANDO RQA (BASE TREINO)...\n")

for rotulo_aparelho in df_melhores_taxas_janelas.loc[
    df_melhores_taxas_janelas["carga"].isin(
        ['dish_washer - 9','fridge - 7','microwave - 16','washer_dryer - 13',
         'washer_dryer - 14']),
    : ]["carga"].values:
    
    print(f"* Aparelho {rotulo_aparelho.upper()}:")
    print()
    
    # Informacoes da carga selecionada
    CARGA = rotulo_aparelho.split(" - ")[0]
    #INSTANCIA = int(rotulo_aparelho.split(" - ")[1])

    config_aparelho = df_melhores_taxas_janelas[
        df_melhores_taxas_janelas["carga"]==rotulo_aparelho
    ].to_dict("records")[0]
    TAXA = config_aparelho["taxa_amostragem"]
    TAMANHO_JANELA = config_aparelho["janela"]
    CONFIG_RP_APARELHO = PARAMETROS_RP
    
    # Percorrer instancias do aparelho na residencia
    for INSTANCIA in instancia_aparelho_residencia(CARGA, RESIDENCIA, base = redd):
        
        # Extrair series divididas em janelas para cada medidor
        print("   - Carregando dados (taxa={:.0f}, janela={:.0f})...".format(
            TAXA, TAMANHO_JANELA
        ))
        X, y = carregar_dados_aparelho(
            janelas=janelas,
            instancia=INSTANCIA,
            aparelho=CARGA,
            tamanho_janela=TAMANHO_JANELA,
            taxa=TAXA,
            eliminar_janelas_vazias=True
        )
        print()
        
        print("   - Detalhes da amostragem (lotes):")
        print("   ---")
        for item in Counter(y).items():
            print(f"      - Classe `{item[0]}`: {item[1]} amostras ({round(item[1]/len(y)*100,1)}%)" )
        print()
        
        print("* Calculando RQA...")
        rqa.extend( calcular_rqa_amostras(X, y, rotulo_aparelho) )
        print()

        # Consolidando resultados RQA dataframe...
df_rqa = pd.DataFrame(
    data = np.nan_to_num(rqa),
    columns = [
        "Appliance", "State",
        "Minimum diagonal line length (L_min)",
        "Minimum vertical line length (V_min)",
        "Minimum white vertical line length (W_min)",
        "Recurrence rate (RR)",
        "Determinism (DET)",
        "Average diagonal line length (L)",
        "Longest diagonal line length (L_max)",
        "Divergence (DIV)",
        "Entropy diagonal lines (L_entr)",
        "Laminarity (LAM)",
        "Trapping time (TT)",
        "Longest vertical line length (V_max)",
        "Entropy vertical lines (V_entr)",
        "Average white vertical line length (W)",
        "Longest white vertical line length (W_max)",
        "Longest white vertical line length inverse (W_div)",
        "Entropy white vertical lines (W_entr)",
        "Ratio determinism / recurrence rate (DET/RR)",
        "Ratio laminarity / determinism (LAM/DET)"
    ]
)
# Incluindo rótulo
#df_rqa["Y"] = y


# a = input("Deseja persistir os dados? [S]im ou [N]ão: ")
# if str(a.upper()) == "S":
df_rqa.to_excel( os.path.join(caminho_dados_notebook, "df_rqa_treino.xlsx"), index=False )

# CALCULANDO RQA (BASE TREINO)...

* Aparelho DISH_WASHER - 9:

Meter 13 is in a nested meter group. Retrieving just the ElecMeter.
Meter 14 is in a nested meter group. Retrieving just the ElecMeter.
   - Carregando dados (taxa=2, janela=720)...
Meter 13 is in a nested meter group. Retrieving just the ElecMeter.
Meter 14 is in a nested meter group. Retrieving just the ElecMeter.


   - Detalhes da amostragem (lotes):
   ---
      - Classe `0`: 583 amostras (97.5%)
      - Classe `1`: 15 amostras (2.5%)

* Calculando RQA...


  0%|          | 0/598 [00:00<?, ?it/s]


* Aparelho FRIDGE - 7:

Meter 13 is in a nested meter group. Retrieving just the ElecMeter.
Meter 14 is in a nested meter group. Retrieving just the ElecMeter.
   - Carregando dados (taxa=2, janela=1080)...
Meter 13 is in a nested meter group. Retrieving just the ElecMeter.
Meter 14 is in a nested meter group. Retrieving just the ElecMeter.


   - Detalhes da amostragem (lotes):
   ---
      - Classe `1`: 398 amostras (99.7%)
      - Classe `0`: 1 amostras (0.3%)

* Calculando RQA...


  0%|          | 0/399 [00:00<?, ?it/s]


* Aparelho MICROWAVE - 16:

Meter 13 is in a nested meter group. Retrieving just the ElecMeter.
Meter 14 is in a nested meter group. Retrieving just the ElecMeter.
   - Carregando dados (taxa=2, janela=900)...
Meter 13 is in a nested meter group. Retrieving just the ElecMeter.
Meter 14 is in a nested meter group. Retrieving just the ElecMeter.


   - Detalhes da amostragem (lotes):
   ---
      - Classe `0`: 452 amostras (94.8%)
      - Classe `1`: 25 amostras (5.2%)

* Calculando RQA...


  0%|          | 0/477 [00:00<?, ?it/s]


* Aparelho WASHER_DRYER - 13:

Meter 13 is in a nested meter group. Retrieving just the ElecMeter.
Meter 14 is in a nested meter group. Retrieving just the ElecMeter.
   - Carregando dados (taxa=2, janela=60)...
Meter 13 is in a nested meter group. Retrieving just the ElecMeter.
Meter 14 is in a nested meter group. Retrieving just the ElecMeter.


   - Detalhes da amostragem (lotes):
   ---
      - Classe `0`: 6912 amostras (97.4%)
      - Classe `1`: 187 amostras (2.6%)

* Calculando RQA...


  0%|          | 0/7099 [00:00<?, ?it/s]


   - Carregando dados (taxa=2, janela=60)...
Meter 13 is in a nested meter group. Retrieving just the ElecMeter.
Meter 14 is in a nested meter group. Retrieving just the ElecMeter.


   - Detalhes da amostragem (lotes):
   ---
      - Classe `0`: 6817 amostras (96.0%)
      - Classe `1`: 282 amostras (4.0%)

* Calculando RQA...


  0%|          | 0/7099 [00:00<?, ?it/s]


* Aparelho WASHER_DRYER - 14:

Meter 13 is in a nested meter group. Retrieving just the ElecMeter.
Meter 14 is in a nested meter group. Retrieving just the ElecMeter.
   - Carregando dados (taxa=3, janela=360)...
Meter 13 is in a nested meter group. Retrieving just the ElecMeter.
Meter 14 is in a nested meter group. Retrieving just the ElecMeter.


   - Detalhes da amostragem (lotes):
   ---
      - Classe `0`: 766 amostras (96.2%)
      - Classe `1`: 30 amostras (3.8%)

* Calculando RQA...


  0%|          | 0/796 [00:00<?, ?it/s]


   - Carregando dados (taxa=3, janela=360)...
Meter 13 is in a nested meter group. Retrieving just the ElecMeter.
Meter 14 is in a nested meter group. Retrieving just the ElecMeter.


   - Detalhes da amostragem (lotes):
   ---
      - Classe `0`: 758 amostras (95.2%)
      - Classe `1`: 38 amostras (4.8%)

* Calculando RQA...


  0%|          | 0/796 [00:00<?, ?it/s]




In [57]:
rqa = []

janelas = janelas_teste

print("# CALCULANDO RQA (BASE TESTE)...\n")

for rotulo_aparelho in df_melhores_taxas_janelas.loc[
    df_melhores_taxas_janelas["carga"].isin(
        ['dish_washer - 9','fridge - 7','microwave - 16','washer_dryer - 13',
         'washer_dryer - 14']),
    : ]["carga"].values:
    
    print(f"* Aparelho {rotulo_aparelho.upper()}:")
    print()
    
    # Informacoes da carga selecionada
    CARGA = rotulo_aparelho.split(" - ")[0]
    #INSTANCIA = int(rotulo_aparelho.split(" - ")[1])

    config_aparelho = df_melhores_taxas_janelas[
        df_melhores_taxas_janelas["carga"]==rotulo_aparelho
    ].to_dict("records")[0]
    TAXA = config_aparelho["taxa_amostragem"]
    TAMANHO_JANELA = config_aparelho["janela"]
    CONFIG_RP_APARELHO = PARAMETROS_RP
    
    # Percorrer instancias do aparelho na residencia
    for INSTANCIA in instancia_aparelho_residencia(CARGA, RESIDENCIA, base = redd):
        
        # Extrair series divididas em janelas para cada medidor
        print("   - Carregando dados (taxa={:.0f}, janela={:.0f})...".format(
            TAXA, TAMANHO_JANELA
        ))
        X, y = carregar_dados_aparelho(
            janelas=janelas,
            instancia=INSTANCIA,
            aparelho=CARGA,
            tamanho_janela=TAMANHO_JANELA,
            taxa=TAXA,
            eliminar_janelas_vazias=True
        )
        print()
        
        print("   - Detalhes da amostragem (lotes):")
        print("   ---")
        for item in Counter(y).items():
            print(f"      - Classe `{item[0]}`: {item[1]} amostras ({round(item[1]/len(y)*100,1)}%)" )
        print()
        
        print("* Calculando RQA...")
        rqa.extend( calcular_rqa_amostras(X, y, rotulo_aparelho) )
        print()

        # Consolidando resultados RQA dataframe...
df_rqa = pd.DataFrame(
    data = np.nan_to_num(rqa),
    columns = [
        "Appliance", "State",
        "Minimum diagonal line length (L_min)",
        "Minimum vertical line length (V_min)",
        "Minimum white vertical line length (W_min)",
        "Recurrence rate (RR)",
        "Determinism (DET)",
        "Average diagonal line length (L)",
        "Longest diagonal line length (L_max)",
        "Divergence (DIV)",
        "Entropy diagonal lines (L_entr)",
        "Laminarity (LAM)",
        "Trapping time (TT)",
        "Longest vertical line length (V_max)",
        "Entropy vertical lines (V_entr)",
        "Average white vertical line length (W)",
        "Longest white vertical line length (W_max)",
        "Longest white vertical line length inverse (W_div)",
        "Entropy white vertical lines (W_entr)",
        "Ratio determinism / recurrence rate (DET/RR)",
        "Ratio laminarity / determinism (LAM/DET)"
    ]
)
# Incluindo rótulo
#df_rqa["Y"] = y


# a = input("Deseja persistir os dados? [S]im ou [N]ão: ")
# if str(a.upper()) == "S":
df_rqa.to_excel( os.path.join(caminho_dados_notebook, "df_rqa_teste.xlsx"), index=False )

# CALCULANDO RQA (BASE TESTE)...

* Aparelho DISH_WASHER - 9:

Meter 13 is in a nested meter group. Retrieving just the ElecMeter.
Meter 14 is in a nested meter group. Retrieving just the ElecMeter.
   - Carregando dados (taxa=2, janela=720)...
Meter 13 is in a nested meter group. Retrieving just the ElecMeter.
Meter 14 is in a nested meter group. Retrieving just the ElecMeter.


   - Detalhes da amostragem (lotes):
   ---
      - Classe `0`: 395 amostras (96.8%)
      - Classe `1`: 13 amostras (3.2%)

* Calculando RQA...


  0%|          | 0/408 [00:00<?, ?it/s]


* Aparelho FRIDGE - 7:

Meter 13 is in a nested meter group. Retrieving just the ElecMeter.
Meter 14 is in a nested meter group. Retrieving just the ElecMeter.
   - Carregando dados (taxa=2, janela=1080)...
Meter 13 is in a nested meter group. Retrieving just the ElecMeter.
Meter 14 is in a nested meter group. Retrieving just the ElecMeter.


   - Detalhes da amostragem (lotes):
   ---
      - Classe `1`: 269 amostras (98.2%)
      - Classe `0`: 5 amostras (1.8%)

* Calculando RQA...


  0%|          | 0/274 [00:00<?, ?it/s]


* Aparelho MICROWAVE - 16:

Meter 13 is in a nested meter group. Retrieving just the ElecMeter.
Meter 14 is in a nested meter group. Retrieving just the ElecMeter.
   - Carregando dados (taxa=2, janela=900)...
Meter 13 is in a nested meter group. Retrieving just the ElecMeter.
Meter 14 is in a nested meter group. Retrieving just the ElecMeter.


   - Detalhes da amostragem (lotes):
   ---
      - Classe `0`: 306 amostras (93.9%)
      - Classe `1`: 20 amostras (6.1%)

* Calculando RQA...


  0%|          | 0/326 [00:00<?, ?it/s]


* Aparelho WASHER_DRYER - 13:

Meter 13 is in a nested meter group. Retrieving just the ElecMeter.
Meter 14 is in a nested meter group. Retrieving just the ElecMeter.
   - Carregando dados (taxa=2, janela=60)...
Meter 13 is in a nested meter group. Retrieving just the ElecMeter.
Meter 14 is in a nested meter group. Retrieving just the ElecMeter.


   - Detalhes da amostragem (lotes):
   ---
      - Classe `0`: 4643 amostras (96.0%)
      - Classe `1`: 192 amostras (4.0%)

* Calculando RQA...


  0%|          | 0/4835 [00:00<?, ?it/s]


   - Carregando dados (taxa=2, janela=60)...
Meter 13 is in a nested meter group. Retrieving just the ElecMeter.
Meter 14 is in a nested meter group. Retrieving just the ElecMeter.


   - Detalhes da amostragem (lotes):
   ---
      - Classe `0`: 4582 amostras (94.8%)
      - Classe `1`: 253 amostras (5.2%)

* Calculando RQA...


  0%|          | 0/4835 [00:00<?, ?it/s]


* Aparelho WASHER_DRYER - 14:

Meter 13 is in a nested meter group. Retrieving just the ElecMeter.
Meter 14 is in a nested meter group. Retrieving just the ElecMeter.
   - Carregando dados (taxa=3, janela=360)...
Meter 13 is in a nested meter group. Retrieving just the ElecMeter.
Meter 14 is in a nested meter group. Retrieving just the ElecMeter.


   - Detalhes da amostragem (lotes):
   ---
      - Classe `0`: 514 amostras (94.8%)
      - Classe `1`: 28 amostras (5.2%)

* Calculando RQA...


  0%|          | 0/542 [00:00<?, ?it/s]


   - Carregando dados (taxa=3, janela=360)...
Meter 13 is in a nested meter group. Retrieving just the ElecMeter.
Meter 14 is in a nested meter group. Retrieving just the ElecMeter.


   - Detalhes da amostragem (lotes):
   ---
      - Classe `0`: 508 amostras (93.7%)
      - Classe `1`: 34 amostras (6.3%)

* Calculando RQA...


  0%|          | 0/542 [00:00<?, ?it/s]




# Classificadores

In [58]:
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier
from xgboost.sklearn import XGBClassifier

from sklearn.model_selection import StratifiedKFold
skf = StratifiedKFold(n_splits=10, shuffle=True, random_state=SEED)

resultados = []

In [59]:
df_rqa_treino = pd.read_excel( os.path.join(caminho_dados_notebook, "df_rqa_treino.xlsx"), index=False, engine='openpyxl')
df_rqa_teste = pd.read_excel( os.path.join(caminho_dados_notebook, "df_rqa_teste.xlsx"), index=False, engine='openpyxl' )

In [60]:
df_rqa_treino.shape

(17264, 21)

In [61]:
df_rqa_teste.shape

(11762, 21)

## SVM

In [66]:
resultados_modelo = {
    "appliance": [], "fold": [],
    "acc": [], "f1": [], "auc": [], "base": []
}

for a in df_rqa_treino["Appliance"].unique():
    
    print(f"* Appliance `{a}`...\n")
    
    # Base de treino
    df_treino = df_rqa_treino[df_rqa_treino["Appliance"]==a]
    X = np.nan_to_num(df_treino[["Recurrence rate (RR)","Determinism (DET)"]].values)
    y = df_treino["State"].values
    
    y_true, y_pred  = [], []
    
    
    print("   - Evaluation model (CV - 10 folds)...\n")
    for it, (idx_treino, idx_teste) in enumerate(skf.split(X, y)):

        # Preparando lotes
        X_treino, X_teste = X[idx_treino], X[idx_teste]
        y_treino, y_teste = y[idx_treino], y[idx_teste]
        
        if len(set(y_treino))>1:
        
            # Treinando modelo
            modelo = SVC(kernel='rbf', random_state=SEED)
            modelo.fit(X_treino, y_treino)

            # Prevendo conjunto de teste
            y_hat = modelo.predict(X_teste)

            # Incrementando resultados
            resultados_modelo["appliance"].append(a)
            resultados_modelo["fold"].append(it+1)
            resultados_modelo["acc"].append( accuracy_score(y_teste, y_hat) )
            resultados_modelo["f1"].append( f1_score(y_teste, y_hat, average="macro") )
            resultados_modelo["auc"].append(roc_auc_score(y_teste, y_hat) if np.unique(y_teste).shape[0]>1 else 0.5)
            resultados_modelo["base"].append("treino")

            # Extendendo rotulos (analise global)
            y_true.extend(y_teste)
            y_pred.extend(y_hat)
        
    # Base de teste
    df_teste = df_rqa_teste[df_rqa_teste["Appliance"]==a]
    X_teste = np.nan_to_num(df_teste[["Recurrence rate (RR)","Determinism (DET)"]].values)
    y_teste = df_teste["State"].values
    
    # Treinando/avaliando modelo
    modelo = SVC(kernel='rbf', random_state=SEED)
    modelo.fit(X, y)
    y_hat = modelo.predict(X_teste)
    
    # Incrementando resultados
    resultados_modelo["appliance"].append(a)
    resultados_modelo["fold"].append(it+1)
    resultados_modelo["acc"].append( accuracy_score(y_teste, y_hat) )
    resultados_modelo["f1"].append( f1_score(y_teste, y_hat, average="macro") )
    resultados_modelo["auc"].append(roc_auc_score(y_teste, y_hat) if np.unique(y_teste).shape[0]>1 else 0.5)
    resultados_modelo["base"].append("teste")

    print()
    print("   - Final Results:")
    print("   ---")
    print()

    print("      -> Classification Report:")
    print()
    print(classification_report(y_true, y_pred))
    print("      -> Confusion Matrix:")
    print()
    print(confusion_matrix(y_true, y_pred))
    
    print()
    
# Consolidating DataFrame
df_resultados_svm = pd.DataFrame(resultados_modelo)
df_resultados_svm.to_excel(os.path.join(caminho_dados_notebook, "df_resultados_svm.xlsx"))
    
print("############################## FINAL MODEL RESULTS ##############################")
display(df_resultados_svm.groupby(["appliance","base"]).agg({
    "acc": ["mean", "std", "max", "min"],
    "f1": ["mean", "std", "max", "min"],
    "auc": ["mean", "std", "max", "min"]
}))

* Appliance `dish_washer - 9`...

   - Evaluation model (CV - 10 folds)...


   - Final Results:
   ---

      -> Classification Report:

              precision    recall  f1-score   support

           0       0.97      1.00      0.99       583
           1       0.00      0.00      0.00        15

    accuracy                           0.97       598
   macro avg       0.49      0.50      0.49       598
weighted avg       0.95      0.97      0.96       598

      -> Confusion Matrix:

[[583   0]
 [ 15   0]]

* Appliance `fridge - 7`...

   - Evaluation model (CV - 10 folds)...


   - Final Results:
   ---

      -> Classification Report:

              precision    recall  f1-score   support

           1       1.00      1.00      1.00       359

    accuracy                           1.00       359
   macro avg       1.00      1.00      1.00       359
weighted avg       1.00      1.00      1.00       359

      -> Confusion Matrix:

[[359]]

* Appliance `microwave - 16`...

   - Ev

Unnamed: 0_level_0,Unnamed: 1_level_0,acc,acc,acc,acc,f1,f1,f1,f1,auc,auc,auc,auc
Unnamed: 0_level_1,Unnamed: 1_level_1,mean,std,max,min,mean,std,max,min,mean,std,max,min
appliance,base,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2
dish_washer - 9,teste,0.968137,,0.968137,0.968137,0.491905,,0.491905,0.491905,0.5,,0.5,0.5
dish_washer - 9,treino,0.974944,0.008725,0.983333,0.966667,0.493648,0.002237,0.495798,0.491525,0.5,0.0,0.5,0.5
fridge - 7,teste,0.981752,,0.981752,0.981752,0.495396,,0.495396,0.495396,0.5,,0.5,0.5
fridge - 7,treino,1.0,0.0,1.0,1.0,1.0,0.0,1.0,1.0,0.5,0.0,0.5,0.5
microwave - 16,teste,0.93865,,0.93865,0.93865,0.484177,,0.484177,0.484177,0.5,,0.5,0.5
microwave - 16,treino,0.947651,0.010705,0.958333,0.9375,0.486547,0.002822,0.489362,0.483871,0.5,0.0,0.5,0.5
washer_dryer - 13,teste,0.953981,,0.953981,0.953981,0.488224,,0.488224,0.488224,0.5,,0.5,0.5
washer_dryer - 13,treino,0.966967,0.000216,0.967583,0.966878,0.491603,5.6e-05,0.491762,0.49158,0.5,0.0,0.5,0.5
washer_dryer - 14,teste,0.942804,,0.942804,0.942804,0.48528,,0.48528,0.48528,0.5,,0.5,0.5
washer_dryer - 14,treino,0.957288,0.002625,0.962264,0.955975,0.489088,0.000684,0.490385,0.488746,0.5,0.0,0.5,0.5


## XGBOOST

In [67]:
# resultados_modelo = {
#     "appliance": [], "fold": [],
#     "acc": [], "f1": [], "auc": []
# }

# for a in df_rqa["Appliance"].unique():
    
#     print(f"* Appliance `{a}`...\n")
    
#     df_ = df_rqa[df_rqa["Appliance"]==a]
#     X = np.nan_to_num(df_[["Recurrence rate (RR)","Determinism (DET)"]].values)
#     y = df_["State"].values
    
#     y_true, y_pred  = [], []
    
    
#     print("   - Evaluation model (CV - 10 folds)...\n")
#     for it, (idx_treino, idx_teste) in enumerate(skf.split(X, y)):

#         # Preparando lotes
#         X_treino, X_teste = X[idx_treino], X[idx_teste]
#         y_treino, y_teste = y[idx_treino], y[idx_teste]
        
#         # Treinando modelo
#         modelo = XGBClassifier(random_state=SEED, n_jobs=4)
#         modelo.fit(X_treino, y_treino)
        
#         # Prevendo conjunto de teste
#         y_hat = modelo.predict(X_teste)

#         # Incrementando resultados
#         resultados_modelo["appliance"].append(a)
#         resultados_modelo["fold"].append(it+1)
#         resultados_modelo["acc"].append( accuracy_score(y_teste, y_hat) )
#         resultados_modelo["f1"].append( f1_score(y_teste, y_hat, average="macro") )
#         resultados_modelo["auc"].append(roc_auc_score(y_teste, y_hat) if np.unique(y_teste).shape[0]>1 else 0.5)
        
#         # Extendendo rotulos (analise global)
#         y_true.extend(y_teste)
#         y_pred.extend(y_hat)
        
#     print()
#     print("   - Final Results:")
#     print("   ---")
#     print()

#     print("      -> Classification Report:")
#     print()
#     print(classification_report(y_true, y_pred))
#     print("      -> Confusion Matrix:")
#     print()
#     print(confusion_matrix(y_true, y_pred))
    
#     print()
    
# # Consolidating DataFrame
# df_resultados_xgboost = pd.DataFrame(resultados_modelo)
# df_resultados_xgboost.to_excel(os.path.join(caminho_dados_notebook, "df_resultados_xgboost.xlsx"))

# print("############################## FINAL MODEL RESULTS ##############################")
# display(df_resultados_xgboost.groupby("appliance").agg({
#     "acc": ["mean", "std", "max", "min"],
#     "f1": ["mean", "std", "max", "min"],
#     "auc": ["mean", "std", "max", "min"]
# }))

In [69]:
resultados_modelo = {
    "appliance": [], "fold": [],
    "acc": [], "f1": [], "auc": [], "base": []
}

for a in df_rqa_treino["Appliance"].unique():
    
    print(f"* Appliance `{a}`...\n")
    
    # Base de treino
    df_treino = df_rqa_treino[df_rqa_treino["Appliance"]==a]
    X = np.nan_to_num(df_treino[["Recurrence rate (RR)","Determinism (DET)"]].values)
    y = df_treino["State"].values
    
    y_true, y_pred  = [], []
    
    
    print("   - Evaluation model (CV - 10 folds)...\n")
    for it, (idx_treino, idx_teste) in enumerate(skf.split(X, y)):

        # Preparando lotes
        X_treino, X_teste = X[idx_treino], X[idx_teste]
        y_treino, y_teste = y[idx_treino], y[idx_teste]
        
        if len(set(y_treino))>1:
        
            # Treinando modelo
            modelo = XGBClassifier(random_state=SEED, n_jobs=4)
            modelo.fit(X_treino, y_treino)

            # Prevendo conjunto de teste
            y_hat = modelo.predict(X_teste)

            # Incrementando resultados
            resultados_modelo["appliance"].append(a)
            resultados_modelo["fold"].append(it+1)
            resultados_modelo["acc"].append( accuracy_score(y_teste, y_hat) )
            resultados_modelo["f1"].append( f1_score(y_teste, y_hat, average="macro") )
            resultados_modelo["auc"].append(roc_auc_score(y_teste, y_hat) if np.unique(y_teste).shape[0]>1 else 0.5)
            resultados_modelo["base"].append("treino")

            # Extendendo rotulos (analise global)
            y_true.extend(y_teste)
            y_pred.extend(y_hat)
        
    # Base de teste
    df_teste = df_rqa_teste[df_rqa_teste["Appliance"]==a]
    X_teste = np.nan_to_num(df_teste[["Recurrence rate (RR)","Determinism (DET)"]].values)
    y_teste = df_teste["State"].values
    
    # Treinando/avaliando modelo
    modelo = XGBClassifier(random_state=SEED, n_jobs=4)
    modelo.fit(X, y)
    y_hat = modelo.predict(X_teste)
    
    # Incrementando resultados
    resultados_modelo["appliance"].append(a)
    resultados_modelo["fold"].append(it+1)
    resultados_modelo["acc"].append( accuracy_score(y_teste, y_hat) )
    resultados_modelo["f1"].append( f1_score(y_teste, y_hat, average="macro") )
    resultados_modelo["auc"].append(roc_auc_score(y_teste, y_hat) if np.unique(y_teste).shape[0]>1 else 0.5)
    resultados_modelo["base"].append("teste")

    print()
    print("   - Final Results:")
    print("   ---")
    print()

    print("      -> Classification Report:")
    print()
    print(classification_report(y_true, y_pred))
    print("      -> Confusion Matrix:")
    print()
    print(confusion_matrix(y_true, y_pred))
    
    print()
    
# Consolidating DataFrame
df_resultados_svm = pd.DataFrame(resultados_modelo)
df_resultados_svm.to_excel(os.path.join(caminho_dados_notebook, "df_resultados_xgboost.xlsx"))
    
print("############################## FINAL MODEL RESULTS ##############################")
display(df_resultados_svm.groupby(["appliance","base"]).agg({
    "acc": ["mean", "std", "max", "min"],
    "f1": ["mean", "std", "max", "min"],
    "auc": ["mean", "std", "max", "min"]
}))

* Appliance `dish_washer - 9`...

   - Evaluation model (CV - 10 folds)...


   - Final Results:
   ---

      -> Classification Report:

              precision    recall  f1-score   support

           0       0.97      1.00      0.99       583
           1       0.00      0.00      0.00        15

    accuracy                           0.97       598
   macro avg       0.49      0.50      0.49       598
weighted avg       0.95      0.97      0.96       598

      -> Confusion Matrix:

[[581   2]
 [ 15   0]]

* Appliance `fridge - 7`...

   - Evaluation model (CV - 10 folds)...


   - Final Results:
   ---

      -> Classification Report:

              precision    recall  f1-score   support

           1       1.00      1.00      1.00       359

    accuracy                           1.00       359
   macro avg       1.00      1.00      1.00       359
weighted avg       1.00      1.00      1.00       359

      -> Confusion Matrix:

[[359]]

* Appliance `microwave - 16`...

   - Ev


   - Final Results:
   ---

      -> Classification Report:

              precision    recall  f1-score   support

           0       0.95      0.98      0.97       452
           1       0.23      0.12      0.16        25

    accuracy                           0.93       477
   macro avg       0.59      0.55      0.56       477
weighted avg       0.91      0.93      0.92       477

      -> Confusion Matrix:

[[442  10]
 [ 22   3]]

* Appliance `washer_dryer - 13`...

   - Evaluation model (CV - 10 folds)...


   - Final Results:
   ---

      -> Classification Report:

              precision    recall  f1-score   support

           0       0.97      1.00      0.98     13729
           1       0.56      0.08      0.14       469

    accuracy                           0.97     14198
   macro avg       0.77      0.54      0.56     14198
weighted avg       0.96      0.97      0.96     14198

      -> Confusion Matrix:

[[13701    28]
 [  433    36]]

* Appliance `washer_dryer - 14`.


   - Final Results:
   ---

      -> Classification Report:

              precision    recall  f1-score   support

           0       0.99      0.99      0.99      1524
           1       0.85      0.76      0.81        68

    accuracy                           0.98      1592
   macro avg       0.92      0.88      0.90      1592
weighted avg       0.98      0.98      0.98      1592

      -> Confusion Matrix:

[[1515    9]
 [  16   52]]

############################## FINAL MODEL RESULTS ##############################


Unnamed: 0_level_0,Unnamed: 1_level_0,acc,acc,acc,acc,f1,f1,f1,f1,auc,auc,auc,auc
Unnamed: 0_level_1,Unnamed: 1_level_1,mean,std,max,min,mean,std,max,min,mean,std,max,min
appliance,base,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2
dish_washer - 9,teste,0.965686,,0.965686,0.965686,0.55375,,0.55375,0.55375,0.53593,,0.53593,0.53593
dish_washer - 9,treino,0.971582,0.011246,0.983333,0.95,0.492778,0.002899,0.495798,0.487179,0.498276,0.003635,0.5,0.491379
fridge - 7,teste,0.981752,,0.981752,0.981752,0.495396,,0.495396,0.495396,0.5,,0.5,0.5
fridge - 7,treino,1.0,0.0,1.0,1.0,1.0,0.0,1.0,1.0,0.5,0.0,0.5,0.5
microwave - 16,teste,0.932515,,0.932515,0.932515,0.615648,,0.615648,0.615648,0.590196,,0.590196,0.590196
microwave - 16,treino,0.932934,0.02767,0.978723,0.893617,0.555076,0.129081,0.827839,0.472527,0.55558,0.106782,0.75,0.477778
washer_dryer - 13,teste,0.955222,,0.955222,0.955222,0.572949,,0.572949,0.572949,0.547704,,0.547704,0.547704
washer_dryer - 13,treino,0.967531,0.000969,0.969718,0.966173,0.55822,0.02688,0.615082,0.512596,0.537371,0.016528,0.573376,0.510638
washer_dryer - 14,teste,0.950185,,0.950185,0.950185,0.687007,,0.687007,0.687007,0.640269,,0.640269,0.640269
washer_dryer - 14,treino,0.984292,0.008999,1.0,0.968553,0.899,0.052945,1.0,0.825083,0.879189,0.061724,1.0,0.782425


## MLP

In [70]:
# resultados_modelo = {
#     "appliance": [], "fold": [],
#     "acc": [], "f1": [], "auc": []
# }

# for a in df_rqa["Appliance"].unique():
    
#     print(f"* Appliance `{a}`...\n")
    
#     df_ = df_rqa[df_rqa["Appliance"]==a]
#     X = np.nan_to_num(df_[["Recurrence rate (RR)","Determinism (DET)"]].values)
#     y = df_["State"].values
    
#     y_true, y_pred  = [], []
    
    
#     print("   - Evaluation model (CV - 10 folds)...\n")
#     for it, (idx_treino, idx_teste) in enumerate(skf.split(X, y)):

#         # Preparando lotes
#         X_treino, X_teste = X[idx_treino], X[idx_teste]
#         y_treino, y_teste = y[idx_treino], y[idx_teste]
        
#         # Treinando modelo
#         modelo = MLPClassifier(alpha=1e-3, hidden_layer_sizes=(10,), random_state=SEED)
#         modelo.fit(X_treino, y_treino)
        
#         # Prevendo conjunto de teste
#         y_hat = modelo.predict(X_teste)

#         # Incrementando resultados
#         resultados_modelo["appliance"].append(a)
#         resultados_modelo["fold"].append(it+1)
#         resultados_modelo["acc"].append( accuracy_score(y_teste, y_hat) )
#         resultados_modelo["f1"].append( f1_score(y_teste, y_hat, average="macro") )
#         resultados_modelo["auc"].append(roc_auc_score(y_teste, y_hat) if np.unique(y_teste).shape[0]>1 else 0.5)
        
#         # Extendendo rotulos (analise global)
#         y_true.extend(y_teste)
#         y_pred.extend(y_hat)
        
#     print()
#     print("   - Final Results:")
#     print("   ---")
#     print()

#     print("      -> Classification Report:")
#     print()
#     print(classification_report(y_true, y_pred))
#     print("      -> Confusion Matrix:")
#     print()
#     print(confusion_matrix(y_true, y_pred))
    
#     print()
    
# # Consolidating DataFrame
# df_resultados_mlp = pd.DataFrame(resultados_modelo)
# df_resultados_mlp.to_excel(os.path.join(caminho_dados_notebook, "df_resultados_mlp.xlsx"))

# print("############################## FINAL MODEL RESULTS ##############################")
# display(df_resultados_mlp.groupby("appliance").agg({
#     "acc": ["mean", "std", "max", "min"],
#     "f1": ["mean", "std", "max", "min"],
#     "auc": ["mean", "std", "max", "min"]
# }))

In [71]:
resultados_modelo = {
    "appliance": [], "fold": [],
    "acc": [], "f1": [], "auc": [], "base": []
}

for a in df_rqa_treino["Appliance"].unique():
    
    print(f"* Appliance `{a}`...\n")
    
    # Base de treino
    df_treino = df_rqa_treino[df_rqa_treino["Appliance"]==a]
    X = np.nan_to_num(df_treino[["Recurrence rate (RR)","Determinism (DET)"]].values)
    y = df_treino["State"].values
    
    y_true, y_pred  = [], []
    
    
    print("   - Evaluation model (CV - 10 folds)...\n")
    for it, (idx_treino, idx_teste) in enumerate(skf.split(X, y)):

        # Preparando lotes
        X_treino, X_teste = X[idx_treino], X[idx_teste]
        y_treino, y_teste = y[idx_treino], y[idx_teste]
        
        if len(set(y_treino))>1:
        
            # Treinando modelo
            modelo = MLPClassifier(alpha=1e-3, hidden_layer_sizes=(10,), random_state=SEED)
            modelo.fit(X_treino, y_treino)

            # Prevendo conjunto de teste
            y_hat = modelo.predict(X_teste)

            # Incrementando resultados
            resultados_modelo["appliance"].append(a)
            resultados_modelo["fold"].append(it+1)
            resultados_modelo["acc"].append( accuracy_score(y_teste, y_hat) )
            resultados_modelo["f1"].append( f1_score(y_teste, y_hat, average="macro") )
            resultados_modelo["auc"].append(roc_auc_score(y_teste, y_hat) if np.unique(y_teste).shape[0]>1 else 0.5)
            resultados_modelo["base"].append("treino")

            # Extendendo rotulos (analise global)
            y_true.extend(y_teste)
            y_pred.extend(y_hat)
        
    # Base de teste
    df_teste = df_rqa_teste[df_rqa_teste["Appliance"]==a]
    X_teste = np.nan_to_num(df_teste[["Recurrence rate (RR)","Determinism (DET)"]].values)
    y_teste = df_teste["State"].values
    
    # Treinando/avaliando modelo
    modelo = MLPClassifier(alpha=1e-3, hidden_layer_sizes=(10,), random_state=SEED)
    modelo.fit(X, y)
    y_hat = modelo.predict(X_teste)
    
    # Incrementando resultados
    resultados_modelo["appliance"].append(a)
    resultados_modelo["fold"].append(it+1)
    resultados_modelo["acc"].append( accuracy_score(y_teste, y_hat) )
    resultados_modelo["f1"].append( f1_score(y_teste, y_hat, average="macro") )
    resultados_modelo["auc"].append(roc_auc_score(y_teste, y_hat) if np.unique(y_teste).shape[0]>1 else 0.5)
    resultados_modelo["base"].append("teste")

    print()
    print("   - Final Results:")
    print("   ---")
    print()

    print("      -> Classification Report:")
    print()
    print(classification_report(y_true, y_pred))
    print("      -> Confusion Matrix:")
    print()
    print(confusion_matrix(y_true, y_pred))
    
    print()
    
# Consolidating DataFrame
df_resultados_svm = pd.DataFrame(resultados_modelo)
df_resultados_svm.to_excel(os.path.join(caminho_dados_notebook, "df_resultados_mlp.xlsx"))
    
print("############################## FINAL MODEL RESULTS ##############################")
display(df_resultados_svm.groupby(["appliance","base"]).agg({
    "acc": ["mean", "std", "max", "min"],
    "f1": ["mean", "std", "max", "min"],
    "auc": ["mean", "std", "max", "min"]
}))

* Appliance `dish_washer - 9`...

   - Evaluation model (CV - 10 folds)...


   - Final Results:
   ---

      -> Classification Report:

              precision    recall  f1-score   support

           0       0.97      1.00      0.99       583
           1       0.00      0.00      0.00        15

    accuracy                           0.97       598
   macro avg       0.49      0.50      0.49       598
weighted avg       0.95      0.97      0.96       598

      -> Confusion Matrix:

[[583   0]
 [ 15   0]]

* Appliance `fridge - 7`...

   - Evaluation model (CV - 10 folds)...


   - Final Results:
   ---

      -> Classification Report:

              precision    recall  f1-score   support

           1       1.00      1.00      1.00       359

    accuracy                           1.00       359
   macro avg       1.00      1.00      1.00       359
weighted avg       1.00      1.00      1.00       359

      -> Confusion Matrix:

[[359]]

* Appliance `microwave - 16`...

   - Ev

Unnamed: 0_level_0,Unnamed: 1_level_0,acc,acc,acc,acc,f1,f1,f1,f1,auc,auc,auc,auc
Unnamed: 0_level_1,Unnamed: 1_level_1,mean,std,max,min,mean,std,max,min,mean,std,max,min
appliance,base,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2
dish_washer - 9,teste,0.968137,,0.968137,0.968137,0.491905,,0.491905,0.491905,0.5,,0.5,0.5
dish_washer - 9,treino,0.974944,0.008725,0.983333,0.966667,0.493648,0.002237,0.495798,0.491525,0.5,0.0,0.5,0.5
fridge - 7,teste,0.981752,,0.981752,0.981752,0.495396,,0.495396,0.495396,0.5,,0.5,0.5
fridge - 7,treino,1.0,0.0,1.0,1.0,1.0,0.0,1.0,1.0,0.5,0.0,0.5,0.5
microwave - 16,teste,0.93865,,0.93865,0.93865,0.484177,,0.484177,0.484177,0.5,,0.5,0.5
microwave - 16,treino,0.947651,0.010705,0.958333,0.9375,0.486547,0.002822,0.489362,0.483871,0.5,0.0,0.5,0.5
washer_dryer - 13,teste,0.953981,,0.953981,0.953981,0.488224,,0.488224,0.488224,0.5,,0.5,0.5
washer_dryer - 13,treino,0.966967,0.000216,0.967583,0.966878,0.491603,5.6e-05,0.491762,0.49158,0.5,0.0,0.5,0.5
washer_dryer - 14,teste,0.942804,,0.942804,0.942804,0.48528,,0.48528,0.48528,0.5,,0.5,0.5
washer_dryer - 14,treino,0.957288,0.002625,0.962264,0.955975,0.489088,0.000684,0.490385,0.488746,0.5,0.0,0.5,0.5


## ELM - Extreme Learning Machine 

In [74]:
from elm import ELM
from sklearn.preprocessing import normalize

ModuleNotFoundError: No module named 'elm'

In [23]:
resultados_modelo = {
    "appliance": [], "fold": [],
    "acc": [], "f1": [], "auc": []
}

for a in df_rqa["Appliance"].unique():
    
    print(f"* Appliance `{a}`...\n")
    
    df_ = df_rqa[df_rqa["Appliance"]==a]
    X = df_[["Recurrence rate (RR)","Determinism (DET)"]].values
    y = df_["State"].values
    
    y_true, y_pred  = [], []
    
    
    print("   - Evaluation model (CV - 10 folds)...\n")
    for it, (idx_treino, idx_teste) in enumerate(skf.split(X, y)):

        # Preparando lotes
        X_treino, X_teste = X[idx_treino], X[idx_teste]
        y_treino, y_teste = y[idx_treino], y[idx_teste]
        
        # Treinando modelo
        modelo = ELM(hid_num=10)
        modelo.fit(normalize(X_treino), y_treino)
        
        # Prevendo conjunto de teste
        y_hat = modelo.predict(normalize(X_teste))
        y_hat = (y_hat > 0.5).astype(int)

        # Incrementando resultados
        resultados_modelo["appliance"].append(a)
        resultados_modelo["fold"].append(it+1)
        resultados_modelo["acc"].append( accuracy_score(y_teste, y_hat) )
        resultados_modelo["f1"].append( f1_score(y_teste, y_hat, average="macro") )
        resultados_modelo["auc"].append(roc_auc_score(y_teste, y_hat) if np.unique(y_teste).shape[0]>1 else 0.5)
        
        # Extendendo rotulos (analise global)
        y_true.extend(y_teste)
        y_pred.extend(y_hat)
        
    print()
    print("   - Final Results:")
    print("   ---")
    print()

    print("      -> Classification Report:")
    print()
    print(classification_report(y_true, y_pred))
    print("      -> Confusion Matrix:")
    print()
    print(confusion_matrix(y_true, y_pred))
    
    print()
    
# Consolidating DataFrame
df_resultados_elm = pd.DataFrame(resultados_modelo)
df_resultados_elm.to_excel(os.path.join(caminho_dados_notebook, "df_resultados_elm.xlsx"))

print("############################## FINAL MODEL RESULTS ##############################")
display(df_resultados_elm.groupby("appliance").agg({
    "acc": ["mean", "std", "max", "min"],
    "f1": ["mean", "std", "max", "min"],
    "auc": ["mean", "std", "max", "min"]
}))

* Appliance `dish_washer - 9`...

   - Evaluation model (CV - 10 folds)...



NameError: name 'ELM' is not defined

# Análise dos Resultados (1)

In [77]:
df_resultados_svm = pd.read_excel(os.path.join(caminho_dados_notebook, "df_resultados_svm.xlsx"), engine='openpyxl')
df_resultados_svm["model"] = "SVM"

df_resultados_xgboost = pd.read_excel(os.path.join(caminho_dados_notebook, "df_resultados_xgboost.xlsx"), engine='openpyxl')
df_resultados_xgboost["model"] = "XGBOOST"

df_resultados_mlp = pd.read_excel(os.path.join(caminho_dados_notebook, "df_resultados_mlp.xlsx"), engine='openpyxl')
df_resultados_mlp["model"] = "MLP"

# df_resultados_elm = pd.read_excel(os.path.join(caminho_dados_notebook, "df_resultados_elm.xlsx"))
# df_resultados_elm["model"] = "ELM"

df_analise = pd.concat([
    df_resultados_svm,
    df_resultados_xgboost,
    df_resultados_mlp, 
#     df_resultados_elm,  
])

print("* Análise por modelo:")
df_analise_modelo = df_analise.groupby(["model","base"]).agg({
    "acc": ["mean","std","max","min"],
    "f1": ["mean","std","max","min"],
    "auc": ["mean","std","max","min"]
}).reset_index().sort_values(('f1','mean'), ascending=False).set_index("model")
display(df_analise_modelo)
df_analise_modelo.to_excel(os.path.join(caminho_dados_notebook, "df_analise_modelo.xlsx"))

print()
print("* Análise por aparelho/modelo:")
df_analise_aparelho = df_analise.groupby(["appliance","model","base"]).agg({
    "acc": ["mean","std","max","min"],
    "f1": ["mean","std","max","min"],
    "auc": ["mean","std","max","min"]
})#.reset_index().sort_values(('f1','mean'), ascending=False).set_index(["aparelho","metodologia"])
display(df_analise_aparelho)
df_analise_aparelho.to_excel(os.path.join(caminho_dados_notebook, "df_analise_aparelho.xls"))

* Análise por modelo:


Unnamed: 0_level_0,base,acc,acc,acc,acc,f1,f1,f1,f1,auc,auc,auc,auc
Unnamed: 0_level_1,Unnamed: 1_level_1,mean,std,max,min,mean,std,max,min,mean,std,max,min
model,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2
XGBOOST,treino,0.970681,0.026105,1.0,0.893617,0.694913,0.215395,1.0,0.472527,0.596003,0.156165,1.0,0.477778
XGBOOST,teste,0.957072,0.018284,0.981752,0.932515,0.58495,0.071581,0.687007,0.495396,0.56282,0.053958,0.640269,0.5
MLP,treino,0.968745,0.01868,1.0,0.9375,0.583854,0.199462,1.0,0.483871,0.5,0.0,0.5,0.5
SVM,treino,0.968745,0.01868,1.0,0.9375,0.583854,0.199462,1.0,0.483871,0.5,0.0,0.5,0.5
MLP,teste,0.957065,0.017917,0.981752,0.93865,0.488997,0.004665,0.495396,0.484177,0.5,0.0,0.5,0.5
SVM,teste,0.957065,0.017917,0.981752,0.93865,0.488997,0.004665,0.495396,0.484177,0.5,0.0,0.5,0.5



* Análise por aparelho/modelo:


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,acc,acc,acc,acc,f1,f1,f1,f1,auc,auc,auc,auc
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,mean,std,max,min,mean,std,max,min,mean,std,max,min
appliance,model,base,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2
dish_washer - 9,MLP,teste,0.968137,,0.968137,0.968137,0.491905,,0.491905,0.491905,0.5,,0.5,0.5
dish_washer - 9,MLP,treino,0.974944,0.008725,0.983333,0.966667,0.493648,0.002237,0.495798,0.491525,0.5,0.0,0.5,0.5
dish_washer - 9,SVM,teste,0.968137,,0.968137,0.968137,0.491905,,0.491905,0.491905,0.5,,0.5,0.5
dish_washer - 9,SVM,treino,0.974944,0.008725,0.983333,0.966667,0.493648,0.002237,0.495798,0.491525,0.5,0.0,0.5,0.5
dish_washer - 9,XGBOOST,teste,0.965686,,0.965686,0.965686,0.55375,,0.55375,0.55375,0.53593,,0.53593,0.53593
dish_washer - 9,XGBOOST,treino,0.971582,0.011246,0.983333,0.95,0.492778,0.002899,0.495798,0.487179,0.498276,0.003635,0.5,0.491379
fridge - 7,MLP,teste,0.981752,,0.981752,0.981752,0.495396,,0.495396,0.495396,0.5,,0.5,0.5
fridge - 7,MLP,treino,1.0,0.0,1.0,1.0,1.0,0.0,1.0,1.0,0.5,0.0,0.5,0.5
fridge - 7,SVM,teste,0.981752,,0.981752,0.981752,0.495396,,0.495396,0.495396,0.5,,0.5,0.5
fridge - 7,SVM,treino,1.0,0.0,1.0,1.0,1.0,0.0,1.0,1.0,0.5,0.0,0.5,0.5


# Conclusões

...

# Fim.

In [763]:
%load_ext watermark

In [764]:
%watermark -a "Diego Luiz Cavalca" -u -n -t -z -v -m -g

Diego Luiz Cavalca 
last updated: Fri Jul 03 2020 17:24:36 Hora oficial do Brasil 

CPython 3.6.7
IPython 7.6.1

compiler   : MSC v.1900 64 bit (AMD64)
system     : Windows
release    : 10
machine    : AMD64
processor  : Intel64 Family 6 Model 158 Stepping 9, GenuineIntel
CPU cores  : 8
interpreter: 64bit
Git hash   : fe2077f45387300d020bc7d9c113451844efc83a
