In [None]:
%load_ext autoreload
%autoreload 5

In [None]:
import json
import numpy as np
import pandas as pd
import random

from sklearn.metrics import (
    classification_report,
    confusion_matrix,
    accuracy_score,
    f1_score,
    precision_score,
    recall_score,
    plot_confusion_matrix,
)

import matplotlib
import matplotlib.pyplot as plt
import numpy as np

from weight_lifting import WeightLifting
from sfs_features import (
    get_lr_features,
    get_svm_features,
    get_mpl_features
)

from sklearn.ensemble import IsolationForest

In [None]:
np.random.seed(42)
random.seed(42)

### BASE PADRAO

In [None]:
engine = WeightLifting()
df = engine.load_df()
df = engine.transform(df)
X_train, X_test, y_train, y_test = engine.create_train_test(df)
base_padrao_predict = engine.fit_and_predict(X_train, X_test, y_train, y_test)
#engine.plot_results(base_padrao_predict, X_test, y_test)

### FEATURE SELECTION WITH SequentialFeatureSelector 

In [None]:
sfs_predict = []

# LOGISTIC REGRESSION
lr_X_train, lr_X_test, lr_y_train, lr_y_test = engine.create_train_test(df, features=get_mpl_features())
lr_predict = engine.fit_and_predict(lr_X_train, lr_X_test, lr_y_train, lr_y_test)
lr_predict = lr_predict[0:1][0] # Somente o LR interessa
#lr_predict.update({'X_TEST':lr_X_test})
sfs_predict.append(lr_predict)

# SUPORT VECTOR MACHINES
svm_X_train, svm_X_test, svm_y_train, svm_y_test = engine.create_train_test(df, features=get_svm_features())
svm_predict = engine.fit_and_predict(svm_X_train, svm_X_test, svm_y_train, svm_y_test)
svm_predict = svm_predict[1:2][0] # Somente o SVM interessa
#lr_predict.update({'X_TEST':svm_X_test})
sfs_predict.append(svm_predict)

# MULTILAYER PERCEPTRON
mlp_X_train, mlp_X_test, mlp_y_train, mlp_y_test = engine.create_train_test(df, features=get_mpl_features())
mpl_predict = engine.fit_and_predict(mlp_X_train, mlp_X_test, mlp_y_train, mlp_y_test)
mpl_predict = mpl_predict[2:3][0] # Somente o mpl interessa
#lr_predict.update({'X_TEST':mlp_X_test})
sfs_predict.append(mpl_predict)

#engine.plot_results([sfs_predict[0]], lr_X_test, lr_y_test)
#engine.plot_results([sfs_predict[1]], svm_X_test, svm_y_test)
#engine.plot_results([sfs_predict[2]], mlp_X_test, mlp_y_test)

### ISOLATION FOREST

In [None]:
iso = IsolationForest(contamination=0.05)
predict = iso.fit_predict(df.iloc[:, 0:-1])

mask = predict != -1

iso_X_train, iso_X_test, iso_y_train, iso_y_test = engine.create_train_test(df.iloc[mask])

isolation_predict = engine.fit_and_predict(iso_X_train, iso_X_test, iso_y_train, iso_y_test)
#engine.plot_results(isolation_predict, iso_X_test, iso_y_test)
#plot_confusion_matrix(isolation_predict[2]['model'], iso_X_test, iso_y_test, values_format = '.5g')

### ISOLATION FOREST + SFS

In [None]:
iso_sfs_predict = []

# LOGISTIC REGRESSION
iso_lr_X_train, iso_lr_X_test, iso_lr_y_train, iso_lr_y_test = engine.create_train_test(df.iloc[mask], features=get_mpl_features())
iso_lr_predict = engine.fit_and_predict(iso_lr_X_train, iso_lr_X_test, iso_lr_y_train, iso_lr_y_test)
iso_lr_predict = iso_lr_predict[0:1][0] # Somente o LR interessa
#lr_predict.update({'X_TEST':lr_X_test})
iso_sfs_predict.append(iso_lr_predict)

# SUPORT VECTOR MACHINES
iso_svm_X_train, iso_svm_X_test, iso_svm_y_train, iso_svm_y_test = engine.create_train_test(df.iloc[mask], features=get_svm_features())
iso_svm_predict = engine.fit_and_predict(iso_svm_X_train, iso_svm_X_test, iso_svm_y_train, iso_svm_y_test)
iso_svm_predict = iso_svm_predict[1:2][0] # Somente o SVM interessa
#lr_predict.update({'X_TEST':svm_X_test})
iso_sfs_predict.append(iso_svm_predict)

# MULTILAYER PERCEPTRON
iso_mlp_X_train, iso_mlp_X_test, iso_mlp_y_train, iso_mlp_y_test = engine.create_train_test(df.iloc[mask], features=get_mpl_features())
iso_mpl_predict = engine.fit_and_predict(iso_mlp_X_train, iso_mlp_X_test, iso_mlp_y_train, iso_mlp_y_test)
iso_mpl_predict = iso_mpl_predict[2:3][0] # Somente o mpl interessa
#lr_predict.update({'X_TEST':mlp_X_test})
iso_sfs_predict.append(iso_mpl_predict)

#plot_confusion_matrix(iso_sfs_predict[0]['model'], iso_lr_X_test, iso_lr_y_test, values_format = '.5g')
#engine.plot_results([iso_sfs_predict[0]], iso_lr_X_test, iso_lr_y_test)
#engine.plot_results([iso_sfs_predict[1]], iso_svm_X_test, iso_svm_y_test)
#engine.plot_results([iso_sfs_predict[2]], iso_mlp_X_test, iso_mlp_y_test)

In [None]:
plot_confusion_matrix(iso_sfs_predict[2]['model'], iso_mlp_X_test, iso_mlp_y_test, values_format = '.5g')

In [None]:
engine.plot_results([iso_sfs_predict[2]], iso_mlp_X_test, iso_mlp_y_test)

### COMPARACAO DE RESULTADOS

In [None]:
# Unificando os resultados em um unico DF
# PADRAO
resultados_inicial = pd.DataFrame(base_padrao_predict)
resultados_inicial['ESTADO'] = 'INICIAL'

# SFS
resultados_sfs = pd.DataFrame(sfs_predict)
resultados_sfs['ESTADO'] = 'SFS'

# ISO
resultados_iso = pd.DataFrame(isolation_predict)
resultados_iso['ESTADO'] = 'ISO'

resultados = pd.concat([resultados_inicial,resultados_sfs, resultados_iso])
resultados['NAME_ESTADO'] = resultados['name'] + '_' + resultados['ESTADO']

In [None]:
resultados

In [None]:
processando métricas
resultados['accuracy_score'] = resultados.apply(
    lambda x: 
        round(accuracy_score(y_test, x['predict']), 4), axis=1)

resultados['f1_score'] = resultados.apply(
    lambda x: 
        round(f1_score(y_test, x['predict'], average='macro'), 4), axis=1)

resultados['precision_score'] = resultados.apply(
    lambda x: 
        round(precision_score(y_test, x['predict'], average='macro'), 4), axis=1)

resultados['recall_score'] = resultados.apply(
    lambda x: 
        round(recall_score(y_test, x['predict'], average='macro'), 4), axis=1)     
        
resultados.sort_values(by='name')