# Testes e Otimização de Classificadores

In [2]:
import os
from os.path import join, dirname
from dotenv import load_dotenv

dotenv_path = join(dirname('__file__'), '.env')

load_dotenv(dotenv_path)

ROOT_PATH = os.environ.get("ROOT_PATH")


In [3]:
import pandas as pd
import numpy as np

df = pd.read_parquet(f"{ROOT_PATH}/features/features.parquet")


## Definindo Classificadores

In [82]:
from sklearn.svm import SVC
from sklearn.model_selection import (cross_val_predict, KFold, cross_val_score)
from sklearn.metrics import (
    confusion_matrix, auc, roc_curve, plot_confusion_matrix, classification_report, accuracy_score)
import seaborn as sns
from sklearn.model_selection import GridSearchCV


def get_estimators(model, params, x, y):
    clf = GridSearchCV(estimator=model, param_grid=params, n_jobs=-1, cv=5)
    clf.fit(x, y)
    return clf


def plot_confusion_matrix(m):
    g1 = sns.heatmap(m, annot=True, cmap="YlGnBu")
    g1.set_xlabel('Predicted labels')
    g1.set_ylabel('True labels')
    g1.set_title('Confusion Matrix')


def norm_confusion_matrix(y, y_pred):
    m = confusion_matrix(y, y_pred)
    return m.astype('float')/m.sum(axis=1)[:, np.newaxis]


def svm(params: dict, x: np.ndarray, y: np.ndarray):
    model = SVC()
    clf = GridSearchCV(estimator=model, param_grid=params, n_jobs=-1)
    clf.fit(x, y)
    return clf

    # cv = KFold(n_splits=5, random_state=1, shuffle=True)
    # model = SVC(kernel='rbf', probability=True, C=100, gamma=0.00001)
    # scores = cross_val_score(model, x, y, scoring=[
    #  'accuracy'], cv=cv, n_jobs=-1)
    # print('Accuracy: %.3f (%.3f)' % (np.mean(scores), np.std(scores)))
    # y_pred = cross_val_predict(model, x, y, cv=10)
    # print(accuracy_score(y_true=y,y_pred=y_pred))
    # print(classification_report(y_true=y,y_pred=y_pred))


## Classificações Gerais

In [65]:
from sklearn.model_selection import (train_test_split, KFold)

x = df.loc[:, ['var', 'skew', 'kur']]
y = df.loc[:, 'label']

X_train, X_test, y_train, y_test = train_test_split(
    x, y, test_size=0.2, random_state=0)


In [105]:
from sklearn.neighbors import KNeighborsClassifier as KNN
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier as DTC
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis as QDA

param_svm = [{
    'C': [1, 10, 100, 1000],
    'gamma': [0.01, 0.001, 0.0001, 0.00001],
    'kernel': ['rbf']
}]


param_knn = [{
    'n_neighbors': [3, 5, 7, 9, 11, 15],
    'algorithm':['ball_tree', 'kd_tree', 'brute']
}]


param_tree = [{
    'criterion': ['gini', 'entropy'],
    'splitter': ['best', 'random'],
    'max_depth':np.arange(1, 7)
}]

result_svm = get_estimators(model=SVC(), params=param_svm, x=X_train, y=y_train)
result_knn = get_estimators(model=KNN(), params=param_knn, x=X_train, y=y_train)
result_tree = get_estimators(model=DTC(), params=param_tree, x=X_train, y=y_train)
result_qda = get_estimators(model=QDA(), params={}, x=X_train, y=y_train)


In [106]:
from pprint import pprint

best_params = {
    'SVM': result_svm.cv_results_['params'][0],
    'KNN': result_knn.cv_results_['params'][0],
    'DTC': result_tree.cv_results_['params'][0],
    'QDA': result_qda.cv_results_['params'][0]
}

pprint(best_params)


{'DTC': {'criterion': 'gini', 'max_depth': 1, 'splitter': 'best'},
 'KNN': {'algorithm': 'ball_tree', 'n_neighbors': 3},
 'QDA': {},
 'SVM': {'C': 1, 'gamma': 0.01, 'kernel': 'rbf'}}


In [108]:
from pprint import pprint

print(classification_report(y_test, result_knn.best_estimator_.predict(X_test)))

print(classification_report(y_test, result_svm.best_estimator_.predict(X_test)))

print(classification_report(y_test, result_tree.best_estimator_.predict(X_test)))

print(classification_report(y_test, result_qda.best_estimator_.predict(X_test)))



              precision    recall  f1-score   support

       ictal       1.00      1.00      1.00       196
      normal       1.00      1.00      1.00       183
         pos       1.00      1.00      1.00        67
         pre       1.00      1.00      1.00        50
         rep       1.00      1.00      1.00       214

    accuracy                           1.00       710
   macro avg       1.00      1.00      1.00       710
weighted avg       1.00      1.00      1.00       710

              precision    recall  f1-score   support

       ictal       1.00      1.00      1.00       196
      normal       1.00      1.00      1.00       183
         pos       1.00      1.00      1.00        67
         pre       1.00      1.00      1.00        50
         rep       1.00      1.00      1.00       214

    accuracy                           1.00       710
   macro avg       1.00      1.00      1.00       710
weighted avg       1.00      1.00      1.00       710

              precisio