# TrainClassifier

+ Faz o treinamento e avaliacao da acuracia dos classificadores utilizando um arquivo `.csv` gerado pelo notebook `Main.ipynb` 

In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, cross_val_score, StratifiedKFold
from sklearn.metrics import accuracy_score
from sklearn.ensemble import BaggingClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC, SVR
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier


In [2]:
# Carregar os dados
data = pd.read_csv("./data_matlab_artigo.csv")
X = data.iloc[:, 0:-1]                # All column except the last one
X = X.drop(columns=['Mean', 'Median']) # Remove the mean and median column
y = data.iloc[:, -1]                  # Last Column

## Train Models
---

### Bagged Trees Ensemble

In [3]:
# Criar o classificador base (uma única árvore de decisão)
base_classifier = DecisionTreeClassifier()

# Criar o classificador Bagged Trees Ensemble
bagged_classifier = BaggingClassifier(base_estimator=base_classifier, n_estimators=10, random_state=42)

# Realizar a validação cruzada com k = 5
skf = StratifiedKFold(n_splits=5, shuffle=True)
bgtree_scores = cross_val_score(bagged_classifier, X, y, cv=skf)

print("Acurácia em cada fold :", bgtree_scores)
print("Acurácia média        :", bgtree_scores.mean())



Acurácia em cada fold : [0.98243148 0.97470134 0.9845397  0.97540408 0.97329585]
Acurácia média        : 0.9780744905130007


### Quadratic SVM

In [4]:
# Criar o classificador SVM Quadrático
svm_classifier = SVC(kernel='poly',degree=2,C=2)  # degree=2 para o kernel quadrático

# Realizar a validação cruzada com k = 5
skf = StratifiedKFold(n_splits=5, shuffle=True)
svm_cv_scores = cross_val_score(svm_classifier, X, y, cv=skf)

# Imprimir as acurácias de cada fold e a acurácia média
print("Acurácia em cada fold :", svm_cv_scores)
print("Acurácia média        :", svm_cv_scores.mean())

Acurácia em cada fold : [0.86718201 0.86577653 0.86296557 0.86929023 0.87069571]
Acurácia média        : 0.8671820098383696


### Fine Decision Tree

In [5]:
# Criar o classificador Fine Decision Tree
fine_tree_classifier = DecisionTreeClassifier()

# Realizar a validação cruzada com k = 5
skf = StratifiedKFold(n_splits=5, shuffle=True)
fdtree_cv_scores = cross_val_score(fine_tree_classifier, X, y, cv=skf)

# Imprimir as acurácias de cada fold e a acurácia média
print("Acurácia em cada fold :", fdtree_cv_scores)
print("Acurácia média        :", fdtree_cv_scores.mean())

Acurácia em cada fold : [0.96486297 0.97680956 0.96837667 0.97470134 0.96275474]
Acurácia média        : 0.9695010541110332


### Naïve Bayes

In [6]:
# Criar o classificador Naïve Bayes
naive_bayes_classifier = GaussianNB()

# Realizar a validação cruzada com k = 5
skf = StratifiedKFold(n_splits=5, shuffle=True)
nb_cv_scores = cross_val_score(naive_bayes_classifier, X, y, cv=skf)

# Imprimir as acurácias de cada fold e a acurácia média
print("Acurácia em cada fold :", nb_cv_scores)
print("Acurácia média        :", nb_cv_scores.mean())

Acurácia em cada fold : [0.90513001 0.8945889  0.89107519 0.90653549 0.89248067]
Acurácia média        : 0.897962052002811


### KNN

In [7]:
# Criar o classificador KNN
knn_classifier = KNeighborsClassifier(n_neighbors=5)  # Número de vizinhos = 3 (pode ser ajustado)

# # Realizar a validação cruzada com k = 5
skf = StratifiedKFold(n_splits=5, shuffle=True)
knn_cv_scores = cross_val_score(knn_classifier, X, y, cv=skf)

# Imprimir as acurácias de cada fold e a acurácia média
print("Acurácia em cada fold :", knn_cv_scores)
print("Acurácia média        :", knn_cv_scores.mean())

Acurácia em cada fold : [0.91075193 0.92761771 0.89669712 0.91496838 0.88826423]
Acurácia média        : 0.907659873506676


In [8]:
# Resumo
print(f"Bagged Trees Ensemble: {bgtree_scores.mean()}")
print(f"Quadratic SVM:         {svm_cv_scores.mean()}")
print(f"Fine Decision Tree :   {fdtree_cv_scores.mean()}")
print(f"Naïve Bayes :          {nb_cv_scores.mean()}")
print(f"KNeighbors (KNN) :     {knn_cv_scores.mean()}")

Bagged Trees Ensemble: 0.9780744905130007
Quadratic SVM:         0.8671820098383696
Fine Decision Tree :   0.9695010541110332
Naïve Bayes :          0.897962052002811
KNeighbors (KNN) :     0.907659873506676


## Export Classifiers
---

In [9]:
import emlearn
from os import path

In [27]:
# Convert model using emlearn

/home/dev/Workspace/TG/source/test-emlearn/test.h
/home/dev/Workspace/TG/source
