# TrainClassifier

+ Faz o treinamento e testa acuracia dos classificadores utilizando um arquivo csv (SDCard.txt) que contem as features extraidas dos audios.
+ Classificadores utilizados:
    + `Bagged Trees Ensemble`
    + `Quadratic SVM`
    + `Fine Decision Tree`
    + `Naïve Bayes`
    + `KNeighbors (KNN)`

In [7]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, cross_val_score, StratifiedKFold
from sklearn.metrics import accuracy_score
from sklearn.ensemble import BaggingClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier


In [8]:
# Carregar os dados
## {'RMS','Mean','Peak1','Peak2','Peak3','PeakLocs1','PeakLocs2','PeakLocs3','FalutID'};
data = pd.read_csv("./SDcard.txt", delimiter=';' ,names=['FalutID', 'RMS','Mean','Peak1','Peak2','Peak3','PeakLocs1','PeakLocs2','PeakLocs3', 'Nada'])
X = data.iloc[:, 1:-1]                # All column except the last one
#X = X.drop(columns='Nada') # Remove the mean and median column
y = data.iloc[:, 0]                  # First Column

## Bagged Trees Ensemble

In [15]:
# Criar o classificador base (uma única árvore de decisão)
base_classifier = DecisionTreeClassifier()

# Criar o classificador Bagged Trees Ensemble
bagged_classifier = BaggingClassifier(base_estimator=base_classifier, n_estimators=10, random_state=42)

# Realizar a validação cruzada com k = 5
skf = StratifiedKFold(n_splits=5, shuffle=True)
bgtree_scores = cross_val_score(bagged_classifier, X, y, cv=skf)

print("Acurácia em cada fold :", bgtree_scores)
print("Acurácia média        :", bgtree_scores.mean())

Acurácia em cada fold : [0.96703297 0.95054945 0.97802198 0.96153846 0.94475138]
Acurácia média        : 0.9603788476716654




## Quadratic SVM

In [16]:
# Criar o classificador SVM Quadrático
svm_classifier = SVC(kernel='poly', degree=2)  # degree=2 para o kernel quadrático

# Realizar a validação cruzada com k = 5
skf = StratifiedKFold(n_splits=5, shuffle=True)
svm_cv_scores = cross_val_score(svm_classifier, X, y, cv=skf)

# Imprimir as acurácias de cada fold e a acurácia média
print("Acurácia em cada fold :", svm_cv_scores)
print("Acurácia média        :", svm_cv_scores.mean())

Acurácia em cada fold : [0.54385965 0.55947137 0.54185022 0.55506608]
Acurácia média        : 0.5500618285802612


## Fine Decision Tree

In [11]:
# Criar o classificador Fine Decision Tree
fine_tree_classifier = DecisionTreeClassifier()

# Realizar a validação cruzada com k = 5
skf = StratifiedKFold(n_splits=5, shuffle=True)
fdtree_cv_scores = cross_val_score(fine_tree_classifier, X, y, cv=skf)

# Imprimir as acurácias de cada fold e a acurácia média
print("Acurácia em cada fold :", fdtree_cv_scores)
print("Acurácia média        :", fdtree_cv_scores.mean())

Acurácia em cada fold : [0.94505495 0.98351648 0.94505495 0.96703297 0.95027624]
Acurácia média        : 0.9581871167506526


## Naïve Bayes

In [12]:
# Criar o classificador Naïve Bayes
naive_bayes_classifier = GaussianNB()

# Realizar a validação cruzada com k = 5
skf = StratifiedKFold(n_splits=5, shuffle=True)
nb_cv_scores = cross_val_score(naive_bayes_classifier, X, y, cv=skf)

# Imprimir as acurácias de cada fold e a acurácia média
print("Acurácia em cada fold :", nb_cv_scores)
print("Acurácia média        :", nb_cv_scores.mean())

Acurácia em cada fold : [0.95604396 0.96703297 0.92307692 0.95054945 0.93370166]
Acurácia média        : 0.946080990832372


## KNN

In [13]:
# Criar o classificador KNN
knn_classifier = KNeighborsClassifier(n_neighbors=5)  # Número de vizinhos = 3 (pode ser ajustado)

# # Realizar a validação cruzada com k = 5
skf = StratifiedKFold(n_splits=5, shuffle=True)
knn_cv_scores = cross_val_score(knn_classifier, X, y, cv=skf)

# Imprimir as acurácias de cada fold e a acurácia média
print("Acurácia em cada fold :", knn_cv_scores)
print("Acurácia média        :", knn_cv_scores.mean())

Acurácia em cada fold : [0.91758242 0.94505495 0.93406593 0.91208791 0.93370166]
Acurácia média        : 0.9284985732499544


In [14]:
# Resumo
print(f"Bagged Trees Ensemble: {bgtree_scores.mean()}")
print(f"Quadratic SVM:         {svm_cv_scores.mean()}")
print(f"Fine Decision Tree:   {fdtree_cv_scores.mean()}")
print(f"Naïve Bayes :          {nb_cv_scores.mean()}")
print(f"KNeighbors (KNN) :     {knn_cv_scores.mean()}")

Bagged Trees Ensemble: 0.9592981604031326
Quadratic SVM:         0.5599477870196102
Fine Decision Tree :   0.9581871167506526
Naïve Bayes :          0.946080990832372
KNeighbors (KNN) :     0.9284985732499544
