<a href="https://colab.research.google.com/github/andssuu/moncattle/blob/master/supervised/notebooks/top5_classifiers.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Descrição

Notebook das implementações dos classificadores mais utilizados nos trabalhos no contexto de classificação do comportamento do gado.


# Clonando repositório

In [3]:
!git clone https://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@github.com/andssuu/moncattle.git

Cloning into 'moncattle'...
remote: Enumerating objects: 568, done.[K
remote: Counting objects: 100% (568/568), done.[K
remote: Compressing objects: 100% (489/489), done.[K
remote: Total 568 (delta 344), reused 135 (delta 56), pack-reused 0[K
Receiving objects: 100% (568/568), 6.84 MiB | 8.82 MiB/s, done.
Resolving deltas: 100% (344/344), done.


# Importação das bibliotecas

In [33]:
import numpy as np
import pandas as pd
# RF
from sklearn.model_selection import cross_val_score, StratifiedKFold
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
# NB
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
# SVM 
from sklearn import svm
# DT
from sklearn import tree
# LDA
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis

seed = 0
np.random.seed(seed)

# Leitura da base dados

In [21]:
df = pd.read_csv("moncattle/data/dataset.csv", float_precision='high')
bases = ["A2", "A3", "B2", "B3", "C3", "C4", "D1", "D2", "D3", "D4"]
df_data = df.loc[:, ['acc_x', 'acc_y', 'acc_z', 'mag_x', 'mag_y', 'mag_z', 'gir_x', 'gir_y', 'gir_z', 'label']]
data = df_data.iloc[:, :9]
labels = df.iloc[:, -1]

# Definição de funções e hiperparâmetros

In [22]:
def evaluate_cross(clf):
    scores = cross_val_score(clf, data, labels, cv=StratifiedKFold(n_splits, shuffle=True, random_state=seed))
    [print("{}-Fold: {:.2f}".format(k, score)) for k, score in enumerate(scores, 1)]
    mean = np.mean(scores)
    std = np.std(scores)
    print("\tAcurácia média: {:.4f}".format(mean))
    print("\tDesvio Padrão: {:}".format(std))

# Random Forest

## Random Forest (Gini)

In [38]:
clf = RandomForestClassifier(n_estimators=100, criterion='gini', random_state=seed)
evaluate_cross(clf)

1-Fold: 0.90
2-Fold: 0.90
3-Fold: 0.91
4-Fold: 0.91
5-Fold: 0.90
6-Fold: 0.90
7-Fold: 0.91
8-Fold: 0.89
9-Fold: 0.91
10-Fold: 0.89
	Acurácia média: 0.9015
	Desvio Padrão: 0.0076781576614601095


## Random Forest (Entropy)

In [24]:
clf = RandomForestClassifier(n_estimators=100, criterion='entropy', random_state=seed)
evaluate_cross(clf)

1-Fold: 0.89
2-Fold: 0.90
3-Fold: 0.90
4-Fold: 0.90
5-Fold: 0.90
6-Fold: 0.89
7-Fold: 0.91
8-Fold: 0.89
9-Fold: 0.91
10-Fold: 0.89
	Acurácia média: 0.8987
	Desvio Padrão: 0.008298607079238114


# Naive Bayes

In [43]:
clf = GaussianNB()
evaluate_cross(clf)

1-Fold: 0.78
2-Fold: 0.77
3-Fold: 0.76
4-Fold: 0.77
5-Fold: 0.77
6-Fold: 0.76
7-Fold: 0.77
8-Fold: 0.76
9-Fold: 0.79
10-Fold: 0.78
	Acurácia média: 0.7712
	Desvio Padrão: 0.0077546776892008975


# SVM

In [44]:
clf = svm.SVC()
evaluate_cross(clf)

1-Fold: 0.82
2-Fold: 0.81
3-Fold: 0.82
4-Fold: 0.82
5-Fold: 0.82
6-Fold: 0.82
7-Fold: 0.82
8-Fold: 0.80
9-Fold: 0.82
10-Fold: 0.82
	Acurácia média: 0.8167
	Desvio Padrão: 0.005674273419347853


# Decision Tree

In [53]:
clf = tree.DecisionTreeClassifier(random_state=seed)
evaluate_cross(clf)

1-Fold: 0.82
2-Fold: 0.85
3-Fold: 0.85
4-Fold: 0.85
5-Fold: 0.84
6-Fold: 0.83
7-Fold: 0.85
8-Fold: 0.84
9-Fold: 0.85
10-Fold: 0.83
	Acurácia média: 0.8398
	Desvio Padrão: 0.010708766635106786


# LDA

In [64]:
clf = LinearDiscriminantAnalysis()
evaluate_cross(clf)

1-Fold: 0.73
2-Fold: 0.72
3-Fold: 0.74
4-Fold: 0.74
5-Fold: 0.74
6-Fold: 0.72
7-Fold: 0.74
8-Fold: 0.72
9-Fold: 0.75
10-Fold: 0.72
	Acurácia média: 0.7321
	Desvio Padrão: 0.009514963694928444
