In [46]:
# carregando o dataset
from sklearn.datasets import load_wine
wine = load_wine()

In [47]:
# exemplos de acessos aos dados
x = wine.data[:, :] # features de cada elemento
y = wine.target # classes de cada elemento

In [48]:
# dividando os dados entre treino e teste
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.33, random_state = 42)

In [49]:
# aplicando o modelo Random Forest
from  sklearn.ensemble import RandomForestClassifier
rfc = RandomForestClassifier()
rfc.fit(x_train, y_train)
y_pred = rfc.predict(x_test)

In [50]:
# metricas do Random Forest
from sklearn.metrics import accuracy_score, recall_score, precision_score
rfc_acc = round(accuracy_score(y_test, y_pred), 6) 
rfc_recall = round(recall_score(y_test, y_pred, average = 'weighted'), 6)
rfc_precision = round(precision_score(y_test, y_pred, average = 'weighted'), 6)

In [51]:
# aplicando o modelo KNN
from sklearn.neighbors import KNeighborsClassifier
knn = KNeighborsClassifier(n_neighbors = 3)
knn.fit(x_train, y_train)
y_pred = knn.predict(x_test)

In [52]:
# metricas do KNN
knn_acc = round(accuracy_score(y_test, y_pred), 6) 
knn_recall = round(recall_score(y_test, y_pred, average = 'weighted'), 6)
knn_precision = round(precision_score(y_test, y_pred, average = 'weighted'), 6)

In [53]:
# comparação dos modelos
print('KNN vs Random Forest\n')
print('Classes: {0}' .format(wine.target_names))
print('Acurácia: {0} vs {1}' .format(knn_acc, rfc_acc))
print('Recall: {0} vs {1}' .format(knn_recall, rfc_recall))
print('Precisão: {0} vs {1}' .format(knn_precision, rfc_precision))

KNN vs Random Forest

Classes: ['class_0' 'class_1' 'class_2']
Acurácia: 0.694915 vs 1.0
Recall: 0.694915 vs 1.0
Precisão: 0.698231 vs 1.0


In [54]:
# validação cruzada entre os modelos
from sklearn.model_selection import cross_val_score
cv_rfc = cross_val_score(rfc, x, y)
cv_knn = cross_val_score(knn, x, y)
print('Validação Cruzada: {0} vs {1}' .format(cv_knn, cv_rfc))
print('Resultado KNN: {0}'.format(cv_knn.sum() / len(cv_knn))) # media da validação cruazada
print('Resultado Random Forest: {0}'.format(cv_rfc.sum() / len(cv_rfc))) # media da validação cruazada

Validação Cruzada: [0.63888889 0.69444444 0.66666667 0.65714286 0.85714286] vs [0.94444444 0.94444444 0.94444444 1.         1.        ]
Resultado KNN: 0.7028571428571428
Resultado Random Forest: 0.9666666666666666


In [55]:
# Buscando hiper parametro
from sklearn.model_selection import GridSearchCV

In [56]:
# Passando o Grid para achar o melhor Hiper Parametro para o Random Forest
parameters = {'min_samples_split': (2,6)}
rfc_hps = GridSearchCV(rfc, parameters)
rfc_hps.fit(x, y)
print('Melhor valor para min_samples_split: {0}'.format(rfc_hps.best_params_['min_samples_split']))

Melhor valor para min_samples_split: 2


In [59]:
# Passando o Grid para achar o melhor Hiper Parametro para o KNN
parameters = {'n_neighbors': (1,20)}
knn_hps = GridSearchCV(knn, parameters)
knn_hps.fit(x, y)
print('Melhor valor para n_neighbors: {0}'.format(knn_hps.best_params_['n_neighbors']))

Melhor valor para n_neighbors: 1
