In [5]:
from sklearn import datasets
from sklearn.decomposition import PCA
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score
from sklearn.metrics import recall_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.model_selection import GridSearchCV
from sklearn.svm import SVC

In [6]:
iris = datasets.load_iris()
x = iris.data[:, :2]
y = iris.target

In [7]:
X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.33, random_state=42)

In [8]:
rfc = RandomForestClassifier()
rfc.fit(X_train,y_train)
y_pred = rfc.predict(X_test)

In [9]:
rfc_acc = round(accuracy_score(y_test, y_pred), 6)
rfc_recall = round(recall_score(y_test, y_pred, average='weighted'), 6)
rfc_precision = round(precision_score(y_test, y_pred, average='weighted'), 6)

In [10]:
knn = KNeighborsClassifier(n_neighbors=3)
knn.fit(X_train, y_train)
y_pred = knn.predict(X_test)

In [11]:
knn_acc = round(accuracy_score(y_test, y_pred), 6)
knn_recall = round(recall_score(y_test, y_pred, average='weighted'), 6)
knn_precision = round(precision_score(y_test, y_pred, average='weighted'), 6)

In [12]:
clf = DecisionTreeClassifier(random_state=0)
clf.fit(X_train,y_train)
y_pred = clf.predict(X_test)

In [13]:
clf_acc = round(accuracy_score(y_test, y_pred), 6)
clf_recall = round(recall_score(y_test, y_pred, average='weighted'), 6)
clf_precision = round(precision_score(y_test, y_pred, average='weighted'), 6)

In [14]:
svc = SVC()
svc.fit(x, y)
y_pred = rfc.predict(X_test)

In [15]:
svc_acc = round(accuracy_score(y_test, y_pred), 6)
svc_recall = round(recall_score(y_test, y_pred, average='weighted'), 6)
svc_precision = round(precision_score(y_test, y_pred, average='weighted'), 6)

In [16]:
print("Random Forests\n vs KNeighborsClassifier\n vs DecisionTreeClassifier\n vs SVC")
print("Classes: {0}\n".format(iris.target_names))

Random Forests
 vs KNeighborsClassifier
 vs DecisionTreeClassifier
 vs SVC
Classes: ['setosa' 'versicolor' 'virginica']



In [37]:
print("Acurácia: {0} vs {1} vs {2} vs {3}".format(rfc_acc, knn_acc, clf_acc, svc_acc))
print("Recall:  {0} vs {1} vs {2} vs {3}".format(rfc_recall, knn_recall,  clf_recall, svc_recall))
print("Precisão: {0} vs {1} vs {2} vs {3}".format(rfc_precision, knn_precision,  clf_precision, svc_precision))

Acurácia: 0.74 vs 0.76 vs 0.7 vs 0.74
Recall:  0.74 vs 0.76 vs 0.7 vs 0.74
Precisão: 0.739664 vs 0.760171 vs 0.71 vs 0.739664


In [38]:
from sklearn.model_selection import cross_val_score
cv_rfc = cross_val_score(rfc, x, y, cv=5)
cv_knn = cross_val_score(knn, x, y, cv=5)
cv_svc = cross_val_score(svc, x, y, cv=5)
cv_clf = cross_val_score(clf, x, y, cv=5)
 
print("\nValidação cruzada:\n {0} vs\n {1} vs\n {2} vs\n {3}".format(cv_knn, cv_rfc, cv_svc, cv_clf))


Validação cruzada:
 [0.66666667 0.8        0.63333333 0.86666667 0.66666667] vs
 [0.66666667 0.7        0.76666667 0.76666667 0.7       ] vs
 [0.73333333 0.86666667 0.76666667 0.86666667 0.86666667] vs
 [0.7        0.7        0.73333333 0.8        0.66666667]


In [19]:
parameters = {'min_samples_split':(2,6)}
rfc_hps = GridSearchCV(rfc, parameters)
rfc_hps.fit(x, y)
print("Melhor valor para o parâmetro min_samples_split: {0}".format(rfc_hps.best_params_['min_samples_split']))

Melhor valor para o parâmetro min_samples_split: 2


In [20]:
#KNN
parameters = {'n_neighbors':(1,20)}
knn_hps = GridSearchCV(knn, parameters)
knn_hps.fit(x, y)
knn_hps.best_params_['n_neighbors']
print("Melhor valor para o parâmetro n_neighbors: {0}".format(knn_hps.best_params_['n_neighbors']))

Melhor valor para o parâmetro n_neighbors: 20


In [24]:
parameters = {'min_samples_split':(2,6)}
clf_hps = GridSearchCV(clf, parameters)
clf_hps.fit(x, y)
print("Melhor valor para o parâmetro min_samples_split: {0}".format(clf_hps.best_params_['min_samples_split']))

Melhor valor para o parâmetro min_samples_split: 6


In [25]:
parameters = {'C':(1, 10)}
svc_hps = GridSearchCV(svc, parameters)
svc_hps.fit(x, y)
print("Melhor valor para o parâmetro min_samples_split: {0}".format(svc_hps.best_params_['C']))

Melhor valor para o parâmetro min_samples_split: 1


In [85]:
# parameters = {'n_neighbors':(1,20)}
# knn_hps = GridSearchCV(knn, parameters)
# knn_hps.fit(x, y)
# knn_hps.best_params_['n_neighbors']
# print("Melhor valor para o parâmetro n_neighbors: {0}".format(knn_hps.best_params_['n_neighbors']))

Melhor valor para o parâmetro n_neighbors: 20
