In [1]:
import pandas as pd
import numpy as np

from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split as tts

from sklearn.model_selection import GridSearchCV

from sklearn.metrics import classification_report, roc_auc_score

In [None]:
rand = 0xC0FFEE
data = pd.read_csv()
X = data[data.columns[:-1]]
y = data['labels']

In [None]:
X_train, y_train, X_test, y_test = tts(X, y, random_state=rand, stratify=y)

In [None]:
bayes = GaussianNB()
bayes.fit(X_train, y_train)
pred_p = bayes.predict_proba(X_test)
pred = bayes.predict(X_test)

print('naive bayes')
print(classification_report(y_pred=pred, y_true=y_test))
print(roc_auc_score(y_test, pred_p))

In [None]:
knn = KNeighborsClassifier()
knn.fit(X_train, y_train)
pred_p = knn.predict_proba(X_test)
pred = knn.predict(X_test)

print('KNN')
print(classification_report(y_pred=pred, y_true=y_test))
print(roc_auc_score(y_test, pred_p))

In [None]:
knn_gs = GridSearchCV(KNeighborsClassifier(), 
                      {
                          'n_neighbors':[1, 3, 5, 11],
                          'leaf_size':[3, 5, 10, 15]
                        }, cv=5, scoring='roc_auc').fit(X_train, y_train)

pred_p = knn_gs.predict_proba(X_test)
pred = knn_gs.predict(X_test)

print('KNN gridsearch')
print(knn_gs.best_params_)
print(classification_report(y_pred=pred, y_true=y_test))
print(roc_auc_score(y_test, pred_p))

In [None]:
tree = DecisionTreeClassifier()
tree.fit(X_train, y_train)
pred_p = tree.predict_proba(X_test)
pred = tree.predict(X_test)

print('decision tree')
print(classification_report(y_pred=pred, y_true=y_test))
print(roc_auc_score(y_test, pred_p))

In [None]:
tree_gs = GridSearchCV(DecisionTreeClassifier(), 
                      {
                          'max_depth':[5, 10, 15, 50],
                        }, cv=5, scoring='roc_auc').fit(X_train, y_train)

pred_p = tree_gs.predict_proba(X_test)
pred = tree_gs.predict(X_test)

print('tree gridsearch')
print(tree_gs.best_params_)
print(classification_report(y_pred=pred, y_true=y_test))
print(roc_auc_score(y_test, pred_p))

In [None]:
forest = RandomForestClassifier()
forest.fit(X_train, y_train)
pred_p = forest.predict_proba(X_test)
pred = forest.predict(X_test)

print('Random forest')
print(classification_report(y_pred=pred, y_true=y_test))
print(roc_auc_score(y_test, pred_p))

In [None]:
forest_gs = GridSearchCV(RandomForestClassifier(), 
                      {
                          'max_depth':[3, 5, 10, 20],
                          'num_estimators':[10, 50, 100]
                        }, cv=5, scoring='roc_auc').fit(X_train, y_train)

pred_p = forest_gs.predict_proba(X_test)
pred = forest_gs.predict(X_test)

print('random forest gridsearch')
print(tree_gs.best_params_)
print(classification_report(y_pred=pred, y_true=y_test))
print(roc_auc_score(y_test, pred_p))