### Classification Metrics

In [None]:
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

from sklearn.datasets import load_breast_cancer
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import MultinomialNB

In [None]:
cancer = load_breast_cancer()

In [None]:
X = cancer.data
y = cancer.target

In [None]:
from sklearn.model_selection import train_test_split

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y)

In [None]:
lgr = LogisticRegression()
knn = KNeighborsClassifier()
nbayes = MultinomialNB()

In [None]:
lgr.fit(X_train, y_train)
lgr.score(X_test, y_test)

In [None]:
knn.fit(X_train, y_train)
knn.score(X_test, y_test)

In [None]:
nbayes.fit(X_train, y_train)
nbayes.score(X_test, y_test)

In [None]:
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import GridSearchCV

In [None]:
sscaler = StandardScaler()
X_train_scaled = sscaler.fit_transform(X_train)
X_test_scaled = sscaler.transform(X_test)

In [None]:
knn.fit(X_train_scaled, y_train)
knn.score(X_test_scaled, y_test)

In [None]:
params = {'n_neighbors': [i for i in range(1, 20)]}
grid = GridSearchCV(knn, param_grid=params)

In [None]:
grid.fit(X_train_scaled, y_train)

In [None]:
grid.best_params_

In [None]:
grid.score(X_test_scaled, y_test)

In [None]:
preds = grid.predict(X_test_scaled)

In [None]:
from sklearn.metrics import confusion_matrix

In [None]:
confusion_matrix(y_train, grid.predict(X_train_scaled))

In [None]:
probs = knn.predict_proba(X_test_scaled)

In [None]:
predict_II = np.where(probs[:, 1] > 0.2, 1, 0)

In [None]:
predict_II

In [None]:
confusion_matrix(y_test, predict_II)

In [None]:
confusion_matrix(y_test, grid.predict(X_test_scaled))

In [None]:
from sklearn.model_selection import cross_val_score

In [None]:
cross_val_score(knn, X_train, y_train, cv = 5)

In [None]:
cross_val_score(knn, X_train_scaled, y_train, cv = 5, scoring = 'recall')

In [None]:
from sklearn.metrics import recall_score, precision_recall_curve, roc_curve

In [None]:
roc_curve(y_test, preds)

In [None]:
precision_recall_curve(y_test, preds)

In [None]:
recall_score(y_test, preds)

In [None]:
from sklearn.metrics import classification_report

In [None]:
print(classification_report(y_test, preds))

In [None]:
import scikitplot as skplot

In [None]:
skplot.metrics.plot_confusion_matrix(y_test, preds)

In [None]:
skplot.metrics.plot_roc_curve(y_test, grid.predict_proba(X_test_scaled))

In [None]:
skplot.metrics.plot_precision_recall(y_test, grid.predict_proba(X_test_scaled))

In [None]:
X

In [None]:
test_point = X[0, :]

In [None]:
test_point

In [None]:
diff = X - test_point

In [None]:
dist_df = pd.DataFrame({'dists': np.apply_along_axis(np.linalg.norm, 1, diff), 
                        'labels': y})

In [None]:
dist_df.head()

In [None]:
dist_df.nsmallest(5, 'dists')