# Метод ближайших соседей (классификация)

Используем реализацию классификатора на основе [ближайших соседей](https://scikit-learn.org/stable/modules/neighbors.html#nearest-neighbors-classification) в `sciki-learn`: [KNeighborsClassifier](https://scikit-learn.org/stable/modules/generated/sklearn.neighbors.KNeighborsClassifier.html#sklearn.neighbors.KNeighborsClassifier) и [RadiusNeighborsClassifier](https://scikit-learn.org/stable/modules/generated/sklearn.neighbors.RadiusNeighborsClassifier.html#sklearn.neighbors.RadiusNeighborsClassifier)

In [None]:
import numpy as np
import pandas as pd

from sklearn.neighbors import KNeighborsClassifier, RadiusNeighborsClassifier
from sklearn.model_selection import train_test_split
# базовые метрики классификации
from sklearn.metrics import accuracy_score, f1_score, auc, class_likelihood_ratios
# ROC-curve, AUC
from sklearn.metrics import roc_curve, roc_auc_score, RocCurveDisplay
# Precision-recall
from sklearn.metrics import precision_recall_curve, average_precision_score, PrecisionRecallDisplay

import matplotlib.pyplot as plt

# Не показывать FutureWarnings
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

## Импорт и подготовка данных

In [None]:
df = pd.read_csv('./datasets/loanapp.csv')
df.shape

In [None]:
df = df.dropna()
df.shape

In [None]:
y = df['approve']
X = df.drop(columns='approve')
# Разобьём выборку на обучающую и тестовую 80:20
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2,random_state=0)

## Инициализация и обучение модели 

In [None]:
# инициализируем и обучим классификатор
clf = KNeighborsClassifier(n_neighbors=5, weights='uniform')
clf.fit(X_train, y_train)

## Прогнозирование (на тестовой выборке)

In [None]:
# прогноз на тестовой выборке
clf_knn.predict(X_test)

In [None]:
# прогноз вероятностей на тестовой выборке
clf.predict_proba(X_test)

In [None]:
# clf_rad = RadiusNeighborsClassifier(radius=10000.0, weights='uniform')
# clf_rad.fit(X_train, y_train)

In [None]:
# clf_rad.predict_proba(X_test)

## Метрики качестве прогнозов на тестовой выборке

In [None]:
# доля правильных прогнозов на тестовой выборке
clf.score(X_test, y_test)

In [None]:
# альтернативно: метод accuracy_score
y_pred = clf.predict(X_test)
accuracy_score(y_test, y_pred)

Метрика 

$$
	F1=\frac{TP}{2TP+FP+FN}
$$

In [None]:
f1_score(y_test, y_pred)

Метрика AUC-ROC

In [None]:
y_pred_prob = clf.predict_proba(X_test)[:,1]
fpr, tpr, _ = roc_curve (y_test, y_pred_prob)

roc_auc_score (y_test, y_pred_prob)

ROC-кривая и мера AUC

In [None]:
RocCurveDisplay.from_estimator(clf, X_test, y_test)
plt.show()

In [None]:
# метрика AUC-PR
y_pred_prob = clf.predict_proba(X_test)[:,1]
precision, recall, thresholds = precision_recall_curve(y_test, y_pred_prob)
auc(recall, precision)

In [None]:
y_pred_prob = clf.predict_proba(X_test)[:,1]
average_precision_score(y_test, y_pred_prob)

Кривая Precision-Recall

In [None]:
PrecisionRecallDisplay.from_estimator(clf, X_test, y_test)
plt.show()

## Сравнение нескольких классификаторов

In [None]:
# Другой классификатор
clf2 = KNeighborsClassifier(n_neighbors=3, weights='uniform')
clf2.fit(X_train, y_train)

In [None]:
ax = plt.subplot()
RocCurveDisplay.from_estimator(clf, X_test, y_test, ax=ax, name='k=5')
RocCurveDisplay.from_estimator(clf2, X_test, y_test, ax=ax, name='k=3')

plt.show()

In [None]:
ax = plt.subplot()
PrecisionRecallDisplay.from_estimator(clf, X_test, y_test, ax=ax, name='k=5')
PrecisionRecallDisplay.from_estimator(clf2, X_test, y_test, ax=ax, name='k=3')

plt.show()

In [None]:
# непосредственно нап=рисуем ROC-кривую с AUC-ROC
y_pred_prob = clf.predict_proba(X_test)[:,1]
fpr, tpr, _ = roc_curve (y_test, y_pred_prob)

auc_roc = roc_auc_score (y_test, y_pred_prob)

plt.plot (fpr,tpr,label="kNN1 AUC= "+str(round(auc_roc, 3)))

y_pred_prob = clf2.predict_proba(X_test)[:,1]
fpr, tpr, _ = roc_curve (y_test, y_pred_prob)

auc_roc = roc_auc_score (y_test, y_pred_prob)

plt.plot (fpr,tpr,label="kNN2 AUC= "+str(round(auc_roc, 3)))

plt.ylabel('True Positive Rate')
plt.xlabel('False Positive Rate')
plt.legend(loc=4)
plt.show()

In [None]:
# непосредственно нап=рисуем кривую Precision-Recall с AUC-PR
y_pred_prob = clf.predict_proba(X_test)[:,1]
precision, recall, _ = precision_recall_curve (y_test, y_pred_prob)

auc_pr = auc(recall, precision)

plt.plot (recall,precision,label="kNN1 AUC= "+str(round(auc_pr, 3)))

y_pred_prob = clf2.predict_proba(X_test)[:,1]
precision, recall, _ = precision_recall_curve (y_test, y_pred_prob)

auc_pr = auc(recall, precision)

plt.plot (recall,precision,label="kNN2 AUC= "+str(round(auc_pr, 3)))

plt.ylabel('Prcision')
plt.xlabel('Recall')
plt.legend(loc=3)
plt.show()