# Метрики классификации

In [3]:
import numpy as np
import pandas as pd

from sklearn.ensemble  import GradientBoostingClassifier, ExtraTreesClassifier, RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, roc_auc_score, roc_curve, average_precision_score, precision_recall_curve, PrecisionRecallDisplay, auc

import matplotlib.pyplot as plt

# Не показывать FutureWarnings
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

In [4]:
df = pd.read_csv('./datasets/loanapp.csv')
df = df.dropna()

In [5]:
y = df['approve']
X = df.drop(columns=['approve','reject', 'action'])
# Разобьём выборку на обучающую и тестовую 80:20
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2,random_state=0)

In [None]:
clf_boost = GradientBoostingClassifier()
clf_boost.fit(X_train, y_train)

clf_forest = RandomForestClassifier()
clf_forest.fit(X_train, y_train)

clf_extratree = ExtraTreesClassifier()
clf_extratree.fit(X_train, y_train)

## Accuracy

In [None]:
print( clf_boost.score(X_test, y_test) )
print( clf_forest.score(X_test, y_test) )
print( clf_extratree.score(X_test, y_test) )

In [None]:
# альтернативно
print( accuracy_score(y_test, clf_boost.predict(X_test)) )
print( accuracy_score(y_test, clf_forest.predict(X_test)) )
print( accuracy_score(y_test, clf_extratree.predict(X_test)) )

## Average precision score

In [None]:
print( average_precision_score(y_test, clf_boost.predict_proba(X_test)[:,1]) )
print( average_precision_score(y_test, clf_forest.predict_proba(X_test)[:,1]) )
print( average_precision_score(y_test, clf_extratree.predict_proba(X_test)[:,1]) )

## F-метрика

In [None]:
print( f1_score(y_test, clf_boost.predict(X_test)) )
print( f1_score(y_test, clf_forest.predict(X_test)) )
print( f1_score(y_test, clf_extratree.predict(X_test)) )

## ROC-кривая

In [None]:
y_pred_prob = clf_boost.predict_proba(X_test)[:,1]
fpr, tpr, _ = roc_curve (y_test, y_pred_prob)

auc_roc = roc_auc_score (y_test, y_pred_prob)

plt.plot (fpr,tpr,label="Boosting AUC= "+str(auc_roc))

y_pred_prob = clf_forest.predict_proba(X_test)[:,1]
fpr, tpr, _ = roc_curve (y_test, y_pred_prob)

auc_roc = roc_auc_score (y_test, y_pred_prob)

plt.plot (fpr,tpr,label="Forest AUC= "+str(auc_roc))

y_pred_prob = clf_extratree.predict_proba(X_test)[:,1]
fpr, tpr, _ = roc_curve (y_test, y_pred_prob)

auc_roc = roc_auc_score (y_test, y_pred_prob)

plt.plot (fpr,tpr,label="Extra Tree AUC= "+str(auc_roc))

plt.ylabel('True Positive Rate')
plt.xlabel('False Positive Rate')
plt.legend(loc=4)
plt.show()

## Precision-Recall curve

In [None]:
y_pred_prob = clf_boost.predict_proba(X_test)[:,1]
precision, recall, _ = precision_recall_curve (y_test, y_pred_prob)

auc_pr = auc(recall, precision)

plt.plot (recall,precision,label="Boosting AUC= "+str(auc_pr))

y_pred_prob = clf_forest.predict_proba(X_test)[:,1]
precision, recall, _ = precision_recall_curve (y_test, y_pred_prob)

auc_pr = auc(recall, precision)

plt.plot (recall,precision,label="Forest AUC= "+str(auc_pr))

y_pred_prob = clf_extratree.predict_proba(X_test)[:,1]
precision, recall, _ = precision_recall_curve (y_test, y_pred_prob)

auc_pr = auc(recall, precision)

plt.plot (recall,precision,label="Extra Tree AUC= "+str(auc_pr))

plt.ylabel('Prcision')
plt.xlabel('Recall')
plt.legend(loc=3)
plt.show()