# Bagging

In [None]:
import numpy as np
import pandas as pd

from sklearn.ensemble  import BaggingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import LinearSVC
from sklearn.model_selection import train_test_split
# базовые метрики классификации
from sklearn.metrics import accuracy_score, f1_score, auc, class_likelihood_ratios
# ROC-curve, AUC
from sklearn.metrics import roc_curve, roc_auc_score, RocCurveDisplay
# Precision-recall
from sklearn.metrics import precision_recall_curve, average_precision_score, PrecisionRecallDisplay

import matplotlib.pyplot as plt

# Не показывать Warning
import warnings
warnings.simplefilter(action='ignore', category=Warning)

In [None]:
df = pd.read_csv('./datasets/loanapp.csv')
df = df.dropna()

y = df['approve']
X = df.drop(columns=['approve','reject', 'action'])
# Разобьём выборку на обучающую и тестовую 80:20
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2,random_state=0)

In [None]:
clf_logistic = LogisticRegression(penalty=None, random_state=0)
clf_logistic.fit(X_train, y_train)

clf_linear_svc = LinearSVC()
clf_linear_svc.fit(X_train, y_train)

clf_knn = KNeighborsClassifier(n_neighbors=5, weights='uniform')
clf_knn.fit(X_train, y_train)

clf_tree = DecisionTreeClassifier(random_state=0)
clf_tree.fit(X_train, y_train)

In [None]:
clf_logistic_bagging = BaggingClassifier(estimator=LogisticRegression(penalty=None), n_estimators=10)
clf_logistic_bagging.fit(X_train, y_train)

In [None]:
clf_linear_svc_bagging = BaggingClassifier(estimator=LinearSVC(), n_estimators=10)
clf_linear_svc_bagging.fit(X_train, y_train)

In [None]:
clf_knn_bagging = BaggingClassifier(estimator=KNeighborsClassifier(n_neighbors=5, weights='uniform'), n_estimators=10)
clf_knn_bagging.fit(X_train, y_train)

In [None]:
clf_tree_bagging = BaggingClassifier(estimator=DecisionTreeClassifier(random_state=0), n_estimators=10)
clf_tree_bagging.fit(X_train, y_train)

## Accuracy

In [None]:
print( clf_logistic.score(X_test, y_test) )
print( clf_logistic_bagging.score(X_test, y_test) )
print( clf_linear_svc.score(X_test, y_test) )
print( clf_linear_svc_bagging.score(X_test, y_test) )
print( clf_knn.score(X_test, y_test) )
print( clf_knn_bagging.score(X_test, y_test) )
print( clf_tree.score(X_test, y_test) )
print( clf_tree_bagging.score(X_test, y_test) )

## ROC-кривые

In [None]:
ax = plt.subplot()
RocCurveDisplay.from_estimator(clf_logistic, X_test, y_test, ax=ax, name='Logistic')
RocCurveDisplay.from_estimator(clf_logistic_bagging, X_test, y_test, ax=ax, name='Logistic Bagging')

plt.show()

In [None]:
ax = plt.subplot()
RocCurveDisplay.from_estimator(clf_linear_svc, X_test, y_test, ax=ax, name='SVC')
RocCurveDisplay.from_estimator(clf_linear_svc_bagging, X_test, y_test, ax=ax, name='SVC Bagging')

plt.show()

In [None]:
ax = plt.subplot()
RocCurveDisplay.from_estimator(clf_knn, X_test, y_test, ax=ax, name='kNN')
RocCurveDisplay.from_estimator(clf_knn_bagging, X_test, y_test, ax=ax, name='kNN Bagging')

plt.show()

In [None]:
ax = plt.subplot()
RocCurveDisplay.from_estimator(clf_tree, X_test, y_test, ax=ax, name='Tree')
RocCurveDisplay.from_estimator(clf_tree_bagging, X_test, y_test, ax=ax, name='Tree Bagging')

plt.show()

## Precision-Recall

In [None]:
ax = plt.subplot()
PrecisionRecallDisplay.from_estimator(clf_logistic, X_test, y_test, ax=ax, name='Logistic')
PrecisionRecallDisplay.from_estimator(clf_logistic_bagging, X_test, y_test, ax=ax, name='Logistic Bagging')

plt.show()

In [None]:
ax = plt.subplot()
PrecisionRecallDisplay.from_estimator(clf_linear_svc, X_test, y_test, ax=ax, name='SVC')
PrecisionRecallDisplay.from_estimator(clf_linear_svc_bagging, X_test, y_test, ax=ax, name='SVC Bagging')

plt.show()

In [None]:
ax = plt.subplot()
PrecisionRecallDisplay.from_estimator(clf_knn, X_test, y_test, ax=ax, name='kNN')
PrecisionRecallDisplay.from_estimator(clf_knn_bagging, X_test, y_test, ax=ax, name='kNN Bagging')

plt.show()

In [None]:
ax = plt.subplot()
PrecisionRecallDisplay.from_estimator(clf_tree, X_test, y_test, ax=ax, name='Tree')
PrecisionRecallDisplay.from_estimator(clf_tree_bagging, X_test, y_test, ax=ax, name='Tree Bagging')

plt.show()