In [11]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt 
import seaborn as sns
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import BaggingClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, f1_score, log_loss, accuracy_score
from sklearn.preprocessing import  OneHotEncoder, LabelEncoder
from sklearn.compose import make_column_transformer, make_column_selector
from tqdm import tqdm

In [7]:
sonar = pd.read_csv("C:/Python/Cases/Sonar/Sonar.csv")
le = LabelEncoder()
y = le.fit_transform( sonar['Class'] )
X = sonar.drop('Class', axis=1)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=25,
                                                    stratify=y)

In [9]:
X_train.shape

(145, 60)

In [8]:
dtc = DecisionTreeClassifier(random_state=25)
knn = KNeighborsClassifier()
nb = GaussianNB()
bagg = BaggingClassifier(estimator=nb, n_estimators=10, random_state=25)
bagg.fit(X_train, y_train)
y_pred = bagg.predict(X_test)
print( classification_report(y_test, y_pred) )

              precision    recall  f1-score   support

           0       0.62      0.74      0.68        34
           1       0.61      0.48      0.54        29

    accuracy                           0.62        63
   macro avg       0.62      0.61      0.61        63
weighted avg       0.62      0.62      0.61        63



In [4]:
bagg = BaggingClassifier(estimator=knn, n_estimators=10)
bagg.fit(X_train, y_train)
y_pred = bagg.predict(X_test)
print( classification_report(y_test, y_pred) )

              precision    recall  f1-score   support

           0       0.71      0.88      0.79        34
           1       0.81      0.59      0.68        29

    accuracy                           0.75        63
   macro avg       0.76      0.73      0.73        63
weighted avg       0.76      0.75      0.74        63



Evaluating the individual estimators

In [5]:
bagg = BaggingClassifier(estimator=dtc, n_estimators=10)
bagg.fit(X_train, y_train)
y_pred = bagg.predict(X_test)
print( classification_report(y_test, y_pred) )

              precision    recall  f1-score   support

           0       0.67      0.94      0.78        34
           1       0.87      0.45      0.59        29

    accuracy                           0.71        63
   macro avg       0.77      0.69      0.69        63
weighted avg       0.76      0.71      0.69        63



In [13]:
lr = LogisticRegression()

In [14]:
est_list = [nb, dtc, knn, lr]
n_est = [10, 15, 25, 50]
scores = []
for e in tqdm(est_list):
    for n in n_est:
        bagg = BaggingClassifier(random_state=25, n_estimators=n, estimator=e )
        bagg.fit(X_train, y_train)
        y_pred_prob = bagg.predict_proba(X_test)
        scores.append([e, n, log_loss(y_test, y_pred_prob)])
df_scores = pd.DataFrame( scores, columns=['Estimator','B-Samples','score'] )
df_scores.sort_values('score')

100%|██████████| 4/4 [00:02<00:00,  1.83it/s]


Unnamed: 0,Estimator,B-Samples,score
8,KNeighborsClassifier(),10,0.422282
9,KNeighborsClassifier(),15,0.43149
10,KNeighborsClassifier(),25,0.432707
11,KNeighborsClassifier(),50,0.447037
6,DecisionTreeClassifier(random_state=25),25,0.47575
4,DecisionTreeClassifier(random_state=25),10,0.477146
7,DecisionTreeClassifier(random_state=25),50,0.490503
5,DecisionTreeClassifier(random_state=25),15,0.50212
13,LogisticRegression(),15,0.534413
12,LogisticRegression(),10,0.537041
