In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.ensemble import AdaBoostClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, classification_report, confusion_matrix

In [2]:
## view first five rows
df = pd.read_csv('bank-additional-full.csv')
df.head(5)

Unnamed: 0,age,job,marital,education,default,housing,loan,contact,month,day_of_week,...,campaign,pdays,previous,poutcome,emp.var.rate,cons.price.idx,cons.conf.idx,euribor3m,nr.employed,y
0,56,housemaid,married,basic.4y,no,no,no,telephone,may,mon,...,1,999,0,nonexistent,1.1,93.994,-36.4,4.857,5191.0,no
1,57,services,married,high.school,unknown,no,no,telephone,may,mon,...,1,999,0,nonexistent,1.1,93.994,-36.4,4.857,5191.0,no
2,37,services,married,high.school,no,yes,no,telephone,may,mon,...,1,999,0,nonexistent,1.1,93.994,-36.4,4.857,5191.0,no
3,40,admin.,married,basic.6y,no,no,no,telephone,may,mon,...,1,999,0,nonexistent,1.1,93.994,-36.4,4.857,5191.0,no
4,56,services,married,high.school,no,no,yes,telephone,may,mon,...,1,999,0,nonexistent,1.1,93.994,-36.4,4.857,5191.0,no


In [3]:
# Convert target labels to numeric
df['y'] = df['y'].map({'yes': 1, 'no': 0}).astype(int)

# One-hot encode categorical variables
df = pd.get_dummies(df, drop_first=True)

# Train-test split
X = df.drop(columns=['y'])
y = df['y']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=7, stratify=y)

In [4]:
# Standardize features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [5]:
from sklearn.ensemble import AdaBoostClassifier

# Train baseline model
baseline_model = AdaBoostClassifier(random_state=42)
baseline_model.fit(X_train, y_train)



In [6]:
from sklearn.ensemble import AdaBoostClassifier
from sklearn.model_selection import GridSearchCV

from sklearn.model_selection import GridSearchCV

param_grid = {
    'n_estimators': [50, 100, 200],
    'learning_rate': [0.01, 0.1, 1.0],
    'algorithm': ['SAMME']
}

grid_search = GridSearchCV(estimator=baseline_model, param_grid=param_grid, cv=3, scoring='f1')
grid_search.fit(X_train, y_train)

print(f"Best Parameters: {grid_search.best_params_}")

Best Parameters: {'algorithm': 'SAMME', 'learning_rate': 1.0, 'n_estimators': 50}


In [7]:
# Fit final model with best parameters
final_model = grid_search.best_estimator_
final_model.fit(X_train, y_train)

In [8]:
def evaluate_model(model, X_test, y_test, label):
    y_pred = model.predict(X_test)
    print(f"\nPerformance for {label}:")
    print(f"Accuracy: {accuracy_score(y_test, y_pred):.4f}")
    print(f"Precision: {precision_score(y_test, y_pred):.4f}")
    print(f"Recall: {recall_score(y_test, y_pred):.4f}")
    print(f"F1 Score: {f1_score(y_test, y_pred):.4f}")
    print(f"ROC AUC: {roc_auc_score(y_test, y_pred):.4f}")
    print("\nClassification Report:\n", classification_report(y_test, y_pred))
    print("\nConfusion Matrix:\n", confusion_matrix(y_test, y_pred))

# Evaluate baseline and final models
evaluate_model(baseline_model, X_test, y_test, "Baseline Model")


Performance for Baseline Model:
Accuracy: 0.9101
Precision: 0.6761
Recall: 0.3869
F1 Score: 0.4921
ROC AUC: 0.6817

Classification Report:
               precision    recall  f1-score   support

           0       0.93      0.98      0.95      7310
           1       0.68      0.39      0.49       928

    accuracy                           0.91      8238
   macro avg       0.80      0.68      0.72      8238
weighted avg       0.90      0.91      0.90      8238


Confusion Matrix:
 [[7138  172]
 [ 569  359]]


In [9]:
evaluate_model(final_model, X_test, y_test, "Final Model (After Tuning)")


Performance for Final Model (After Tuning):
Accuracy: 0.9054
Precision: 0.6319
Recall: 0.3847
F1 Score: 0.4782
ROC AUC: 0.6781

Classification Report:
               precision    recall  f1-score   support

           0       0.93      0.97      0.95      7310
           1       0.63      0.38      0.48       928

    accuracy                           0.91      8238
   macro avg       0.78      0.68      0.71      8238
weighted avg       0.89      0.91      0.90      8238


Confusion Matrix:
 [[7102  208]
 [ 571  357]]
