In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
df = pd.read_csv('preprocessed_train.csv')
df_test = pd.read_csv('preprocessed_test.csv')

In [3]:
df.head()

Unnamed: 0,Month,Age,Profession,Income_Annual,Base_Salary_PerMonth,Total_Bank_Accounts,Total_Credit_Cards,Rate_Of_Interest,Total_Current_Loans,Delay_from_due_date,...,Current_Debt_Outstanding,Ratio_Credit_Utilization,Credit_History_Age,Payment_of_Min_Amount,Per_Month_EMI,Monthly_Investment,Payment_Behaviour,Monthly_Balance,Credit_Score,Loan_Count
0,7,51,11,101583.48,8648.29,5,7,10,4,8,...,50.93,34.462154,289,0,190.811017,630.015789,3,314.002193,1,4
1,1,23,14,101926.95,8635.9125,4,4,9,1,13,...,1058.0,39.693812,245,0,70.587681,662.803927,4,410.199642,1,1
2,2,49,14,158871.12,12962.26,0,4,8,1,8,...,576.48,39.367225,228,0,86.90586,746.805985,4,742.514154,1,1
3,6,40,3,60379.28,4804.606667,5,6,18,3,15,...,725.39,29.061701,205,0,90.906385,166.418658,1,473.135623,1,3
4,5,17,0,50050.83,4085.9025,9,10,20,5,28,...,3419.1,30.386321,54,1,190.44506,56.789441,0,401.355749,0,5


In [4]:
X = df.drop('Credit_Score', axis=1)
y = df['Credit_Score']
X_test = df_test.drop('ID', axis=1)

In [8]:
import warnings
from sklearn.ensemble import AdaBoostClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from skopt import BayesSearchCV
from sklearn.metrics import accuracy_score

# Suppress warnings for clarity
warnings.filterwarnings("ignore")

# Split the data into training and testing sets
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Define the base estimator
base_estimator = DecisionTreeClassifier()

# Define the parameter space for AdaBoostClassifier
param_space = {
    'n_estimators': (50, 300),              # Number of boosting stages
    'learning_rate': (0.01, 1.5),           # Learning rate
    'algorithm': ['SAMME', 'SAMME.R'],      # Algorithm type: SAMME or SAMME.R
    'estimator__max_depth': (1, 10),        # Maximum depth of the base estimator
    'estimator__min_samples_split': (2, 20), # Minimum samples required to split an internal node
    'estimator__min_samples_leaf': (1, 20)  # Minimum samples required to be a leaf node
}

# Initialize the AdaBoostClassifier with the base estimator
ada_model = AdaBoostClassifier(estimator=base_estimator, random_state=42)

# Initialize BayesSearchCV with 5-fold cross-validation
bayes_search = BayesSearchCV(
    estimator=ada_model,
    search_spaces=param_space,
    n_iter=50,                 # Number of parameter settings that are sampled
    cv=5,                      # 5-fold cross-validation
    scoring='accuracy',        # Metric to evaluate during cross-validation
    n_jobs=-1,                 # Use all available cores
    verbose=1,
    random_state=42            # For reproducibility
)

# Fit BayesSearchCV on the training data
bayes_search.fit(X_train, y_train)

# Get the best parameters and score
ada_model = bayes_search.best_estimator_
print("Best parameters for AdaBoost:", bayes_search.best_params_)

# Evaluate the best model
y_pred = ada_model.predict(X_val)
accuracy = accuracy_score(y_pred=y_pred, y_true=y_val)
print(f"Accuracy of the best Decision Tree: {accuracy:.2f}")

Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits
Fitting 5 folds for each of 1 candidates, totalling 5 fits


KeyboardInterrupt: 

In [None]:
y_test = ada_model.predict(X_test)

In [None]:
y_pred_test = pd.Series(y_test).map({0: 'Poor', 1: 'Standard', 2: 'Good'})
final = pd.DataFrame({'ID': df_test['ID'], 'Credit_Score': y_pred_test})
final.head()

In [None]:
final.to_csv('ada_submission.csv', index=False)