# Boost Techniques

Developed by: David

---

Methods to enhance the performance of weak learning models, turning them into stronger ones.

---

# Importing the libraries

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

# Data Preprocessing

## Importing the dataset

In [None]:
dataset = pd.read_csv('data/default_credit_card_clients.txt', delimiter='\t',skiprows=1)
X = dataset.iloc[:, 1:-1].values # All columns except the first and the last
y = dataset.iloc[:, -1].values # Only the last column

In [None]:
print(X)

In [None]:
print(y)

## Encode the categorical variables

In [None]:
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
# transformed_data = column_transformer.fit_transform(your_dataframe)

ct = ColumnTransformer(
    transformers=[
        ('gender_encoder', OneHotEncoder(), [1]),  # Encode the Gender variable
        ('education_encoder', OneHotEncoder(), [2]),  # Encode the Education variable
        ('marital_encoder', OneHotEncoder(), [3])  # Encode the Marital Status variable
    ],
    remainder='passthrough'  # Keep the rest of the columns as they are
)

# Apply the ColumnTransformer to X
X = np.array(ct.fit_transform(X))

## Splitting the dataset into the Training set and Test set

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)

# XGBoost

## Training XGBoost on the Training set

In [None]:
from xgboost import XGBClassifier
classifier = XGBClassifier()
classifier.fit(X_train, y_train)

## Making the Confusion Matrix

In [None]:
from sklearn.metrics import confusion_matrix, accuracy_score
y_pred = classifier.predict(X_test)
cm = confusion_matrix(y_test, y_pred)
print(cm)
cm_xgboost_score = accuracy_score(y_test, y_pred)

## Applying k-Fold Cross Validation

In [None]:
from sklearn.model_selection import cross_val_score
accuracies = cross_val_score(estimator = classifier, X = X_train, y = y_train, cv = 10)
kf_xgboost_accuracy = "{:.2f} %".format(accuracies.mean()*100)
kf_xgboost_sd = "{:.2f} %".format(accuracies.std()*100)

# CatBoost

## Training CatBoost on the Training set

In [None]:
from catboost import CatBoostClassifier
classifier = CatBoostClassifier()
classifier.fit(X_train, y_train)

## Making the Confusion Matrix

In [None]:
from sklearn.metrics import confusion_matrix, accuracy_score
y_pred = classifier.predict(X_test)
cm = confusion_matrix(y_test, y_pred)
print(cm)
cm_catboost_score = accuracy_score(y_test, y_pred)

## Applying k-Fold Cross Validation

In [None]:
from sklearn.model_selection import cross_val_score
accuracies = cross_val_score(estimator = classifier, X = X_train, y = y_train, cv = 10)
kf_catboost_accuracy = "{:.2f} %".format(accuracies.mean()*100)
kf_catboost_sd = "{:.2f} %".format(accuracies.std()*100)

# Determining the best model

In [None]:
data = {'Model': ['XGBoost', 'CatBoost'],
        'CM Score': [cm_xgboost_score, cm_catboost_score],
        'k-Fold Accuracy': [kf_xgboost_accuracy,kf_catboost_accuracy],
        'k-Fold SD': [kf_xgboost_sd, kf_catboost_sd]}

df = pd.DataFrame(data)

print(df)