In [1]:
# Import necessary libraries
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler, LabelEncoder, OneHotEncoder
from sklearn.impute import SimpleImputer
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.ensemble import GradientBoostingClassifier, AdaBoostClassifier
import xgboost as xgb
from sklearn.metrics import accuracy_score, classification_report

from google.colab import files
uploaded = files.upload()

# Load the dataset
data = pd.read_csv("credit_card_default.csv")

Saving credit_card_default.csv to credit_card_default.csv


In [1]:
# Pre-processing
## Handle missing values
imputer = SimpleImputer(strategy='mean')
data_imputed = imputer.fit_transform(data)

## Separate features and target variable
X = data_imputed[:, :-1]  # Features
y = data_imputed[:, -1]   # Target

## Use label encoding for binary target
label_encoder = LabelEncoder()
y = label_encoder.fit_transform(y)

## Normalize numerical features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

## Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# Algorithm Implementation
## 1. k-Nearest Neighbors
knn = KNeighborsClassifier()
knn.fit(X_train, y_train)
knn_pred = knn.predict(X_test)
print("kNN Accuracy:", accuracy_score(y_test, knn_pred))
print(classification_report(y_test, knn_pred))

## 2. Support Vector Machine
svm = SVC()
svm.fit(X_train, y_train)
svm_pred = svm.predict(X_test)
print("SVM Accuracy:", accuracy_score(y_test, svm_pred))
print(classification_report(y_test, svm_pred))

## 3. Gradient Boosting
gb = GradientBoostingClassifier()
gb.fit(X_train, y_train)
gb_pred = gb.predict(X_test)
print("Gradient Boosting Accuracy:", accuracy_score(y_test, gb_pred))
print(classification_report(y_test, gb_pred))

## 4. AdaBoost
ada = AdaBoostClassifier()
ada.fit(X_train, y_train)
ada_pred = ada.predict(X_test)
print("AdaBoost Accuracy:", accuracy_score(y_test, ada_pred))
print(classification_report(y_test, ada_pred))

## 5. XGBoost
xgb_clf = xgb.XGBClassifier()
xgb_clf.fit(X_train, y_train)
xgb_pred = xgb_clf.predict(X_test)
print("XGBoost Accuracy:", accuracy_score(y_test, xgb_pred))
print(classification_report(y_test, xgb_pred))

NameError: name 'SimpleImputer' is not defined

In [4]:
# Hyperparameter Tuning (example for Gradient Boosting)
## Define parameter grid for hyperparameter tuning
param_grid = {
    'n_estimators': [50, 100, 200],       # Number of boosting stages
    'learning_rate': [0.01, 0.1, 0.5]      # Learning rate, shrinks the contribution of each tree
}

## Perform GridSearchCV for Gradient Boosting
grid_search = GridSearchCV(estimator=GradientBoostingClassifier(), param_grid=param_grid, cv=3, n_jobs=-1)
grid_search.fit(X_train, y_train)

## Get the best estimator from GridSearchCV
best_gb_model = grid_search.best_estimator_

## Train the best Gradient Boosting model
best_gb_model.fit(X_train, y_train)

## Predict using the best Gradient Boosting model
best_gb_pred = best_gb_model.predict(X_test)

## Print accuracy and classification report for the tuned Gradient Boosting model
print("Tuned Gradient Boosting Accuracy:", accuracy_score(y_test, best_gb_pred))
print(classification_report(y_test, best_gb_pred))

Tuned Gradient Boosting Accuracy: 0.82
              precision    recall  f1-score   support

           0       0.84      0.95      0.89      4687
           1       0.67      0.35      0.46      1313

    accuracy                           0.82      6000
   macro avg       0.75      0.65      0.68      6000
weighted avg       0.80      0.82      0.80      6000

