In [4]:
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import accuracy_score


target_col = "default.payment.next.month"
train = pd.read_csv("/home/danial/Data Science/Credit Risk Analysis/data/processed/Final/train.csv")
val   = pd.read_csv("/home/danial/Data Science/Credit Risk Analysis/data/processed/Final/val.csv")

X_train = train.drop(columns=[target_col])
y_train = train[target_col]
X_val   = val.drop(columns=[target_col])
y_val   = val[target_col]


In [5]:
param_grid_logreg = {
    'C': [0.01, 0.1, 1, 10],  
    'solver': ['liblinear', 'lbfgs'] 
}


In [7]:
logreg = LogisticRegression(max_iter=10000, random_state=7)

grid_logreg = GridSearchCV(
    estimator=logreg,
    param_grid=param_grid_logreg,
    cv=3,
    scoring='accuracy',
    n_jobs=-1
)

grid_logreg.fit(X_train, y_train)

print("Best Params (LogReg):", grid_logreg.best_params_)
print("Best CV Accuracy (LogReg):", grid_logreg.best_score_)


Best Params (LogReg): {'C': 0.01, 'solver': 'lbfgs'}
Best CV Accuracy (LogReg): 0.8070416666666667


In [8]:
best_logreg = grid_logreg.best_estimator_
best_logreg.fit(X_train, y_train)

y_val_pred_logreg = best_logreg.predict(X_val)
val_acc_logreg = accuracy_score(y_val, y_val_pred_logreg)

print("Validation Accuracy (LogReg):", val_acc_logreg)


Validation Accuracy (LogReg): 0.816


In [9]:
param_grid_gnb = {
    'var_smoothing': [1e-09, 1e-08, 1e-07, 1e-06]
}


In [10]:
gnb = GaussianNB()

grid_gnb = GridSearchCV(
    estimator=gnb,
    param_grid=param_grid_gnb,
    cv=3,
    scoring='accuracy',
    n_jobs=-1
)

grid_gnb.fit(X_train, y_train)

print("Best Params (GNB):", grid_gnb.best_params_)
print("Best CV Accuracy (GNB):", grid_gnb.best_score_)


Best Params (GNB): {'var_smoothing': 1e-07}
Best CV Accuracy (GNB): 0.8019583333333333


In [11]:
best_gnb = grid_gnb.best_estimator_
best_gnb.fit(X_train, y_train)

y_val_pred_gnb = best_gnb.predict(X_val)
val_acc_gnb = accuracy_score(y_val, y_val_pred_gnb)

print("Validation Accuracy (GNB):", val_acc_gnb)


Validation Accuracy (GNB): 0.8106666666666666


In [12]:
print(f"Logistic Regression (Validation Accuracy): {val_acc_logreg:.4f}")
print(f"GaussianNB        (Validation Accuracy): {val_acc_gnb:.4f}")


Logistic Regression (Validation Accuracy): 0.8160
GaussianNB        (Validation Accuracy): 0.8107
