In [3]:
import numpy as np
# Loading preprocessed data
X_train_scaled = np.load("data/X_train_scaled.npy")
X_test_scaled = np.load("data/X_test_scaled.npy")
y_train = np.load("data/y_train.npy")
y_test = np.load("data/y_test.npy")
X_train = np.load("data/X_train_unscaled.npy")
X_test = np.load("data/X_test_unscaled.npy")

In [5]:
from sklearn.model_selection import StratifiedKFold, cross_val_score
from sklearn.linear_model import LogisticRegression

# Initializing model
lr = LogisticRegression(
    max_iter=1000,
    class_weight='balanced',
    n_jobs=-1,
    solver='lbfgs'
)

# Stratified K-Fold
skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

# Cross-validation ROC-AUC
cv_scores_lr = cross_val_score(
    lr,
    X_train_scaled,
    y_train,
    cv=skf,
    scoring='roc_auc',
    n_jobs=-1
)

print("Logistic Regression CV ROC-AUC:")
print(f"Mean: {cv_scores_lr.mean():.4f}")
print(f"Std:  {cv_scores_lr.std():.4f}")

Logistic Regression CV ROC-AUC:
Mean: 0.9941
Std:  0.0001


In [7]:
from sklearn.ensemble import RandomForestClassifier

rf = RandomForestClassifier(
    n_estimators=200,
    class_weight='balanced',
    random_state=42,
    n_jobs=-1
)

cv_scores_rf = cross_val_score(
    rf,
    X_train,
    y_train,
    cv=skf,
    scoring='roc_auc',
    n_jobs=-1
)

print("Random Forest CV ROC-AUC:")
print(f"Mean: {cv_scores_rf.mean():.6f}")
print(f"Std:  {cv_scores_rf.std():.6f}")

Random Forest CV ROC-AUC:
Mean: 0.999999
Std:  0.000000


In [9]:
from xgboost import XGBClassifier

neg, pos = np.bincount(y_train)
scale_pos_weight = neg / pos

xgb = XGBClassifier(
    n_estimators=300,
    max_depth=6,
    learning_rate=0.05,
    subsample=0.8,
    colsample_bytree=0.8,
    scale_pos_weight=scale_pos_weight,
    objective='binary:logistic',
    eval_metric='logloss',
    random_state=42,
    n_jobs=-1
)

cv_scores_xgb = cross_val_score(
    xgb,
    X_train,
    y_train,
    cv=skf,
    scoring='roc_auc',
    n_jobs=-1
)

print("XGBoost CV ROC-AUC:")
print(f"Mean: {cv_scores_xgb.mean():.8f}")
print(f"Std:  {cv_scores_xgb.std():.8f}")

XGBoost CV ROC-AUC:
Mean: 1.00000000
Std:  0.00000000
