In [1]:
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split, StratifiedKFold, GridSearchCV, RandomizedSearchCV, cross_val_score
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score, roc_auc_score
import numpy as np

In [2]:
# Loading dataframe partitions
df = pd.read_csv('../../data/spotify_churn_dataset_transformed.csv')
X = df.drop(columns=['is_churned'])
y = df['is_churned']

# Splitting into train and test partitions
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.15, random_state=42, stratify=y)

In [3]:
# Initializing the Logistic Regressor
lr_model = LogisticRegression(penalty='l2', class_weight='balanced', random_state=42, verbose=1, n_jobs=-1, max_iter=5000)
lr_model.fit(X_train, y_train)

preds = lr_model.predict(X_test)

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 20 concurrent workers.


In [4]:
acc_score = accuracy_score(y_test, preds)
auc_score = roc_auc_score(y_test, preds)

print(f"Accuracy: {acc_score:0.3f}\nAUC Score: {auc_score:0.3f}")

Accuracy: 0.522
AUC Score: 0.517


In [6]:
# # Defining stratified k-fold for imbalanced dataset
strat_kfold = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

param_grid = {
    'C': np.logspace(-3, 3, 10),
    'penalty': ['l1', 'l2', 'elasticnet'],
    'solver': ['saga'],  # saga supports all penalty type
    'l1_ratio': [0.0, 0.5, 1.0]
}


grid_search = RandomizedSearchCV(
    estimator= LogisticRegression(class_weight='balanced', max_iter=5000),
    param_distributions=param_grid,
    n_iter=25,
    cv=strat_kfold,
    scoring='roc_auc',
    n_jobs=-1,
    verbose=3,
    random_state=42    
)
grid_search.fit(X_train, y_train)

# Best parameters from grid search
print(f"Best Parameters: {grid_search.best_params_}")
print(f"Best Score (ROC-AUC): {grid_search.best_score_:.4f}")

Fitting 5 folds for each of 25 candidates, totalling 125 fits




[CV 1/5] END C=0.021544346900318832, l1_ratio=0.5, penalty=l2, solver=saga;, score=0.531 total time=   9.3s
[CV 1/5] END C=10.0, l1_ratio=0.0, penalty=l2, solver=saga;, score=0.531 total time=   9.2s
[CV 3/5] END C=0.46415888336127775, l1_ratio=0.5, penalty=l2, solver=saga;, score=0.508 total time=   9.3s
[CV 1/5] END C=0.001, l1_ratio=0.0, penalty=l1, solver=saga;, score=0.545 total time=   0.1s




[CV 1/5] END C=46.41588833612773, l1_ratio=1.0, penalty=l2, solver=saga;, score=0.531 total time=   9.2s
[CV 5/5] END C=0.021544346900318832, l1_ratio=0.5, penalty=l2, solver=saga;, score=0.523 total time=   9.4s
[CV 2/5] END C=0.001, l1_ratio=0.0, penalty=l1, solver=saga;, score=0.503 total time=   0.1s
[CV 3/5] END C=0.001, l1_ratio=0.0, penalty=l1, solver=saga;, score=0.500 total time=   0.1s
[CV 4/5] END C=0.001, l1_ratio=0.0, penalty=l1, solver=saga;, score=0.477 total time=   0.1s
[CV 3/5] END C=10.0, l1_ratio=0.0, penalty=l2, solver=saga;, score=0.508 total time=   9.3s
[CV 5/5] END C=0.001, l1_ratio=0.0, penalty=l1, solver=saga;, score=0.526 total time=   0.1s
[CV 1/5] END C=0.021544346900318832, l1_ratio=1.0, penalty=elasticnet, solver=saga;, score=0.545 total time=   0.1s




[CV 2/5] END C=0.021544346900318832, l1_ratio=1.0, penalty=elasticnet, solver=saga;, score=0.503 total time=   0.1s
[CV 5/5] END C=0.021544346900318832, l1_ratio=1.0, penalty=elasticnet, solver=saga;, score=0.526 total time=   0.1s




[CV 4/5] END C=46.41588833612773, l1_ratio=1.0, penalty=l2, solver=saga;, score=0.481 total time=  10.5s
[CV 3/5] END C=0.021544346900318832, l1_ratio=0.5, penalty=l2, solver=saga;, score=0.508 total time=  10.7s
[CV 2/5] END C=46.41588833612773, l1_ratio=1.0, penalty=l2, solver=saga;, score=0.504 total time=  10.6s
[CV 1/5] END C=0.46415888336127775, l1_ratio=0.5, penalty=l2, solver=saga;, score=0.531 total time=  11.0s
[CV 4/5] END C=0.46415888336127775, l1_ratio=0.5, penalty=l2, solver=saga;, score=0.481 total time=  11.1s
[CV 5/5] END C=0.46415888336127775, l1_ratio=0.5, penalty=l2, solver=saga;, score=0.523 total time=  11.1s








[CV 2/5] END C=0.46415888336127775, l1_ratio=0.5, penalty=l2, solver=saga;, score=0.504 total time=  13.1s




[CV 5/5] END C=46.41588833612773, l1_ratio=1.0, penalty=l2, solver=saga;, score=0.523 total time=  13.3s




[CV 2/5] END C=0.021544346900318832, l1_ratio=0.5, penalty=l2, solver=saga;, score=0.504 total time=  13.9s
[CV 5/5] END C=10.0, l1_ratio=0.0, penalty=l2, solver=saga;, score=0.523 total time=  13.7s
[CV 2/5] END C=10.0, l1_ratio=0.0, penalty=l2, solver=saga;, score=0.504 total time=  13.6s
[CV 4/5] END C=0.021544346900318832, l1_ratio=0.5, penalty=l2, solver=saga;, score=0.481 total time=  13.7s




[CV 4/5] END C=10.0, l1_ratio=0.0, penalty=l2, solver=saga;, score=0.481 total time=  14.0s
[CV 3/5] END C=46.41588833612773, l1_ratio=1.0, penalty=l2, solver=saga;, score=0.508 total time=  14.0s




[CV 3/5] END C=0.021544346900318832, l1_ratio=1.0, penalty=elasticnet, solver=saga;, score=0.500 total time=  10.8s
[CV 4/5] END C=0.021544346900318832, l1_ratio=1.0, penalty=elasticnet, solver=saga;, score=0.477 total time=  10.9s




[CV 2/5] END C=46.41588833612773, l1_ratio=0.0, penalty=elasticnet, solver=saga;, score=0.504 total time=   9.8s[CV 4/5] END C=46.41588833612773, l1_ratio=0.0, penalty=elasticnet, solver=saga;, score=0.481 total time=   9.6s





[CV 5/5] END C=46.41588833612773, l1_ratio=0.0, penalty=elasticnet, solver=saga;, score=0.523 total time=   9.9s
[CV 3/5] END C=46.41588833612773, l1_ratio=0.0, penalty=elasticnet, solver=saga;, score=0.508 total time=  10.0s
[CV 1/5] END C=46.41588833612773, l1_ratio=0.0, penalty=elasticnet, solver=saga;, score=0.531 total time=  10.2s




[CV 3/5] END C=0.46415888336127775, l1_ratio=0.5, penalty=l1, solver=saga;, score=0.506 total time=  11.7s
[CV 4/5] END C=0.46415888336127775, l1_ratio=0.5, penalty=l1, solver=saga;, score=0.481 total time=  11.7s
[CV 1/5] END C=0.46415888336127775, l1_ratio=0.5, penalty=l1, solver=saga;, score=0.532 total time=  11.9s
[CV 2/5] END C=0.46415888336127775, l1_ratio=0.5, penalty=l1, solver=saga;, score=0.503 total time=  11.8s




[CV 5/5] END C=0.46415888336127775, l1_ratio=0.5, penalty=l1, solver=saga;, score=0.524 total time=  13.2s




[CV 1/5] END C=0.004641588833612777, l1_ratio=0.0, penalty=l2, solver=saga;, score=0.532 total time=  12.0s
[CV 2/5] END C=0.004641588833612777, l1_ratio=0.0, penalty=l2, solver=saga;, score=0.504 total time=  12.0s




[CV 5/5] END C=0.004641588833612777, l1_ratio=0.0, penalty=l2, solver=saga;, score=0.524 total time=  11.9s




[CV 3/5] END C=0.004641588833612777, l1_ratio=0.0, penalty=l2, solver=saga;, score=0.508 total time=  12.6s
[CV 4/5] END C=0.004641588833612777, l1_ratio=0.0, penalty=l2, solver=saga;, score=0.481 total time=  12.5s




[CV 3/5] END C=0.46415888336127775, l1_ratio=1.0, penalty=elasticnet, solver=saga;, score=0.506 total time=  15.1s




[CV 1/5] END C=0.46415888336127775, l1_ratio=1.0, penalty=elasticnet, solver=saga;, score=0.532 total time=  15.6s
[CV 2/5] END C=0.46415888336127775, l1_ratio=1.0, penalty=elasticnet, solver=saga;, score=0.503 total time=  15.6s




[CV 5/5] END C=0.46415888336127775, l1_ratio=1.0, penalty=elasticnet, solver=saga;, score=0.524 total time=  11.4s
[CV 4/5] END C=0.46415888336127775, l1_ratio=1.0, penalty=elasticnet, solver=saga;, score=0.481 total time=  11.5s




[CV 1/5] END C=0.1, l1_ratio=1.0, penalty=elasticnet, solver=saga;, score=0.533 total time=  11.1s
[CV 2/5] END C=0.1, l1_ratio=1.0, penalty=elasticnet, solver=saga;, score=0.502 total time=  11.1s
[CV 3/5] END C=0.1, l1_ratio=1.0, penalty=elasticnet, solver=saga;, score=0.503 total time=  11.0s
[CV 2/5] END C=0.004641588833612777, l1_ratio=0.5, penalty=l1, solver=saga;, score=0.503 total time=   0.1s
[CV 1/5] END C=0.004641588833612777, l1_ratio=0.5, penalty=l1, solver=saga;, score=0.545 total time=   0.1s[CV 3/5] END C=0.004641588833612777, l1_ratio=0.5, penalty=l1, solver=saga;, score=0.500 total time=   0.1s

[CV 4/5] END C=0.004641588833612777, l1_ratio=0.5, penalty=l1, solver=saga;, score=0.477 total time=   0.1s
[CV 5/5] END C=0.004641588833612777, l1_ratio=0.5, penalty=l1, solver=saga;, score=0.526 total time=   0.1s




[CV 4/5] END C=0.1, l1_ratio=1.0, penalty=elasticnet, solver=saga;, score=0.479 total time=  11.3s




[CV 2/5] END C=1000.0, l1_ratio=0.0, penalty=l1, solver=saga;, score=0.504 total time=  12.5s
[CV 1/5] END C=1000.0, l1_ratio=0.0, penalty=l1, solver=saga;, score=0.531 total time=  12.6s
[CV 1/5] END C=0.021544346900318832, l1_ratio=0.0, penalty=l1, solver=saga;, score=0.545 total time=   0.1s
[CV 2/5] END C=0.021544346900318832, l1_ratio=0.0, penalty=l1, solver=saga;, score=0.503 total time=   0.1s
[CV 4/5] END C=1000.0, l1_ratio=0.0, penalty=l1, solver=saga;, score=0.481 total time=  12.5s




[CV 3/5] END C=1000.0, l1_ratio=0.0, penalty=l1, solver=saga;, score=0.508 total time=  12.7s
[CV 5/5] END C=0.021544346900318832, l1_ratio=0.0, penalty=l1, solver=saga;, score=0.526 total time=   0.1s
[CV 5/5] END C=1000.0, l1_ratio=0.0, penalty=l1, solver=saga;, score=0.523 total time=  12.8s




[CV 5/5] END C=0.1, l1_ratio=1.0, penalty=elasticnet, solver=saga;, score=0.524 total time=  12.3s




[CV 1/5] END C=10.0, l1_ratio=0.0, penalty=elasticnet, solver=saga;, score=0.531 total time=  11.4s




[CV 2/5] END C=10.0, l1_ratio=0.0, penalty=elasticnet, solver=saga;, score=0.504 total time=  11.6s




[CV 3/5] END C=10.0, l1_ratio=0.0, penalty=elasticnet, solver=saga;, score=0.508 total time=  11.6s




[CV 4/5] END C=10.0, l1_ratio=0.0, penalty=elasticnet, solver=saga;, score=0.481 total time=  11.3s




[CV 5/5] END C=10.0, l1_ratio=0.0, penalty=elasticnet, solver=saga;, score=0.523 total time=  12.0s




[CV 2/5] END C=0.001, l1_ratio=0.5, penalty=l2, solver=saga;, score=0.504 total time=   9.0s
[CV 1/5] END C=0.001, l1_ratio=0.5, penalty=l2, solver=saga;, score=0.532 total time=   9.1s
[CV 3/5] END C=0.001, l1_ratio=0.5, penalty=l2, solver=saga;, score=0.508 total time=   9.1s




[CV 4/5] END C=0.001, l1_ratio=0.5, penalty=l2, solver=saga;, score=0.481 total time=   9.2s




[CV 5/5] END C=0.001, l1_ratio=0.5, penalty=l2, solver=saga;, score=0.524 total time=   9.8s




[CV 1/5] END C=0.1, l1_ratio=0.0, penalty=l2, solver=saga;, score=0.531 total time=   9.8s
[CV 2/5] END C=0.1, l1_ratio=0.0, penalty=l2, solver=saga;, score=0.504 total time=   9.8s




[CV 1/5] END C=1000.0, l1_ratio=0.5, penalty=elasticnet, solver=saga;, score=0.531 total time=  14.8s
[CV 5/5] END C=1000.0, l1_ratio=0.5, penalty=elasticnet, solver=saga;, score=0.523 total time=  12.4s
[CV 4/5] END C=1000.0, l1_ratio=0.5, penalty=elasticnet, solver=saga;, score=0.481 total time=  12.5s
[CV 3/5] END C=1000.0, l1_ratio=0.5, penalty=elasticnet, solver=saga;, score=0.508 total time=  14.7s
[CV 2/5] END C=1000.0, l1_ratio=0.5, penalty=elasticnet, solver=saga;, score=0.504 total time=  14.8s




[CV 3/5] END C=0.021544346900318832, l1_ratio=0.0, penalty=l1, solver=saga;, score=0.500 total time=  12.2s
[CV 4/5] END C=0.021544346900318832, l1_ratio=0.0, penalty=l1, solver=saga;, score=0.477 total time=  12.2s




[CV 3/5] END C=0.1, l1_ratio=0.0, penalty=l2, solver=saga;, score=0.508 total time=   9.8s




[CV 4/5] END C=0.1, l1_ratio=0.0, penalty=l2, solver=saga;, score=0.481 total time=  11.8s




[CV 5/5] END C=0.1, l1_ratio=0.0, penalty=l2, solver=saga;, score=0.523 total time=  12.4s
[CV 1/5] END C=2.154434690031882, l1_ratio=0.5, penalty=l2, solver=saga;, score=0.531 total time=  12.2s




[CV 2/5] END C=2.154434690031882, l1_ratio=0.5, penalty=l2, solver=saga;, score=0.504 total time=  12.7s
[CV 3/5] END C=2.154434690031882, l1_ratio=0.5, penalty=l2, solver=saga;, score=0.508 total time=  12.1s




[CV 4/5] END C=2.154434690031882, l1_ratio=0.5, penalty=l2, solver=saga;, score=0.481 total time=   9.4s
[CV 5/5] END C=2.154434690031882, l1_ratio=0.5, penalty=l2, solver=saga;, score=0.523 total time=   9.4s




[CV 3/5] END C=215.44346900318823, l1_ratio=0.0, penalty=l2, solver=saga;, score=0.508 total time=   9.1s
[CV 2/5] END C=215.44346900318823, l1_ratio=0.0, penalty=l2, solver=saga;, score=0.504 total time=   9.3s




[CV 1/5] END C=10.0, l1_ratio=1.0, penalty=elasticnet, solver=saga;, score=0.532 total time=  12.5s
[CV 2/5] END C=10.0, l1_ratio=1.0, penalty=elasticnet, solver=saga;, score=0.504 total time=  12.5s




[CV 1/5] END C=215.44346900318823, l1_ratio=0.0, penalty=l2, solver=saga;, score=0.531 total time=  11.7s




[CV 3/5] END C=10.0, l1_ratio=1.0, penalty=elasticnet, solver=saga;, score=0.508 total time=  13.5s
[CV 4/5] END C=215.44346900318823, l1_ratio=0.0, penalty=l2, solver=saga;, score=0.481 total time=  12.4s




[CV 5/5] END C=215.44346900318823, l1_ratio=0.0, penalty=l2, solver=saga;, score=0.523 total time=  12.6s
[CV 5/5] END C=10.0, l1_ratio=1.0, penalty=elasticnet, solver=saga;, score=0.523 total time=  13.4s




[CV 4/5] END C=10.0, l1_ratio=1.0, penalty=elasticnet, solver=saga;, score=0.481 total time=  13.6s




[CV 1/5] END C=46.41588833612773, l1_ratio=1.0, penalty=l1, solver=saga;, score=0.531 total time=  13.4s
[CV 2/5] END C=46.41588833612773, l1_ratio=1.0, penalty=l1, solver=saga;, score=0.504 total time=  13.3s




[CV 3/5] END C=46.41588833612773, l1_ratio=1.0, penalty=l1, solver=saga;, score=0.508 total time=  13.3s




[CV 4/5] END C=215.44346900318823, l1_ratio=0.5, penalty=l2, solver=saga;, score=0.481 total time=   9.4s




[CV 5/5] END C=215.44346900318823, l1_ratio=0.5, penalty=l2, solver=saga;, score=0.523 total time=   9.5s




[CV 1/5] END C=215.44346900318823, l1_ratio=0.5, penalty=l2, solver=saga;, score=0.531 total time=  11.9s




[CV 2/5] END C=215.44346900318823, l1_ratio=0.5, penalty=l2, solver=saga;, score=0.504 total time=  11.0s
[CV 3/5] END C=215.44346900318823, l1_ratio=0.5, penalty=l2, solver=saga;, score=0.508 total time=  10.9s




[CV 4/5] END C=46.41588833612773, l1_ratio=1.0, penalty=l1, solver=saga;, score=0.481 total time=  14.2s




[CV 5/5] END C=46.41588833612773, l1_ratio=1.0, penalty=l1, solver=saga;, score=0.523 total time=  13.5s




[CV 2/5] END C=0.1, l1_ratio=0.5, penalty=l1, solver=saga;, score=0.502 total time=  10.7s




[CV 1/5] END C=0.1, l1_ratio=0.5, penalty=l1, solver=saga;, score=0.533 total time=  11.0s
[CV 3/5] END C=0.1, l1_ratio=0.5, penalty=l1, solver=saga;, score=0.503 total time=  10.4s




[CV 4/5] END C=0.1, l1_ratio=0.5, penalty=l1, solver=saga;, score=0.479 total time=  10.4s




[CV 1/5] END C=0.1, l1_ratio=1.0, penalty=l1, solver=saga;, score=0.533 total time=   9.4s




[CV 3/5] END C=0.1, l1_ratio=1.0, penalty=l1, solver=saga;, score=0.503 total time=   9.3s
[CV 5/5] END C=0.1, l1_ratio=0.5, penalty=l1, solver=saga;, score=0.524 total time=  10.4s
[CV 2/5] END C=0.1, l1_ratio=1.0, penalty=l1, solver=saga;, score=0.502 total time=   9.7s
[CV 5/5] END C=0.1, l1_ratio=1.0, penalty=l1, solver=saga;, score=0.524 total time=   9.2s
[CV 4/5] END C=0.1, l1_ratio=1.0, penalty=l1, solver=saga;, score=0.479 total time=   9.3s
Best Parameters: {'solver': 'saga', 'penalty': 'l1', 'l1_ratio': 0.0, 'C': np.float64(0.001)}
Best Score (ROC-AUC): 0.5101


