In [1]:
import pandas as pd
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split, StratifiedKFold, GridSearchCV
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score, roc_auc_score

In [2]:
# Loading dataframe partitions
df = pd.read_csv('../../data/spotify_churn_dataset_transformed.csv')
X = df.drop(columns=['is_churned'])
y = df['is_churned']

# Splitting into train and test partitions
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.15, random_state=42, stratify=y)

In [3]:
# Initializing the SVC model
SVC_model = SVC(kernel='rbf')
SVC_model.fit(X_train, y_train)

# Getting predictions
preds = SVC_model.predict(X_test)

In [4]:
acc_score = accuracy_score(y_test, preds)
auc_score = roc_auc_score(y_test, preds)

print(f"Accuracy: {acc_score:0.3f}\nAUC Score: {auc_score:0.3f}")

Accuracy: 0.741
AUC Score: 0.500


In [5]:
# Defining stratified k-fold for imbalanced dataset
strat_kfold = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

param_grid = {
    'kernel' : ['rbf', 'linear'],
    'C' : [0.1, 1, 10],
    'gamma' : ['scale', 0.01, 0.1, 1]
}

grid_search = GridSearchCV(SVC(class_weight='balanced'), param_grid, cv=strat_kfold, scoring='roc_auc', n_jobs=-1, verbose=3)
grid_search.fit(X_train, y_train)

# Best parameters from grid search
print(f"Best Parameters: {grid_search.best_params_}")
print(f"Best Score: {grid_search.best_score_}")

Fitting 5 folds for each of 24 candidates, totalling 120 fits
[CV 4/5] END ....C=0.1, gamma=scale, kernel=rbf;, score=0.460 total time=   2.9s
[CV 2/5] END .....C=0.1, gamma=0.01, kernel=rbf;, score=0.476 total time=   3.4s
[CV 5/5] END ....C=0.1, gamma=scale, kernel=rbf;, score=0.523 total time=   3.4s
[CV 3/5] END .....C=0.1, gamma=0.01, kernel=rbf;, score=0.461 total time=   4.2s
[CV 1/5] END ....C=0.1, gamma=scale, kernel=rbf;, score=0.526 total time=   4.9s
[CV 2/5] END ....C=0.1, gamma=scale, kernel=rbf;, score=0.510 total time=   5.7s
[CV 3/5] END ....C=0.1, gamma=scale, kernel=rbf;, score=0.522 total time=   5.6s
[CV 1/5] END ......C=0.1, gamma=0.1, kernel=rbf;, score=0.500 total time=   3.3s
[CV 2/5] END ......C=0.1, gamma=0.1, kernel=rbf;, score=0.482 total time=   3.3s
[CV 3/5] END ......C=0.1, gamma=0.1, kernel=rbf;, score=0.475 total time=   3.8s
[CV 4/5] END ......C=0.1, gamma=0.1, kernel=rbf;, score=0.463 total time=   3.4s
[CV 1/5] END ........C=0.1, gamma=1, kernel=rbf