In [1]:
import pandas as pd 
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV, RepeatedStratifiedKFold
from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score
from sklearn.preprocessing import MinMaxScaler
import glob
import os
import time 

In [2]:
train_path = max(glob.glob('/Users/ivankoh/Library/CloudStorage/OneDrive-NationalUniversityofSingapore/1D/NUS Y3S1/BT4222/Data/train/*.csv'), key=os.path.getctime) 
print(train_path)
df = pd.read_csv(train_path)

/Users/ivankoh/Library/CloudStorage/OneDrive-NationalUniversityofSingapore/1D/NUS Y3S1/BT4222/Data/train/kickstarter_train_final_20221022-142838.csv


## Get to the correct data format

In [3]:
X_train, y_train = df.drop('state', axis=1).to_numpy(), df['state'].to_numpy()

## Randomized Search with K-fold CV applied to SVC

**Hyperparameter Tuning**

We will use RandomizedSearchCV for hyperparameter tuning.

### 1. Training

In [4]:
# Hyper-parameters
gamma = [1e-7,1e-4,1e-3,1e-1,1e0,1e1]
C = [1e-3,1e-2,1e0,1e1,1e3,1e5]
kernels = ['rbf', 'linear']  #poly takes forever to train 'sigmoid'

params = {'C': C, 'gamma': gamma, 'kernel': kernels}
cv_method = RepeatedStratifiedKFold(n_splits=5, 
                                    n_repeats=3, 
                                    random_state=2022)
svc_randomcv = RandomizedSearchCV(
    estimator=SVC(max_iter=100),
    param_distributions=params,
    n_iter=10, 
    cv=cv_method,
    verbose=2,
    random_state=2022,
    # n_jobs=-1, # use all processors,
    scoring='roc_auc'
)

svc_randomcv.fit(X_train, y_train)

Fitting 15 folds for each of 10 candidates, totalling 150 fits




[CV] END ..................C=100000.0, gamma=1.0, kernel=rbf; total time=   1.8s




[CV] END ..................C=100000.0, gamma=1.0, kernel=rbf; total time=   1.7s




[CV] END ..................C=100000.0, gamma=1.0, kernel=rbf; total time=   1.7s




[CV] END ..................C=100000.0, gamma=1.0, kernel=rbf; total time=   1.7s




[CV] END ..................C=100000.0, gamma=1.0, kernel=rbf; total time=   1.7s




[CV] END ..................C=100000.0, gamma=1.0, kernel=rbf; total time=   1.7s




[CV] END ..................C=100000.0, gamma=1.0, kernel=rbf; total time=   1.7s




[CV] END ..................C=100000.0, gamma=1.0, kernel=rbf; total time=   1.7s




[CV] END ..................C=100000.0, gamma=1.0, kernel=rbf; total time=   1.7s




[CV] END ..................C=100000.0, gamma=1.0, kernel=rbf; total time=   1.7s




[CV] END ..................C=100000.0, gamma=1.0, kernel=rbf; total time=   1.7s




[CV] END ..................C=100000.0, gamma=1.0, kernel=rbf; total time=   1.7s




[CV] END ..................C=100000.0, gamma=1.0, kernel=rbf; total time=   1.7s




[CV] END ..................C=100000.0, gamma=1.0, kernel=rbf; total time=   1.7s




[CV] END ..................C=100000.0, gamma=1.0, kernel=rbf; total time=   1.7s




[CV] END ..................C=0.001, gamma=0.1, kernel=linear; total time=   1.6s




[CV] END ..................C=0.001, gamma=0.1, kernel=linear; total time=   1.6s




[CV] END ..................C=0.001, gamma=0.1, kernel=linear; total time=   1.5s




[CV] END ..................C=0.001, gamma=0.1, kernel=linear; total time=   1.6s




[CV] END ..................C=0.001, gamma=0.1, kernel=linear; total time=   1.5s




[CV] END ..................C=0.001, gamma=0.1, kernel=linear; total time=   1.6s




[CV] END ..................C=0.001, gamma=0.1, kernel=linear; total time=   1.5s




[CV] END ..................C=0.001, gamma=0.1, kernel=linear; total time=   1.6s




[CV] END ..................C=0.001, gamma=0.1, kernel=linear; total time=   1.6s




[CV] END ..................C=0.001, gamma=0.1, kernel=linear; total time=   1.5s




[CV] END ..................C=0.001, gamma=0.1, kernel=linear; total time=   1.6s




[CV] END ..................C=0.001, gamma=0.1, kernel=linear; total time=   1.5s




[CV] END ..................C=0.001, gamma=0.1, kernel=linear; total time=   1.5s




[CV] END ..................C=0.001, gamma=0.1, kernel=linear; total time=   1.5s




[CV] END ..................C=0.001, gamma=0.1, kernel=linear; total time=   1.5s




[CV] END ...............C=100000.0, gamma=0.0001, kernel=rbf; total time=   1.7s




[CV] END ...............C=100000.0, gamma=0.0001, kernel=rbf; total time=   1.7s




[CV] END ...............C=100000.0, gamma=0.0001, kernel=rbf; total time=   1.7s




[CV] END ...............C=100000.0, gamma=0.0001, kernel=rbf; total time=   1.7s




[CV] END ...............C=100000.0, gamma=0.0001, kernel=rbf; total time=   1.7s




[CV] END ...............C=100000.0, gamma=0.0001, kernel=rbf; total time=   1.7s




[CV] END ...............C=100000.0, gamma=0.0001, kernel=rbf; total time=   1.7s




[CV] END ...............C=100000.0, gamma=0.0001, kernel=rbf; total time=   1.7s




[CV] END ...............C=100000.0, gamma=0.0001, kernel=rbf; total time=   1.7s




[CV] END ...............C=100000.0, gamma=0.0001, kernel=rbf; total time=   1.7s




[CV] END ...............C=100000.0, gamma=0.0001, kernel=rbf; total time=   1.7s




[CV] END ...............C=100000.0, gamma=0.0001, kernel=rbf; total time=   1.7s




[CV] END ...............C=100000.0, gamma=0.0001, kernel=rbf; total time=   1.7s




[CV] END ...............C=100000.0, gamma=0.0001, kernel=rbf; total time=   1.7s




[CV] END ...............C=100000.0, gamma=0.0001, kernel=rbf; total time=   1.7s




[CV] END ....................C=10.0, gamma=1e-07, kernel=rbf; total time=   1.7s




[CV] END ....................C=10.0, gamma=1e-07, kernel=rbf; total time=   1.7s




[CV] END ....................C=10.0, gamma=1e-07, kernel=rbf; total time=   1.7s




[CV] END ....................C=10.0, gamma=1e-07, kernel=rbf; total time=   1.7s




[CV] END ....................C=10.0, gamma=1e-07, kernel=rbf; total time=   1.7s




[CV] END ....................C=10.0, gamma=1e-07, kernel=rbf; total time=   1.7s




[CV] END ....................C=10.0, gamma=1e-07, kernel=rbf; total time=   1.7s




[CV] END ....................C=10.0, gamma=1e-07, kernel=rbf; total time=   1.7s




[CV] END ....................C=10.0, gamma=1e-07, kernel=rbf; total time=   1.7s




[CV] END ....................C=10.0, gamma=1e-07, kernel=rbf; total time=   1.7s




[CV] END ....................C=10.0, gamma=1e-07, kernel=rbf; total time=   1.7s




[CV] END ....................C=10.0, gamma=1e-07, kernel=rbf; total time=   1.7s




[CV] END ....................C=10.0, gamma=1e-07, kernel=rbf; total time=   1.7s




[CV] END ....................C=10.0, gamma=1e-07, kernel=rbf; total time=   1.7s




[CV] END ....................C=10.0, gamma=1e-07, kernel=rbf; total time=   1.7s




[CV] END .....................C=0.001, gamma=0.1, kernel=rbf; total time=   1.7s




[CV] END .....................C=0.001, gamma=0.1, kernel=rbf; total time=   1.7s




[CV] END .....................C=0.001, gamma=0.1, kernel=rbf; total time=   1.7s




[CV] END .....................C=0.001, gamma=0.1, kernel=rbf; total time=   1.7s




[CV] END .....................C=0.001, gamma=0.1, kernel=rbf; total time=   1.7s




[CV] END .....................C=0.001, gamma=0.1, kernel=rbf; total time=   1.7s




[CV] END .....................C=0.001, gamma=0.1, kernel=rbf; total time=   1.7s




[CV] END .....................C=0.001, gamma=0.1, kernel=rbf; total time=   1.7s




[CV] END .....................C=0.001, gamma=0.1, kernel=rbf; total time=   1.7s




[CV] END .....................C=0.001, gamma=0.1, kernel=rbf; total time=   1.7s




[CV] END .....................C=0.001, gamma=0.1, kernel=rbf; total time=   1.7s




[CV] END .....................C=0.001, gamma=0.1, kernel=rbf; total time=   1.7s




[CV] END .....................C=0.001, gamma=0.1, kernel=rbf; total time=   1.7s




[CV] END .....................C=0.001, gamma=0.1, kernel=rbf; total time=   1.7s




[CV] END .....................C=0.001, gamma=0.1, kernel=rbf; total time=   1.7s




[CV] END ................C=0.001, gamma=1e-07, kernel=linear; total time=   1.5s




[CV] END ................C=0.001, gamma=1e-07, kernel=linear; total time=   1.6s




[CV] END ................C=0.001, gamma=1e-07, kernel=linear; total time=   1.5s




[CV] END ................C=0.001, gamma=1e-07, kernel=linear; total time=   1.5s




[CV] END ................C=0.001, gamma=1e-07, kernel=linear; total time=   1.5s




[CV] END ................C=0.001, gamma=1e-07, kernel=linear; total time=   1.5s




[CV] END ................C=0.001, gamma=1e-07, kernel=linear; total time=   1.5s




[CV] END ................C=0.001, gamma=1e-07, kernel=linear; total time=   1.5s




[CV] END ................C=0.001, gamma=1e-07, kernel=linear; total time=   1.5s




[CV] END ................C=0.001, gamma=1e-07, kernel=linear; total time=   1.5s




[CV] END ................C=0.001, gamma=1e-07, kernel=linear; total time=   1.5s




[CV] END ................C=0.001, gamma=1e-07, kernel=linear; total time=   1.5s




[CV] END ................C=0.001, gamma=1e-07, kernel=linear; total time=   1.5s




[CV] END ................C=0.001, gamma=1e-07, kernel=linear; total time=   1.5s




[CV] END ................C=0.001, gamma=1e-07, kernel=linear; total time=   1.5s




[CV] END ...................C=1000.0, gamma=10.0, kernel=rbf; total time=   1.7s




[CV] END ...................C=1000.0, gamma=10.0, kernel=rbf; total time=   1.7s




[CV] END ...................C=1000.0, gamma=10.0, kernel=rbf; total time=   1.7s




[CV] END ...................C=1000.0, gamma=10.0, kernel=rbf; total time=   1.7s




[CV] END ...................C=1000.0, gamma=10.0, kernel=rbf; total time=   1.7s




[CV] END ...................C=1000.0, gamma=10.0, kernel=rbf; total time=   1.7s




[CV] END ...................C=1000.0, gamma=10.0, kernel=rbf; total time=   1.7s




[CV] END ...................C=1000.0, gamma=10.0, kernel=rbf; total time=   1.7s




[CV] END ...................C=1000.0, gamma=10.0, kernel=rbf; total time=   1.7s




[CV] END ...................C=1000.0, gamma=10.0, kernel=rbf; total time=   1.7s




[CV] END ...................C=1000.0, gamma=10.0, kernel=rbf; total time=   1.7s




[CV] END ...................C=1000.0, gamma=10.0, kernel=rbf; total time=   1.7s




[CV] END ...................C=1000.0, gamma=10.0, kernel=rbf; total time=   1.7s




[CV] END ...................C=1000.0, gamma=10.0, kernel=rbf; total time=   1.7s




[CV] END ...................C=1000.0, gamma=10.0, kernel=rbf; total time=   1.7s




[CV] END ...............C=100000.0, gamma=0.1, kernel=linear; total time=   1.5s




[CV] END ...............C=100000.0, gamma=0.1, kernel=linear; total time=   1.5s




[CV] END ...............C=100000.0, gamma=0.1, kernel=linear; total time=   1.5s




[CV] END ...............C=100000.0, gamma=0.1, kernel=linear; total time=   1.5s




[CV] END ...............C=100000.0, gamma=0.1, kernel=linear; total time=   1.6s




[CV] END ...............C=100000.0, gamma=0.1, kernel=linear; total time=   1.5s




[CV] END ...............C=100000.0, gamma=0.1, kernel=linear; total time=   1.6s




[CV] END ...............C=100000.0, gamma=0.1, kernel=linear; total time=   1.6s




[CV] END ...............C=100000.0, gamma=0.1, kernel=linear; total time=   1.5s




[CV] END ...............C=100000.0, gamma=0.1, kernel=linear; total time=   1.5s




[CV] END ...............C=100000.0, gamma=0.1, kernel=linear; total time=   1.5s




[CV] END ...............C=100000.0, gamma=0.1, kernel=linear; total time=   1.5s




[CV] END ...............C=100000.0, gamma=0.1, kernel=linear; total time=   1.5s




[CV] END ...............C=100000.0, gamma=0.1, kernel=linear; total time=   1.6s




[CV] END ...............C=100000.0, gamma=0.1, kernel=linear; total time=   1.5s




[CV] END .....................C=0.01, gamma=10.0, kernel=rbf; total time=   1.8s




[CV] END .....................C=0.01, gamma=10.0, kernel=rbf; total time=   1.8s




[CV] END .....................C=0.01, gamma=10.0, kernel=rbf; total time=   1.8s




[CV] END .....................C=0.01, gamma=10.0, kernel=rbf; total time=   1.8s




[CV] END .....................C=0.01, gamma=10.0, kernel=rbf; total time=   1.7s




[CV] END .....................C=0.01, gamma=10.0, kernel=rbf; total time=   1.7s




[CV] END .....................C=0.01, gamma=10.0, kernel=rbf; total time=   1.7s




[CV] END .....................C=0.01, gamma=10.0, kernel=rbf; total time=   1.7s




[CV] END .....................C=0.01, gamma=10.0, kernel=rbf; total time=   1.7s




[CV] END .....................C=0.01, gamma=10.0, kernel=rbf; total time=   1.7s




[CV] END .....................C=0.01, gamma=10.0, kernel=rbf; total time=   1.7s




[CV] END .....................C=0.01, gamma=10.0, kernel=rbf; total time=   1.7s




[CV] END .....................C=0.01, gamma=10.0, kernel=rbf; total time=   1.7s




[CV] END .....................C=0.01, gamma=10.0, kernel=rbf; total time=   1.7s




[CV] END .....................C=0.01, gamma=10.0, kernel=rbf; total time=   1.7s




[CV] END ...............C=0.001, gamma=0.0001, kernel=linear; total time=   1.5s




[CV] END ...............C=0.001, gamma=0.0001, kernel=linear; total time=   1.5s




[CV] END ...............C=0.001, gamma=0.0001, kernel=linear; total time=   1.5s




[CV] END ...............C=0.001, gamma=0.0001, kernel=linear; total time=   1.5s




[CV] END ...............C=0.001, gamma=0.0001, kernel=linear; total time=   1.5s




[CV] END ...............C=0.001, gamma=0.0001, kernel=linear; total time=   1.5s




[CV] END ...............C=0.001, gamma=0.0001, kernel=linear; total time=   1.6s




[CV] END ...............C=0.001, gamma=0.0001, kernel=linear; total time=   1.5s




[CV] END ...............C=0.001, gamma=0.0001, kernel=linear; total time=   1.5s




[CV] END ...............C=0.001, gamma=0.0001, kernel=linear; total time=   1.5s




[CV] END ...............C=0.001, gamma=0.0001, kernel=linear; total time=   1.5s




[CV] END ...............C=0.001, gamma=0.0001, kernel=linear; total time=   1.5s




[CV] END ...............C=0.001, gamma=0.0001, kernel=linear; total time=   1.5s




[CV] END ...............C=0.001, gamma=0.0001, kernel=linear; total time=   1.5s




[CV] END ...............C=0.001, gamma=0.0001, kernel=linear; total time=   1.5s




### 2. Best Hyper-parameters

In [9]:
print(svc_randomcv.best_params_)
print(svc_randomcv.best_score_)

{'kernel': 'linear', 'gamma': 0.1, 'C': 0.001}
0.8368519758539402


### 3. Testing

In [6]:
test_path = max(glob.glob('/Users/ivankoh/Library/CloudStorage/OneDrive-NationalUniversityofSingapore/1D/NUS Y3S1/BT4222/Data/test/*.csv'), key=os.path.getctime) 
print(test_path)
test = pd.read_csv(test_path)

X_test, y_test = test.drop('state', axis=1).to_numpy(), test['state'].to_numpy()

/Users/ivankoh/Library/CloudStorage/OneDrive-NationalUniversityofSingapore/1D/NUS Y3S1/BT4222/Data/test/kickstarter_test_final_20221022-162938.csv


In [7]:
print(X_train.shape)
print(y_train.shape)
print(X_test.shape)
print(y_test.shape)

(24756, 578)
(24756,)
(4369, 578)
(4369,)


In [8]:
best_clf = SVC(**svc_randomcv.best_params_)

start_time = time.time()
best_clf.fit(X_train, y_train)
y_pred_test = best_clf.predict(X_test)

time_taken = time.time() - start_time
print("Total time taken for the program execution", time_taken) # seconds

print(classification_report(y_test, y_pred_test, digits=5))
roc_auc_score(y_test, y_pred_test)

Total time taken for the program execution 163.0077760219574
              precision    recall  f1-score   support

           0    0.93557   0.26053   0.40757      1282
           1    0.76371   0.99255   0.86322      3087

    accuracy                        0.77775      4369
   macro avg    0.84964   0.62654   0.63539      4369
weighted avg    0.81414   0.77775   0.72952      4369



0.6265399109647574