## In this notebook, we are going to explore the Support Vector classifier's hyperparameter space, take the best configuration of hyperparameters based on UCB algorithmy, compare with randomly chosen hyperparameter configuration, compare their performance and evaluate the validation error.

### Import necessary libraries

In [1]:
import numpy as np
import pandas as pd
import math
import random
import matplotlib
import matplotlib.pyplot as plt
import seaborn as sns 
import xlrd
import sklearn
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from utils import *

### Read the data

In [3]:
data = pd.read_csv('Titanic_dataset.csv') 

### Preprocess the data

In [4]:
na_value = data.isna()
na_counts = na_value.sum()
print(na_counts)

pclass          0
survived        0
name            0
sex             0
age           263
sibsp           0
parch           0
ticket          0
fare            1
cabin        1014
embarked        2
boat          823
body         1188
home.dest     564
dtype: int64


In [5]:
columns_to_drop = ['name', 'ticket', 'cabin', 'boat', 'body', 'home.dest']
data = data.drop(columns_to_drop, axis=1)

data['age'].fillna(data['age'].median(), inplace=True)
data['fare'].fillna(data['fare'].mean(), inplace=True)
data['embarked'].fillna(data['embarked'].mode()[0], inplace=True)

data = pd.get_dummies(data, columns=['sex', 'embarked'])

### Split the data into train and test sets

In [6]:
X = data.drop('survived', axis=1)
y = data['survived']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

### Define the hyperparameter space for SVC

In [7]:
C = [0.001, 0.01, 0.1, 1, 10, 100]
kernel = ['linear', 'rbf']
gamma = [0.1, 0.2, 0.5, 1.0]
degree = [2, 3, 4]
probability = [True, False]
shrinking = [True, False]
cache_size = [100, 200, 500]

svc_param_space = []

for c in C:
    for k in kernel:
        for g in gamma:
            for d in degree:
                for p in probability:
                    for s in shrinking:
                        for cache in cache_size:
                            config_svc = {
                                'C': c,
                                'kernel': k,
                                'gamma': g,
                                'degree': d,
                                'probability': p,
                                'shrinking': s,
                                'cache_size': cache
                            }
                            svc_param_space.append(config_svc)

print(len(svc_param_space))

1728


### Evaluate the UCB on SVC and Random Strategy

In [8]:
best_config_svc, best_config_random_svc, pred_rand_svc = SVC_Random(X_train, y_train, X_test, y_test, svc_param_space)

Dataset: Titanic Dataset

UCB Strategy for SVM:
Best validation error: 0.2442748091603053
Best hyperparameter configuration: {'C': 0.01, 'kernel': 'linear', 'gamma': 0.1, 'degree': 2, 'probability': True, 'shrinking': True, 'cache_size': 100}

Random Strategy for SVM:
Best validation error: 0.2442748091603053
Best hyperparameter configuration: {'C': 10, 'kernel': 'linear', 'gamma': 0.2, 'degree': 2, 'probability': False, 'shrinking': False, 'cache_size': 100}


### Train the final SVC model using the best hyperparameters from UCB strategy

In [23]:
SVC_UCB_test = SVC(**best_config_svc)
SVC_UCB_test.fit(X_train, y_train) 

### Evaluate performance on the test set using the best hyperparameters from UCB strategy

In [24]:
test_predictions_svc = SVC_UCB_test.predict(X_test)
test_accuracy_svc = accuracy_score(y_test, test_predictions_svc)
test_precision_svc = precision_score(y_test, test_predictions_svc)
test_recall_svc = recall_score(y_test, test_predictions_svc)
test_f1_svc = f1_score(y_test, test_predictions_svc)

### Evaluate performance on the test set using the best hyperparameters from random strategy


In [25]:
test_precision_random_svc = pred_rand_svc
test_accuracy_random_svc = accuracy_score(y_test, test_precision_random_svc)
test_precision_random_svc = precision_score(y_test, test_precision_random_svc)
# test_recall_random_svc = recall_score(y_test, test_precision_random_svc)
# test_f1_random_svc = f1_score(y_test, test_precision_random_svc) 

### Compare the performances of UCB-selected and Random-Selected hyperparameters for SVC  

In [27]:
print("UCB-selected hyperparameters for XGBoosting: ", best_config_svc)
print("UCB-selected performance:")
print(f"  - Accuracy: {test_accuracy_svc}")
print(f"  - Precision: {test_precision_svc}")
print(f"  - Recall: {test_recall_svc}")
print(f"  - F1-score: {test_f1_svc}")

print("\nRandom-selected hyperparameters for XGBoosting: ", best_config_random_svc)
print("Random-selected performance:")
print(f"  - Accuracy: {test_accuracy_random_svc}")
print(f"  - Precision: {test_precision_random_svc}")
# print(f"  - Recall: {test_recall_random_svc}")
# print(f"  - F1-score: {test_f1_random_svc}")

UCB-selected hyperparameters for XGBoosting:  {'C': 0.01, 'kernel': 'linear', 'gamma': 0.1, 'degree': 2, 'probability': True, 'shrinking': True, 'cache_size': 100}
UCB-selected performance:
  - Accuracy: 0.7557251908396947
  - Precision: 0.78125
  - Recall: 0.635593220338983
  - F1-score: 0.7009345794392523

Random-selected hyperparameters for XGBoosting:  {'C': 10, 'kernel': 'linear', 'gamma': 0.2, 'degree': 2, 'probability': False, 'shrinking': False, 'cache_size': 100}
Random-selected performance:
  - Accuracy: 0.7557251908396947
  - Precision: 0.78125
