In [1]:
import math
import random
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score

In [2]:
# Load the Titanic dataset
data = pd.read_excel('Titanic_dataset.xls')

# Drop irrelevant columns
columns_to_drop = ['name', 'ticket', 'cabin', 'boat', 'body', 'home.dest']
data = data.drop(columns_to_drop, axis=1)

# Handle missing values
data['age'].fillna(data['age'].median(), inplace=True)
data['fare'].fillna(data['fare'].mean(), inplace=True)
data['embarked'].fillna(data['embarked'].mode()[0], inplace=True)

# Convert categorical variables into dummy/indicator variables
data = pd.get_dummies(data, columns=['sex', 'embarked'])

In [3]:
# Split the data into features (X) and target variable (y)
X = data.drop('survived', axis=1)
y = data['survived']

# Split the data into train, validation, and test sets
X_trainval, X_test, y_trainval, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
X_train, X_val, y_train, y_val = train_test_split(X_trainval, y_trainval, test_size=0.2, random_state=42)

In [4]:
# Define the parameter space for SVM and Random Forest
svm_param_space = [
    {'C': 0.001, 'kernel': 'linear'},
    {'C': 0.01, 'kernel': 'linear'},
    {'C': 0.1, 'kernel': 'linear'},
    {'C': 1, 'kernel': 'linear'},
    {'C': 10, 'kernel': 'linear'},
    {'C': 100, 'kernel': 'linear'},
    {'C': 0.001, 'kernel': 'rbf'},
    {'C': 0.01, 'kernel': 'rbf'},
    {'C': 0.1, 'kernel': 'rbf'},
    {'C': 1, 'kernel': 'rbf'},
    {'C': 10, 'kernel': 'rbf'},
    {'C': 100, 'kernel': 'rbf'},
]

rf_param_space = [
    {'n_estimators': 50, 'max_depth': 3},
    {'n_estimators': 100, 'max_depth': 3},
    {'n_estimators': 150, 'max_depth': 3},
    {'n_estimators': 50, 'max_depth': 6},
    {'n_estimators': 100, 'max_depth': 6},
    {'n_estimators': 150, 'max_depth': 6},
    {'n_estimators': 50, 'max_depth': 9},
    {'n_estimators': 100, 'max_depth': 9},
    {'n_estimators': 150, 'max_depth': 9},
]

In [22]:
len(svm_param_space)

12

In [5]:
def ucb_selection(rewards, attempts, t, c):
    ucb_values = []
    n = len(rewards)

    for i in range(n):
        if attempts[i] == 0:
            # Exploration term when no attempts have been made
            ucb_value = float('inf')
        else:
            exploitation = rewards[i] / attempts[i]
            exploration = c * math.sqrt(math.log(t) / attempts[i])
            ucb_value = exploitation + exploration

        ucb_values.append(ucb_value)

    # Select the hyperparameters with the highest UCB value
    selected_index = max(range(n), key=lambda i: ucb_values[i])

    return selected_index

In [23]:
rewards_svm = [0] * len(svm_param_space)
attempts_svm = [0] * len(svm_param_space)
best_config_ucb_svm = None
best_error_ucb_svm = float('inf')

max_iterations = 12
c = 2  # Exploration parameter

# Perform cross-validation for evaluation
cv = 5  # Number of cross-validation folds

for t in range(1, max_iterations + 1):
    # UCB Strategy for SVM
    config_idx_ucb_svm = ucb_selection(rewards_svm, attempts_svm, t, c)
    config_ucb_svm = svm_param_space[config_idx_ucb_svm]
    svm_model = SVC(**config_ucb_svm)

    # Train the SVM model on the training set
    svm_model.fit(X_train, y_train)

    # Perform cross-validation on the validation set
    scores_svm = cross_val_score(svm_model, X_val, y_val, cv=cv)
    error_ucb_svm = np.mean(scores_svm)

    rewards_svm[config_idx_ucb_svm] += error_ucb_svm
    attempts_svm[config_idx_ucb_svm] += 1

    if error_ucb_svm < best_error_ucb_svm:
        best_config_ucb_svm = config_ucb_svm
        best_error_ucb_svm = error_ucb_svm

print("Dataset: Titanic Dataset")

print("\nUCB Strategy for SVM:")
print("Best validation error:", best_error_ucb_svm)
print("Best hyperparameter configuration:", best_config_ucb_svm)

Dataset: Titanic Dataset

UCB Strategy for SVM:
Best validation error: 0.5523809523809524
Best hyperparameter configuration: {'C': 0.001, 'kernel': 'rbf'}


In [18]:
rewards_rf = [0] * len(rf_param_space)
attempts_rf = [0] * len(rf_param_space)
best_config_ucb_rf = None
best_error_ucb_rf = float('inf')

max_iterations = 4
c = 2  # Exploration parameter

# Perform cross-validation for evaluation
cv = 5  # Number of cross-validation folds

for t in range(1, max_iterations + 1):
    # UCB Strategy for Random Forest
    config_idx_ucb_rf = ucb_selection(rewards_rf, attempts_rf, t, c)
    config_ucb_rf = rf_param_space[config_idx_ucb_rf]
    rf_model = RandomForestClassifier(**config_ucb_rf)

    # Train the Random Forest model on the training set
    rf_model.fit(X_train, y_train)

    # Perform cross-validation on the validation set
    scores_rf = cross_val_score(rf_model, X_val, y_val, cv=cv)
    error_ucb_rf = np.mean(scores_rf)

    rewards_rf[config_idx_ucb_rf] += error_ucb_rf
    attempts_rf[config_idx_ucb_rf] += 1

    if error_ucb_rf < best_error_ucb_rf:
        best_config_ucb_rf = config_ucb_rf
        best_error_ucb_rf = error_ucb_rf

print("Dataset: Titanic Dataset")

print("\nUCB Strategy for Random Forest:")
print("Best validation error:", best_error_ucb_rf)
print("Best hyperparameter configuration:", best_config_ucb_rf)

Dataset: Titanic Dataset

UCB Strategy for Random Forest:
Best validation error: 0.7333333333333333
Best hyperparameter configuration: {'n_estimators': 50, 'max_depth': 6}


In [19]:
# Train the final SVM model using the best hyperparameters from UCB strategy
svm_model.set_params(**best_config_ucb_svm)
svm_model.fit(X_trainval, y_trainval)

# Evaluate performance on the test set using the best hyperparameters from UCB strategy
test_predictions_svm = svm_model.predict(X_test)
test_accuracy_svm = accuracy_score(y_test, test_predictions_svm)
test_precision_svm = precision_score(y_test, test_predictions_svm)
test_recall_svm = recall_score(y_test, test_predictions_svm)
test_f1_svm = f1_score(y_test, test_predictions_svm)
test_roc_auc_svm = roc_auc_score(y_test, svm_model.decision_function(X_test))

In [20]:
# Train the final Random Forest model using the best hyperparameters from UCB strategy
rf_model.set_params(**best_config_ucb_rf)
rf_model.fit(X_trainval, y_trainval)

# Evaluate performance on the test set using the best hyperparameters from UCB strategy
test_predictions_rf = rf_model.predict(X_test)
test_accuracy_rf = accuracy_score(y_test, test_predictions_rf)
test_precision_rf = precision_score(y_test, test_predictions_rf)
test_recall_rf = recall_score(y_test, test_predictions_rf)
test_f1_rf = f1_score(y_test, test_predictions_rf)
test_roc_auc_rf = roc_auc_score(y_test, rf_model.predict_proba(X_test)[:, 1])

In [21]:
print("\nUCB-selected hyperparameters for SVM: ", best_config_ucb_svm)
print("UCB-selected performance for SVM:")
print(f"  - Accuracy: {test_accuracy_svm}")
print(f"  - Precision: {test_precision_svm}")
print(f"  - Recall: {test_recall_svm}")
print(f"  - F1-score: {test_f1_svm}")
print(f"  - ROC AUC: {test_roc_auc_svm}")

print("\nUCB-selected hyperparameters for Random Forest: ", best_config_ucb_rf)
print("UCB-selected performance for Random Forest:")
print(f"  - Accuracy: {test_accuracy_rf}")
print(f"  - Precision: {test_precision_rf}")
print(f"  - Recall: {test_recall_rf}")
print(f"  - F1-score: {test_f1_rf}")
print(f"  - ROC AUC: {test_roc_auc_rf}")


UCB-selected hyperparameters for SVM:  {'C': 0.001, 'kernel': 'linear'}
UCB-selected performance for SVM:
  - Accuracy: 0.6145038167938931
  - Precision: 0.8695652173913043
  - Recall: 0.1694915254237288
  - F1-score: 0.28368794326241137
  - ROC AUC: 0.8230049435028248

UCB-selected hyperparameters for Random Forest:  {'n_estimators': 50, 'max_depth': 6}
UCB-selected performance for Random Forest:
  - Accuracy: 0.767175572519084
  - Precision: 0.8518518518518519
  - Recall: 0.5847457627118644
  - F1-score: 0.6934673366834171
  - ROC AUC: 0.8556967984934086
