In [67]:
import pandas as pd
import numpy as np
import random
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.decomposition import PCA

### Setting a seed value for reproducibility

In [69]:
seed_value = 42
np.random.seed(seed_value)
random.seed(seed_value)

X_train = pd.read_csv("X_train.csv")
X_val = pd.read_csv("X_val.csv")
X_test = pd.read_csv("X_test.csv")
y_train = pd.read_csv("y_train.csv").values.ravel()
y_val = pd.read_csv("y_val.csv").values.ravel()
y_test = pd.read_csv("y_test.csv").values.ravel()

In [70]:
# Helper function to evaluate the model
def evaluate_model(model, X, y):
    y_pred = model.predict(X)
    accuracy = accuracy_score(y, y_pred)
    precision = precision_score(y, y_pred)
    recall = recall_score(y, y_pred)
    f1 = f1_score(y, y_pred)
    print(f"Accuracy: {accuracy:.2f}")
    print(f"Precision: {precision:.2f}")
    print(f"Recall: {recall:.2f}")
    print(f"F1-Score: {f1:.2f}")
    return accuracy, precision, recall, f1

In [71]:
# Experiment 1: Default RBF kernel with different regularization values
print("SVM with RBF Kernel:")
for C in [0.01, 0.1, 1, 10, 100, 1000]:
    print(f"Regularization C={C}")
    svm_rbf = SVC(kernel='rbf', C=C, gamma='scale', random_state=42)
    svm_rbf.fit(X_train, y_train)
    print("Validation Metrics:")
    evaluate_model(svm_rbf, X_val, y_val)
    print("-" * 40)

SVM with RBF Kernel:
Regularization C=0.01
Validation Metrics:
Accuracy: 0.62
Precision: 0.62
Recall: 1.00
F1-Score: 0.77
----------------------------------------
Regularization C=0.1
Validation Metrics:
Accuracy: 0.62
Precision: 0.62
Recall: 1.00
F1-Score: 0.77
----------------------------------------
Regularization C=1
Validation Metrics:
Accuracy: 0.62
Precision: 0.62
Recall: 1.00
F1-Score: 0.77
----------------------------------------
Regularization C=10
Validation Metrics:
Accuracy: 0.62
Precision: 0.62
Recall: 1.00
F1-Score: 0.77
----------------------------------------
Regularization C=100
Validation Metrics:
Accuracy: 0.62
Precision: 0.62
Recall: 1.00
F1-Score: 0.77
----------------------------------------
Regularization C=1000
Validation Metrics:
Accuracy: 0.63
Precision: 0.63
Recall: 1.00
F1-Score: 0.77
----------------------------------------


In [72]:
# Experiment 2: Polynomial kernel with degree 2
print("\nSVM with Polynomial Kernel (Degree 2):")
for C in [0.01, 0.1, 1, 10, 100, 1000]:
    print(f"Regularization C={C}")
    svm_poly = SVC(kernel='poly', degree=2, C=C, gamma='scale', random_state=42)
    svm_poly.fit(X_train, y_train)
    print("Validation Metrics:")
    evaluate_model(svm_poly, X_val, y_val)
    print("-" * 40)


SVM with Polynomial Kernel (Degree 2):
Regularization C=0.01
Validation Metrics:
Accuracy: 0.62
Precision: 0.62
Recall: 1.00
F1-Score: 0.77
----------------------------------------
Regularization C=0.1
Validation Metrics:
Accuracy: 0.62
Precision: 0.62
Recall: 1.00
F1-Score: 0.77
----------------------------------------
Regularization C=1
Validation Metrics:
Accuracy: 0.62
Precision: 0.62
Recall: 1.00
F1-Score: 0.77
----------------------------------------
Regularization C=10
Validation Metrics:
Accuracy: 0.62
Precision: 0.62
Recall: 1.00
F1-Score: 0.77
----------------------------------------
Regularization C=100
Validation Metrics:
Accuracy: 0.62
Precision: 0.62
Recall: 1.00
F1-Score: 0.77
----------------------------------------
Regularization C=1000
Validation Metrics:
Accuracy: 0.70
Precision: 0.68
Recall: 0.99
F1-Score: 0.80
----------------------------------------


In [73]:
# Experiment 3: Linear kernel. Code is not executing skip this
"""
print("\nSVM with Linear Kernel:")
for C in [0.01, 0.1, 1, 10, 100, 1000]:
    print(f"Regularization C={C}")
    svm_linear = SVC(kernel='linear', C=C, random_state=42)
    svm_linear.fit(X_train, y_train)
    print("Validation Metrics:")
    evaluate_model(svm_linear, X_val, y_val)
    print("-" * 40)
"""

'\nprint("\nSVM with Linear Kernel:")\nfor C in [0.01, 0.1, 1, 10, 100, 1000]:\n    print(f"Regularization C={C}")\n    svm_linear = SVC(kernel=\'linear\', C=C, random_state=42)\n    svm_linear.fit(X_train, y_train)\n    print("Validation Metrics:")\n    evaluate_model(svm_linear, X_val, y_val)\n    print("-" * 40)\n'

In [74]:
# Experiment 4: PCA Transformation + RBF Kernel
print("\nSVM with RBF Kernel and PCA Transformation:")
pca = PCA(n_components=5)  # Reduce to 5 components for experimentation
X_train_pca = pca.fit_transform(X_train)
X_val_pca = pca.transform(X_val)

for C in [0.01, 0.1, 1, 10, 100, 1000]:
    print(f"Regularization C={C}")
    svm_rbf_pca = SVC(kernel='rbf', C=C, gamma='scale', random_state=42)
    svm_rbf_pca.fit(X_train_pca, y_train)
    print("Validation Metrics:")
    evaluate_model(svm_rbf_pca, X_val_pca, y_val)
    print("-" * 40)


SVM with RBF Kernel and PCA Transformation:
Regularization C=0.01
Validation Metrics:
Accuracy: 0.62
Precision: 0.62
Recall: 1.00
F1-Score: 0.77
----------------------------------------
Regularization C=0.1
Validation Metrics:
Accuracy: 0.62
Precision: 0.62
Recall: 1.00
F1-Score: 0.77
----------------------------------------
Regularization C=1
Validation Metrics:
Accuracy: 0.62
Precision: 0.62
Recall: 1.00
F1-Score: 0.77
----------------------------------------
Regularization C=10
Validation Metrics:
Accuracy: 0.62
Precision: 0.62
Recall: 1.00
F1-Score: 0.77
----------------------------------------
Regularization C=100
Validation Metrics:
Accuracy: 0.62
Precision: 0.62
Recall: 1.00
F1-Score: 0.77
----------------------------------------
Regularization C=1000
Validation Metrics:
Accuracy: 0.87
Precision: 0.86
Recall: 0.94
F1-Score: 0.90
----------------------------------------


### Lets do a final test on testing data

In [76]:
# PCA Transformation
pca = PCA(n_components=0.95)  # Use same PCA setup as before
X_train_pca = pca.fit_transform(X_train)
X_test_pca = pca.transform(X_test)

# Best SVM Model (RBF Kernel, C=1000)
best_svm = SVC(kernel='rbf', C=1000, random_state=42)
best_svm.fit(X_train_pca, y_train)

# Evaluate on Test Set
y_test_pred = best_svm.predict(X_test_pca)

accuracy = accuracy_score(y_test, y_test_pred)
precision = precision_score(y_test, y_test_pred)
recall = recall_score(y_test, y_test_pred)
f1 = f1_score(y_test, y_test_pred)

print(f"Test Set Metrics for SVM (RBF + PCA, C=1000):")
print(f"Accuracy: {accuracy:.2f}")
print(f"Precision: {precision:.2f}")
print(f"Recall: {recall:.2f}")
print(f"F1-Score: {f1:.2f}")

Test Set Metrics for SVM (RBF + PCA, C=1000):
Accuracy: 0.64
Precision: 0.64
Recall: 1.00
F1-Score: 0.78
