## **Imports**

In [28]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report
from sklearn.model_selection import GridSearchCV

In [29]:
X_train_pca = pd.read_csv('X_train_pca.csv')
X_test_pca = pd.read_csv('X_test_pca.csv')
y_train = pd.read_csv('y_train.csv')
y_test = pd.read_csv('y_test.csv')
X_train_pca.shape, X_test_pca.shape, y_train.shape, y_test.shape

((22413, 15), (5604, 15), (22413, 1), (5604, 1))

In [30]:
y_train = y_train.values.ravel()
y_test = y_test.values.ravel()

In [31]:
svc_default = SVC()
svc_default.fit(X_train_pca, y_train)
y_pred_default = svc_default.predict(X_test_pca)
print(classification_report(y_test, y_pred_default))
print("Default sklearn SVC Accuracy:", accuracy_score(y_test, y_pred_default))

              precision    recall  f1-score   support

           0       0.84      0.95      0.89      4341
           1       0.68      0.36      0.47      1263

    accuracy                           0.82      5604
   macro avg       0.76      0.66      0.68      5604
weighted avg       0.80      0.82      0.80      5604

Default sklearn SVC Accuracy: 0.8179871520342612


In [32]:
X_sample, _, y_sample, _ = train_test_split(X_train_pca, y_train, test_size=0.8, stratify=y_train, random_state=1)
X_sample.shape, y_sample.shape

((4482, 15), (4482,))

In [37]:
# Define kernel types to train
kernel_types = ['linear', 'rbf', 'sigmoid']

for kernel in kernel_types:
    print(f"Training with {kernel} kernel")

    # Define the hyperparameter grid for the current kernel
    param_grid = {
        'C': [0.1, 1, 10, 25],
        'kernel': [kernel],
        'gamma': [0.01, 0.1, 1, 'scale', 'auto'],
        'tol': [0.001, 0.01, 0.1],
        'class_weight': ['balanced', None]
    }

    svc = SVC()
    grid_search = GridSearchCV(svc, param_grid, cv=3, n_jobs=-1, scoring='accuracy')

    grid_search.fit(X_sample, y_sample)

    best_params = grid_search.best_params_
    best_model = grid_search.best_estimator_

    y_pred = best_model.predict(X_test_pca)

    # Print Best Parameters and Model Performance
    print(f"Best Parameters for {kernel} kernel:", best_params)
    print(f"{kernel} SVC best Accuracy:", accuracy_score(y_test, y_pred))
    print(f"Classification Report for {kernel} kernel:\n", classification_report(y_test, y_pred))

Training with linear kernel
Best Parameters for linear kernel: {'C': 10, 'class_weight': None, 'gamma': 0.01, 'kernel': 'linear', 'tol': 0.1}
linear SVC best Accuracy: 0.8081727337615988
Classification Report for linear kernel:
               precision    recall  f1-score   support

           0       0.82      0.96      0.89      4341
           1       0.67      0.29      0.40      1263

    accuracy                           0.81      5604
   macro avg       0.75      0.62      0.65      5604
weighted avg       0.79      0.81      0.78      5604

Training with rbf kernel
Best Parameters for rbf kernel: {'C': 25, 'class_weight': None, 'gamma': 0.01, 'kernel': 'rbf', 'tol': 0.1}
rbf SVC best Accuracy: 0.8145967166309779
Classification Report for rbf kernel:
               precision    recall  f1-score   support

           0       0.83      0.95      0.89      4341
           1       0.67      0.35      0.46      1263

    accuracy                           0.81      5604
   macro avg

In [34]:
param_grid = {
    'C': [0.01, 0.1, 1, 10, 25],
    'kernel': ['linear'],
    'gamma': [0.01, 0.1, 1, 'scale', 'auto'],
    'class_weight': ['balanced', None],
    'tol': [0.001, 0.01, 0.1]
}

grid_search = GridSearchCV(svc, param_grid, cv=3, n_jobs=-1, scoring='accuracy')

grid_search.fit(X_sample, y_sample)

best_params = grid_search.best_params_
best_model = grid_search.best_estimator_

y_pred = best_model.predict(X_test_pca)

print("Best Hyperparameters:", best_params)
print("Linear sklearn SVC Accuracy:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))

Best Hyperparameters: {'C': 10, 'class_weight': None, 'gamma': 0.01, 'kernel': 'linear', 'tol': 0.1}
Linear sklearn SVC Accuracy: 0.8081727337615988
              precision    recall  f1-score   support

           0       0.82      0.96      0.89      4341
           1       0.67      0.29      0.40      1263

    accuracy                           0.81      5604
   macro avg       0.75      0.62      0.65      5604
weighted avg       0.79      0.81      0.78      5604



In [38]:
svc_linear = SVC(C=10, gamma='scale', kernel='linear')
svc_linear.fit(X_train_pca, y_train)
y_pred = svc_linear.predict(X_test_pca)

print("Linear sklearn SVC Accuracy:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))

Linear sklearn SVC Accuracy: 0.8083511777301927
              precision    recall  f1-score   support

           0       0.82      0.96      0.89      4341
           1       0.67      0.29      0.41      1263

    accuracy                           0.81      5604
   macro avg       0.75      0.62      0.65      5604
weighted avg       0.79      0.81      0.78      5604

