In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import classification_report, accuracy_score
from svm_scratch import LinearSVM_Dual
from sklearn.model_selection import GridSearchCV

In [2]:
df_train = pd.read_csv('../train_set_encoded.csv')
df_val = pd.read_csv('../test_set_encoded.csv')
X_val = df_val.drop('Depression', axis=1)
y_val = df_val['Depression']
X = df_train.drop('Depression', axis=1)
y = df_train['Depression']

In [3]:
# Define parameter grid for SVM with RBF kernel
param_grid = {
    'C': [0.1, 1, 10, 100],
    'gamma': ['scale', 'auto', 0.01, 0.1, 1]
}

# Initialize SVM model with RBF kernel
svc = SVC(kernel='rbf', random_state=42)

# Perform GridSearchCV
grid_search = GridSearchCV(svc, param_grid, cv=5, scoring='accuracy', n_jobs=-1, verbose=1)
grid_search.fit(X, y)

# Print best parameters and score
print("Best parameters:", grid_search.best_params_)
print("Best cross-validation accuracy:", grid_search.best_score_)

# Use best model and evaluate on validation set
svm_model = grid_search.best_estimator_
val_predictions = svm_model.predict(X_val)

# Print validation results
print("\nValidation Set Results:")
print("Accuracy on validation set:", accuracy_score(y_val, val_predictions))
print("\nClassification Report on Validation Set:")
print(classification_report(y_val, val_predictions))

Fitting 5 folds for each of 20 candidates, totalling 100 fits
Best parameters: {'C': 1, 'gamma': 0.1}
Best cross-validation accuracy: 0.845237403999592

Validation Set Results:
Accuracy on validation set: 0.8449820788530465

Classification Report on Validation Set:
              precision    recall  f1-score   support

           0       0.85      0.77      0.81      2348
           1       0.84      0.90      0.87      3232

    accuracy                           0.84      5580
   macro avg       0.85      0.83      0.84      5580
weighted avg       0.85      0.84      0.84      5580

