# SVM Model with scikit-learn


In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import classification_report, accuracy_score
from svm_scratch import LinearSVM_Dual
from sklearn.model_selection import GridSearchCV

In [3]:

df_train = pd.read_csv('../train_set_encoded.csv')
df_val = pd.read_csv('../test_set_encoded.csv')
X_val = df_val.drop('Depression', axis=1)
y_val = df_val['Depression']
X = df_train.drop('Depression', axis=1)
y = df_train['Depression']

In [11]:
param_grid = {
    'C': [0.01, 0.1, 1, 10, 100]
}

svc = SVC(kernel='linear', random_state=42)
grid_search = GridSearchCV(svc, param_grid, cv=5, scoring='accuracy', n_jobs=-1)
grid_search.fit(X, y)

print("Best parameters:", grid_search.best_params_)
print("Best cross-validation accuracy:", grid_search.best_score_)

# Use best model and evaluate on validation set
svm_model = grid_search.best_estimator_
val_predictions = svm_model.predict(X_val)

# Print validation results
print("\nValidation Set Results:")
print("Accuracy on validation set:", accuracy_score(y_val, val_predictions))
print("\nClassification Report on Validation Set:")
print(classification_report(y_val, val_predictions))

Best parameters: {'C': 1}
Best cross-validation accuracy: 0.8463574556067129

Validation Set Results:
Accuracy on validation set: 0.843010752688172

Classification Report on Validation Set:
              precision    recall  f1-score   support

           0       0.84      0.77      0.81      2348
           1       0.84      0.90      0.87      3232

    accuracy                           0.84      5580
   macro avg       0.84      0.83      0.84      5580
weighted avg       0.84      0.84      0.84      5580



## Custom SVM Implementation (from scratch)

### Train and Evaluate Custom SVM

In [None]:
# Định nghĩa siêu tham số
C_value = 1.0  # C nên chọn từ kết quả GridSearchCV tốt nhất
tol_value = 1e-3  # Tolerance thường dùng 1e-4 hoặc 1e-3
max_iter_value = 500  # Số vòng lặp tối đa
X_train_array = X.to_numpy()
y_train_array = y.to_numpy()
# Huấn luyện và đánh giá mô hình
svm_scratch = LinearSVM_Dual(C=C_value, tol=tol_value, max_iter=max_iter_value)
svm_scratch.fit(X_train_array, y_train_array)  # Sử dụng toàn bộ training data

# Đánh giá trên tập validation
y_pred_scratch = svm_scratch.predict(X_val)
print('Accuracy (scratch):', accuracy_score(y_val, y_pred_scratch))
print('\nClassification Report:')
print(classification_report(y_val, y_pred_scratch))