In [None]:
import pandas as pd
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import accuracy_score, classification_report, roc_auc_score, confusion_matrix
from imblearn.over_sampling import RandomOverSampler
import joblib

# Load datasets
train_data = pd.read_csv('/content/drive/MyDrive/training_data_with_final.csv')
validation_data = pd.read_csv('/content/drive/MyDrive/validation_data_with_final.csv')
test_data = pd.read_csv('/content/drive/MyDrive/testing_data_with_final.csv')

# Select features and target
features = [col for col in train_data.columns if col not in ['Name', 'category']]
X_train = train_data[features]
y_train = train_data['category']

X_val = validation_data[features]
y_val = validation_data['category']

X_test = test_data[features]
y_test = test_data['category']

# Handle class imbalance in training data
oversampler = RandomOverSampler(random_state=42)
X_train_balanced, y_train_balanced = oversampler.fit_resample(X_train, y_train)

# Initialize SVM classifier
svm_model = SVC(probability=True, random_state=42)

# Hyperparameter tuning using GridSearchCV
param_grid = {
    'C': [0.1, 1, 10, 100],
    'gamma': [1, 0.1, 0.01, 0.001],
    'kernel': ['rbf']
}
grid_search = GridSearchCV(svm_model, param_grid, cv=3, scoring='accuracy', n_jobs=-1)
grid_search.fit(X_train_balanced, y_train_balanced)

# Best model
best_svm_model = grid_search.best_estimator_

# Training accuracy
train_predictions = best_svm_model.predict(X_train)
train_accuracy = accuracy_score(y_train, train_predictions)
print(f"Training Accuracy: {train_accuracy:.2f}")

# Validation accuracy
val_predictions = best_svm_model.predict(X_val)
val_accuracy = accuracy_score(y_val, val_predictions)
print(f"Validation Accuracy: {val_accuracy:.2f}")

# Validation ROC-AUC
val_probs = best_svm_model.predict_proba(X_val)
val_roc_auc = roc_auc_score(pd.get_dummies(y_val), val_probs, multi_class='ovr')
print(f"Validation ROC-AUC: {val_roc_auc:.2f}")

# Test accuracy
test_predictions = best_svm_model.predict(X_test)
test_accuracy = accuracy_score(y_test, test_predictions)
print(f"Test Accuracy: {test_accuracy:.2f}")

# Test ROC-AUC
test_probs = best_svm_model.predict_proba(X_test)
test_roc_auc = roc_auc_score(pd.get_dummies(y_test), test_probs, multi_class='ovr')
print(f"Test ROC-AUC: {test_roc_auc:.2f}")

# Classification reports
print("\nClassification Report (Train):")
print(classification_report(y_train, train_predictions))

print("\nClassification Report (Validation):")
print(classification_report(y_val, val_predictions))

print("\nClassification Report (Test):")
print(classification_report(y_test, test_predictions))

# Confusion matrices
print("\nConfusion Matrix (Validation):")
print(confusion_matrix(y_val, val_predictions))

print("\nConfusion Matrix (Test):")
print(confusion_matrix(y_test, test_predictions))

# Save the best model
joblib.dump(best_svm_model, 'improved_svm_model.joblib')
print("Improved SVM model saved as 'improved_svm_model.joblib'")
