In [2]:
import sys


from sklearn import svm
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report
import pickle

# Balance the dataset
from imblearn.over_sampling import SMOTE


In [None]:
with open('svm_dataset.pkl', 'rb') as f:
    svm_dataset = pickle.load(f)


print('Size of svm_dataset:', sys.getsizeof(svm_dataset) / 1024 / 1024, 'MB')


In [23]:
features, labels = zip(*svm_dataset)
classes = set(labels)  # Unique classes from your labels

# Normalize the features
features = [x.flatten() for x in features]
scaler = StandardScaler()
features_scaled = scaler.fit_transform(features)


In [12]:
svm_classifiers = {}

for class_label in classes:
    print(f"Training SVM for class: {class_label}")
    # Create binary labels for the current class
    binary_labels = [1 if label == class_label else 0 for label in labels]

    # Split data
    X_train, X_test, y_train, y_test = train_test_split(features_scaled, binary_labels, test_size=0.2, random_state=42)

    smote = SMOTE(random_state=42)
    X_train, y_train = smote.fit_resample(X_train, y_train)


    # Define the SVM and perform grid search
    parameters = {'C': [0.1, 1, 10, 100], 'kernel': ['linear', 'rbf']}
    svc = svm.SVC(class_weight='balanced')  # Adjust class weights for imbalance
    clf = GridSearchCV(svc, parameters, cv=5)
    clf.fit(X_train, y_train)

    # Store the classifier
    svm_classifiers[class_label] = clf

    # Output the best parameters and performance metrics
    print(f"Best parameters for {class_label}: {clf.best_params_}")
    print("Classification Report:")
    print(classification_report(y_test, clf.predict(X_test)))


