In [6]:
import os
import joblib
import pandas as pd
from sklearn.svm import SVC
from imblearn.over_sampling import SMOTE
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score, recall_score, precision_score, f1_score

save = True

# Define function to compute the performance metrics for SVM
def compute_svm_metrics(X, y, smote=False, model_name='model', kernel='linear'):
    # Split data into train and test sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    scaler=MinMaxScaler()
    X_train=scaler.fit_transform(X_train)
    X_test=scaler.transform(X_test)

    if smote:
        sm = SMOTE(random_state=27)
        X_train, y_train = sm.fit_resample(X_train, y_train)
    
    # Train SVM model
    model = SVC(probability=True, random_state=42, kernel=kernel)
    model.fit(X_train, y_train)

    # Save model with best performance
    if save:
        joblib.dump(model, f'{model_name}.pkl')

    # Predict on test set
    y_pred = model.predict(X_test)
    y_prob = model.predict_proba(X_test)[:, 1]

    # Compute metrics
    auc = roc_auc_score(y_test, y_prob)
    recall = recall_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)

    return auc, recall, precision, f1

def load_data(file_name):
    curr_file = os.path.join(os.getcwd().replace('models', 'data'), file_name)
    return pd.read_csv(curr_file, delimiter=",")

def run_svm(kernel):
    # Load data
    data = load_data('data.csv')

    # Extract features and labels for each dataset
    X_data, y_data = data.drop('Outcome', axis=1), data['Outcome']

    # Compute metrics for each dataset with SVM
    metrics_data_svm = compute_svm_metrics(X_data, y_data, model_name='svm_data', kernel=kernel)
    metrics_data_smote_svm = compute_svm_metrics(X_data, y_data, smote=True, model_name='svm_smote', kernel=kernel)

    # Combine metrics into a single table for SVM
    metrics_table_svm = pd.DataFrame({
        'Dataset': ['data', 'data_smote'],
        'AUC': [metrics_data_svm[0], metrics_data_smote_svm[0]],
        'Recall': [metrics_data_svm[1], metrics_data_smote_svm[1]],
        'Precision': [metrics_data_svm[2], metrics_data_smote_svm[2]],
        'F1': [metrics_data_svm[3], metrics_data_smote_svm[3]]
    })
    return metrics_table_svm


In [7]:
# Vizuallize metrics table for SVM
display(run_svm('linear'))

Unnamed: 0,Dataset,AUC,Recall,Precision,F1
0,data,0.847176,0.627907,0.771429,0.692308
1,data_smote,0.848686,0.837209,0.666667,0.742268


In [8]:
# Vizuallize metrics table for SVM
display(run_svm('sigmoid'))

Unnamed: 0,Dataset,AUC,Recall,Precision,F1
0,data,0.867714,0.069767,0.088235,0.077922
1,data_smote,0.868922,0.186047,0.111111,0.13913


In [9]:
# Vizuallize metrics table for SVM
display(run_svm('poly'))

Unnamed: 0,Dataset,AUC,Recall,Precision,F1
0,data,0.84476,0.581395,0.735294,0.649351
1,data_smote,0.863787,0.813953,0.729167,0.769231


In [10]:
# Vizuallize metrics table for SVM
display(run_svm('rbf'))

Unnamed: 0,Dataset,AUC,Recall,Precision,F1
0,data,0.867412,0.604651,0.742857,0.666667
1,data_smote,0.860163,0.837209,0.631579,0.72
