In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn import svm
from sklearn import metrics
from sklearn.pipeline import make_pipeline
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import *
from sklearn.pipeline import Pipeline

# SVM for classification using the defined metrics of measuring the importance/quaility of a speech
## Accuracy: 78.72%
## Precision: 0.74
## Sensitivity: 0.86
## AUC: 0.78

In [None]:
df = pd.read_csv("results/dataset_tabular_merged.csv")

In [None]:
data = np.array(df.iloc[:, 5:-1])
target = np.array(df.iloc[:, -1])

In [None]:
X_train, X_test, y_train, y_test = train_test_split(data, 
                                                    target, 
                                                    test_size=0.3, 
                                                    stratify=target, random_state=109)

In [None]:
param_grid = {'svm__C': [0.0001, 0.001, 0.01, 0.1, 1, 10, 100, 1000],
              'svm__gamma': [100, 10, 1, 0.1, 0.2, 0.02, 0.002, 0.0002, 0.01, 0.001, 0.0001],
              'svm__kernel': ['rbf']}

In [None]:
pipeline = Pipeline(steps = [("StandardScaler", StandardScaler()), ("svm", svm.SVC())])
search = GridSearchCV(pipeline, param_grid, scoring="accuracy", cv=5, refit = True, verbose = 0, n_jobs=5)

In [None]:
search.fit(X_train, y_train)
y_pred = search.predict(X_test)

In [None]:
print("Accuracy:", metrics.accuracy_score(y_test, y_pred))
print("Precision:", metrics.precision_score(y_test, y_pred))
print("Sensitivity:", metrics.recall_score(y_test, y_pred))
print("AUC:", metrics.roc_auc_score(y_test, y_pred))

In [None]:
cm = metrics.confusion_matrix(y_test, y_pred)
cm_display = metrics.ConfusionMatrixDisplay(cm).plot()

In [None]:
fpr, tpr, _ = metrics.roc_curve(y_test, y_pred, pos_label=search.classes_[1])
roc_display = metrics.RocCurveDisplay(fpr=fpr, tpr=tpr).plot()