# SVM MODEL

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.svm import SVC
from sklearn import metrics
from sklearn.metrics import ConfusionMatrixDisplay, classification_report, roc_curve, fbeta_score

## Data

In [4]:
full_train = pd.read_csv("../../data/csev/model_6/fulltrainCSEV.csv")
full_test = pd.read_csv("../../data/csev/model_6/fulltestCSEV.csv")

In [5]:
Y_train = full_train['C_SEV']
X_train = full_train.drop(['C_SEV'], axis=1)
Y_train.value_counts()

1    27796
0    27794
Name: C_SEV, dtype: int64

In [6]:
Y_test = full_test['C_SEV']
X_test = full_test.drop(['C_SEV'], axis=1)
Y_test.value_counts()

1    351139
0      6949
Name: C_SEV, dtype: int64

## SVC

In [7]:
svc = SVC(gamma = 'auto')

In [8]:
svc.fit(X_train, Y_train)

SVC(gamma='auto')

In [9]:
Y_pred = svc.predict(X_test)

## Evaluation

In [None]:
metrics.accuracy_score(Y_test, Y_pred)

In [None]:
titles_options = [
    ("Matriz de confusión", None),
    ("Matriz de confusión normalizada", "true"),
]
for title, normalize in titles_options:
    disp = ConfusionMatrixDisplay.from_estimator(
        svc,
        X_test,
        Y_test,
        display_labels=['Con Fall.', 'Sin Fall.'],
        cmap=plt.cm.Blues,
        normalize=normalize,
    )
    disp.ax_.set_title(title)

    print(title)
    print(disp.confusion_matrix)

plt.show()

In [None]:
print("Classification Report")
print(classification_report(Y_test, Y_pred))

In [None]:
# Curva ROC
# keep probabilities for the positive outcome only
yhat = Y_pred
# calculate roc curves
fpr, tpr, thresholds = roc_curve(Y_test, yhat)
# plot the roc curve for the model
plt.plot([0,1], [0,1], linestyle='--', label='No Skill')
plt.plot(fpr, tpr, marker='.', label='SVM')
# axis labels
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.legend()
# show the plot
plt.show()

In [None]:
fbeta_score(Y_test, Y_pred, average='binary', beta=2)