# Model selection

## Importing the libraries

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

## Importing the dataset

In [2]:
dataset = pd.read_csv('social_network_ads.csv')
X = dataset.iloc[:, :-1].values
y = dataset.iloc[:, -1].values

## Splitting the dataset

In [3]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25, random_state = 0)

## Feature scaling

In [4]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train_fs = sc.fit_transform(X_train)
X_test_fs = sc.transform(X_test)

## Training the Kernel SVM model

In [5]:
from sklearn.svm import SVC
classifier = SVC(kernel = 'rbf', random_state = 0, probability = True)
classifier.fit(X_train_fs, y_train)

SVC(probability=True, random_state=0)

## Applying the model on the training set

In [6]:
y_train_re = y_train.reshape(len(y_train), 1)

y_train_pred = classifier.predict(X_train_fs)
y_train_pred = y_train_pred.reshape(len(y_train_pred), 1)

y_train_proba = classifier.predict_proba(X_train_fs)

## Model preformance on the training set

In [7]:
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.metrics import accuracy_score, precision_score, cohen_kappa_score, matthews_corrcoef, roc_auc_score

cm_train = confusion_matrix(y_train, y_train_pred)
cr_train = classification_report(y_train, y_train_pred)
print(cm_train)
print(cr_train)

acc_train = accuracy_score(y_train, y_train_pred)
prec_train = precision_score(y_train, y_train_pred)
ck_train = cohen_kappa_score(y_train, y_train_pred)
mcc_train = matthews_corrcoef(y_train, y_train_pred)
roc_auc_train = roc_auc_score(y_train, y_train_pred)

print(f'Accuracy = {acc_train:.2f}; Precision = {prec_train:.2f}; Kappa = {ck_train:.2f}; MCC = {mcc_train:.2f}; ROC AUC = {roc_auc_train:.2f}')

[[171  18]
 [  8 103]]
              precision    recall  f1-score   support

           0       0.96      0.90      0.93       189
           1       0.85      0.93      0.89       111

    accuracy                           0.91       300
   macro avg       0.90      0.92      0.91       300
weighted avg       0.92      0.91      0.91       300

Accuracy = 0.91; Precision = 0.85; Kappa = 0.82; MCC = 0.82; ROC AUC = 0.92


## Applying Cross-Validation

In [8]:
from sklearn.model_selection import cross_val_score
acc_cv = cross_val_score(estimator = classifier, cv = 10, X = X_train_fs, y = y_train)
acc_cv_mean = acc_cv.mean() * 100
acc_cv_stdev = acc_cv.std() * 100
print(f'Accuracy for 10-fold Cross-Validation: {acc_cv_mean:.2f} \u00B1 {acc_cv_stdev:.2f}')

Accuracy for 10-fold Cross-Validation: 90.33 ± 6.57


## Applying Grid Search

In [9]:
from sklearn.model_selection import GridSearchCV
parameters = [
    {'C': [0.25, 0.5, 0.75, 1], 'kernel': ['linear']},
    {'C': [0.25, 0.5, 0.75, 1], 'kernel': ['rbf'], 'gamma': [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]}
]
grid_search = GridSearchCV(estimator = classifier, param_grid = parameters, scoring = 'accuracy', cv = 10, n_jobs = 6)
grid_search.fit(X_train_fs, y_train)
acc_grid_best = grid_search.best_score_
best_params = grid_search.best_params_
print(f'The best model uses the following parameters: {best_params}')
print(f'The accuracy of this model is: {acc_grid_best * 100:.2f}')

The best model uses the following parameters: {'C': 0.5, 'gamma': 0.6, 'kernel': 'rbf'}
The accuracy of this model is: 90.67


## Apply the model to a test set

In [10]:
y_test_re = y_test.reshape(len(y_test), 1)

y_test_pred = classifier.predict(X_test_fs)
y_test_pred = y_test_pred.reshape(len(y_test_pred), 1)

y_test_proba = classifier.predict_proba(X_test_fs)

## Model performance on the test set

In [11]:
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.metrics import accuracy_score, precision_score, cohen_kappa_score, matthews_corrcoef, roc_auc_score

cm_test = confusion_matrix(y_test, y_test_pred)
cr_test = classification_report(y_test, y_test_pred)
print(cm_test)
print(cr_test)

acc_test = accuracy_score(y_test, y_test_pred)
prec_test = precision_score(y_test, y_test_pred)
ck_test = cohen_kappa_score(y_test, y_test_pred)
mcc_test = matthews_corrcoef(y_test, y_test_pred)
roc_auc_test = roc_auc_score(y_test, y_test_pred)

print(f'Accuracy = {acc_test:.2f}; Precision = {prec_test:.2f}; Kappa = {ck_test:.2f}; MCC = {mcc_test:.2f}; ROC AUC = {roc_auc_test:.2f}')

[[64  4]
 [ 3 29]]
              precision    recall  f1-score   support

           0       0.96      0.94      0.95        68
           1       0.88      0.91      0.89        32

    accuracy                           0.93       100
   macro avg       0.92      0.92      0.92       100
weighted avg       0.93      0.93      0.93       100

Accuracy = 0.93; Precision = 0.88; Kappa = 0.84; MCC = 0.84; ROC AUC = 0.92
