In [1]:
import os
import time
import pickle

import numpy as np
import pandas as pd
import plotly_express as px
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis
import sklearn.metrics as metrics
from sklearn import model_selection
from sklearn.naive_bayes import MultinomialNB
from sklearn.svm import LinearSVC
from sklearn.linear_model import SGDClassifier
from sklearn.model_selection import GridSearchCV

In [2]:
TOP5_FEATURES = ['net_acc_std', 'net_acc_max', 'EDA_tonic_mean', 'EDA_tonic_min',  'EDA_tonic_max', 'label']
TOP10_FEATURES = [
    "net_acc_std",
    "net_acc_max",
    "EDA_tonic_mean",
    "EDA_tonic_min",
    "EDA_tonic_max",
    "EDA_smna_mean",
    "EDA_smna_std",
    "EDA_smna_min",
    "EDA_smna_max",
    "EDA_phasic_min",
    "label"
]
TOP_EDA_FEATURES = ["EDA_smna_mean", "EDA_phasic_min", "EDA_tonic_mean", "EDA_tonic_min", "EDA_tonic_max", "label"]

STRESS = {
    0: "amusement", 1: "baseline", 2: "stress"
}

In [3]:
def prepare_dataset(dataset_name, features):
    data = pd.read_csv(f'../../data/03_primary/{dataset_name}/combined_subjects.csv')
    df = data.loc[:, data.columns.intersection(features)]
    df = df[df.label != 0]
    Y_ = df.label
    X_ = df.drop(columns=['label'])
    return model_selection.train_test_split(X_, Y_, test_size=0.3, random_state=42, stratify=Y_)
    # return X_, Y_, None, None

In [4]:
def train(m, dataset_name, features):
    X, x, Y, y = prepare_dataset(dataset_name, features)
    start_time = time.time()
    result = m.fit(X, Y).predict(x)
    print(f"Finished in: {time.time() - start_time:.2f} seconds")
    print("Accuracy: {:.2f}%".format(metrics.accuracy_score(y, result) * 100))
    print("Balanced Accuracy: {:.2f}%".format(metrics.balanced_accuracy_score(y, result) * 100))
    print("F1 Score: {0:.2f}".format(metrics.f1_score(y, result, average='macro')))
    print("Precision: {0:.2f}".format(metrics.precision_score(y, result, average='macro')))
    print("Recall: {0:.2f}".format(metrics.recall_score(y, result, average='macro')))
    print("R2 Score: {0:.2f}".format(metrics.r2_score(y, result)))
    return m

In [5]:
X_train, X_test, y_train, y_test = prepare_dataset('WESAD_preprocessed_int15_add15', TOP5_FEATURES)

In [6]:
X_train.head()

Unnamed: 0,net_acc_std,net_acc_max,EDA_tonic_mean,EDA_tonic_min,EDA_tonic_max
882,0.001381,0.260257,-0.666074,-0.670798,-0.658636
1128,0.006178,0.406848,-0.770063,-0.780377,-0.758878
1627,0.024405,0.379145,-1.34455,-3.401457,1.278204
1710,0.003674,0.308428,-0.920008,-0.936975,-0.904069
441,0.016577,0.771997,-0.89871,-0.961893,-0.887457


In [13]:
svc = LinearSVC()

param_grid={
                'C': [0.5, 1, 2],
                'loss': ['hinge', 'log_loss', 'modified_huber', 'squared_hinge', 'perceptron', 'squared_error', 'huber', 'epsilon_insensitive','squared_epsilon_insensitive'],
                'random_state': [42],
                'class_weight': [None, 'balanced'],
                'tol': [1e-06, 1e-05, 1e-04, 1e-03],
                'max_iter': [250, 500, 1000, 2000]
           },

model = GridSearchCV(svc, param_grid, return_train_score=True)
search = model.fit(X_train, y_train)

Liblinear failed to converge, increase the number of iterations.
Liblinear failed to converge, increase the number of iterations.
Liblinear failed to converge, increase the number of iterations.
Liblinear failed to converge, increase the number of iterations.
Liblinear failed to converge, increase the number of iterations.
Liblinear failed to converge, increase the number of iterations.
Liblinear failed to converge, increase the number of iterations.
Liblinear failed to converge, increase the number of iterations.
Liblinear failed to converge, increase the number of iterations.
Liblinear failed to converge, increase the number of iterations.
Liblinear failed to converge, increase the number of iterations.
Liblinear failed to converge, increase the number of iterations.
Liblinear failed to converge, increase the number of iterations.
Liblinear failed to converge, increase the number of iterations.
Liblinear failed to converge, increase the number of iterations.
Liblinear failed to conve

KeyboardInterrupt: 

In [14]:
search.best_params_

NameError: name 'search' is not defined

In [7]:
top5_model = train(LinearSVC(
    C=2,
    class_weight='balanced',
    loss='hinge',
    random_state=42,
    tol=1e-06,
    max_iter=250
), 'WESAD_preprocessed_int15_add15', TOP5_FEATURES)

Finished in: 0.01 seconds
Accuracy: 89.20%
Balanced Accuracy: 89.23%
F1 Score: 0.88
Precision: 0.88
Recall: 0.89
R2 Score: 0.53




In [9]:
top10_model = train(LinearSVC(
    C=2,
    class_weight='balanced',
    dual=True,
    random_state=42,
    tol=1e-05,
    max_iter=250
), 'WESAD_preprocessed_int15_add15', TOP10_FEATURES)

Finished in: 0.02 seconds
Accuracy: 91.10%
Balanced Accuracy: 91.18%
F1 Score: 0.90
Precision: 0.90
Recall: 0.91
R2 Score: 0.61


Liblinear failed to converge, increase the number of iterations.


In [None]:
to5_eda_model = train(LinearSVC(
    C=2,
    class_weight='balanced',
    dual=True,
    random_state=42,
    tol=1e-05,
    max_iter=250
), 'WESAD_preprocessed_int15_add15', TOP_EDA_FEATURES)

Finished in: 0.01 seconds
Accuracy: 89.77%
Balanced Accuracy: 88.82%
F1 Score: 0.89
Precision: 0.89
Recall: 0.89
R2 Score: 0.55




## XAI
In this section we are trying to interpret the trained models.

In [8]:
import gc
gc.collect()

0

In [9]:
import explainerdashboard as expdb
from explainerdashboard import ExplainerDashboard, RegressionExplainer
from explainerdashboard import InlineExplainer
from explainerdashboard.custom import (ImportancesComposite,
                                       IndividualPredictionsComposite,
                                       WhatIfComposite,
                                       ShapDependenceComposite,
                                       ShapInteractionsComposite,
                                       DecisionTreesComposite)

Start the interactive dashboard to see the explanation of the model.

In [10]:
explainer = RegressionExplainer(top5_model, X_test, y_test, model_output='logodds')

ed = ExplainerDashboard(explainer, mode='inline')

Generating self.shap_explainer = shap.KernelExplainer(model, X)...
Building ExplainerDashboard..
For this type of model and model_output interactions don't work, so setting shap_interaction=False...
The explainer object has no decision_trees property. so setting decision_trees=False...
Generating layout...
Calculating shap values...


  0%|          | 0/528 [00:00<?, ?it/s]

Calculating predictions...
Calculating residuals...
Calculating absolute residuals...
Calculating dependencies...
Calculating importances...
Reminder: you can store the explainer (including calculated dependencies) with explainer.dump('explainer.joblib') and reload with e.g. ClassifierExplainer.from_file('explainer.joblib')
Registering callbacks...


In [11]:
ed.run(8765)

Starting ExplainerDashboard inline (terminate it with ExplainerDashboard.terminate(8765))


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]