In [1]:
import os
import time
import pickle

import numpy as np
import pandas as pd
import plotly_express as px
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis
import sklearn.metrics as metrics
from sklearn import model_selection
from sklearn.naive_bayes import MultinomialNB
from sklearn.svm import LinearSVC
from sklearn.linear_model import SGDClassifier
from sklearn.model_selection import GridSearchCV

In [12]:
TOP5_FEATURES = ['net_acc_std', 'net_acc_max', 'EDA_tonic_mean', 'EDA_tonic_min',  'EDA_tonic_max', 'label']
TOP10_FEATURES = [
    "net_acc_std",
    "net_acc_max",
    "EDA_tonic_mean",
    "EDA_tonic_min",
    "EDA_tonic_max",
    "EDA_smna_mean",
    "EDA_smna_std",
    "EDA_smna_min",
    "EDA_smna_max",
    "EDA_phasic_min",
    "label"
]
TOP_EDA_FEATURES = ["EDA_smna_mean", "EDA_phasic_min", "EDA_tonic_mean", "EDA_tonic_min", "EDA_tonic_max", "label"]

STRESS = {
    0: "amusement", 1: "baseline", 2: "stress"
}
STRESS_MAP = {
    1: 0,
    2: 1
}

In [13]:
def prepare_dataset(dataset_name, features):
    data = pd.read_csv(f'../../data/03_primary/{dataset_name}/combined_subjects.csv')
    df = data.loc[:, data.columns.intersection(features)]
    df = df[df.label != 0]
    Y_ = df.label.map(STRESS_MAP)
    X_ = df.drop(columns=['label'])
    return model_selection.train_test_split(X_, Y_, test_size=0.25, random_state=42, stratify=Y_)

In [14]:
def train(m, dataset_name, features):
    X, x, Y, y = prepare_dataset(dataset_name, features)
    start_time = time.time()
    result = m.fit(X, Y).predict(x)
    print(f"Finished in: {time.time() - start_time:.2f} seconds")
    print("Accuracy: {:.2f}%".format(metrics.accuracy_score(y, result) * 100))
    print("Balanced Accuracy: {:.2f}%".format(metrics.balanced_accuracy_score(y, result) * 100))
    print("F1 Score: {0:.2f}".format(metrics.f1_score(y, result, average='macro')))
    print("Precision: {0:.2f}".format(metrics.precision_score(y, result, average='macro')))
    print("Recall: {0:.2f}".format(metrics.recall_score(y, result, average='macro')))
    print("R2 Score: {0:.2f}".format(metrics.r2_score(y, result)))

In [5]:
X_train, X_test, y_train, y_test = prepare_dataset('WESAD_preprocessed_int15_add15')

In [6]:
svc = SGDClassifier()

param_grid={
               'loss': ['hinge', 'log_loss', 'modified_huber', 'squared_hinge', 'perceptron', 'squared_error', 'huber', 'epsilon_insensitive', 'squared_epsilon_insensitive'],
               'random_state': [42],
               'class_weight': [None, 'balanced'],
               'tol': [1e-06, 1e-05, 1e-04, 1e-03],
               'max_iter': [250, 500, 1000, 2000],
               'penalty': ['l2', 'l1', 'elasticnet'],
               'learning_rate': ['constant', 'optimal', 'invscaling', 'adaptive']
           },

model = GridSearchCV(svc, param_grid, return_train_score=True)
search = model.fit(X_train, y_train)

12960 fits failed out of a total of 17280.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
12960 fits failed with the following error:
Traceback (most recent call last):
  File "C:\Users\ratus\anaconda3\envs\ex\lib\site-packages\sklearn\model_selection\_validation.py", line 686, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\ratus\anaconda3\envs\ex\lib\site-packages\sklearn\linear_model\_stochastic_gradient.py", line 890, in fit
    return self._fit(
  File "C:\Users\ratus\anaconda3\envs\ex\lib\site-packages\sklearn\linear_model\_stochastic_gradient.py", line 658, in _fit
    self._validate_params()
  File "C:\Users\ratus\anaconda3\envs\ex\lib\site-packages\sklearn\linear_model\_stochastic_gradient.p

In [7]:
search.best_params_

{'class_weight': None,
 'learning_rate': 'optimal',
 'loss': 'modified_huber',
 'max_iter': 250,
 'penalty': 'elasticnet',
 'random_state': 42,
 'tol': 1e-06}

In [8]:
train(SGDClassifier(
    random_state=42,
    class_weight='balanced',
    loss='modified_huber',
    tol=1e-6,
    max_iter=250,
    penalty='elasticnet',
    learning_rate='optimal'
), 'WESAD_preprocessed_int10_add10', TOP5_FEATURES)

Finished in: 0.03 seconds
Accuracy: 87.16%
Balanced Accuracy: 87.00%
F1 Score: 0.86
Precision: 0.86
Recall: 0.87
R2 Score: 0.44


In [9]:
train(SGDClassifier(
    random_state=42,
    class_weight='balanced',
    loss='modified_huber',
    tol=1e-6,
    max_iter=250,
    penalty='l1',
    learning_rate='optimal'
), 'WESAD_preprocessed_int10_add15', TOP5_FEATURES)

Finished in: 0.02 seconds
Accuracy: 87.10%
Balanced Accuracy: 87.01%
F1 Score: 0.86
Precision: 0.86
Recall: 0.87
R2 Score: 0.44


In [15]:
train(SGDClassifier(
    random_state=42,
    class_weight='balanced',
    loss='modified_huber',
    tol=0.00001,
    max_iter=1000,
    penalty='l1',
    learning_rate='optimal'
), 'WESAD_preprocessed_int15_add15', TOP5_FEATURES)

Finished in: 0.01 seconds
Accuracy: 88.64%
Balanced Accuracy: 86.72%
F1 Score: 0.87
Precision: 0.88
Recall: 0.87
R2 Score: 0.50


In [16]:
train(SGDClassifier(
    random_state=42,
    class_weight='balanced',
    loss='modified_huber',
    tol=0.00001,
    max_iter=1000,
    penalty='l1',
    learning_rate='optimal'
), 'WESAD_preprocessed_int15_add15', TOP10_FEATURES)

Finished in: 0.01 seconds
Accuracy: 90.45%
Balanced Accuracy: 89.57%
F1 Score: 0.90
Precision: 0.90
Recall: 0.90
R2 Score: 0.58


In [17]:
train(SGDClassifier(
    random_state=42,
    class_weight='balanced',
    loss='modified_huber',
    tol=0.00001,
    max_iter=1000,
    penalty='l1',
    learning_rate='optimal'
), 'WESAD_preprocessed_int15_add15', TOP_EDA_FEATURES)

Finished in: 0.01 seconds
Accuracy: 86.36%
Balanced Accuracy: 86.40%
F1 Score: 0.85
Precision: 0.85
Recall: 0.86
R2 Score: 0.40
