# Creación del modelo a partir de los datos medidos

In [1]:
# Importamos los módulos
import pandas as pd
import numpy as np

In [2]:
# Leemos los valores
df = pd.read_csv('HRV_Analysis.csv')

In [3]:
# Creamos la serie y con la variable independiente
y_data = df['Stress']
# Creamos el X con las variables dependientes que nos interesan
x_data = df[['Mean RR (ms)','SDNN','Mean HR (bpm)','RMSSD (ms)','pNN50 (%)','VLF','LF','HF','LF/HF']]

In [4]:
# Separamos los datasets de entrenamiento y test
from sklearn.model_selection import train_test_split

x_train, x_test, y_train, y_test = train_test_split(x_data, y_data, test_size=0.2, random_state=1,stratify=y_data)

print("numero de casos de test:", x_test.shape[0])
print("numero de casos de entrenamiento:",x_train.shape[0])

numero de casos de test: 29
numero de casos de entrenamiento: 113


In [5]:
# Importamos los módulos
from sklearn.svm import SVC
from sklearn.metrics import roc_auc_score, accuracy_score, f1_score, confusion_matrix, classification_report

In [6]:
# Creamos el modelo con los valores óptimos (se han calculado aparte)
svc = SVC(C=1, gamma=1, kernel='poly', degree=3)
# Entrenamos el modelo con los datos de entrenamiento
svc.fit(x_train, y_train)

SVC(C=1, gamma=1, kernel='poly')

In [7]:
# Predecimos los valores con el modelo creado en ambos datasets
preds_train = svc.predict(x_train)
preds_test = svc.predict(x_test)
# Obtenemos la precisión del modelo en ambos datasets (% acierto)
print('accuracy in train:', accuracy_score(y_train, preds_train))
print('accuracy in test:', accuracy_score(y_test, preds_test))

accuracy in train: 0.9823008849557522
accuracy in test: 0.8275862068965517


In [8]:
# Obtenemos el informe de clasificación
print('Classification report of our model\n')
print(classification_report(y_test,preds_test))

Classification report of our model

              precision    recall  f1-score   support

           0       0.89      0.84      0.86        19
           1       0.73      0.80      0.76        10

    accuracy                           0.83        29
   macro avg       0.81      0.82      0.81        29
weighted avg       0.83      0.83      0.83        29



In [9]:
# Obtenemos la matriz de confusión
print(confusion_matrix(y_test, preds_test))

[[16  3]
 [ 2  8]]


In [10]:
# Exportamos el modelo
from joblib import dump
dump(svc,'AW_svc.joblib')

['AW_svc.joblib']

In [15]:
test = pd.read_csv('SWELL_test.csv')
train = pd.read_csv('SWELL_train.csv')

In [16]:
# Convertimos la variable categórica (condition) en variables binarias
train=pd.concat([train,pd.get_dummies(train[['condition']])],axis=1)
test=pd.concat([test,pd.get_dummies(test[['condition']])],axis=1)
# Añadimos una nueva columna (condicion) con los valores 0, no estrés y 1, estrés (interruption y time pressure)
test['condicion'] = test['condition_no stress']
train['condicion'] = train['condition_no stress']
test['condicion'] = test['condicion'].replace([0,1],[1,0])
train['condicion'] = train['condicion'].replace([0,1],[1,0])

In [17]:
# Creamos los datasets X con las variables dependientes que nos interesan
X_test = test[['MEAN_RR','SDRR','RMSSD','pNN50','HR','VLF','LF','HF','LF_HF']]
X_train = train[['MEAN_RR','SDRR','RMSSD','pNN50','HR','VLF','LF','HF','LF_HF']]

In [18]:
# Creamos las series y con la variable independiente
y_train = train['condicion']
y_test = test['condicion']

In [19]:
# Predecimos las situaciones de estrés de los datasets X
preds1 = svc.predict(X_test)
preds2 = svc.predict(X_train)

In [20]:
# Obtenemos la precisión del modelo en ambos datasets (% acierto)
print('Accuracy in SWELL_train:', accuracy_score(y_test, preds1))
print('Accuracy in SWELL_test:', accuracy_score(y_train, preds2))

Accuracy in SWELL_train: 0.4979406818901859
Accuracy in SWELL_test: 0.49399250993124627
