In [36]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [6]:
import os
from verbio import readers, features, settings, preprocessing, temporal, visualize, utils
import numpy as np
import pandas as pd
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import f1_score, recall_score, precision_score, accuracy_score

win_len = 10.0
win_stride = 5.0
pt_range = range(1, 74)

pt_names = [f'P{pt:03d}' for pt in pt_range]
sessions = ['TEST01','TEST02','TEST03','TEST04','TEST05','TEST06','TEST07','TEST08']

base_dir = '/home/jason/hubbs/project_verbio/data/raw_data/'

eda_fname = 'E4_EDA_PPT.xlsx'
hr_fname = 'E4_HR_PPT.xlsx'
annotation_fname = 'MANUAL_ANNOTATION_PPT.xlsx'

%matplotlib inline
results_dict = {
    'pt': [],
    'n_sessions': [],
    'acc': [],
    'f1': [],
    'rec': [],
    'prec': [],
    'n_pos_train': [],
    'n_neg_train': [],
    'n_pos_test': [],
    'n_neg_test': []
}
for pt in pt_names:
    pt_valid = True
    x = []
    y = []
    for session in sessions:
        # Build paths to data
        eda_path = os.path.join(base_dir, pt, session, eda_fname)
        hr_path = os.path.join(base_dir, pt, session, hr_fname)
        annotation_path = os.path.join(base_dir, pt, session, annotation_fname)
        # Skip session if missing
        if not os.path.exists(eda_path) or not os.path.exists(hr_path) or not os.path.exists(annotation_path):
            pt_valid = False
            break
        
        # Read in dfs
        eda_df = readers.read_excel(eda_path)
        hr_df = readers.read_excel(hr_path)
        annotation_df = readers.read_excel(annotation_path)
        # Convert EDA signals to numpy
        eda_signal = eda_df['EDA'].to_numpy()
        eda_times = eda_df[settings.time_key].to_numpy()
        # Get EDA features
        eda_fx = features.eda_features(
            signal=eda_signal, 
            times=eda_times, 
            sr=4, 
            win_len=win_len, 
            win_stride=win_stride
        )[['SCR_Peaks']]
        # Convert HR signals to numpy
        hr_signal = np.gradient(hr_df['HR'].to_numpy())
        hr_times = hr_df[settings.time_key].to_numpy()
        # Window HR
        hr_fx = preprocessing.window_timed(
            x=hr_signal,
            times=hr_times,
            win_len=win_len,
            win_stride=win_stride,
            win_fn=lambda x: np.mean(x)
        )
        hr_fx = np.array(hr_fx)
        # Convert annotation signals to numpy
        annotation_r1 = annotation_df['R1'].to_numpy()
        annotation_r2 = annotation_df['R2'].to_numpy()
        annotation_r4 = annotation_df['R4'].to_numpy()
        annotation_r5 = annotation_df['R5'].to_numpy()
        annotation_times = annotation_df[settings.time_key].to_numpy()
        # Combine both annotators
        annotation_mixed = np.vstack([annotation_r1, annotation_r2, annotation_r4, annotation_r5])
        annotation_mean = np.mean(annotation_mixed, axis=0)
        # Window annotations
        annotation_fx = preprocessing.window_timed(
            x=annotation_bin,
            times=annotation_times,
            win_len=win_len,
            win_stride=win_stride,
            win_fn=lambda x: preprocessing.binarize(np.mean(x), threshold=2.5)
        )
        # Shift annotations back in time so features previous to that annotation
        # are used for prediction. If we have a window length of 10 seconds and
        # a window stride of 5 seconds, we need to shift back two time quanta,
        # and cut off the last two elements since we no longer need them
        # We must also truncate the EDA
        annotation_fx = np.array(annotation_fx, dtype='int')
        assert win_len%win_stride < 0.1 # Assert that they're at least somewhat divisible
        shift_num = -int(win_len//win_stride)
        annotation_fx = temporal.shift(annotation_fx, shift_num)[:shift_num]
        eda_fx = eda_fx.iloc[:annotation_fx.shape[0]]
        hr_fx = hr_fx[:annotation_fx.shape[0]]
        # Combine X, add to session data, combine y, add to session data
        x_df = eda_fx.copy(deep=True)
        x_df['HR'] = hr_fx
        x.append(x_df.to_numpy())
        y.append(annotation_fx)
    
    if pt_valid:
        for j in range(1,5):
            x_train = np.concatenate(x[:j], axis=0)
            y_train = np.concatenate(y[:j], axis=0)
            x_test = np.concatenate(x[4:], axis=0)
            y_test = np.concatenate(y[4:], axis=0)

#             train_vis = pd.DataFrame(x_train)
#             train_vis['annotation'] = y_train
#             visualize.plot_matrix(train_vis, 'annotation', None)

#             test_vis = pd.DataFrame(x_test)
#             test_vis['annotation'] = y_test
#             visualize.plot_matrix(test_vis, 'annotation', None)

            clf = DecisionTreeClassifier()
            clf.fit(x_train, y_train)
            y_hat = clf.predict(x_test)

            f1 = f1_score(y_test, y_hat)
            prec = precision_score(y_test, y_hat)
            rec = recall_score(y_test, y_hat)
            acc = accuracy_score(y_test, y_hat)
            
            n_pos_train = sum(y_train)
            n_neg_train = y_train.shape[0] - n_pos_train
            n_pos_test = sum(y_test)
            n_neg_test = y_test.shape[0] - n_pos_test
            
            results_dict['pt'].append(pt)
            results_dict['n_sessions'].append(j)
            results_dict['n_pos_train'].append(n_pos_train)
            results_dict['n_neg_train'].append(n_neg_train)
            results_dict['n_pos_test'].append(n_pos_test)
            results_dict['n_neg_test'].append(n_neg_test)
            results_dict['acc'].append(acc)
            results_dict['prec'].append(prec)
            results_dict['rec'].append(rec)
            results_dict['f1'].append(f1)
            print(f'=={pt}-{j}==\nF1 Score: {f1:.3f}\nPrecision: {prec:.3f}\nRecall: {rec:.3f}\nAcc: {acc:.3f}\n\n')
        

  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)


==P004-1==
F1 Score: 0.467
Precision: 0.372
Recall: 0.625
Acc: 0.672


==P004-2==
F1 Score: 0.440
Precision: 0.365
Recall: 0.554
Acc: 0.676


==P004-3==
F1 Score: 0.372
Precision: 0.329
Recall: 0.429
Acc: 0.668


==P004-4==
F1 Score: 0.375
Precision: 0.307
Recall: 0.482
Acc: 0.631




  'precision', 'predicted', average, warn_for)
  'recall', 'true', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'recall', 'true', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'recall', 'true', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'recall', 'true', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'recall', 'true', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'recall', 'true', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'recall', 'true', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'recall', 'true', average, warn_for)


==P005-1==
F1 Score: 0.000
Precision: 0.000
Recall: 0.000
Acc: 1.000


==P005-2==
F1 Score: 0.000
Precision: 0.000
Recall: 0.000
Acc: 1.000


==P005-3==
F1 Score: 0.000
Precision: 0.000
Recall: 0.000
Acc: 1.000


==P005-4==
F1 Score: 0.000
Precision: 0.000
Recall: 0.000
Acc: 1.000




  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  'precision', 'predicted', average, warn_for)
  'recall', 'true', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'recall', 'true', average, warn_for)
  'recall', 'true', average, warn_for)
  'recall', 'true', average, warn_for)
  'recall', 'true', average, warn_for)
  'recall', 'true', average, warn_for)
  'recall', 'true', average, warn_for)
  'recall', 'true', average, warn_for)


==P008-1==
F1 Score: 0.000
Precision: 0.000
Recall: 0.000
Acc: 1.000


==P008-2==
F1 Score: 0.000
Precision: 0.000
Recall: 0.000
Acc: 0.767


==P008-3==
F1 Score: 0.000
Precision: 0.000
Recall: 0.000
Acc: 0.827


==P008-4==
F1 Score: 0.000
Precision: 0.000
Recall: 0.000
Acc: 0.940




  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)


==P016-1==
F1 Score: 0.000
Precision: 0.000
Recall: 0.000
Acc: 0.854


==P016-2==
F1 Score: 0.000
Precision: 0.000
Recall: 0.000
Acc: 0.845


==P016-3==
F1 Score: 0.087
Precision: 0.143
Recall: 0.062
Acc: 0.808


==P016-4==
F1 Score: 0.032
Precision: 0.033
Recall: 0.031
Acc: 0.726




  'precision', 'predicted', average, warn_for)
  'recall', 'true', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'recall', 'true', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'recall', 'true', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'recall', 'true', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'recall', 'true', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'recall', 'true', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'recall', 'true', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'recall', 'true', average, warn_for)


==P020-1==
F1 Score: 0.000
Precision: 0.000
Recall: 0.000
Acc: 1.000


==P020-2==
F1 Score: 0.000
Precision: 0.000
Recall: 0.000
Acc: 1.000


==P020-3==
F1 Score: 0.000
Precision: 0.000
Recall: 0.000
Acc: 1.000


==P020-4==
F1 Score: 0.000
Precision: 0.000
Recall: 0.000
Acc: 1.000




  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  'precision', 'predicted', average, warn_for)
  'recall', 'true', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'recall', 'true', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'recall', 'true', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'recall', 'true', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'recall', 'true', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'recall', 'true', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'recall', 'true', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'recall', 'true', average, warn_for)


==P021-1==
F1 Score: 0.000
Precision: 0.000
Recall: 0.000
Acc: 1.000


==P021-2==
F1 Score: 0.000
Precision: 0.000
Recall: 0.000
Acc: 1.000


==P021-3==
F1 Score: 0.000
Precision: 0.000
Recall: 0.000
Acc: 1.000


==P021-4==
F1 Score: 0.000
Precision: 0.000
Recall: 0.000
Acc: 1.000




  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)


KeyboardInterrupt: 

In [50]:
df = pd.DataFrame(results_dict)
print(df.head())
df.to_excel('results/grad_10_5_random_forest_7.xlsx')

     pt  n_sessions       acc        f1       rec      prec  n_pos_train  \
0  P004           1  0.512295  0.363636  0.280992  0.515152           11   
1  P004           2  0.500000  0.483051  0.471074  0.495652           57   
2  P004           3  0.479508  0.409302  0.363636  0.468085           68   
3  P004           4  0.459016  0.297872  0.231405  0.417910           81   
4  P005           1  0.378378  0.163636  0.091837  0.750000            5   

   n_neg_train  n_pos_test  n_neg_test  
0           57         121         123  
1           80         121         123  
2          120         121         123  
3          180         121         123  
4           33          98          50  
