In [2]:
#%load_ext autoreload
#%autoreload 2
import project_path
from sleep_stages.sleep_stage_config import Config
import pandas as pd
import pickle
from glob import glob
import os
from scipy.stats import ttest_ind
import numpy as np

cfg = Config()
baselines = ["stages", "always1", "always0", ] # binterval is the "Manual Annotation alg" and "sleep" is the "device algo"
defaultml = ["Random_forest_300", "SGD_perceptron", "SGD_log", "SGD_hinge"] # "SGD_huber"
defaultdl = ["LSTM_20", "LSTM_50_raw", "LSTM_100_raw", "CNN_20_raw", "CNN_50_raw", "CNN_100_raw"]
period = "sleep_period"
sub_folder = cfg.ANALYSIS_SUB_FOLDER[period]
readable_algs_dict= {"stages":"Ground Truth","always_0":"Always wake", "always_1":"Always sleep", "CNN_20_ENMO_HRV": "CNN(20)", "CNN_50_ENMO_HRV":"CNN(50)", "CNN_100_ENMO_HRV":"CNN(100)", "LSTM_20_ENMO_HRV":"LSTM(20)", "LSTM_50_ENMO_HRV":"LSTM(50)","LSTM_100_ENMO_HRV":"LSTM(100)", "SGD_hinge":"Linear SVM", "SGD_log":"Logistic Regression", "SGD_perceptron":"Perception", "Random_forest_300": "Random Forest"}


In [3]:
def get_group_average(t, metrics):
    """
    calculate the group average from a given data frame
    :param t: 
    :param metrics: 
    :return: 
    """
    for m in metrics:
        t[m] = t[m].apply(lambda x: np.float(x.split("+-")[0]))
    t = pd.concat((t.mean(), t.std()), axis=1)
    t.columns = ["Mean", "Std"]
    t = t.apply(lambda x: "%.1f +- %.1f" % (x["Mean"],x["Std"]), axis=1)
    return t.to_frame("Group Average").T.to_latex()

def pvalue(results, alg1, alg2, metric):
    """
    statistical significance test
    :param results: 
    :param alg1: 
    :param alg2: 
    :param metric: 
    :return: 
    """
    return ttest_ind(results[alg1][metric], results[alg2][metric])[1]

In [20]:
def load_task(sub_folder, num_classes=2, feature_type = "ENMO_HRV",  stages_clf_columns=[]):
    result_folder = cfg.STAGE_OUTPUT_FOLDER_HRV30s[num_classes]
    
    minutes_path =  os.path.join(result_folder, sub_folder, "%d_stages_minutes_summary_%s.csv"
                                % (num_classes, feature_type))
    mins_df = pd.read_csv(minutes_path)
    mins_df = mins_df.rename(columns={"Unnamed: 0": "algorithms"})
#     mins_df.head()

    SUMMARY_FILE = os.path.join(result_folder, sub_folder, "%d_stages_summary_%s.csv"
                                    % (num_classes, feature_type))
    PICKLE_RESULT_FILE = os.path.join(result_folder, sub_folder, "%d_stages_results_%s.pkl"
                                          % (num_classes, feature_type))
    summary = pd.read_csv(SUMMARY_FILE)
    print("summary csv is loaded")
    summary = summary.rename(columns={summary.columns[0]: "algs"})
    summary = summary.set_index("algs")

    with open(PICKLE_RESULT_FILE, "rb") as f:
        results = pickle.load(f)
    print("Pickle file is loaded")
    # metrics = ["accuracy", "specificity", "precision", "recall", "f1-score"]
    summary['algorithms'] = summary.index
    summary = summary.reset_index(drop = True)
    stages_combined = pd.merge(summary, mins_df, on =['algorithms'])    
    stages_combined = stages_combined.reindex(columns=stages_clf_columns)
    return stages_combined

In [5]:
def ttest(results1, results2, alg1, alg2, metrics):
    for metric in metrics:
        print("T-Test for metric: %s, the p value = %.3f" % (metric, ttest_ind(results1[alg1][metric], results2[alg2][metric])[1]))

## 2 stages 30s hrv_len, with feature ENMO_HRV

### load minute prediction

In [7]:
num_classes = 2
feature_type ='ENMO_HRV'
result_folder = cfg.STAGE_OUTPUT_FOLDER_HRV30s[num_classes]
minutes_path =  os.path.join(result_folder, sub_folder, "%d_stages_minutes_summary_%s.csv"
                                % (num_classes, feature_type))
mins_df = pd.read_csv(minutes_path)
mins_df = mins_df.rename(columns={"Unnamed: 0": "algorithms"})
mins_df.head()

Unnamed: 0,algorithms,Sleep,Wake
0,always_0,-363.5 +- 8.9,363.5 +- 8.9
1,always_1,88.9 +- 6.6,-88.9 +- 6.6
2,CNN_20_ENMO_HRV,22.2 +- 5.4,-22.2 +- 5.4
3,CNN_50_ENMO_HRV,19.6 +- 5.8,-19.6 +- 5.8
4,CNN_100_ENMO_HRV,17.4 +- 5.9,-17.4 +- 5.9


### load classifier performance prediction

In [9]:
SUMMARY_FILE = os.path.join(result_folder, sub_folder, "%d_stages_summary_%s.csv"
                                % (num_classes, feature_type))
PICKLE_RESULT_FILE = os.path.join(result_folder, sub_folder, "%d_stages_results_%s.pkl"
                                      % (num_classes, feature_type))
summary = pd.read_csv(SUMMARY_FILE)
print("summary csv is loaded")
summary = summary.rename(columns={summary.columns[0]: "algs"})
summary = summary.set_index("algs")

with open(PICKLE_RESULT_FILE, "rb") as f:
    results = pickle.load(f)
print("Pickle file is loaded")
metrics = ["accuracy", "specificity", "precision", "recall", "f1-score", "cohen-kappa"]
summary['algorithms'] = summary.index
summary = summary.reset_index(drop = True)


summary csv is loaded
Pickle file is loaded


In [10]:
summary.head()

Unnamed: 0,accuracy,cohen-kappa,f1-score,precision,recall,specificity,algorithms
0,19.3 +- 1.3,0.0 +- 0.0,0.3 +- 0.6,0.3 +- 0.6,0.3 +- 0.6,99.7 +- 0.6,always_0
1,80.9 +- 1.2,0.0 +- 0.0,88.9 +- 0.8,80.9 +- 1.2,100.0 +- 0.0,0.0 +- 0.0,always_1
2,84.9 +- 1.0,44.0 +- 2.0,90.1 +- 0.9,88.3 +- 1.0,93.1 +- 1.0,48.9 +- 2.2,CNN_20_ENMO_HRV
3,85.1 +- 1.1,44.7 +- 2.2,90.0 +- 1.0,88.7 +- 1.0,92.8 +- 1.2,50.1 +- 2.5,CNN_50_ENMO_HRV
4,85.1 +- 1.1,44.8 +- 2.2,90.0 +- 1.0,88.9 +- 1.0,92.5 +- 1.2,50.9 +- 2.5,CNN_100_ENMO_HRV


### combine predicted minutes and metrics

In [11]:
stages_2_combined = pd.merge(summary,mins_df, on =['algorithms'])
stages_2_clf_columns= ['algorithms', "accuracy", "specificity", "precision", "recall", "f1-score", "cohen-kappa", "Wake", "Sleep"]

stages_2_combined = stages_2_combined.reindex(columns=stages_2_clf_columns)

In [None]:
stages_2_combined.head(110)

In [13]:
readable_df = pd.DataFrame(list(readable_algs_dict.items()), columns=["old_alg_name","new_alg_name"])
print(readable_df)

         old_alg_name         new_alg_name
0              stages         Ground Truth
1            always_0          Always wake
2            always_1         Always sleep
3     CNN_20_ENMO_HRV              CNN(20)
4     CNN_50_ENMO_HRV              CNN(50)
5    CNN_100_ENMO_HRV             CNN(100)
6    LSTM_20_ENMO_HRV             LSTM(20)
7    LSTM_50_ENMO_HRV             LSTM(50)
8   LSTM_100_ENMO_HRV            LSTM(100)
9           SGD_hinge           Linear SVM
10            SGD_log  Logistic Regression
11     SGD_perceptron           Perception
12  Random_forest_300        Random Forest


In [14]:
readable_algs_dict['always_0']

'Always wake'

In [16]:
stages_2_baselines = ['stages', 'always_0', 'always_1']
stages_2_combined['algorithms'] = stages_2_combined['algorithms'].apply(lambda x: readable_algs_dict[x])



In [18]:
stages_2_combined.sample(5)

Unnamed: 0,algorithms,accuracy,specificity,precision,recall,f1-score,cohen-kappa,Wake,Sleep
1,Always sleep,80.9 +- 1.2,0.0 +- 0.0,80.9 +- 1.2,100.0 +- 0.0,88.9 +- 0.8,0.0 +- 0.0,-88.9 +- 6.6,88.9 +- 6.6
4,CNN(100),85.1 +- 1.1,50.9 +- 2.5,88.9 +- 1.0,92.5 +- 1.2,90.0 +- 1.0,44.8 +- 2.2,-17.4 +- 5.9,17.4 +- 5.9
5,LSTM(20),83.5 +- 1.0,56.5 +- 2.3,89.6 +- 0.9,89.4 +- 1.1,88.9 +- 0.9,43.5 +- 2.1,-1.0 +- 5.6,1.0 +- 5.6
2,CNN(20),84.9 +- 1.0,48.9 +- 2.2,88.3 +- 1.0,93.1 +- 1.0,90.1 +- 0.9,44.0 +- 2.0,-22.2 +- 5.4,22.2 +- 5.4
6,LSTM(50),83.6 +- 1.1,54.3 +- 2.4,89.5 +- 0.9,89.3 +- 1.2,88.9 +- 0.9,42.4 +- 2.2,-0.9 +- 5.5,0.9 +- 5.5


In [29]:
bls = readable_df.new_alg_name.unique().tolist()

In [32]:
# stages_2_combined

### this is the section to produce latex table

In [None]:
stages_2_combined['acc_for_sort'] = stages_2_combined['accuracy'].apply(lambda x: np.float(x.split("+-")[0]))
print(bls)  
print(stages_2_combined[stages_2_combined['algorithms'].isin(bls)].sort_values(by="acc_for_sort", ascending=False)[stages_2_clf_columns].to_latex(index=False))

In [None]:
stages_2_combined[stages_2_combined['algorithms'].isin(bls)].sort_values(by="acc_for_sort", ascending=False)[stages_2_clf_columns]

### this is the 2 stages ENMO alone and HRV alone in one table

In [23]:
num_classes = 2
stages_2_clf_columns=  ['algorithms', "accuracy", "specificity", "precision", "recall", "f1-score", "cohen-kappa", "Sleep", "Wake"]
stage_2_df = load_task(sub_folder, num_classes=num_classes, feature_type='ENMO', stages_clf_columns=stages_2_clf_columns)

readable_2_algs_dict = {"stages":"ground truth","always_0":"always wake", "always_1":"always sleep", "CNN_20_ENMO": "CNN 20", "CNN_50_ENMO":"CNN 50", "CNN_100_ENMO":"CNN 100", "LSTM_20_ENMO":"LSTM 20", "LSTM_50_ENMO":"LSTM 50","LSTM_100_ENMO":"LSTM 100", "SGD_hinge":"Linear SVM", "SGD_log":"Logistic Regression", "SGD_perceptron":"Perception", "Random_forest_300": "Random Forest"}
readable_2stages_df = pd.DataFrame(list(readable_2_algs_dict.items()), columns=["old_alg_name","new_alg_name"])



summary csv is loaded
Pickle file is loaded


In [39]:
readable_df = pd.DataFrame(list(readable_algs_dict.items()), columns=["old_alg_name","new_alg_name"])
stages_2_baselines = ['stages', 'always_0', 'always_1']
stage_2_df['algorithms'] = stage_2_df['algorithms'].apply(lambda x: readable_2_algs_dict[x])
bls = readable_df.new_alg_name.unique().tolist()

### 2 stages HRV 

In [46]:
num_classes = 2
stages_2_clf_columns=  ['algorithms', "accuracy", "specificity", "precision", "recall", "f1-score", "cohen-kappa", "Sleep", "Wake"]
stage_2_df = load_task(sub_folder, num_classes=num_classes, feature_type='HRV', stages_clf_columns=stages_2_clf_columns)

# readable_2_algs_dict = {"stages":"ground truth","always_0":"always wake", "always_1":"always sleep", "CNN_20_HRV": "CNN 20", "CNN_50_HRV":"CNN 50", "CNN_100_HRV":"CNN 100", "LSTM_20_HRV":"LSTM 20", "LSTM_50_HRV":"LSTM 50","LSTM_100_HRV":"LSTM 100", "SGD_hinge":"Linear SVM", "SGD_log":"Logistic Regression", "SGD_perceptron":"Perception", "Random_forest_300": "Random Forest"}
readable_2stages_df = pd.DataFrame(list(readable_2_algs_dict.items()), columns=["old_alg_name","new_alg_name"])


summary csv is loaded
Pickle file is loaded


In [None]:
stage_2_df

In [48]:
readable_df = pd.DataFrame(list(readable_algs_dict.items()), columns=["old_alg_name","new_alg_name"])
stages_2_baselines = ['stages', 'always_0', 'always_1']
stage_2_df['algorithms'] = stage_2_df['algorithms'].apply(lambda x: readable_2_algs_dict[x])
stage_2_bls = readable_df.new_alg_name.unique().tolist()

In [49]:
stage_2_df['acc_for_sort'] = stage_2_df['accuracy'].apply(lambda x: np.float(x.split("+-")[0]))
stage_2_df.sort_values(by="acc_for_sort", ascending=False)

Unnamed: 0,algorithms,accuracy,specificity,precision,recall,f1-score,cohen-kappa,Sleep,Wake,acc_for_sort
12,ground truth,100.0 +- 0.0,99.7 +- 0.6,100.0 +- 0.0,100.0 +- 0.0,100.0 +- 0.0,100.0 +- 0.0,364.0 +- 8.7,88.9 +- 6.6,100.0
3,CNN 50,81.7 +- 1.2,43.0 +- 2.3,86.3 +- 1.2,91.3 +- 1.3,87.9 +- 1.1,35.3 +- 1.9,23.2 +- 7.3,-23.2 +- 7.3,81.7
4,CNN 100,81.7 +- 1.3,43.7 +- 2.3,86.5 +- 1.1,91.0 +- 1.4,87.8 +- 1.2,35.7 +- 2.0,21.4 +- 7.4,-21.4 +- 7.4,81.7
1,always sleep,80.9 +- 1.2,0.0 +- 0.0,80.9 +- 1.2,100.0 +- 0.0,88.9 +- 0.8,0.0 +- 0.0,88.9 +- 6.6,-88.9 +- 6.6,80.9
5,LSTM 20,80.9 +- 1.1,48.4 +- 2.2,87.1 +- 1.2,89.0 +- 1.2,87.3 +- 1.0,35.4 +- 1.8,8.9 +- 6.8,-8.9 +- 6.8,80.9
8,Linear SVM,80.6 +- 1.3,15.4 +- 1.8,82.6 +- 1.2,96.6 +- 1.0,88.2 +- 1.0,14.7 +- 1.4,65.3 +- 7.7,-65.3 +- 7.7,80.6
2,CNN 20,80.3 +- 1.3,44.7 +- 2.3,86.2 +- 1.3,89.5 +- 1.4,86.9 +- 1.1,33.3 +- 1.9,15.9 +- 7.6,-15.9 +- 7.6,80.3
6,LSTM 50,80.2 +- 1.2,46.4 +- 2.4,87.2 +- 1.1,88.3 +- 1.3,86.8 +- 1.1,33.1 +- 2.0,6.5 +- 7.5,-6.5 +- 7.5,80.2
7,LSTM 100,80.2 +- 1.2,42.6 +- 2.5,86.8 +- 1.1,89.0 +- 1.3,87.0 +- 1.0,30.6 +- 2.1,12.1 +- 7.3,-12.1 +- 7.3,80.2
9,Logistic Regression,77.6 +- 1.6,32.6 +- 2.7,84.4 +- 1.3,89.1 +- 1.9,84.8 +- 1.5,22.4 +- 1.8,25.4 +- 10.3,-25.4 +- 10.3,77.6


In [203]:
print(stage_2_df[stage_2_df['algorithms'].isin(stage_2_bls)].sort_values(by="acc_for_sort", ascending=False)[stages_2_clf_columns].to_latex(index=False))

\begin{tabular}{lllllllll}
\toprule
          algorithms &      accuracy &   specificity &     precision &        recall &      f1-score &   cohen-kappa &           Sleep &            Wake \\
\midrule
        ground truth &  100.0 +- 0.0 &  100.0 +- 0.0 &  100.0 +- 0.0 &  100.0 +- 0.0 &  100.0 +- 0.0 &  100.0 +- 0.0 &    365.1 +- 8.7 &    187.4 +- 8.6 \\
            LSTM 100 &   79.5 +- 1.2 &   62.2 +- 2.1 &   81.8 +- 1.4 &   88.9 +- 1.3 &   84.1 +- 1.1 &   51.5 +- 2.2 &     35.3 +- 8.7 &    -35.3 +- 8.7 \\
             CNN 100 &   79.1 +- 1.2 &   57.0 +- 2.1 &   79.9 +- 1.5 &   91.0 +- 1.4 &   83.9 +- 1.3 &   49.8 +- 2.1 &     54.4 +- 9.2 &    -54.4 +- 9.2 \\
             LSTM 50 &   78.6 +- 1.2 &   61.1 +- 2.0 &   81.2 +- 1.4 &   88.2 +- 1.3 &   83.4 +- 1.2 &   49.5 +- 2.1 &     34.5 +- 8.9 &    -34.5 +- 8.9 \\
              CNN 50 &   78.2 +- 1.2 &   54.0 +- 2.1 &   78.9 +- 1.5 &   91.3 +- 1.3 &   83.5 +- 1.2 &   47.1 +- 2.1 &     61.2 +- 9.2 &    -61.2 +- 9.2 \\
             LSTM 2

### This is the ENMO alone

In [22]:
num_classes = 2
stages_2_clf_columns=  ['algorithms', "accuracy", "specificity", "precision", "recall", "f1-score", "cohen-kappa", "Sleep", "Wake"]
stage_2_df = load_task(sub_folder, num_classes=num_classes, feature_type='ENMO', stages_clf_columns=stages_2_clf_columns)

readable_2_algs_dict = {"stages":"ground truth","always_0":"always wake", "always_1":"always sleep", "CNN_20_ENMO": "CNN 20", "CNN_50_ENMO":"CNN 50", "CNN_100_ENMO":"CNN 100", "LSTM_20_ENMO":"LSTM 20", "LSTM_50_ENMO":"LSTM 50","LSTM_100_ENMO":"LSTM 100", "SGD_hinge":"Linear SVM", "SGD_log":"Logistic Regression", "SGD_perceptron":"Perception", "Random_forest_300": "Random Forest"}
readable_2stages_df = pd.DataFrame(list(readable_2_algs_dict.items()), columns=["old_alg_name","new_alg_name"])


summary csv is loaded
Pickle file is loaded


In [None]:
readable_df = pd.DataFrame(list(readable_algs_dict.items()), columns=["old_alg_name","new_alg_name"])
stages_2_baselines = ['stages', 'always_0', 'always_1']
stage_2_df['algorithms'] = stage_2_df['algorithms'].apply(lambda x: readable_2_algs_dict[x])
stage_2_bls = readable_df.new_alg_name.unique().tolist()
stage_2_df['acc_for_sort'] = stage_2_df['accuracy'].apply(lambda x: np.float(x.split("+-")[0]))
stage_2_df.sort_values(by="acc_for_sort", ascending=False)

In [None]:
print(stage_2_df[stage_2_df['algorithms'].isin(stage_2_bls)].sort_values(by="acc_for_sort", ascending=False)[stages_2_clf_columns].to_latex(index=False))

## ======================== 3 stages =======================

## This is the 3 stages ENMO + HRV

In [57]:
num_classes = 3
stages_3_clf_columns= ['algorithms', "accuracy", "specificity", "precision", "recall", "f1-score", "cohen-kappa", "Non-REM sleep","REM sleep","Wake"]
stage_3_df = load_task(sub_folder, num_classes=num_classes, feature_type='ENMO_HRV', stages_clf_columns=stages_3_clf_columns)
readable_3stages_algs_dict= {"stages":"ground truth","always_0":"always wake", "always_1":"always Non-REM sleep", "always_2":"always REM sleep", "CNN_20_ENMO_HRV": "CNN 20", "CNN_50_ENMO_HRV":"CNN 50", "CNN_100_ENMO_HRV":"CNN 100", "LSTM_20_ENMO_HRV":"LSTM 20", "LSTM_50_ENMO_HRV":"LSTM 50","LSTM_100_ENMO_HRV":"LSTM 100", "SGD_hinge":"Linear SVM", "SGD_log":"Logistic Regression", "SGD_perceptron":"Perception", "Random_forest_300": "Random Forest"}
readable_3stages_df = pd.DataFrame(list(readable_3stages_algs_dict.items()), columns=["old_alg_name","new_alg_name"])

summary csv is loaded
Pickle file is loaded


In [None]:
print(stage_3_df)

In [59]:

print(readable_3stages_df)

         old_alg_name          new_alg_name
0              stages          ground truth
1            always_0           always wake
2            always_1  always Non-REM sleep
3            always_2      always REM sleep
4     CNN_20_ENMO_HRV                CNN 20
5     CNN_50_ENMO_HRV                CNN 50
6    CNN_100_ENMO_HRV               CNN 100
7    LSTM_20_ENMO_HRV               LSTM 20
8    LSTM_50_ENMO_HRV               LSTM 50
9   LSTM_100_ENMO_HRV              LSTM 100
10          SGD_hinge            Linear SVM
11            SGD_log   Logistic Regression
12     SGD_perceptron            Perception
13  Random_forest_300         Random Forest


In [None]:
stages_3_baselines = ['stages', 'always_0', 'always_1', 'always_2']
stage_3_df['algorithms'] = stage_3_df['algorithms'].apply(lambda x: readable_3stages_algs_dict[x])
# get ready the rename df
stage_3_bls = readable_3stages_df.new_alg_name.unique().tolist()

In [66]:
stage_3_df['acc_for_sort'] = stage_3_df['accuracy'].apply(lambda x: np.float(x.split("+-")[0]))

In [68]:
stage_3_df[['algorithms', "accuracy", "specificity", "precision", "recall", "f1-score", "cohen-kappa","Wake","REM sleep", "Non-REM sleep",'acc_for_sort']].sort_values(by="acc_for_sort", ascending=False)

Unnamed: 0,algorithms,accuracy,specificity,precision,recall,f1-score,cohen-kappa,Wake,REM sleep,Non-REM sleep,acc_for_sort
13,ground truth,100.0 +- 0.0,99.7 +- 0.6,100.0 +- 0.0,100.0 +- 0.0,100.0 +- 0.0,100.0 +- 0.0,88.9 +- 6.6,67.4 +- 3.0,296.7 +- 7.1,100.0
5,CNN 100,74.9 +- 1.1,82.1 +- 0.8,68.7 +- 1.3,64.5 +- 1.3,62.9 +- 1.3,47.0 +- 2.0,-14.4 +- 5.8,-0.5 +- 4.3,14.9 +- 6.7,74.9
7,LSTM 50,74.2 +- 1.1,81.7 +- 0.8,66.5 +- 1.4,62.8 +- 1.2,61.5 +- 1.3,45.1 +- 2.1,0.3 +- 5.6,-12.5 +- 3.7,12.2 +- 6.2,74.2
4,CNN 50,73.6 +- 1.2,82.5 +- 0.8,67.7 +- 1.3,65.5 +- 1.3,62.6 +- 1.4,47.0 +- 2.1,-4.7 +- 5.7,7.5 +- 5.8,-2.9 +- 7.5,73.6
6,LSTM 20,73.4 +- 1.0,81.4 +- 0.7,65.7 +- 1.3,62.6 +- 1.2,60.7 +- 1.2,43.1 +- 2.0,4.1 +- 5.6,-14.8 +- 3.9,10.8 +- 6.2,73.4
8,LSTM 100,73.4 +- 1.0,80.6 +- 0.8,65.5 +- 1.4,60.4 +- 1.2,59.4 +- 1.3,42.4 +- 2.1,7.7 +- 5.8,-24.4 +- 3.2,16.7 +- 6.3,73.4
3,CNN 20,73.1 +- 1.1,80.5 +- 0.8,66.8 +- 1.3,61.4 +- 1.2,60.1 +- 1.2,43.0 +- 1.9,-18.7 +- 5.4,-4.4 +- 5.2,23.0 +- 7.1,73.1
9,Linear SVM,68.9 +- 1.0,72.2 +- 0.7,44.0 +- 1.1,45.7 +- 0.9,42.3 +- 0.9,27.9 +- 1.8,-15.1 +- 6.4,-67.4 +- 3.0,82.5 +- 7.1,68.9
12,Random Forest,68.7 +- 1.0,73.6 +- 0.7,54.1 +- 1.6,48.3 +- 0.8,44.6 +- 0.9,30.2 +- 1.7,-0.2 +- 6.2,-63.5 +- 2.9,63.7 +- 6.8,68.7
10,Logistic Regression,68.5 +- 1.0,73.1 +- 0.7,43.8 +- 1.0,47.4 +- 0.9,43.2 +- 0.9,29.2 +- 1.7,-1.6 +- 7.0,-67.3 +- 3.0,68.9 +- 7.6,68.5


In [75]:
print(stage_3_df[stage_3_df['algorithms'].isin(stage_3_bls)].sort_values(by="acc_for_sort", ascending=False)[stages_3_clf_columns].to_latex())

\begin{tabular}{lllllllllll}
\toprule
{} &            algorithms &      accuracy &   specificity &     precision &        recall &      f1-score &   cohen-kappa &  Non-REM sleep &     REM sleep &           Wake \\
\midrule
13 &          ground truth &  100.0 +- 0.0 &  100.0 +- 0.0 &  100.0 +- 0.0 &  100.0 +- 0.0 &  100.0 +- 0.0 &  100.0 +- 0.0 &   297.5 +- 7.1 &   67.6 +- 3.0 &   187.4 +- 8.6 \\
7  &               LSTM 50 &   76.2 +- 1.0 &   85.6 +- 0.5 &   72.2 +- 1.3 &   68.8 +- 1.2 &   67.9 +- 1.3 &   58.4 +- 1.8 &    23.9 +- 7.1 &  -10.7 +- 3.8 &   -13.2 +- 6.8 \\
8  &              LSTM 100 &   76.1 +- 0.9 &   85.1 +- 0.5 &   71.9 +- 1.4 &   66.8 +- 1.2 &   66.4 +- 1.3 &   57.4 +- 1.9 &    26.5 +- 7.0 &  -23.3 +- 3.4 &    -3.2 +- 6.8 \\
5  &               CNN 100 &   76.0 +- 1.0 &   85.6 +- 0.6 &   72.2 +- 1.2 &   69.7 +- 1.3 &   68.1 +- 1.3 &   58.6 +- 1.9 &    30.2 +- 7.7 &    2.5 +- 4.5 &   -32.7 +- 7.2 \\
4  &                CNN 50 &   75.2 +- 1.1 &   85.7 +- 0.6 &   71.7 +- 1.

### Task 2 :  3stages classification for ENMO alone

In [69]:
num_classes = 3
stages_clf_columns= ['algorithms', "accuracy", "specificity", "precision", "recall", "f1-score", "cohen-kappa", "Non-REM sleep","REM sleep","Wake"]
stage_3_df = load_task(sub_folder, num_classes=num_classes, feature_type='ENMO', stages_clf_columns=stages_clf_columns)
readable_3stages_algs_dict= {"stages":"ground truth","always_0":"always wake", "always_1":"always Non-REM sleep", "always_2":"always REM sleep", "CNN_20_ENMO": "CNN 20", "CNN_50_ENMO":"CNN 50", "CNN_100_ENMO":"CNN 100", "LSTM_20_ENMO":"LSTM 20", "LSTM_50_ENMO":"LSTM 50","LSTM_100_ENMO":"LSTM 100", "SGD_hinge":"Linear SVM", "SGD_log":"Logistic Regression", "SGD_perceptron":"Perception", "Random_forest_300": "Random Forest"}
readable_3stages_df = pd.DataFrame(list(readable_3stages_algs_dict.items()), columns=["old_alg_name","new_alg_name"])

summary csv is loaded
Pickle file is loaded


In [70]:
stage_3_df['algorithms'] = stage_3_df['algorithms'].apply(lambda x: readable_3stages_algs_dict[x])
stage_3_df['acc_for_sort'] = stage_3_df['accuracy'].apply(lambda x: np.float(x.split("+-")[0]))

In [72]:
stage_3_df[['algorithms', "accuracy", "specificity", "precision", "recall", "f1-score", "cohen-kappa","Wake","REM sleep", "Non-REM sleep",'acc_for_sort']].sort_values(by="acc_for_sort", ascending=False)

Unnamed: 0,algorithms,accuracy,specificity,precision,recall,f1-score,cohen-kappa,Wake,REM sleep,Non-REM sleep,acc_for_sort
13,ground truth,100.0 +- 0.0,99.7 +- 0.6,100.0 +- 0.0,100.0 +- 0.0,100.0 +- 0.0,100.0 +- 0.0,88.9 +- 6.6,67.4 +- 3.0,296.7 +- 7.1,100.0
9,Linear SVM,68.7 +- 1.0,71.5 +- 0.7,43.3 +- 1.1,43.9 +- 0.9,40.8 +- 0.9,25.1 +- 1.8,-24.3 +- 6.1,-67.4 +- 3.0,91.7 +- 6.7,68.7
4,CNN 50,68.4 +- 1.0,71.6 +- 0.7,42.2 +- 1.1,43.8 +- 0.9,40.4 +- 1.0,24.5 +- 2.0,-17.4 +- 6.2,-67.3 +- 3.0,84.7 +- 7.0,68.4
5,CNN 100,68.4 +- 1.0,71.4 +- 0.7,42.5 +- 1.1,43.5 +- 0.9,40.1 +- 1.0,24.1 +- 2.0,-19.4 +- 6.3,-67.3 +- 3.0,86.7 +- 7.1,68.4
3,CNN 20,68.2 +- 1.0,71.6 +- 0.7,42.4 +- 1.0,44.0 +- 0.9,40.7 +- 0.9,24.5 +- 1.8,-17.3 +- 6.0,-67.3 +- 3.0,84.6 +- 6.8,68.2
10,Logistic Regression,68.2 +- 1.0,72.0 +- 0.7,42.4 +- 1.0,45.1 +- 0.9,41.5 +- 0.9,26.8 +- 1.8,-10.0 +- 6.3,-67.4 +- 3.0,77.4 +- 7.0,68.2
7,LSTM 50,68.1 +- 1.0,72.1 +- 0.7,41.8 +- 1.0,45.0 +- 0.9,41.3 +- 0.9,26.6 +- 1.9,-5.1 +- 6.1,-67.3 +- 3.0,72.5 +- 6.9,68.1
8,LSTM 100,68.1 +- 1.0,72.1 +- 0.7,44.2 +- 1.3,45.0 +- 0.9,41.4 +- 1.0,26.8 +- 2.0,-4.4 +- 6.3,-66.8 +- 3.0,71.2 +- 7.0,68.1
6,LSTM 20,67.7 +- 1.0,72.1 +- 0.7,41.6 +- 1.0,45.2 +- 0.9,41.4 +- 0.9,26.5 +- 1.8,-3.7 +- 6.0,-67.4 +- 3.0,71.1 +- 6.8,67.7
12,Random Forest,66.5 +- 0.9,72.1 +- 0.7,46.8 +- 1.2,45.0 +- 0.8,41.8 +- 0.8,25.7 +- 1.7,-0.1 +- 5.8,-60.8 +- 2.9,60.9 +- 6.5,66.5


In [66]:
bls = readable_3stages_df.new_alg_name.unique().tolist()

['ground truth',
 'always wake',
 'always Non-REM sleep',
 'always REM sleep',
 'CNN 20',
 'CNN 50',
 'CNN 100',
 'LSTM 20',
 'LSTM 50',
 'LSTM 100',
 'Linear SVM',
 'Logistic Regression',
 'Perception',
 'Random Forest']

In [68]:
print(stage_3_df[stage_3_df['algorithms'].isin(bls)].sort_values(by="acc_for_sort", ascending=False)[stages_clf_columns].to_latex(index=False))

\begin{tabular}{llllllllll}
\toprule
           algorithms &      accuracy &   specificity &     precision &        recall &      f1-score &   cohen-kappa &  Non-REM sleep &      REM sleep &           Wake \\
\midrule
         ground truth &  100.0 +- 0.0 &  100.0 +- 0.0 &  100.0 +- 0.0 &  100.0 +- 0.0 &  100.0 +- 0.0 &  100.0 +- 0.0 &   297.5 +- 7.1 &    67.6 +- 3.0 &   187.4 +- 8.6 \\
             LSTM 100 &   73.8 +- 1.2 &   84.3 +- 0.6 &   69.8 +- 1.5 &   66.1 +- 1.3 &   64.9 +- 1.5 &   50.0 +- 2.2 &    36.4 +- 8.1 &    -8.6 +- 4.2 &   -27.8 +- 8.5 \\
              LSTM 50 &   72.9 +- 1.1 &   83.8 +- 0.6 &   67.9 +- 1.4 &   64.1 +- 1.3 &   62.9 +- 1.4 &   45.5 +- 2.1 &    34.1 +- 8.3 &   -16.2 +- 4.3 &   -17.9 +- 8.5 \\
              CNN 100 &   71.0 +- 1.2 &   83.6 +- 0.6 &   66.3 +- 1.4 &   65.4 +- 1.4 &   62.7 +- 1.4 &   46.1 +- 2.0 &    10.6 +- 9.1 &     2.3 +- 5.0 &   -12.9 +- 9.4 \\
              LSTM 20 &   70.4 +- 1.0 &   82.3 +- 0.6 &   65.3 +- 1.4 &   60.8 +- 1.2 &   59.6

### Task 2 :  3stages classification for HRV alone

In [74]:
num_classes = 3
stages_3_clf_columns= ['algorithms', "accuracy", "specificity", "precision", "recall", "f1-score", "cohen-kappa", "Non-REM sleep","REM sleep","Wake"]
stage_3_df = load_task(sub_folder, num_classes=num_classes, feature_type='HRV', stages_clf_columns=stages_3_clf_columns)
readable_3stages_algs_dict= {"stages":"ground truth","always_0":"always wake", "always_1":"always Non-REM sleep", "always_2":"always REM sleep", "CNN_20_HRV": "CNN 20", "CNN_50_HRV":"CNN 50", "CNN_100_HRV":"CNN 100", "LSTM_20_HRV":"LSTM 20", "LSTM_50_HRV":"LSTM 50","LSTM_100_HRV":"LSTM 100", "SGD_hinge":"Linear SVM", "SGD_log":"Logistic Regression", "SGD_perceptron":"Perception", "Random_forest_300": "Random Forest"}
readable_3stages_df = pd.DataFrame(list(readable_3stages_algs_dict.items()), columns=["old_alg_name","new_alg_name"])
stage_3_df['algorithms'] = stage_3_df['algorithms'].apply(lambda x: readable_3stages_algs_dict[x])
stage_3_df['acc_for_sort'] = stage_3_df['accuracy'].apply(lambda x: np.float(x.split("+-")[0]))

summary csv is loaded
Pickle file is loaded


In [76]:
stage_3_df[['algorithms', "accuracy", "specificity", "precision", "recall", "f1-score", "cohen-kappa","Wake","REM sleep", "Non-REM sleep",'acc_for_sort']].sort_values(by="acc_for_sort", ascending=False)

Unnamed: 0,algorithms,accuracy,specificity,precision,recall,f1-score,cohen-kappa,Wake,REM sleep,Non-REM sleep,acc_for_sort
13,ground truth,100.0 +- 0.0,99.7 +- 0.6,100.0 +- 0.0,100.0 +- 0.0,100.0 +- 0.0,100.0 +- 0.0,88.9 +- 6.6,67.4 +- 3.0,296.7 +- 7.1,100.0
8,LSTM 100,72.9 +- 1.3,81.0 +- 0.8,64.7 +- 1.5,60.1 +- 1.3,58.7 +- 1.5,37.2 +- 2.4,-6.6 +- 7.0,-11.8 +- 4.0,18.4 +- 6.9,72.9
7,LSTM 50,72.4 +- 1.2,81.1 +- 0.8,63.0 +- 1.5,59.4 +- 1.3,57.6 +- 1.4,32.9 +- 2.3,5.7 +- 6.9,-19.1 +- 4.1,13.4 +- 6.8,72.4
4,CNN 50,71.1 +- 1.2,80.3 +- 0.8,63.6 +- 1.5,59.3 +- 1.3,56.6 +- 1.4,33.2 +- 2.1,-5.5 +- 7.3,-12.0 +- 5.5,17.5 +- 7.9,71.1
6,LSTM 20,70.8 +- 1.1,80.6 +- 0.7,62.0 +- 1.4,58.5 +- 1.1,55.9 +- 1.3,29.5 +- 2.0,13.9 +- 7.1,-24.3 +- 3.9,10.4 +- 6.7,70.8
5,CNN 100,70.3 +- 1.3,81.5 +- 0.8,62.3 +- 1.4,62.0 +- 1.4,58.4 +- 1.5,36.0 +- 2.1,11.7 +- 7.7,-2.5 +- 4.7,-9.3 +- 7.8,70.3
3,CNN 20,69.1 +- 1.3,79.4 +- 0.8,60.9 +- 1.4,57.3 +- 1.2,54.3 +- 1.3,28.9 +- 1.9,8.6 +- 7.9,-18.8 +- 5.0,10.2 +- 8.2,69.1
1,always Non-REM sleep,66.1 +- 1.1,66.3 +- 0.4,22.4 +- 0.6,33.7 +- 0.4,26.7 +- 0.5,0.0 +- 0.0,-88.9 +- 6.6,-67.4 +- 3.0,156.3 +- 6.6,66.1
9,Linear SVM,65.2 +- 1.3,70.0 +- 0.6,44.0 +- 1.1,40.7 +- 0.7,36.1 +- 0.9,11.7 +- 1.2,-32.9 +- 10.0,-67.4 +- 3.0,100.3 +- 10.0,65.2
10,Logistic Regression,63.3 +- 1.4,71.2 +- 0.6,42.6 +- 1.1,42.3 +- 0.8,36.7 +- 0.9,11.8 +- 1.3,-0.4 +- 11.6,-67.4 +- 3.0,67.8 +- 11.6,63.3


### produce latex

In [79]:
bls = readable_3stages_df.new_alg_name.unique().tolist()
print(stage_3_df[stage_3_df['algorithms'].isin(bls)].sort_values(by="acc_for_sort", ascending=False)[stages_3_clf_columns].to_latex(index=False))

\begin{tabular}{llllllllll}
\toprule
           algorithms &      accuracy &   specificity &     precision &        recall &      f1-score &   cohen-kappa &  Non-REM sleep &      REM sleep &           Wake \\
\midrule
         ground truth &  100.0 +- 0.0 &  100.0 +- 0.0 &  100.0 +- 0.0 &  100.0 +- 0.0 &  100.0 +- 0.0 &  100.0 +- 0.0 &   297.5 +- 7.1 &    67.6 +- 3.0 &   187.4 +- 8.6 \\
             LSTM 100 &   73.8 +- 1.2 &   84.3 +- 0.6 &   69.8 +- 1.5 &   66.1 +- 1.3 &   64.9 +- 1.5 &   50.0 +- 2.2 &    36.4 +- 8.1 &    -8.6 +- 4.2 &   -27.8 +- 8.5 \\
              LSTM 50 &   72.9 +- 1.1 &   83.8 +- 0.6 &   67.9 +- 1.4 &   64.1 +- 1.3 &   62.9 +- 1.4 &   45.5 +- 2.1 &    34.1 +- 8.3 &   -16.2 +- 4.3 &   -17.9 +- 8.5 \\
              CNN 100 &   71.0 +- 1.2 &   83.6 +- 0.6 &   66.3 +- 1.4 &   65.4 +- 1.4 &   62.7 +- 1.4 &   46.1 +- 2.0 &    10.6 +- 9.1 &     2.3 +- 5.0 &   -12.9 +- 9.4 \\
              LSTM 20 &   70.4 +- 1.0 &   82.3 +- 0.6 &   65.3 +- 1.4 &   60.8 +- 1.2 &   59.6

## ================== Task 3 4 stages ===================

### 4 stages ENMO & HRV 

In [80]:
stages_4_clf_columns= ['algorithms', "accuracy", "specificity", "precision", "recall", "f1-score","cohen-kappa", "Deep sleep","Light sleep","REM sleep","Wake"]
stage_4_df = load_task(sub_folder, num_classes=4, feature_type='ENMO_HRV', stages_clf_columns=stages_4_clf_columns)
readable_4stages_algs_dict= {"stages":"ground truth","always_0":"always wake", "always_1":"always light sleep", "always_2":"always deep sleep", "always_3":"always REM sleep", "CNN_20_ENMO_HRV": "CNN 20", "CNN_50_ENMO_HRV":"CNN 50", "CNN_100_ENMO_HRV":"CNN 100", "LSTM_20_ENMO_HRV":"LSTM 20", "LSTM_50_ENMO_HRV":"LSTM 50","LSTM_100_ENMO_HRV":"LSTM 100", "SGD_hinge":"Linear SVM", "SGD_log":"Logistic Regression", "SGD_perceptron":"Perception", "Random_forest_300": "Random Forest"}
readable_4stages_df = pd.DataFrame(list(readable_4stages_algs_dict.items()), columns=["old_alg_name","new_alg_name"])
stages_4_baselines = ['stages', 'always_0', 'always_1', 'always_2','always_3']
stage_4_df['algorithms'] = stage_4_df['algorithms'].apply(lambda x: readable_4stages_algs_dict[x])
# get ready the rename df
bls = readable_4stages_df.new_alg_name.unique().tolist()
stage_4_df['acc_for_sort'] = stage_4_df['accuracy'].apply(lambda x: np.float(x.split("+-")[0]))

summary csv is loaded
Pickle file is loaded


In [85]:
stage_4_df[['algorithms', "accuracy", "specificity", "precision", "recall", "f1-score","cohen-kappa","Wake","REM sleep","Deep sleep","Light sleep","acc_for_sort"]].sort_values(by="acc_for_sort", ascending=False)


Unnamed: 0,algorithms,accuracy,specificity,precision,recall,f1-score,cohen-kappa,Wake,REM sleep,Deep sleep,Light sleep,acc_for_sort
14,ground truth,100.0 +- 0.0,99.7 +- 0.6,100.0 +- 0.0,100.0 +- 0.0,100.0 +- 0.0,100.0 +- 0.0,88.9 +- 6.6,67.4 +- 3.0,39.2 +- 3.6,257.5 +- 7.0,100.0
8,LSTM 50,66.5 +- 1.1,84.1 +- 0.7,53.8 +- 1.4,50.2 +- 1.0,47.4 +- 1.1,44.8 +- 2.2,10.5 +- 5.8,-7.5 +- 3.9,-36.2 +- 3.5,33.2 +- 6.7,66.5
9,LSTM 100,66.5 +- 1.1,82.9 +- 0.6,55.5 +- 1.6,47.5 +- 1.1,46.2 +- 1.2,40.6 +- 2.1,-7.3 +- 5.5,-25.7 +- 3.6,-32.5 +- 3.5,65.5 +- 6.8,66.5
6,CNN 100,65.6 +- 1.2,83.7 +- 0.7,54.8 +- 1.5,49.8 +- 1.1,47.1 +- 1.1,42.6 +- 2.0,-1.7 +- 6.3,1.5 +- 4.7,-34.6 +- 3.5,34.7 +- 7.5,65.6
7,LSTM 20,65.4 +- 1.1,83.4 +- 0.6,51.4 +- 1.3,48.4 +- 0.9,45.6 +- 1.0,41.4 +- 1.9,5.6 +- 5.6,-9.1 +- 4.0,-37.7 +- 3.5,41.2 +- 6.9,65.4
4,CNN 20,64.7 +- 1.1,82.8 +- 0.6,49.5 +- 1.2,47.3 +- 1.0,44.5 +- 1.0,39.9 +- 1.9,-10.8 +- 5.4,-2.2 +- 5.2,-38.8 +- 3.6,51.9 +- 7.6,64.7
5,CNN 50,64.6 +- 1.2,83.8 +- 0.6,51.6 +- 1.4,50.1 +- 1.0,46.1 +- 1.1,43.3 +- 2.0,1.9 +- 5.7,14.6 +- 6.0,-37.7 +- 3.5,21.2 +- 8.1,64.6
10,Linear SVM,60.4 +- 1.2,77.9 +- 0.6,31.1 +- 1.0,35.5 +- 0.8,31.0 +- 0.9,18.5 +- 1.3,-9.4 +- 6.6,-67.4 +- 3.0,-39.2 +- 3.6,115.9 +- 7.9,60.4
11,Logistic Regression,60.3 +- 1.2,78.1 +- 0.6,31.6 +- 1.1,35.8 +- 0.8,31.1 +- 0.9,18.6 +- 1.3,-5.3 +- 6.8,-67.3 +- 3.0,-39.0 +- 3.6,111.7 +- 8.1,60.3
13,Random Forest,60.1 +- 1.1,78.7 +- 0.6,40.8 +- 1.4,36.7 +- 0.7,32.5 +- 0.8,20.8 +- 1.3,3.8 +- 6.3,-61.3 +- 2.9,-38.8 +- 3.6,96.3 +- 7.6,60.1


In [89]:
print(stage_4_df[stage_4_df['algorithms'].isin(bls)].sort_values(by="acc_for_sort", ascending=False)[stages_4_clf_columns].to_latex(index=False))

\begin{tabular}{lllllllllll}
\toprule
          algorithms &      accuracy &   specificity &     precision &        recall &      f1-score &   cohen-kappa &    Deep sleep &    Light sleep &     REM sleep &           Wake \\
\midrule
        ground truth &  100.0 +- 0.0 &  100.0 +- 0.0 &  100.0 +- 0.0 &  100.0 +- 0.0 &  100.0 +- 0.0 &  100.0 +- 0.0 &   39.3 +- 3.6 &   258.2 +- 7.0 &   67.6 +- 3.0 &   187.4 +- 8.6 \\
             LSTM 50 &   70.3 +- 1.0 &   87.4 +- 0.4 &   57.9 +- 1.3 &   54.0 +- 1.0 &   51.9 +- 1.0 &   53.8 +- 1.9 &  -36.2 +- 3.5 &    42.8 +- 7.4 &   -5.6 +- 4.0 &    -1.0 +- 6.9 \\
            LSTM 100 &   70.2 +- 1.0 &   86.9 +- 0.4 &   59.9 +- 1.5 &   52.4 +- 1.0 &   51.3 +- 1.1 &   51.7 +- 1.8 &  -32.4 +- 3.5 &    76.0 +- 7.3 &  -24.7 +- 3.7 &   -18.9 +- 6.6 \\
             CNN 100 &   69.0 +- 1.0 &   87.0 +- 0.4 &   58.0 +- 1.4 &   53.7 +- 1.0 &   51.2 +- 1.1 &   51.6 +- 1.8 &  -34.5 +- 3.5 &    46.1 +- 8.1 &    4.4 +- 4.8 &   -15.9 +- 7.5 \\
             LSTM 20 & 

### Task 3 4 stages HRV single modality

In [87]:
num_classes = 4
stages_4_clf_columns= ['algorithms', "accuracy", "specificity", "precision", "recall", "f1-score", "cohen-kappa", "Deep sleep","Light sleep","REM sleep","Wake"]
stage_4_df = load_task(sub_folder, num_classes=num_classes, feature_type='HRV', stages_clf_columns=stages_4_clf_columns) # here to change

readable_4stages_algs_dict= {"stages":"ground truth","always_0":"always wake", "always_1":"always light sleep", "always_2":"always deep sleep", "always_3":"always REM sleep", "CNN_20_HRV": "CNN 20", "CNN_50_HRV":"CNN 50", "CNN_100_HRV":"CNN 100", "LSTM_20_HRV":"LSTM 20", "LSTM_50_HRV":"LSTM 50","LSTM_100_HRV":"LSTM 100", "SGD_hinge":"Linear SVM", "SGD_log":"Logistic Regression", "SGD_perceptron":"Perception", "Random_forest_300": "Random Forest"}
readable_4stages_df = pd.DataFrame(list(readable_4stages_algs_dict.items()), columns=["old_alg_name","new_alg_name"])
bls = readable_4stages_df.new_alg_name.unique().tolist()

summary csv is loaded
Pickle file is loaded


In [88]:
print(stage_4_df)

           algorithms      accuracy  specificity     precision        recall  \
0            always_0   19.3 +- 1.3  74.1 +- 0.5    5.2 +- 0.6   25.9 +- 0.5   
1            always_1   57.4 +- 1.2  74.1 +- 0.5   15.0 +- 0.6   25.9 +- 0.5   
2            always_2    9.0 +- 0.9  74.7 +- 0.4    2.5 +- 0.6   23.8 +- 0.7   
3            always_3   15.1 +- 0.8  74.3 +- 0.5    4.0 +- 0.6   25.3 +- 0.5   
4          CNN_20_HRV   60.9 +- 1.3  81.7 +- 0.6   44.6 +- 1.2   43.7 +- 1.0   
5          CNN_50_HRV   62.7 +- 1.3  82.6 +- 0.6   49.5 +- 1.5   46.0 +- 1.1   
6         CNN_100_HRV   62.5 +- 1.3  83.0 +- 0.7   51.8 +- 1.6   47.5 +- 1.1   
7         LSTM_20_HRV   62.8 +- 1.2  82.7 +- 0.6   47.9 +- 1.4   45.5 +- 0.9   
8         LSTM_50_HRV   64.3 +- 1.2  83.1 +- 0.6   51.0 +- 1.5   46.5 +- 1.0   
9        LSTM_100_HRV   64.5 +- 1.3  83.2 +- 0.6   51.8 +- 1.6   47.0 +- 1.1   
10          SGD_hinge   54.9 +- 1.5  77.0 +- 0.6   30.2 +- 1.1   32.0 +- 0.8   
11            SGD_log   54.4 +- 1.5  77.

In [89]:
print(readable_4stages_df)

         old_alg_name         new_alg_name
0              stages         ground truth
1            always_0          always wake
2            always_1   always light sleep
3            always_2    always deep sleep
4            always_3     always REM sleep
5          CNN_20_HRV               CNN 20
6          CNN_50_HRV               CNN 50
7         CNN_100_HRV              CNN 100
8         LSTM_20_HRV              LSTM 20
9         LSTM_50_HRV              LSTM 50
10       LSTM_100_HRV             LSTM 100
11          SGD_hinge           Linear SVM
12            SGD_log  Logistic Regression
13     SGD_perceptron           Perception
14  Random_forest_300        Random Forest


In [90]:
stage_4_df['algorithms'] = stage_4_df['algorithms'].apply(lambda x: readable_4stages_algs_dict[x])
stage_4_df['acc_for_sort'] = stage_4_df['accuracy'].apply(lambda x: np.float(x.split("+-")[0]))

In [93]:
stage_4_df[['algorithms', "accuracy", "specificity", "precision", "recall", "f1-score", "cohen-kappa","Wake","REM sleep", "Deep sleep","Light sleep","acc_for_sort"]].sort_values(by="acc_for_sort", ascending=False)

Unnamed: 0,algorithms,accuracy,specificity,precision,recall,f1-score,cohen-kappa,Wake,REM sleep,Deep sleep,Light sleep,acc_for_sort
14,ground truth,100.0 +- 0.0,99.7 +- 0.6,100.0 +- 0.0,100.0 +- 0.0,100.0 +- 0.0,100.0 +- 0.0,88.9 +- 6.6,67.4 +- 3.0,39.2 +- 3.6,257.5 +- 7.0,100.0
9,LSTM 100,64.5 +- 1.3,83.2 +- 0.6,51.8 +- 1.6,47.0 +- 1.1,44.7 +- 1.3,35.0 +- 2.3,4.7 +- 7.0,-16.1 +- 3.6,-33.7 +- 3.5,45.2 +- 7.2,64.5
8,LSTM 50,64.3 +- 1.2,83.1 +- 0.6,51.0 +- 1.5,46.5 +- 1.0,43.7 +- 1.2,33.2 +- 2.3,8.9 +- 6.6,-18.1 +- 3.9,-36.3 +- 3.5,45.5 +- 6.9,64.3
7,LSTM 20,62.8 +- 1.2,82.7 +- 0.6,47.9 +- 1.4,45.5 +- 0.9,42.0 +- 1.0,30.5 +- 2.0,16.7 +- 6.9,-18.8 +- 4.0,-38.5 +- 3.6,40.6 +- 7.2,62.8
5,CNN 50,62.7 +- 1.3,82.6 +- 0.6,49.5 +- 1.5,46.0 +- 1.1,42.4 +- 1.2,32.4 +- 2.1,-4.2 +- 7.5,-2.7 +- 5.6,-37.4 +- 3.5,44.3 +- 8.3,62.7
6,CNN 100,62.5 +- 1.3,83.0 +- 0.7,51.8 +- 1.6,47.5 +- 1.1,44.0 +- 1.2,34.3 +- 2.1,3.0 +- 7.9,2.6 +- 4.8,-34.9 +- 3.5,29.3 +- 8.0,62.5
4,CNN 20,60.9 +- 1.3,81.7 +- 0.6,44.6 +- 1.2,43.7 +- 1.0,39.8 +- 1.1,27.5 +- 1.9,9.6 +- 8.1,-15.8 +- 5.2,-39.0 +- 3.6,45.2 +- 8.9,60.9
1,always light sleep,57.4 +- 1.2,74.1 +- 0.5,15.0 +- 0.6,25.9 +- 0.5,18.8 +- 0.6,0.0 +- 0.0,-88.9 +- 6.6,-67.4 +- 3.0,-39.2 +- 3.6,195.5 +- 7.4,57.4
10,Linear SVM,54.9 +- 1.5,77.0 +- 0.6,30.2 +- 1.1,32.0 +- 0.8,25.9 +- 0.9,4.9 +- 0.9,8.9 +- 13.1,-67.3 +- 3.0,-39.2 +- 3.6,97.7 +- 13.1,54.9
11,Logistic Regression,54.4 +- 1.5,77.3 +- 0.6,29.5 +- 1.0,32.8 +- 0.8,26.5 +- 0.9,6.0 +- 0.9,22.9 +- 13.1,-67.4 +- 3.0,-39.2 +- 3.6,83.6 +- 13.1,54.4


In [119]:
print(stage_4_df[stage_4_df['algorithms'].isin(bls)].sort_values(by="acc_for_sort", ascending=False)[stages_4_clf_columns].to_latex(index=False))

\begin{tabular}{lllllllllll}
\toprule
          algorithms &      accuracy &   specificity &     precision &        recall &      f1-score &   cohen-kappa &     Deep sleep &     Light sleep &      REM sleep &           Wake \\
\midrule
        ground truth &  100.0 +- 0.0 &  100.0 +- 0.0 &  100.0 +- 0.0 &  100.0 +- 0.0 &  100.0 +- 0.0 &  100.0 +- 0.0 &    39.3 +- 3.6 &    258.2 +- 7.0 &    67.6 +- 3.0 &   187.4 +- 8.6 \\
            LSTM 100 &   67.4 +- 1.2 &   86.2 +- 0.4 &   56.2 +- 1.6 &   51.3 +- 1.1 &   49.5 +- 1.2 &   44.6 +- 2.2 &   -33.7 +- 3.5 &     60.4 +- 8.1 &   -13.5 +- 3.8 &   -13.1 +- 8.4 \\
             LSTM 50 &   66.2 +- 1.1 &   85.6 +- 0.4 &   54.4 +- 1.5 &   49.5 +- 1.1 &   47.4 +- 1.1 &   41.2 +- 2.1 &   -36.4 +- 3.5 &     65.9 +- 7.9 &   -14.6 +- 4.1 &   -15.0 +- 8.1 \\
             CNN 100 &   64.3 +- 1.1 &   85.3 +- 0.4 &   54.4 +- 1.6 &   50.2 +- 1.1 &   47.1 +- 1.1 &   40.9 +- 2.1 &   -34.8 +- 3.5 &     49.6 +- 8.9 &     8.2 +- 5.1 &   -23.0 +- 9.5 \\
        

### Task 3 4 stages Single modality ENMO

In [94]:
num_classes = 4
stages_4_clf_columns= ['algorithms', "accuracy", "specificity", "precision", "recall", "f1-score","cohen-kappa", "Deep sleep","Light sleep","REM sleep","Wake"]
stage_4_df = load_task(sub_folder, num_classes=num_classes, feature_type='ENMO', stages_clf_columns=stages_4_clf_columns)
readable_4stages_algs_dict= {"stages":"ground truth","always_0":"always wake", "always_1":"always light sleep", "always_2":"always deep sleep", "always_3":"always REM sleep", "CNN_20_ENMO": "CNN 20", "CNN_50_ENMO":"CNN 50", "CNN_100_ENMO":"CNN 100", "LSTM_20_ENMO":"LSTM 20", "LSTM_50_ENMO":"LSTM 50","LSTM_100_ENMO":"LSTM 100", "SGD_hinge":"Linear SVM", "SGD_log":"Logistic Regression", "SGD_perceptron":"Perception", "Random_forest_300": "Random Forest"}
readable_4stages_df = pd.DataFrame(list(readable_4stages_algs_dict.items()), columns=["old_alg_name","new_alg_name"])
bls = readable_4stages_df.new_alg_name.unique().tolist()

summary csv is loaded
Pickle file is loaded


In [95]:
print(stage_4_df)

           algorithms      accuracy  specificity     precision        recall  \
0            always_0   19.3 +- 1.3  74.1 +- 0.5    5.2 +- 0.6   25.9 +- 0.5   
1            always_1   57.4 +- 1.2  74.1 +- 0.5   15.0 +- 0.6   25.9 +- 0.5   
2            always_2    9.0 +- 0.9  74.7 +- 0.4    2.5 +- 0.6   23.8 +- 0.7   
3            always_3   15.1 +- 0.8  74.3 +- 0.5    4.0 +- 0.6   25.3 +- 0.5   
4         CNN_20_ENMO   59.8 +- 1.1  77.2 +- 0.6   30.2 +- 1.0   33.5 +- 0.8   
5         CNN_50_ENMO   59.3 +- 1.1  77.5 +- 0.6   29.6 +- 1.0   33.9 +- 0.8   
6        CNN_100_ENMO   59.6 +- 1.1  77.4 +- 0.6   30.4 +- 1.0   33.5 +- 0.8   
7        LSTM_20_ENMO   60.1 +- 1.1  77.3 +- 0.6   30.1 +- 1.0   33.5 +- 0.8   
8        LSTM_50_ENMO   60.0 +- 1.1  77.3 +- 0.6   29.9 +- 1.0   33.4 +- 0.8   
9       LSTM_100_ENMO   59.9 +- 1.1  77.2 +- 0.6   29.2 +- 1.0   33.1 +- 0.8   
10          SGD_hinge   60.0 +- 1.1  77.3 +- 0.6   30.3 +- 1.0   33.8 +- 0.8   
11            SGD_log   59.9 +- 1.1  77.

In [96]:
print(readable_4stages_df)

         old_alg_name         new_alg_name
0              stages         ground truth
1            always_0          always wake
2            always_1   always light sleep
3            always_2    always deep sleep
4            always_3     always REM sleep
5         CNN_20_ENMO               CNN 20
6         CNN_50_ENMO               CNN 50
7        CNN_100_ENMO              CNN 100
8        LSTM_20_ENMO              LSTM 20
9        LSTM_50_ENMO              LSTM 50
10      LSTM_100_ENMO             LSTM 100
11          SGD_hinge           Linear SVM
12            SGD_log  Logistic Regression
13     SGD_perceptron           Perception
14  Random_forest_300        Random Forest


In [97]:
stage_4_df['algorithms'] = stage_4_df['algorithms'].apply(lambda x: readable_4stages_algs_dict[x])
stage_4_df['acc_for_sort'] = stage_4_df['accuracy'].apply(lambda x: np.float(x.split("+-")[0]))

In [98]:
stage_4_df[['algorithms', "accuracy", "specificity", "precision", "recall", "f1-score","cohen-kappa","Wake","REM sleep", "Deep sleep","Light sleep","acc_for_sort"]].sort_values(by="acc_for_sort", ascending=False)

Unnamed: 0,algorithms,accuracy,specificity,precision,recall,f1-score,cohen-kappa,Wake,REM sleep,Deep sleep,Light sleep,acc_for_sort
14,ground truth,100.0 +- 0.0,99.7 +- 0.6,100.0 +- 0.0,100.0 +- 0.0,100.0 +- 0.0,100.0 +- 0.0,88.9 +- 6.6,67.4 +- 3.0,39.2 +- 3.6,257.5 +- 7.0,100.0
7,LSTM 20,60.1 +- 1.1,77.3 +- 0.6,30.1 +- 1.0,33.5 +- 0.8,29.5 +- 0.9,15.9 +- 1.3,-21.6 +- 5.5,-67.3 +- 3.0,-39.2 +- 3.6,128.1 +- 7.3,60.1
8,LSTM 50,60.0 +- 1.1,77.3 +- 0.6,29.9 +- 1.0,33.4 +- 0.8,29.3 +- 0.9,16.2 +- 1.4,-18.2 +- 5.7,-67.2 +- 3.0,-39.2 +- 3.6,124.6 +- 7.5,60.0
10,Linear SVM,60.0 +- 1.1,77.3 +- 0.6,30.3 +- 1.0,33.8 +- 0.8,29.7 +- 0.9,16.7 +- 1.4,-18.2 +- 6.2,-67.3 +- 3.0,-39.2 +- 3.6,124.7 +- 7.7,60.0
9,LSTM 100,59.9 +- 1.1,77.2 +- 0.6,29.2 +- 1.0,33.1 +- 0.8,28.9 +- 0.9,15.7 +- 1.4,-17.6 +- 6.2,-67.4 +- 3.0,-39.2 +- 3.6,124.1 +- 7.9,59.9
11,Logistic Regression,59.9 +- 1.1,77.5 +- 0.6,30.2 +- 1.0,34.0 +- 0.8,29.8 +- 0.9,17.0 +- 1.4,-12.7 +- 6.3,-67.4 +- 3.0,-39.1 +- 3.6,119.1 +- 7.8,59.9
4,CNN 20,59.8 +- 1.1,77.2 +- 0.6,30.2 +- 1.0,33.5 +- 0.8,29.4 +- 0.9,15.9 +- 1.4,-18.2 +- 6.0,-67.3 +- 3.0,-39.2 +- 3.6,124.6 +- 7.6,59.8
6,CNN 100,59.6 +- 1.1,77.4 +- 0.6,30.4 +- 1.0,33.5 +- 0.8,29.2 +- 0.9,16.2 +- 1.4,-11.8 +- 6.4,-67.2 +- 3.0,-39.2 +- 3.6,118.2 +- 8.1,59.6
5,CNN 50,59.3 +- 1.1,77.5 +- 0.6,29.6 +- 1.0,33.9 +- 0.8,29.4 +- 0.8,17.0 +- 1.4,-3.5 +- 6.5,-67.3 +- 3.0,-39.2 +- 3.6,109.9 +- 8.1,59.3
13,Random Forest,57.7 +- 1.1,77.8 +- 0.6,34.7 +- 1.0,33.8 +- 0.7,30.1 +- 0.7,17.2 +- 1.3,4.1 +- 5.9,-59.5 +- 2.9,-37.1 +- 3.5,92.5 +- 7.3,57.7


In [209]:
print(stage_4_df[stage_4_df['algorithms'].isin(bls)].sort_values(by="acc_for_sort", ascending=False)[stages_4_clf_columns].to_latex(index=False))

\begin{tabular}{lllllllllll}
\toprule
          algorithms &      accuracy &   specificity &     precision &        recall &      f1-score &   cohen-kappa &    Deep sleep &    Light sleep &     REM sleep &           Wake \\
\midrule
        ground truth &  100.0 +- 0.0 &  100.0 +- 0.0 &  100.0 +- 0.0 &  100.0 +- 0.0 &  100.0 +- 0.0 &  100.0 +- 0.0 &   39.3 +- 3.6 &   258.2 +- 7.0 &   67.6 +- 3.0 &   187.4 +- 8.6 \\
            LSTM 100 &   64.1 +- 1.0 &   82.9 +- 0.5 &   35.6 +- 0.7 &   39.6 +- 0.7 &   35.8 +- 0.7 &   33.5 +- 1.4 &  -39.3 +- 3.6 &   139.4 +- 8.5 &  -67.6 +- 3.0 &   -32.5 +- 7.4 \\
             CNN 100 &   63.9 +- 1.0 &   83.0 +- 0.4 &   36.3 +- 0.9 &   39.6 +- 0.7 &   35.7 +- 0.7 &   33.5 +- 1.4 &  -39.3 +- 3.6 &   133.2 +- 8.7 &  -67.5 +- 3.0 &   -26.4 +- 7.6 \\
             LSTM 50 &   63.6 +- 1.0 &   82.7 +- 0.4 &   35.6 +- 0.8 &   39.3 +- 0.7 &   35.5 +- 0.7 &   33.0 +- 1.4 &  -39.3 +- 3.6 &   143.0 +- 8.2 &  -67.3 +- 3.0 &   -36.3 +- 7.1 \\
              CNN 50 & 

## ================ Task 4 5 stages ==================== 

###  ENMO&HRV

In [100]:
num_classes=5
stages_5_clf_columns= ['algorithms', "accuracy", "specificity", "precision", "recall", "f1-score", "cohen-kappa", "N1 sleep","N2 sleep","N3 sleep","REM sleep","Wake"]
stage_5_df = load_task(sub_folder, num_classes, feature_type='ENMO_HRV', stages_clf_columns=stages_5_clf_columns)
readable_5stages_algs_dict= {"stages":"ground truth","always_0":"always wake", "always_1":"always N1 sleep", "always_2":"always N2 sleep", "always_3":"always N3 sleep", "always_4":"always REM sleep", "CNN_20_ENMO_HRV": "CNN 20", "CNN_50_ENMO_HRV":"CNN 50", "CNN_100_ENMO_HRV":"CNN 100", "LSTM_20_ENMO_HRV":"LSTM 20", "LSTM_50_ENMO_HRV":"LSTM 50","LSTM_100_ENMO_HRV":"LSTM 100", "SGD_hinge":"Linear SVM", "SGD_log":"Logistic Regression", "SGD_perceptron":"Perception", "Random_forest_300": "Random Forest"}
readable_5stages_df = pd.DataFrame(list(readable_5stages_algs_dict.items()), columns=["old_alg_name","new_alg_name"])
print(readable_5stages_df)

summary csv is loaded
Pickle file is loaded
         old_alg_name         new_alg_name
0              stages         ground truth
1            always_0          always wake
2            always_1      always N1 sleep
3            always_2      always N2 sleep
4            always_3      always N3 sleep
5            always_4     always REM sleep
6     CNN_20_ENMO_HRV               CNN 20
7     CNN_50_ENMO_HRV               CNN 50
8    CNN_100_ENMO_HRV              CNN 100
9    LSTM_20_ENMO_HRV              LSTM 20
10   LSTM_50_ENMO_HRV              LSTM 50
11  LSTM_100_ENMO_HRV             LSTM 100
12          SGD_hinge           Linear SVM
13            SGD_log  Logistic Regression
14     SGD_perceptron           Perception
15  Random_forest_300        Random Forest


In [101]:
stages_5_baselines = ['stages', 'always_0', 'always_1', 'always_2','always_3',"always_4"]
stage_5_df['algorithms'] = stage_5_df['algorithms'].apply(lambda x: readable_5stages_algs_dict[x])
# get ready the rename df
bls = readable_5stages_df.new_alg_name.unique().tolist()
stage_5_df['acc_for_sort'] = stage_5_df['accuracy'].apply(lambda x: np.float(x.split("+-")[0]))


In [102]:
stage_5_df[['algorithms', "accuracy", "specificity", "precision", "recall", "f1-score", "cohen-kappa","Wake","REM sleep","N3 sleep","N2 sleep", "N1 sleep","acc_for_sort"]].sort_values(by="acc_for_sort", ascending=False)

Unnamed: 0,algorithms,accuracy,specificity,precision,recall,f1-score,cohen-kappa,Wake,REM sleep,N3 sleep,N2 sleep,N1 sleep,acc_for_sort
15,ground truth,100.0 +- 0.0,99.7 +- 0.6,100.0 +- 0.0,100.0 +- 0.0,100.0 +- 0.0,100.0 +- 0.0,88.9 +- 6.6,67.4 +- 3.0,39.2 +- 3.6,207.9 +- 6.3,49.6 +- 3.3,100.0
7,CNN 100,59.2 +- 1.2,86.5 +- 0.6,49.7 +- 1.4,42.0 +- 1.0,39.1 +- 1.0,45.1 +- 1.9,-8.6 +- 5.8,5.0 +- 4.8,-34.3 +- 3.5,78.2 +- 7.4,-40.3 +- 3.1,59.2
6,CNN 50,58.8 +- 1.2,86.4 +- 0.6,46.6 +- 1.4,41.5 +- 0.9,37.8 +- 1.0,44.7 +- 2.0,-4.7 +- 5.7,11.5 +- 5.7,-37.9 +- 3.5,73.1 +- 7.9,-42.0 +- 3.1,58.8
10,LSTM 100,58.4 +- 1.1,85.9 +- 0.6,44.3 +- 1.4,39.9 +- 0.9,36.7 +- 1.0,43.5 +- 2.0,7.7 +- 5.7,-17.5 +- 3.8,-32.3 +- 3.5,88.0 +- 7.1,-45.9 +- 3.0,58.4
9,LSTM 50,58.3 +- 1.1,86.2 +- 0.6,43.7 +- 1.5,40.6 +- 0.9,36.5 +- 0.9,44.0 +- 2.0,31.9 +- 6.1,-14.3 +- 3.8,-35.2 +- 3.5,63.4 +- 7.2,-45.8 +- 3.0,58.3
8,LSTM 20,57.9 +- 1.1,86.3 +- 0.6,40.6 +- 1.2,41.1 +- 0.8,36.2 +- 0.8,43.6 +- 1.9,25.4 +- 5.8,0.5 +- 4.2,-37.7 +- 3.5,57.3 +- 6.8,-45.4 +- 3.0,57.9
5,CNN 20,57.4 +- 1.2,86.1 +- 0.6,42.4 +- 1.2,40.6 +- 0.8,36.3 +- 0.9,42.6 +- 1.8,2.2 +- 5.8,14.1 +- 5.8,-39.0 +- 3.6,66.0 +- 7.7,-43.3 +- 3.0,57.4
14,Random Forest,51.3 +- 1.2,83.2 +- 0.5,33.2 +- 1.2,31.1 +- 0.6,25.5 +- 0.7,32.0 +- 1.7,33.9 +- 6.9,-55.2 +- 3.1,-38.6 +- 3.6,108.2 +- 7.9,-48.3 +- 3.2,51.3
12,Logistic Regression,51.1 +- 1.2,82.7 +- 0.6,25.8 +- 1.1,30.3 +- 0.7,23.8 +- 0.7,30.9 +- 1.7,33.4 +- 7.8,-66.9 +- 3.0,-39.1 +- 3.6,120.1 +- 8.8,-47.5 +- 3.1,51.1
11,Linear SVM,49.9 +- 1.3,82.9 +- 0.5,27.9 +- 1.0,30.1 +- 0.6,23.9 +- 0.7,29.7 +- 1.7,46.5 +- 8.1,-63.2 +- 3.0,-38.6 +- 3.6,100.0 +- 9.1,-44.7 +- 3.1,49.9


In [138]:
print(stage_5_df[stage_5_df['algorithms'].isin(bls)].sort_values(by="acc_for_sort", ascending=False)[stages_5_clf_columns].to_latex(index=False))

\begin{tabular}{llllllllllll}
\toprule
          algorithms &      accuracy &   specificity &     precision &        recall &      f1-score &   cohen-kappa &      N1 sleep &       N2 sleep &      N3 sleep &     REM sleep &           Wake \\
\midrule
        ground truth &  100.0 +- 0.0 &  100.0 +- 0.0 &  100.0 +- 0.0 &  100.0 +- 0.0 &  100.0 +- 0.0 &  100.0 +- 0.0 &   49.4 +- 3.2 &   208.7 +- 6.2 &   39.3 +- 3.6 &   67.6 +- 3.0 &   187.4 +- 8.6 \\
             LSTM 50 &   63.7 +- 1.0 &   88.7 +- 0.3 &   47.1 +- 1.4 &   43.0 +- 0.8 &   39.9 +- 0.8 &   56.3 +- 1.8 &  -46.0 +- 3.0 &    71.9 +- 7.5 &  -35.2 +- 3.5 &  -12.9 +- 3.9 &    22.2 +- 7.1 \\
            LSTM 100 &   63.6 +- 1.0 &   88.7 +- 0.3 &   47.8 +- 1.3 &   43.3 +- 0.8 &   40.5 +- 0.9 &   57.0 +- 1.8 &  -46.2 +- 3.0 &    97.7 +- 7.5 &  -32.3 +- 3.5 &  -15.9 +- 3.9 &    -3.3 +- 6.8 \\
             CNN 100 &   63.1 +- 1.1 &   88.8 +- 0.3 &   51.5 +- 1.4 &   44.7 +- 0.9 &   41.9 +- 0.9 &   56.2 +- 1.8 &  -40.2 +- 3.1 &    92.4 +

### Task 4 5 stages, single modality ENMO

In [111]:
num_classes = 5
stages_clf_columns= ['algorithms', "accuracy", "specificity", "precision", "recall", "f1-score", "cohen-kappa", "N1 sleep","N2 sleep","N3 sleep","REM sleep","Wake"]
stage_5_df = load_task(sub_folder, num_classes=num_classes, feature_type='ENMO', stages_clf_columns=stages_clf_columns)
readable_5stages_algs_dict= {"stages":"ground truth","always_0":"always wake", "always_1":"always N1 sleep", "always_2":"always N2 sleep", "always_3":"always N3 sleep", "always_4":"always REM sleep", "CNN_20_ENMO": "CNN 20", "CNN_50_ENMO":"CNN 50", "CNN_100_ENMO":"CNN 100", "LSTM_20_ENMO":"LSTM 20", "LSTM_50_ENMO":"LSTM 50","LSTM_100_ENMO":"LSTM 100", "SGD_hinge":"Linear SVM", "SGD_log":"Logistic Regression", "SGD_perceptron":"Perception", "Random_forest_300": "Random Forest"}
readable_5stages_df = pd.DataFrame(list(readable_5stages_algs_dict.items()), columns=["old_alg_name","new_alg_name"])
bls = readable_5stages_df.new_alg_name.unique().tolist()

summary csv is loaded
Pickle file is loaded


In [115]:
# print(stage_5_df)
# print(readable_5stages_df)

In [113]:
stage_5_df['algorithms'] = stage_5_df['algorithms'].apply(lambda x: readable_5stages_algs_dict[x])
stage_5_df['acc_for_sort'] = stage_5_df['accuracy'].apply(lambda x: np.float(x.split("+-")[0]))

In [116]:
stage_5_df[['algorithms', "accuracy", "specificity", "precision", "recall", "f1-score", "cohen-kappa","Wake","REM sleep","N3 sleep","N2 sleep", "N1 sleep","acc_for_sort"]].sort_values(by="acc_for_sort", ascending=False)

Unnamed: 0,algorithms,accuracy,specificity,precision,recall,f1-score,cohen-kappa,Wake,REM sleep,N3 sleep,N2 sleep,N1 sleep,acc_for_sort
15,ground truth,100.0 +- 0.0,99.7 +- 0.6,100.0 +- 0.0,100.0 +- 0.0,100.0 +- 0.0,100.0 +- 0.0,88.9 +- 6.6,67.4 +- 3.0,39.2 +- 3.6,207.9 +- 6.3,49.6 +- 3.3,100.0
9,LSTM 50,51.3 +- 1.1,82.0 +- 0.6,21.8 +- 0.9,28.2 +- 0.8,22.7 +- 0.8,27.8 +- 1.9,2.3 +- 6.2,-67.3 +- 3.0,-39.2 +- 3.6,153.3 +- 8.1,-49.1 +- 3.2,51.3
5,CNN 20,50.9 +- 1.2,82.0 +- 0.6,21.5 +- 0.9,28.1 +- 0.7,22.5 +- 0.8,27.4 +- 1.8,4.6 +- 6.5,-67.1 +- 3.0,-39.2 +- 3.6,150.7 +- 8.1,-49.1 +- 3.2,50.9
8,LSTM 20,50.9 +- 1.1,82.0 +- 0.6,21.1 +- 0.9,28.3 +- 0.8,22.7 +- 0.8,27.8 +- 1.8,5.1 +- 6.1,-67.4 +- 3.0,-39.1 +- 3.6,150.5 +- 7.9,-49.1 +- 3.2,50.9
12,Logistic Regression,50.8 +- 1.2,82.0 +- 0.6,21.2 +- 0.9,28.6 +- 0.8,22.8 +- 0.8,29.1 +- 1.8,10.6 +- 6.8,-67.4 +- 3.0,-39.1 +- 3.6,144.9 +- 8.3,-49.1 +- 3.2,50.8
10,LSTM 100,50.7 +- 1.1,81.9 +- 0.6,20.6 +- 0.9,28.0 +- 0.8,22.4 +- 0.8,27.4 +- 1.9,8.8 +- 6.6,-67.4 +- 3.0,-39.2 +- 3.6,146.8 +- 8.4,-49.1 +- 3.2,50.7
7,CNN 100,50.3 +- 1.2,82.2 +- 0.6,23.3 +- 1.0,28.3 +- 0.7,22.6 +- 0.8,28.0 +- 1.9,20.4 +- 7.3,-65.5 +- 3.0,-39.2 +- 3.6,133.4 +- 8.8,-49.1 +- 3.2,50.3
6,CNN 50,50.1 +- 1.2,82.2 +- 0.6,21.1 +- 0.9,28.6 +- 0.7,22.5 +- 0.8,28.5 +- 1.8,29.8 +- 7.5,-67.0 +- 3.0,-39.2 +- 3.6,125.5 +- 8.8,-49.1 +- 3.2,50.1
11,Linear SVM,48.5 +- 1.2,82.5 +- 0.5,27.6 +- 0.9,28.5 +- 0.6,23.0 +- 0.7,28.5 +- 1.8,42.1 +- 7.7,-60.4 +- 3.0,-37.6 +- 3.5,100.4 +- 8.7,-44.5 +- 3.2,48.5
14,Random Forest,48.3 +- 1.1,82.5 +- 0.5,28.0 +- 0.9,28.2 +- 0.6,23.4 +- 0.7,27.2 +- 1.7,32.1 +- 6.5,-56.3 +- 2.8,-36.7 +- 3.5,105.2 +- 7.6,-44.3 +- 3.2,48.3


In [147]:
print(stage_5_df[stage_5_df['algorithms'].isin(bls)].sort_values(by="acc_for_sort", ascending=False)[stages_clf_columns].to_latex(index=False))

\begin{tabular}{llllllllllll}
\toprule
          algorithms &      accuracy &   specificity &     precision &        recall &      f1-score &   cohen-kappa &      N1 sleep &       N2 sleep &      N3 sleep &     REM sleep &           Wake \\
\midrule
        ground truth &  100.0 +- 0.0 &  100.0 +- 0.0 &  100.0 +- 0.0 &  100.0 +- 0.0 &  100.0 +- 0.0 &  100.0 +- 0.0 &   49.4 +- 3.2 &   208.7 +- 6.2 &   39.3 +- 3.6 &   67.6 +- 3.0 &   187.4 +- 8.6 \\
             LSTM 50 &   56.9 +- 1.0 &   85.7 +- 0.4 &   26.1 +- 0.8 &   32.2 +- 0.6 &   27.1 +- 0.7 &   46.9 +- 1.7 &  -49.4 +- 3.2 &   169.2 +- 8.5 &  -39.3 +- 3.6 &  -67.6 +- 3.0 &   -12.9 +- 7.5 \\
            LSTM 100 &   56.9 +- 1.0 &   85.7 +- 0.4 &   25.3 +- 0.7 &   32.3 +- 0.6 &   27.1 +- 0.7 &   47.1 +- 1.7 &  -49.4 +- 3.2 &   159.7 +- 8.7 &  -39.3 +- 3.6 &  -67.6 +- 3.0 &    -3.3 +- 7.5 \\
             CNN 100 &   56.8 +- 1.1 &   85.8 +- 0.3 &   27.7 +- 0.9 &   32.2 +- 0.5 &   27.2 +- 0.6 &   46.9 +- 1.7 &  -49.4 +- 3.2 &   144.7 +

### Task 4 5 stages only HRV

In [117]:
num_classes = 5
stages_clf_columns= ['algorithms', "accuracy", "specificity", "precision", "recall", "f1-score", "cohen-kappa", "N1 sleep","N2 sleep","N3 sleep","REM sleep","Wake"]
stage_5_df = load_task(sub_folder,num_classes=num_classes, feature_type='HRV', stages_clf_columns=stages_clf_columns)
readable_5stages_algs_dict= {"stages":"ground truth","always_0":"always wake", "always_1":"always N1 sleep", "always_2":"always N2 sleep", "always_3":"always N3 sleep", "always_4":"always REM sleep", "CNN_20_HRV": "CNN 20", "CNN_50_HRV":"CNN 50", "CNN_100_HRV":"CNN 100", "LSTM_20_HRV":"LSTM 20", "LSTM_50_HRV":"LSTM 50","LSTM_100_HRV":"LSTM 100", "SGD_hinge":"Linear SVM", "SGD_log":"Logistic Regression", "SGD_perceptron":"Perception", "Random_forest_300": "Random Forest"}
rreadable_5stages_df = pd.DataFrame(list(readable_5stages_algs_dict.items()), columns=["old_alg_name","new_alg_name"])
bls = readable_5stages_df.new_alg_name.unique().tolist()

summary csv is loaded
Pickle file is loaded


In [118]:
# print(stage_5_df)
# print(readable_5stages_df)

In [119]:
stage_5_df['algorithms'] = stage_5_df['algorithms'].apply(lambda x: readable_5stages_algs_dict[x])
stage_5_df['acc_for_sort'] = stage_5_df['accuracy'].apply(lambda x: np.float(x.split("+-")[0]))

In [120]:
stage_5_df[['algorithms', "accuracy", "specificity", "precision", "recall", "f1-score", "cohen-kappa","Wake","REM sleep","N3 sleep","N2 sleep", "N1 sleep","acc_for_sort"]].sort_values(by="acc_for_sort", ascending=False)

Unnamed: 0,algorithms,accuracy,specificity,precision,recall,f1-score,cohen-kappa,Wake,REM sleep,N3 sleep,N2 sleep,N1 sleep,acc_for_sort
15,ground truth,100.0 +- 0.0,99.7 +- 0.6,100.0 +- 0.0,100.0 +- 0.0,100.0 +- 0.0,100.0 +- 0.0,88.9 +- 6.6,67.4 +- 3.0,39.2 +- 3.6,207.9 +- 6.3,49.6 +- 3.3,100.0
5,CNN 20,53.8 +- 1.3,85.0 +- 0.6,38.9 +- 1.3,36.7 +- 0.9,32.1 +- 1.0,28.7 +- 1.8,18.8 +- 8.2,-9.4 +- 5.4,-39.0 +- 3.6,74.0 +- 8.7,-44.4 +- 3.0,53.8
7,CNN 100,52.4 +- 1.3,85.1 +- 0.6,42.8 +- 1.4,37.6 +- 1.0,33.6 +- 1.1,26.8 +- 1.9,28.9 +- 9.0,-15.7 +- 4.5,-29.8 +- 3.5,57.6 +- 8.4,-41.0 +- 3.1,52.4
6,CNN 50,50.4 +- 1.3,84.6 +- 0.6,39.2 +- 1.4,35.2 +- 0.9,30.0 +- 1.1,22.9 +- 1.9,62.0 +- 10.4,-30.8 +- 4.8,-36.4 +- 3.5,46.2 +- 9.8,-40.9 +- 3.0,50.4
8,LSTM 20,49.3 +- 1.2,81.8 +- 0.5,27.8 +- 1.1,27.8 +- 0.7,22.5 +- 0.8,16.6 +- 1.5,-9.7 +- 8.8,-58.9 +- 3.1,-39.2 +- 3.6,155.7 +- 9.7,-48.0 +- 3.1,49.3
2,always N2 sleep,46.6 +- 1.2,79.2 +- 0.6,9.9 +- 0.6,20.8 +- 0.6,13.2 +- 0.6,0.0 +- 0.0,-88.9 +- 6.6,-67.4 +- 3.0,-39.2 +- 3.6,244.6 +- 8.1,-49.1 +- 3.2,46.6
10,LSTM 100,46.6 +- 1.2,80.9 +- 0.5,20.0 +- 1.0,24.7 +- 0.7,19.0 +- 0.7,10.4 +- 1.5,-5.9 +- 8.6,-65.8 +- 3.0,-39.2 +- 3.6,160.0 +- 9.6,-49.1 +- 3.2,46.6
9,LSTM 50,45.0 +- 1.3,81.8 +- 0.6,20.1 +- 1.0,27.4 +- 0.7,20.1 +- 0.8,13.8 +- 1.5,70.6 +- 11.9,-66.7 +- 3.0,-39.2 +- 3.6,84.3 +- 12.3,-49.0 +- 3.2,45.0
12,Logistic Regression,44.4 +- 1.4,81.6 +- 0.6,20.8 +- 1.0,27.1 +- 0.7,19.9 +- 0.8,11.7 +- 1.3,67.6 +- 13.8,-67.3 +- 3.0,-39.2 +- 3.6,87.1 +- 13.9,-48.2 +- 3.2,44.4
14,Random Forest,43.1 +- 1.3,81.8 +- 0.5,28.5 +- 1.1,27.0 +- 0.6,20.4 +- 0.7,10.2 +- 1.1,83.9 +- 12.2,-61.7 +- 3.1,-38.8 +- 3.6,62.8 +- 12.1,-46.1 +- 3.1,43.1


In [156]:
print(stage_5_df[stage_5_df['algorithms'].isin(bls)].sort_values(by="acc_for_sort", ascending=False)[stages_clf_columns].to_latex(index=False))

\begin{tabular}{llllllllllll}
\toprule
          algorithms &      accuracy &   specificity &     precision &        recall &      f1-score &   cohen-kappa &      N1 sleep &       N2 sleep &       N3 sleep &     REM sleep &           Wake \\
\midrule
        ground truth &  100.0 +- 0.0 &  100.0 +- 0.0 &  100.0 +- 0.0 &  100.0 +- 0.0 &  100.0 +- 0.0 &  100.0 +- 0.0 &   49.4 +- 3.2 &   208.7 +- 6.2 &    39.3 +- 3.6 &   67.6 +- 3.0 &   187.4 +- 8.6 \\
              CNN 20 &   55.6 +- 1.1 &   86.4 +- 0.3 &   40.4 +- 1.2 &   37.3 +- 0.8 &   33.6 +- 0.9 &   36.2 +- 1.8 &  -44.4 +- 3.0 &   103.0 +- 9.8 &   -39.1 +- 3.6 &   -3.9 +- 5.8 &  -15.6 +- 10.1 \\
             CNN 100 &   55.6 +- 1.1 &   86.7 +- 0.3 &   44.9 +- 1.4 &   38.9 +- 0.9 &   35.9 +- 1.0 &   37.1 +- 1.8 &  -40.8 +- 3.1 &    81.2 +- 9.4 &   -29.5 +- 3.6 &  -12.0 +- 4.8 &    1.1 +- 10.7 \\
              CNN 50 &   54.2 +- 1.1 &   86.0 +- 0.3 &   41.2 +- 1.3 &   35.6 +- 0.8 &   32.1 +- 1.0 &   32.3 +- 1.9 &  -40.6 +- 3.1 &   69.

### To print  the latex of HRV feature description

In [177]:
hrv_description = pd.read_csv("C:/tmp/hrv_feature-description.csv")
pd.set_option('max_colwidth',500)
print(hrv_description.to_latex(index=False, float_format="%s"))


\begin{tabular}{ll}
\toprule
             SDNN &                                                                                                                                                     Standard deviation of Normal-to-Normal interval \\
\midrule
             SDSD &                                                                                                                                                               Standard deviation of NNI differences \\
             NN50 &                                                                                                                                                      Number of NN interval differences greater 50ms \\
            pNN50 &                                                                                                                                                 Ratio between NN50 and total number of NN intervals \\
             NN20 &                                                                   

### This is for task 4: 5 stages tasks multimodality HRV & ENMO

In [219]:
# ml_dl_algorithms = []
# print(summary.loc[traditional].sort_values(by="Accuracy", ascending=False)[metrics].to_latex())

# t = summary.loc[traditional].sort_values(by="Accuracy", ascending=False)[metrics]
# print(get_group_average(t, metrics))

In [None]:
traditionalresc = ["resc_" + alg for alg in defaultalgs]
print(summary.loc[traditionalresc].sort_values(by="Accuracy", ascending=False)[metrics].to_latex())

t = summary.loc[traditionalresc].sort_values(by="Accuracy", ascending=False)[metrics]
print(get_group_average(t, metrics))

In [None]:
ml = [alg for alg in defaultml]
print(summary.loc[ml].sort_values(by="Accuracy", ascending=False)[metrics].to_latex())


t = summary.loc[ml].sort_values(by="Accuracy", ascending=False)[metrics]
print(get_group_average(t, metrics))

In [None]:
mlresc = ["resc_" + alg for alg in defaultml]
print(summary.loc[mlresc].sort_values(by="Accuracy", ascending=False)[metrics].to_latex())

t = summary.loc[mlresc].sort_values(by="Accuracy", ascending=False)[metrics]
print(get_group_average(t, metrics))

In [None]:
dl = [alg for alg in defaultdl]
print(summary.loc[dl].sort_values(by="Accuracy", ascending=False)[metrics].to_latex())

t = summary.loc[dl].sort_values(by="Accuracy", ascending=False)[metrics]
print(get_group_average(t, metrics))

In [None]:
dlresc = ["resc_" + alg for alg in defaultdl]
print(summary.loc[dlresc].sort_values(by="Accuracy", ascending=False)[metrics].to_latex())

t = summary.loc[dlresc].sort_values(by="Accuracy", ascending=False)[metrics]
print(get_group_average(t, metrics))

# T test start from here 

## Ensemble method

##### to compare the F1, accuracy, and kappa between ensemble method and 2 stages ENMO_HRV ### task 1

In [277]:
# setup the comparision
num_classes = 2
feature_type = 'ENMO_HRV'
result_folder = cfg.STAGE_OUTPUT_FOLDER_HRV30s[num_classes]
# data loading part for task 2
ENMO_HRV_PICKLE_RESULT_FILE = os.path.join(result_folder, sub_folder, "%d_stages_results_%s.pkl"
                                          % (num_classes, feature_type))
with open(ENMO_HRV_PICKLE_RESULT_FILE, "rb") as f:
    task_results = pickle.load(f)

# data loading part for ensemble
ENSEMBLE_PICKLE_RESULT_FILE = os.path.join(result_folder, "ensemble_all", "%d_stages_ensemble_results_%s.pkl"
                                          % (num_classes, feature_type))
with open(ENSEMBLE_PICKLE_RESULT_FILE, "rb") as f:
    ensemble_results = pickle.load(f)
print("test dataset size is: ")
print(ensemble_results['mean_ensemble'].shape)
print("non ensemble")
print(task_results.keys())
print(ensemble_results.keys())

test dataset size is: 
(348, 6)
non ensemble
dict_keys(['always_0', 'always_1', 'CNN_20_ENMO_HRV', 'CNN_50_ENMO_HRV', 'CNN_100_ENMO_HRV', 'LSTM_20_ENMO_HRV', 'LSTM_50_ENMO_HRV', 'LSTM_100_ENMO_HRV', 'SGD_hinge', 'SGD_log', 'SGD_perceptron', 'Random_forest_300', 'stages'])
dict_keys(['stages', 'CNN_20', 'CNN_50', 'CNN_100', 'LSTM_20', 'LSTM_50', 'LSTM_100', 'max_ensemble', 'mean_ensemble'])


In [278]:
ttest(task_results, ensemble_results,'CNN_100_ENMO_HRV', 'mean_ensemble', ['f1-score', 'accuracy','cohen-kappa'])

T-Test for metric: f1-score, the p value = 0.267
T-Test for metric: accuracy, the p value = 0.191
T-Test for metric: cohen-kappa, the p value = 0.112


In [286]:
ttest(task_results, ensemble_results,'LSTM_100_ENMO_HRV', 'mean_ensemble', ['f1-score'])

T-Test for metric: f1-score, the p value = 0.218


##### to compare the F1, accuracy, and kappa between ensemble method and 3 stages ENMO_HRV ### task 2

In [288]:
# setup the comparision
num_classes = 3
feature_type = 'ENMO_HRV'
result_folder = cfg.STAGE_OUTPUT_FOLDER_HRV30s[num_classes]
# data loading part for task 2
ENMO_HRV_PICKLE_RESULT_FILE = os.path.join(result_folder, sub_folder, "%d_stages_results_%s.pkl"
                                          % (num_classes, feature_type))
with open(ENMO_HRV_PICKLE_RESULT_FILE, "rb") as f:
    task_results = pickle.load(f)

# data loading part for ensemble
ENSEMBLE_PICKLE_RESULT_FILE = os.path.join(result_folder, "ensemble_all", "%d_stages_ensemble_results_%s.pkl"
                                          % (num_classes, feature_type))
with open(ENSEMBLE_PICKLE_RESULT_FILE, "rb") as f:
    ensemble_results = pickle.load(f)
print("test dataset size is: ")
print(ensemble_results['mean_ensemble'].shape)
print("non ensemble")
print(task_results.keys())
print(ensemble_results.keys())

test dataset size is: 
(348, 6)
non ensemble
dict_keys(['always_0', 'always_1', 'always_2', 'CNN_20_ENMO_HRV', 'CNN_50_ENMO_HRV', 'CNN_100_ENMO_HRV', 'LSTM_20_ENMO_HRV', 'LSTM_50_ENMO_HRV', 'LSTM_100_ENMO_HRV', 'SGD_hinge', 'SGD_log', 'SGD_perceptron', 'Random_forest_300', 'stages'])
dict_keys(['stages', 'CNN_20', 'CNN_50', 'CNN_100', 'LSTM_20', 'LSTM_50', 'LSTM_100', 'max_ensemble', 'mean_ensemble'])


In [289]:
ttest(task_results, ensemble_results,'LSTM_50_ENMO_HRV', 'mean_ensemble', ['f1-score', 'accuracy','cohen-kappa'])

T-Test for metric: f1-score, the p value = 0.041
T-Test for metric: accuracy, the p value = 0.004
T-Test for metric: cohen-kappa, the p value = 0.009


In [290]:
ttest(task_results, ensemble_results,'CNN_100_ENMO_HRV', 'mean_ensemble', ['f1-score', 'accuracy','cohen-kappa'])

T-Test for metric: f1-score, the p value = 0.065
T-Test for metric: accuracy, the p value = 0.002
T-Test for metric: cohen-kappa, the p value = 0.015


##### to compare the F1, accuracy, and kappa between ensemble method and 4 stages ENMO_HRV ### task 3 

In [281]:
# setup the comparision
num_classes = 4
feature_type = 'ENMO_HRV'
result_folder = cfg.STAGE_OUTPUT_FOLDER_HRV30s[num_classes]
# data loading part for task 2
ENMO_HRV_PICKLE_RESULT_FILE = os.path.join(result_folder, sub_folder, "%d_stages_results_%s.pkl"
                                          % (num_classes, feature_type))
with open(ENMO_HRV_PICKLE_RESULT_FILE, "rb") as f:
    task_results = pickle.load(f)

# data loading part for ensemble
ENSEMBLE_PICKLE_RESULT_FILE = os.path.join(result_folder, "ensemble_all", "%d_stages_ensemble_results_%s.pkl"
                                          % (num_classes, feature_type))
with open(ENSEMBLE_PICKLE_RESULT_FILE, "rb") as f:
    ensemble_results = pickle.load(f)
print("test dataset size is: ")
print(ensemble_results['mean_ensemble'].shape)
print("non ensemble")
print(task_results.keys())
print(ensemble_results.keys())

test dataset size is: 
(348, 6)
non ensemble
dict_keys(['always_0', 'always_1', 'always_2', 'always_3', 'CNN_20_ENMO_HRV', 'CNN_50_ENMO_HRV', 'CNN_100_ENMO_HRV', 'LSTM_20_ENMO_HRV', 'LSTM_50_ENMO_HRV', 'LSTM_100_ENMO_HRV', 'SGD_hinge', 'SGD_log', 'SGD_perceptron', 'Random_forest_300', 'stages'])
dict_keys(['stages', 'CNN_20', 'CNN_50', 'CNN_100', 'LSTM_20', 'LSTM_50', 'LSTM_100', 'max_ensemble', 'mean_ensemble'])


In [282]:
ttest(task_results, ensemble_results,'LSTM_50_ENMO_HRV', 'mean_ensemble', ['f1-score', 'accuracy','cohen-kappa'])

T-Test for metric: f1-score, the p value = 0.772
T-Test for metric: accuracy, the p value = 0.067
T-Test for metric: cohen-kappa, the p value = 0.087


##### to compare the F1, accuracy, and kappa between ensemble method and 5 stages ENMO_HRV ### task 4

In [284]:
# setup the comparision
num_classes = 5
feature_type = 'ENMO_HRV'
result_folder = cfg.STAGE_OUTPUT_FOLDER_HRV30s[num_classes]
# data loading part for task 2
ENMO_HRV_PICKLE_RESULT_FILE = os.path.join(result_folder, sub_folder, "%d_stages_results_%s.pkl"
                                          % (num_classes, feature_type))
with open(ENMO_HRV_PICKLE_RESULT_FILE, "rb") as f:
    task_results = pickle.load(f)

# data loading part for ensemble
ENSEMBLE_PICKLE_RESULT_FILE = os.path.join(result_folder, "ensemble_all", "%d_stages_ensemble_results_%s.pkl"
                                          % (num_classes, feature_type))
with open(ENSEMBLE_PICKLE_RESULT_FILE, "rb") as f:
    ensemble_results = pickle.load(f)
print("test dataset size is: ")
print(ensemble_results['mean_ensemble'].shape)
print("non ensemble")
print(task_results.keys())
print(ensemble_results.keys())

test dataset size is: 
(348, 6)
non ensemble
dict_keys(['always_0', 'always_1', 'always_2', 'always_3', 'always_4', 'CNN_20_ENMO_HRV', 'CNN_50_ENMO_HRV', 'CNN_100_ENMO_HRV', 'LSTM_20_ENMO_HRV', 'LSTM_50_ENMO_HRV', 'LSTM_100_ENMO_HRV', 'SGD_hinge', 'SGD_log', 'SGD_perceptron', 'Random_forest_300', 'stages'])
dict_keys(['stages', 'CNN_20', 'CNN_50', 'CNN_100', 'LSTM_20', 'LSTM_50', 'LSTM_100', 'max_ensemble', 'mean_ensemble'])


In [285]:
ttest(task_results, ensemble_results,'LSTM_50_ENMO_HRV', 'mean_ensemble', ['f1-score', 'accuracy','cohen-kappa'])

T-Test for metric: f1-score, the p value = 0.028
T-Test for metric: accuracy, the p value = 0.016
T-Test for metric: cohen-kappa, the p value = 0.004


### #### Non ensemble method: Task 1 

##### compare between combined snesing ENMO + HRV vs ENMO 

In [297]:
# setup the comparision
num_classes = 2
feature_type1 = 'ENMO_HRV'
feature_type2 = 'ENMO'
result_folder = cfg.STAGE_OUTPUT_FOLDER_HRV30s[num_classes]
# data loading part for task 2
ENMO_HRV_PICKLE_RESULT_FILE1 = os.path.join(result_folder, sub_folder, "%d_stages_results_%s.pkl"
                                          % (num_classes, feature_type1))
with open(ENMO_HRV_PICKLE_RESULT_FILE1, "rb") as f:
    task_results1 = pickle.load(f)

# data loading part for ensemble
ENMO_HRV_PICKLE_RESULT_FILE2 = os.path.join(result_folder, sub_folder, "%d_stages_results_%s.pkl"
                                          % (num_classes, feature_type2))
with open(ENMO_HRV_PICKLE_RESULT_FILE2, "rb") as f:
    task_results2 = pickle.load(f)
print("test dataset size is: ")
print("non ensemble")
print(task_results1.keys())
print(task_results2.keys())

test dataset size is: 
non ensemble
dict_keys(['always_0', 'always_1', 'CNN_20_ENMO_HRV', 'CNN_50_ENMO_HRV', 'CNN_100_ENMO_HRV', 'LSTM_20_ENMO_HRV', 'LSTM_50_ENMO_HRV', 'LSTM_100_ENMO_HRV', 'SGD_hinge', 'SGD_log', 'SGD_perceptron', 'Random_forest_300', 'stages'])
dict_keys(['always_0', 'always_1', 'CNN_20_ENMO', 'CNN_50_ENMO', 'CNN_100_ENMO', 'LSTM_20_ENMO', 'LSTM_50_ENMO', 'LSTM_100_ENMO', 'SGD_hinge', 'SGD_log', 'SGD_perceptron', 'Random_forest_300', 'stages'])


In [298]:
ttest(task_results1, task_results2,'CNN_100_ENMO_HRV', 'CNN_100_ENMO', ['f1-score', 'accuracy','cohen-kappa'])

T-Test for metric: f1-score, the p value = 0.347
T-Test for metric: accuracy, the p value = 0.499
T-Test for metric: cohen-kappa, the p value = 0.506


##### compare between combined snesing ENMO + HRV vs HRV 

In [None]:
['f1-score', 'accuracy','cohen-kappa', 'precision', 'recall', 'specificity']

### #### Non ensemble method: Task 2

In [8]:
# setup the comparision
num_classes = 3
feature_type1 = 'ENMO_HRV'
feature_type2 = 'ENMO_HRV'
result_folder = cfg.STAGE_OUTPUT_FOLDER_HRV30s[num_classes]
# data loading part for task 2
ENMO_HRV_PICKLE_RESULT_FILE1 = os.path.join(result_folder, sub_folder, "%d_stages_results_%s.pkl"
                                          % (num_classes, feature_type1))
with open(ENMO_HRV_PICKLE_RESULT_FILE1, "rb") as f:
    task_results1 = pickle.load(f)

# data loading part for ensemble
ENMO_HRV_PICKLE_RESULT_FILE2 = os.path.join(result_folder, sub_folder, "%d_stages_results_%s.pkl"
                                          % (num_classes, feature_type2))
with open(ENMO_HRV_PICKLE_RESULT_FILE2, "rb") as f:
    task_results2 = pickle.load(f)
print("test dataset size is: ")
print("non ensemble")
print(task_results1.keys())
print(task_results2.keys())

test dataset size is: 
non ensemble
dict_keys(['always_0', 'always_1', 'always_2', 'CNN_20_ENMO_HRV', 'CNN_50_ENMO_HRV', 'CNN_100_ENMO_HRV', 'LSTM_20_ENMO_HRV', 'LSTM_50_ENMO_HRV', 'LSTM_100_ENMO_HRV', 'SGD_hinge', 'SGD_log', 'SGD_perceptron', 'Random_forest_300', 'stages'])
dict_keys(['always_0', 'always_1', 'always_2', 'CNN_20_ENMO_HRV', 'CNN_50_ENMO_HRV', 'CNN_100_ENMO_HRV', 'LSTM_20_ENMO_HRV', 'LSTM_50_ENMO_HRV', 'LSTM_100_ENMO_HRV', 'SGD_hinge', 'SGD_log', 'SGD_perceptron', 'Random_forest_300', 'stages'])


In [322]:
ttest(task_results1, task_results1,'LSTM_50_ENMO_HRV', 'Random_forest_300', ['f1-score', 'accuracy','cohen-kappa', 'precision', 'recall', 'specificity'])

T-Test for metric: f1-score, the p value = 0.000
T-Test for metric: accuracy, the p value = 0.000
T-Test for metric: cohen-kappa, the p value = 0.000
T-Test for metric: precision, the p value = 0.000
T-Test for metric: recall, the p value = 0.000
T-Test for metric: specificity, the p value = 0.000


In [321]:
ttest(task_results1, task_results1,'LSTM_50_ENMO_HRV', 'CNN_20_ENMO_HRV', ['f1-score', 'accuracy','cohen-kappa', 'precision', 'recall', 'specificity'])

T-Test for metric: f1-score, the p value = 0.000
T-Test for metric: accuracy, the p value = 0.000
T-Test for metric: cohen-kappa, the p value = 0.002
T-Test for metric: precision, the p value = 0.027
T-Test for metric: recall, the p value = 0.001
T-Test for metric: specificity, the p value = 0.000


In [315]:
ttest(task_results1, task_results1,'LSTM_50_ENMO_HRV', 'LSTM_100_ENMO_HRV', ['f1-score', 'accuracy','cohen-kappa', 'precision', 'recall', 'specificity'])

T-Test for metric: f1-score, the p value = 0.108
T-Test for metric: accuracy, the p value = 0.806
T-Test for metric: cohen-kappa, the p value = 0.470
T-Test for metric: precision, the p value = 0.755
T-Test for metric: recall, the p value = 0.021
T-Test for metric: specificity, the p value = 0.223


In [316]:
ttest(task_results1, task_results1,'LSTM_50_ENMO_HRV', 'LSTM_20_ENMO_HRV', ['f1-score', 'accuracy','cohen-kappa', 'precision', 'recall', 'specificity'])

T-Test for metric: f1-score, the p value = 0.051
T-Test for metric: accuracy, the p value = 0.078
T-Test for metric: cohen-kappa, the p value = 0.033
T-Test for metric: precision, the p value = 0.099
T-Test for metric: recall, the p value = 0.059
T-Test for metric: specificity, the p value = 0.068


In [318]:
ttest(task_results1, task_results1,'LSTM_50_ENMO_HRV', 'CNN_50_ENMO_HRV', ['f1-score', 'accuracy','cohen-kappa', 'precision', 'recall', 'specificity'])

T-Test for metric: f1-score, the p value = 0.725
T-Test for metric: accuracy, the p value = 0.177
T-Test for metric: cohen-kappa, the p value = 0.966
T-Test for metric: precision, the p value = 0.592
T-Test for metric: recall, the p value = 0.126
T-Test for metric: specificity, the p value = 0.720


In [9]:
ttest(task_results1, task_results1,'LSTM_50_ENMO_HRV', 'CNN_100_ENMO_HRV', ['f1-score', 'accuracy','cohen-kappa', 'precision', 'recall', 'specificity'])

T-Test for metric: f1-score, the p value = 0.138
T-Test for metric: accuracy, the p value = 0.364
T-Test for metric: cohen-kappa, the p value = nan
T-Test for metric: precision, the p value = 0.031
T-Test for metric: recall, the p value = 0.063
T-Test for metric: specificity, the p value = 0.399


  return (self.a < x) & (x < self.b)
  return (self.a < x) & (x < self.b)
  cond2 = cond0 & (x <= self.a)


### Compare between multi modality task2 vs task 1 best classifier metrics 

In [323]:
# setup the comparision
num_classes1 = 3
num_classes2 = 2
feature_type1 = 'ENMO_HRV'
feature_type2 = 'ENMO_HRV'
result_folder1 = cfg.STAGE_OUTPUT_FOLDER_HRV30s[num_classes1]
# data loading part for first group
ENMO_HRV_PICKLE_RESULT_FILE1 = os.path.join(result_folder1, sub_folder, "%d_stages_results_%s.pkl"
                                          % (num_classes1, feature_type1))
with open(ENMO_HRV_PICKLE_RESULT_FILE1, "rb") as f:
    task_results1 = pickle.load(f)

# data loading part for second group
result_folder2 = cfg.STAGE_OUTPUT_FOLDER_HRV30s[num_classes2]
ENMO_HRV_PICKLE_RESULT_FILE2 = os.path.join(result_folder2, sub_folder, "%d_stages_results_%s.pkl"
                                          % (num_classes2, feature_type2))
with open(ENMO_HRV_PICKLE_RESULT_FILE2, "rb") as f:
    task_results2 = pickle.load(f)
print("test dataset size is: ")
print("non ensemble")
print(task_results1.keys())
print(task_results2.keys())

test dataset size is: 
non ensemble
dict_keys(['always_0', 'always_1', 'always_2', 'CNN_20_ENMO_HRV', 'CNN_50_ENMO_HRV', 'CNN_100_ENMO_HRV', 'LSTM_20_ENMO_HRV', 'LSTM_50_ENMO_HRV', 'LSTM_100_ENMO_HRV', 'SGD_hinge', 'SGD_log', 'SGD_perceptron', 'Random_forest_300', 'stages'])
dict_keys(['always_0', 'always_1', 'CNN_20_ENMO_HRV', 'CNN_50_ENMO_HRV', 'CNN_100_ENMO_HRV', 'LSTM_20_ENMO_HRV', 'LSTM_50_ENMO_HRV', 'LSTM_100_ENMO_HRV', 'SGD_hinge', 'SGD_log', 'SGD_perceptron', 'Random_forest_300', 'stages'])


In [324]:
ttest(task_results1, task_results2,'LSTM_50_ENMO_HRV', 'LSTM_50_ENMO_HRV', ['f1-score', 'accuracy','cohen-kappa', 'precision', 'recall', 'specificity'])

T-Test for metric: f1-score, the p value = 0.000
T-Test for metric: accuracy, the p value = 0.000
T-Test for metric: cohen-kappa, the p value = 0.009
T-Test for metric: precision, the p value = 0.000
T-Test for metric: recall, the p value = 0.000
T-Test for metric: specificity, the p value = 0.000


In [325]:
ttest(task_results1, task_results2,'LSTM_100_ENMO_HRV', 'LSTM_100_ENMO_HRV', ['f1-score', 'accuracy','cohen-kappa', 'precision', 'recall', 'specificity'])

T-Test for metric: f1-score, the p value = 0.000
T-Test for metric: accuracy, the p value = 0.000
T-Test for metric: cohen-kappa, the p value = 0.004
T-Test for metric: precision, the p value = 0.000
T-Test for metric: recall, the p value = 0.000
T-Test for metric: specificity, the p value = 0.000


In [326]:
ttest(task_results1, task_results2,'LSTM_20_ENMO_HRV', 'LSTM_20_ENMO_HRV', ['f1-score', 'accuracy','cohen-kappa', 'precision', 'recall', 'specificity'])

T-Test for metric: f1-score, the p value = 0.000
T-Test for metric: accuracy, the p value = 0.000
T-Test for metric: cohen-kappa, the p value = 0.001
T-Test for metric: precision, the p value = 0.000
T-Test for metric: recall, the p value = 0.000
T-Test for metric: specificity, the p value = 0.000


#### Compare between multi modality task3 best classifier vs the single modality on the best classifiers

In [327]:
# setup the comparision
num_classes1 = 3
num_classes2 = 3
feature_type1 = 'ENMO_HRV'
feature_type2 = 'HRV'
result_folder1 = cfg.STAGE_OUTPUT_FOLDER_HRV30s[num_classes1]
# data loading part for first group
ENMO_HRV_PICKLE_RESULT_FILE1 = os.path.join(result_folder1, sub_folder, "%d_stages_results_%s.pkl"
                                          % (num_classes1, feature_type1))
with open(ENMO_HRV_PICKLE_RESULT_FILE1, "rb") as f:
    task_results1 = pickle.load(f)

# data loading part for second group
result_folder2 = cfg.STAGE_OUTPUT_FOLDER_HRV30s[num_classes2]
ENMO_HRV_PICKLE_RESULT_FILE2 = os.path.join(result_folder2, sub_folder, "%d_stages_results_%s.pkl"
                                          % (num_classes2, feature_type2))
with open(ENMO_HRV_PICKLE_RESULT_FILE2, "rb") as f:
    task_results2 = pickle.load(f)
print("test dataset size is: ")
print("non ensemble")
print(task_results1.keys())
print(task_results2.keys())

test dataset size is: 
non ensemble
dict_keys(['always_0', 'always_1', 'always_2', 'CNN_20_ENMO_HRV', 'CNN_50_ENMO_HRV', 'CNN_100_ENMO_HRV', 'LSTM_20_ENMO_HRV', 'LSTM_50_ENMO_HRV', 'LSTM_100_ENMO_HRV', 'SGD_hinge', 'SGD_log', 'SGD_perceptron', 'Random_forest_300', 'stages'])
dict_keys(['always_0', 'always_1', 'always_2', 'CNN_20_HRV', 'CNN_50_HRV', 'CNN_100_HRV', 'LSTM_20_HRV', 'LSTM_50_HRV', 'LSTM_100_HRV', 'SGD_hinge', 'SGD_log', 'SGD_perceptron', 'Random_forest_300', 'stages'])


In [329]:
# will combined sensing in three stages classification is better than single modality HRV? for the best classifer on each group?
ttest(task_results1, task_results2,'LSTM_50_ENMO_HRV', 'LSTM_100_HRV', ['f1-score', 'accuracy','cohen-kappa', 'precision', 'recall', 'specificity'])

T-Test for metric: f1-score, the p value = 0.002
T-Test for metric: accuracy, the p value = 0.002
T-Test for metric: cohen-kappa, the p value = 0.000
T-Test for metric: precision, the p value = 0.018
T-Test for metric: recall, the p value = 0.003
T-Test for metric: specificity, the p value = 0.001


### #### Non ensemble method: Task 3

#### Compare between multi modality task3 best classifier vs the rest

In [11]:
# setup the comparision
num_classes = 4
feature_type1 = 'ENMO_HRV'
feature_type2 = 'ENMO_HRV'
result_folder = cfg.STAGE_OUTPUT_FOLDER_HRV30s[num_classes]
# data loading part for task 2
ENMO_HRV_PICKLE_RESULT_FILE1 = os.path.join(result_folder, sub_folder, "%d_stages_results_%s.pkl"
                                          % (num_classes, feature_type1))
with open(ENMO_HRV_PICKLE_RESULT_FILE1, "rb") as f:
    task_results1 = pickle.load(f)

# data loading part for ensemble
ENMO_HRV_PICKLE_RESULT_FILE2 = os.path.join(result_folder, sub_folder, "%d_stages_results_%s.pkl"
                                          % (num_classes, feature_type2))
with open(ENMO_HRV_PICKLE_RESULT_FILE2, "rb") as f:
    task_results2 = pickle.load(f)
print("test dataset size is: ")
print("non ensemble")
print(task_results1.keys())
print(task_results2.keys())

test dataset size is: 
non ensemble
dict_keys(['always_0', 'always_1', 'always_2', 'always_3', 'CNN_20_ENMO_HRV', 'CNN_50_ENMO_HRV', 'CNN_100_ENMO_HRV', 'LSTM_20_ENMO_HRV', 'LSTM_50_ENMO_HRV', 'LSTM_100_ENMO_HRV', 'SGD_hinge', 'SGD_log', 'SGD_perceptron', 'Random_forest_300', 'stages'])
dict_keys(['always_0', 'always_1', 'always_2', 'always_3', 'CNN_20_ENMO_HRV', 'CNN_50_ENMO_HRV', 'CNN_100_ENMO_HRV', 'LSTM_20_ENMO_HRV', 'LSTM_50_ENMO_HRV', 'LSTM_100_ENMO_HRV', 'SGD_hinge', 'SGD_log', 'SGD_perceptron', 'Random_forest_300', 'stages'])


In [331]:
ttest(task_results1, task_results1,'LSTM_50_ENMO_HRV', 'LSTM_100_ENMO_HRV', ['f1-score', 'accuracy','cohen-kappa', 'precision', 'recall', 'specificity'])

T-Test for metric: f1-score, the p value = 0.462
T-Test for metric: accuracy, the p value = 0.950
T-Test for metric: cohen-kappa, the p value = 0.107
T-Test for metric: precision, the p value = 0.053
T-Test for metric: recall, the p value = 0.026
T-Test for metric: specificity, the p value = 0.087


In [332]:
ttest(task_results1, task_results1,'LSTM_50_ENMO_HRV', 'CNN_100_ENMO_HRV', ['f1-score', 'accuracy','cohen-kappa', 'precision', 'recall', 'specificity'])

T-Test for metric: f1-score, the p value = 0.344
T-Test for metric: accuracy, the p value = 0.079
T-Test for metric: cohen-kappa, the p value = 0.087
T-Test for metric: precision, the p value = 0.918
T-Test for metric: recall, the p value = 0.663
T-Test for metric: specificity, the p value = 0.147


In [334]:
ttest(task_results1, task_results1,'LSTM_50_ENMO_HRV', 'LSTM_20_ENMO_HRV', ['f1-score', 'accuracy','cohen-kappa', 'precision', 'recall', 'specificity'])

T-Test for metric: f1-score, the p value = 0.000
T-Test for metric: accuracy, the p value = 0.007
T-Test for metric: cohen-kappa, the p value = 0.003
T-Test for metric: precision, the p value = 0.002
T-Test for metric: recall, the p value = 0.000
T-Test for metric: specificity, the p value = 0.001


In [12]:
ttest(task_results1, task_results1,'CNN_20_ENMO_HRV', 'Random_forest_300', ['f1-score', 'accuracy','cohen-kappa', 'precision', 'recall', 'specificity'])

T-Test for metric: f1-score, the p value = 0.000
T-Test for metric: accuracy, the p value = 0.000
T-Test for metric: cohen-kappa, the p value = nan
T-Test for metric: precision, the p value = 0.000
T-Test for metric: recall, the p value = 0.000
T-Test for metric: specificity, the p value = 0.000


In [336]:
ttest(task_results1, task_results1,'LSTM_50_ENMO_HRV', 'CNN_20_ENMO_HRV', ['f1-score', 'accuracy','cohen-kappa', 'precision', 'recall', 'specificity'])

T-Test for metric: f1-score, the p value = 0.000
T-Test for metric: accuracy, the p value = 0.000
T-Test for metric: cohen-kappa, the p value = 0.000
T-Test for metric: precision, the p value = 0.000
T-Test for metric: recall, the p value = 0.000
T-Test for metric: specificity, the p value = 0.000


#### Compare the 4 stages ENMO_HRV vs HRV

In [337]:
# setup the comparision
num_classes1 = 4
num_classes2 = 4
feature_type1 = 'ENMO_HRV'
feature_type2 = 'HRV'
result_folder1 = cfg.STAGE_OUTPUT_FOLDER_HRV30s[num_classes1]
# data loading part for first group
ENMO_HRV_PICKLE_RESULT_FILE1 = os.path.join(result_folder1, sub_folder, "%d_stages_results_%s.pkl"
                                          % (num_classes1, feature_type1))
with open(ENMO_HRV_PICKLE_RESULT_FILE1, "rb") as f:
    task_results1 = pickle.load(f)

# data loading part for second group
result_folder2 = cfg.STAGE_OUTPUT_FOLDER_HRV30s[num_classes2]
ENMO_HRV_PICKLE_RESULT_FILE2 = os.path.join(result_folder2, sub_folder, "%d_stages_results_%s.pkl"
                                          % (num_classes2, feature_type2))
with open(ENMO_HRV_PICKLE_RESULT_FILE2, "rb") as f:
    task_results2 = pickle.load(f)
print("test dataset size is: ")
print("non ensemble")
print(task_results1.keys())
print(task_results2.keys())

test dataset size is: 
non ensemble
dict_keys(['always_0', 'always_1', 'always_2', 'always_3', 'CNN_20_ENMO_HRV', 'CNN_50_ENMO_HRV', 'CNN_100_ENMO_HRV', 'LSTM_20_ENMO_HRV', 'LSTM_50_ENMO_HRV', 'LSTM_100_ENMO_HRV', 'SGD_hinge', 'SGD_log', 'SGD_perceptron', 'Random_forest_300', 'stages'])
dict_keys(['always_0', 'always_1', 'always_2', 'always_3', 'CNN_20_HRV', 'CNN_50_HRV', 'CNN_100_HRV', 'LSTM_20_HRV', 'LSTM_50_HRV', 'LSTM_100_HRV', 'SGD_hinge', 'SGD_log', 'SGD_perceptron', 'Random_forest_300', 'stages'])


In [338]:
# will combined sensing in 4 stages classification is better than single modality HRV? for the best classifer on each group?
ttest(task_results1, task_results2,'LSTM_50_ENMO_HRV', 'LSTM_100_HRV', ['f1-score', 'accuracy','cohen-kappa', 'precision', 'recall', 'specificity'])

T-Test for metric: f1-score, the p value = 0.004
T-Test for metric: accuracy, the p value = 0.000
T-Test for metric: cohen-kappa, the p value = 0.000
T-Test for metric: precision, the p value = 0.113
T-Test for metric: recall, the p value = 0.001
T-Test for metric: specificity, the p value = 0.000


### #### Non ensemble method: Task 4

In [339]:
# setup the comparision
num_classes = 5
feature_type1 = 'ENMO_HRV'
feature_type2 = 'ENMO_HRV'
result_folder = cfg.STAGE_OUTPUT_FOLDER_HRV30s[num_classes]
# data loading part for task 4
ENMO_HRV_PICKLE_RESULT_FILE1 = os.path.join(result_folder, sub_folder, "%d_stages_results_%s.pkl"
                                          % (num_classes, feature_type1))
with open(ENMO_HRV_PICKLE_RESULT_FILE1, "rb") as f:
    task_results1 = pickle.load(f)

# data loading part for ensemble
ENMO_HRV_PICKLE_RESULT_FILE2 = os.path.join(result_folder, sub_folder, "%d_stages_results_%s.pkl"
                                          % (num_classes, feature_type2))
with open(ENMO_HRV_PICKLE_RESULT_FILE2, "rb") as f:
    task_results2 = pickle.load(f)
print("test dataset size is: ")
print("non ensemble")
print(task_results1.keys())
print(task_results2.keys())

test dataset size is: 
non ensemble
dict_keys(['always_0', 'always_1', 'always_2', 'always_3', 'always_4', 'CNN_20_ENMO_HRV', 'CNN_50_ENMO_HRV', 'CNN_100_ENMO_HRV', 'LSTM_20_ENMO_HRV', 'LSTM_50_ENMO_HRV', 'LSTM_100_ENMO_HRV', 'SGD_hinge', 'SGD_log', 'SGD_perceptron', 'Random_forest_300', 'stages'])
dict_keys(['always_0', 'always_1', 'always_2', 'always_3', 'always_4', 'CNN_20_ENMO_HRV', 'CNN_50_ENMO_HRV', 'CNN_100_ENMO_HRV', 'LSTM_20_ENMO_HRV', 'LSTM_50_ENMO_HRV', 'LSTM_100_ENMO_HRV', 'SGD_hinge', 'SGD_log', 'SGD_perceptron', 'Random_forest_300', 'stages'])


In [340]:
ttest(task_results1, task_results1,'LSTM_50_ENMO_HRV', 'LSTM_100_ENMO_HRV', ['f1-score', 'accuracy','cohen-kappa', 'precision', 'recall', 'specificity'])

T-Test for metric: f1-score, the p value = 0.364
T-Test for metric: accuracy, the p value = 0.928
T-Test for metric: cohen-kappa, the p value = 0.550
T-Test for metric: precision, the p value = 0.471
T-Test for metric: recall, the p value = 0.655
T-Test for metric: specificity, the p value = 0.866


In [341]:
ttest(task_results1, task_results1,'LSTM_50_ENMO_HRV', 'CNN_100_ENMO_HRV', ['f1-score', 'accuracy','cohen-kappa', 'precision', 'recall', 'specificity'])

T-Test for metric: f1-score, the p value = 0.001
T-Test for metric: accuracy, the p value = 0.432
T-Test for metric: cohen-kappa, the p value = 0.940
T-Test for metric: precision, the p value = 0.000
T-Test for metric: recall, the p value = 0.005
T-Test for metric: specificity, the p value = 0.520


In [342]:
ttest(task_results1, task_results1,'LSTM_50_ENMO_HRV', 'CNN_50_ENMO_HRV', ['f1-score', 'accuracy','cohen-kappa', 'precision', 'recall', 'specificity'])

T-Test for metric: f1-score, the p value = 0.254
T-Test for metric: accuracy, the p value = 0.290
T-Test for metric: cohen-kappa, the p value = 0.810
T-Test for metric: precision, the p value = 0.083
T-Test for metric: recall, the p value = 0.050
T-Test for metric: specificity, the p value = 0.795


In [343]:
ttest(task_results1, task_results1,'LSTM_50_ENMO_HRV', 'SGD_log', ['f1-score', 'accuracy','cohen-kappa', 'precision', 'recall', 'specificity'])

T-Test for metric: f1-score, the p value = 0.000
T-Test for metric: accuracy, the p value = 0.000
T-Test for metric: cohen-kappa, the p value = 0.000
T-Test for metric: precision, the p value = 0.000
T-Test for metric: recall, the p value = 0.000
T-Test for metric: specificity, the p value = 0.000


In [344]:
ttest(task_results1, task_results1,'LSTM_50_ENMO_HRV', 'LSTM_20_ENMO_HRV', ['f1-score', 'accuracy','cohen-kappa', 'precision', 'recall', 'specificity'])

T-Test for metric: f1-score, the p value = 0.215
T-Test for metric: accuracy, the p value = 0.120
T-Test for metric: cohen-kappa, the p value = 0.107
T-Test for metric: precision, the p value = 0.000
T-Test for metric: recall, the p value = 0.891
T-Test for metric: specificity, the p value = 0.381


In [345]:
ttest(task_results1, task_results1,'LSTM_50_ENMO_HRV', 'CNN_20_ENMO_HRV', ['f1-score', 'accuracy','cohen-kappa', 'precision', 'recall', 'specificity'])

T-Test for metric: f1-score, the p value = 0.067
T-Test for metric: accuracy, the p value = 0.001
T-Test for metric: cohen-kappa, the p value = 0.013
T-Test for metric: precision, the p value = 0.009
T-Test for metric: recall, the p value = 0.632
T-Test for metric: specificity, the p value = 0.055


## This is the data science way to T test

In [5]:
# setup the comparision
num_classes = 3
feature_type = 'ENMO_HRV'
result_folder = cfg.STAGE_OUTPUT_FOLDER_HRV30s[num_classes]
# data loading part for task 2
ENMO_HRV_PICKLE_RESULT_FILE = os.path.join(result_folder, sub_folder, "%d_stages_results_%s.pkl"
                                          % (num_classes, feature_type))
with open(ENMO_HRV_PICKLE_RESULT_FILE, "rb") as f:
    task_results = pickle.load(f)

# data loading part for ensemble
ENSEMBLE_PICKLE_RESULT_FILE = os.path.join(result_folder, "ensemble_all", "%d_stages_ensemble_results_%s.pkl"
                                          % (num_classes, feature_type))
with open(ENSEMBLE_PICKLE_RESULT_FILE, "rb") as f:
    ensemble_results = pickle.load(f)
print("test dataset size is: ")
print(ensemble_results['mean_ensemble'].shape)
print("non ensemble")
print(task_results.keys())
print(ensemble_results.keys())

test dataset size is: 
(348, 6)
non ensemble
dict_keys(['always_0', 'always_1', 'always_2', 'CNN_20_ENMO_HRV', 'CNN_50_ENMO_HRV', 'CNN_100_ENMO_HRV', 'LSTM_20_ENMO_HRV', 'LSTM_50_ENMO_HRV', 'LSTM_100_ENMO_HRV', 'SGD_hinge', 'SGD_log', 'SGD_perceptron', 'Random_forest_300', 'stages'])
dict_keys(['stages', 'CNN_20', 'CNN_50', 'CNN_100', 'LSTM_20', 'LSTM_50', 'LSTM_100', 'max_ensemble', 'mean_ensemble'])


## This is the T test for time deviation 

In [6]:
num_classes = 3
feature_type1 = 'ENMO_HRV'
feature_type2 = 'ENMO_HRV'
result_folder = cfg.STAGE_OUTPUT_FOLDER_HRV30s[num_classes]
# data loading part for task 4
ENMO_HRV_PICKLE_RESULT_FILE1 = os.path.join(result_folder, ANALYSIS_SUB_FOLDER["recording_period"], "%d_stages_minutes_results_%s.pkl"
                                          % (num_classes, feature_type1))
with open(ENMO_HRV_PICKLE_RESULT_FILE1, "rb") as f:
    task_results1 = pickle.load(f)

# data loading part for ensemble
ENMO_HRV_PICKLE_RESULT_FILE2 = os.path.join(result_folder, ANALYSIS_SUB_FOLDER["recording_period"], "%d_stages_minutes_results_%s.pkl"
                                          % (num_classes, feature_type2))
with open(ENMO_HRV_PICKLE_RESULT_FILE2, "rb") as f:
    task_results2 = pickle.load(f)
print("test dataset size is: ")
print("non ensemble")
print(task_results1.keys())
print(task_results2.keys())

test dataset size is: 
non ensemble
dict_keys(['always_0', 'always_1', 'always_2', 'CNN_20_ENMO_HRV', 'CNN_50_ENMO_HRV', 'CNN_100_ENMO_HRV', 'LSTM_20_ENMO_HRV', 'LSTM_50_ENMO_HRV', 'LSTM_100_ENMO_HRV', 'SGD_hinge', 'SGD_log', 'SGD_perceptron', 'Random_forest_300', 'stages'])
dict_keys(['always_0', 'always_1', 'always_2', 'CNN_20_ENMO_HRV', 'CNN_50_ENMO_HRV', 'CNN_100_ENMO_HRV', 'LSTM_20_ENMO_HRV', 'LSTM_50_ENMO_HRV', 'LSTM_100_ENMO_HRV', 'SGD_hinge', 'SGD_log', 'SGD_perceptron', 'Random_forest_300', 'stages'])


In [7]:
ttest(task_results1, task_results1,'CNN_100_ENMO_HRV', 'LSTM_100_ENMO_HRV', ['0','1', '2'])

T-Test for metric: 0, the p value = 0.000
T-Test for metric: 1, the p value = 0.485
T-Test for metric: 2, the p value = 0.000


In [13]:
num_classes = 4
feature_type1 = 'ENMO_HRV'
feature_type2 = 'ENMO_HRV'
result_folder = cfg.STAGE_OUTPUT_FOLDER_HRV30s[num_classes]
# data loading part for task 4
ENMO_HRV_PICKLE_RESULT_FILE1 = os.path.join(result_folder, ANALYSIS_SUB_FOLDER["recording_period"], "%d_stages_minutes_results_%s.pkl"
                                          % (num_classes, feature_type1))
with open(ENMO_HRV_PICKLE_RESULT_FILE1, "rb") as f:
    task_results1 = pickle.load(f)

# data loading part for ensemble
ENMO_HRV_PICKLE_RESULT_FILE2 = os.path.join(result_folder, ANALYSIS_SUB_FOLDER["recording_period"], "%d_stages_minutes_results_%s.pkl"
                                          % (num_classes, feature_type2))
with open(ENMO_HRV_PICKLE_RESULT_FILE2, "rb") as f:
    task_results2 = pickle.load(f)
print("test dataset size is: ")
print("non ensemble")
print(task_results1.keys())
print(task_results2.keys())

test dataset size is: 
non ensemble
dict_keys(['always_0', 'always_1', 'always_2', 'always_3', 'CNN_20_ENMO_HRV', 'CNN_50_ENMO_HRV', 'CNN_100_ENMO_HRV', 'LSTM_20_ENMO_HRV', 'LSTM_50_ENMO_HRV', 'LSTM_100_ENMO_HRV', 'SGD_hinge', 'SGD_log', 'SGD_perceptron', 'Random_forest_300', 'stages'])
dict_keys(['always_0', 'always_1', 'always_2', 'always_3', 'CNN_20_ENMO_HRV', 'CNN_50_ENMO_HRV', 'CNN_100_ENMO_HRV', 'LSTM_20_ENMO_HRV', 'LSTM_50_ENMO_HRV', 'LSTM_100_ENMO_HRV', 'SGD_hinge', 'SGD_log', 'SGD_perceptron', 'Random_forest_300', 'stages'])


In [14]:
ttest(task_results1, task_results1,'CNN_100_ENMO_HRV', 'LSTM_50_ENMO_HRV', ['0','1', '2','3'])

T-Test for metric: 0, the p value = 0.004
T-Test for metric: 1, the p value = 0.558
T-Test for metric: 2, the p value = 0.501
T-Test for metric: 3, the p value = 0.002
