## Single channel sleep scoring comparison sheet

Calculations carried out to fairly compare papers

Initial formatting:
* Confusion matrices are gathered from original articles and formatted in the order W, S1, S2, S3(&4), R, transposed if necessary
* If only the 6-class confmat is available, classes S3 and S4 are merged
* Some studies leave many more W stages than other stages which articifially increases claimed performance. For those studies, we bring down the number of Wake epochs (according to expert label) to the cardinal of the second biggest class.

Then we calculate some metrics for each of the studies:
* accuracy
* kappa
* f1-micro
* f1-macro

And aggregate everything in a pandas dataframe which we can export to the latex article. 

In [1]:
import numpy as np
import sklearn.metrics
import pandas as pd

In [2]:
# order: W, S1, S2, S3(&4), REM
# if matrix in article is not given in this order, we copy it as such and reorder it later to avoid errors
def reorder(cm, new_order=[4, 0, 1, 2, 3]):
    cm = cm[new_order, :]
    cm = cm[:, new_order]
    return cm

# of only 6-class CM is available, transform it into 5-class by merging 3 and 4
def merge(cm, indices_keep=[0, 1, 4, 5], indices_merge=[2, 3]):
    # lines
    cm_keep = cm[indices_keep, :]
    cm_ind = cm[indices_merge, :]
    cm_ind = np.sum(cm_ind, axis=0, keepdims=True)
    ind_toinsert = np.min(np.where((np.array(indices_keep)>indices_merge[0])==1)[0])
    cm = np.vstack([cm_keep[:ind_toinsert, :], cm_ind, cm_keep[ind_toinsert:, :]])
    # same for columns
    cm_keep = cm[:, indices_keep]
    cm_ind = cm[:, indices_merge]
    cm_ind = np.sum(cm_ind, axis=1, keepdims=True)
    cm = np.hstack([cm_keep[:, :ind_toinsert], cm_ind, cm_keep[:, ind_toinsert:]])
    return cm

def reduce_wake(cm):
    # for confmat W, 1, 2, 3, R: if number of true wake epochs is higher than 
    # the biggest other class (always stage 2), limit the number of wake epochs to 
    # the number of S2 epochs
    n_eps = np.sum(cm, axis=1)
    n_wake = n_eps[0]
    n_s2 = n_eps[2]
    if n_wake > n_s2:
        cm_wake = cm[0, :]
        cm_wake = [np.round(n*n_s2/float(n_wake)) for n in cm_wake]
        cm[0, :] = cm_wake
    return cm

In [3]:
def get_y_true_pred(cm):
    # get lists y_true, y_pred from confmat
    y_true = []
    y_pred = []
    for i, elt_i in enumerate(cm): #elmt_i is the line
        for j, elt_j in enumerate(elt_i): #elmt_j is the value (number of occurences)
            y_true += [i]*elt_j
            y_pred += [j]*elt_j
    return y_true, y_pred

In [4]:
def get_metrics(y_true, y_pred):
    acc = sklearn.metrics.accuracy_score(y_true, y_pred)
    kappa = sklearn.metrics.cohen_kappa_score(y_true, y_pred)
    f1_micro = sklearn.metrics.f1_score(y_true, y_pred, average='micro')
    f1_macro = sklearn.metrics.f1_score(y_true, y_pred, average='macro')
    sklearn_report = sklearn.metrics.classification_report(y_true, y_pred, target_names=['W', 'S1', 'S2', 'S3(&4)', 'R'])
    return {
        'accuracy': acc,
        'kappa': kappa,
        'f1_micro': f1_micro,
        'f1_macro': f1_macro,
        'sklearn report': sklearn_report,
    }

def add_infos(d, citation, dataset, nb_patients, scoring_rule, channel, model, split_type, cross_val, nb_raters_per_rec):
    d_ = {
        'dataset': dataset,
        'patients': nb_patients,
        'scoring rule': scoring_rule,
        'channel': channel, 
        'model': model,
        'split type': split_type,
        'cross_val': cross_val,
        'raters per record': nb_raters_per_rec,
    }
    # return {'article': '\cite{%s}' %citation, **d_, **d} #python 3.5
    return dict({'article': citation}.items() + d_.items() + d.items())

def print_metrics(d, print_infos=True, print_metrics=True):
    if print_infos:
        print("Dataset: ", d['dataset'])
        print("Number of patients: ", d['patients'])
        print("Scoring rule: ", d['scoring rule'])
        print("Channel: ", d['channel'])
        print("Model: ", d['model'])
        print("Split type: ", d['split type'])
        print("Cross_val: ", d['cross_val'])
        print("Number of raters per record: ", d['raters per record'])
        print('\n')
    if print_metrics:
        print("Accuracy: ", d['accuracy'])
        print("Kappa: ", d['kappa'])
        print('f1-micro: ', d['f1_micro'])
        print('f1_macro: ', d['f1_macro'])
        print('sklearn_report: ', '\n', d['sklearn report'])

## Tsinalis_2016
Tsinalis, O., Matthews, P. M., Guo, Y., & Zafeiriou, S. (2016). Automatic sleep stage scoring with single-channel EEG using convolutional neural networks. arXiv preprint arXiv:1610.01683.

In [5]:
cm_tsinalis2016 = np.array([[1657, 259, 9, 427, 410],
                            [1534, 12858, 1263, 1257, 666],
                            [9, 399, 5097, 1, 85],
                            [1019, 643, 3, 5686, 360],
                            [605, 171, 47, 175, 2382]])
# order: 1, 2, 3, R, W
cm_tsinalis2016 = reorder(cm_tsinalis2016, [4, 0, 1, 2, 3])
print(cm_tsinalis2016, '\n')
d_tsinalis_2016 = get_metrics(*get_y_true_pred(cm_tsinalis2016))
d_tsinalis_2016 = add_infos(d_tsinalis_2016, 'tsinalis_2016', 'Sleep-EDF', 20, 'R&K', 'Fpz-Cz', 'CNN', 'record', '20-fold CV', 1)
print_metrics(d_tsinalis_2016)

(array([[ 2382,   605,   171,    47,   175],
       [  410,  1657,   259,     9,   427],
       [  666,  1534, 12858,  1263,  1257],
       [   85,     9,   399,  5097,     1],
       [  360,  1019,   643,     3,  5686]]), '\n')
('Dataset: ', 'Sleep-EDF')
('Number of patients: ', 20)
('Scoring rule: ', 'R&K')
('Channel: ', 'Fpz-Cz')
('Model: ', 'CNN')
('Split type: ', 'record')
('Cross_val: ', '20-fold CV')
('Number of raters per record: ', 1)


('Accuracy: ', 0.74766355140186913)
('Kappa: ', 0.65349732580287201)
('f1-micro: ', 0.74766355140186913)
('f1_macro: ', 0.69821619127089174)
('sklearn_report: ', '\n', '             precision    recall  f1-score   support\n\n          W       0.61      0.70      0.65      3380\n         S1       0.34      0.60      0.44      2762\n         S2       0.90      0.73      0.81     17578\n     S3(&4)       0.79      0.91      0.85      5591\n          R       0.75      0.74      0.75      7711\n\navg / total       0.78      0.75      0.76     37022\

## Supratak 2017

SUPRATAK, Akara, DONG, Hao, WU, Chao, et al. DeepSleepNet: a Model for Automatic Sleep Stage Scoring based on Raw Single-Channel EEG. arXiv preprint arXiv:1703.04046, 2017.

Rem: particularity: use of EOG:
The channel used is F4-EOG: clever!

In [6]:
cm_supratak_2017 = np.array([[5433, 572, 107, 13, 102],
                            [452, 2802, 827, 4, 639],
                            [185, 906, 26786, 1158, 499],
                            [18, 4, 1552, 6077, 0],
                            [132, 356, 533, 1, 9442]])
print(cm_supratak_2017, '\n')
d_supratak_2017 = get_metrics(*get_y_true_pred(cm_supratak_2017))
d_supratak_2017 = add_infos(d_supratak_2017, 'supratak_2017', 'MASS', 32, 'AASM', 'F4-EOG', 'CNN-LSTM', 'record', '31-fold CV', 1)
print_metrics(d_supratak_2017)

(array([[ 5433,   572,   107,    13,   102],
       [  452,  2802,   827,     4,   639],
       [  185,   906, 26786,  1158,   499],
       [   18,     4,  1552,  6077,     0],
       [  132,   356,   533,     1,  9442]]), '\n')
('Dataset: ', 'MASS')
('Number of patients: ', 32)
('Scoring rule: ', 'AASM')
('Channel: ', 'F4-EOG')
('Model: ', 'CNN-LSTM')
('Split type: ', 'record')
('Cross_val: ', '31-fold CV')
('Number of raters per record: ', 1)


('Accuracy: ', 0.86245733788395906)
('Kappa: ', 0.79692134756215127)
('f1-micro: ', 0.86245733788395906)
('f1_macro: ', 0.81655429597108298)
('sklearn_report: ', '\n', '             precision    recall  f1-score   support\n\n          W       0.87      0.87      0.87      6227\n         S1       0.60      0.59      0.60      4724\n         S2       0.90      0.91      0.90     29534\n     S3(&4)       0.84      0.79      0.82      7651\n          R       0.88      0.90      0.89     10464\n\navg / total       0.86      0.86      0.86     58600

## Liang 2012

Liang, S. F., Kuo, C. E., Hu, Y. H., Pan, Y. H., & Wang, Y. H. (2012). Automatic stage scoring of single-channel sleep EEG by using multiscale entropy and autoregressive models. IEEE Transactions on Instrumentation and Measurement, 61(6), 1649-1657.

In [7]:
cm_liang_2012 = np.array([[195, 24, 4, 0, 3],
                     [31, 72, 48, 3, 69],
                     [12, 103, 4078, 216, 220],
                     [1, 4, 196, 1309, 0],
                     [8, 8, 22, 6, 1818]])
print(cm_liang_2012, '\n')
d_liang_2012 = get_metrics(*get_y_true_pred(cm_liang_2012))
d_liang_2012 = add_infos(d_liang_2012, 'liang_2012', 'Custom', 20, 'R&K', 'C3-A2', 'Multiscale entropy, AR features, smoothing rules', 'epoch', '50/50', 2)
print_metrics(d_liang_2012)

(array([[ 195,   24,    4,    0,    3],
       [  31,   72,   48,    3,   69],
       [  12,  103, 4078,  216,  220],
       [   1,    4,  196, 1309,    0],
       [   8,    8,   22,    6, 1818]]), '\n')
('Dataset: ', 'Custom')
('Number of patients: ', 20)
('Scoring rule: ', 'R&K')
('Channel: ', 'C3-A2')
('Model: ', 'Multiscale entropy, AR features, smoothing rules')
('Split type: ', 'epoch')
('Cross_val: ', '50/50')
('Number of raters per record: ', 2)


('Accuracy: ', 0.88426035502958578)
('Kappa: ', 0.81605727956860696)
('f1-micro: ', 0.88426035502958578)
('f1_macro: ', 0.76806520447137139)
('sklearn_report: ', '\n', '             precision    recall  f1-score   support\n\n          W       0.79      0.86      0.82       226\n         S1       0.34      0.32      0.33       223\n         S2       0.94      0.88      0.91      4629\n     S3(&4)       0.85      0.87      0.86      1510\n          R       0.86      0.98      0.92      1862\n\navg / total       0.89      0.88      0.88 

## Zhu 2014

Zhu, G., Li, Y., & Wen, P. P. (2014). Analysis and classification of sleep stages based on difference visibility graphs from a single-channel EEG signal. IEEE journal of biomedical and health informatics, 18(6), 1813-1821.

In [8]:
cm_zhu_2014 = np.array([[3863, 66, 23, 4, 20],
                   [11, 50, 5, 3, 12],
                   [5, 80, 1619, 139, 159],
                   [1, 1, 74, 504, 0],
                   [31, 120, 75, 2, 612]])
cm_zhu_2014 = np.transpose(cm_zhu_2014)
# REM: unusually low proportion of S1 !
#print(cm_zhu_2014, '\n')
d_zhu_2014 = get_metrics(*get_y_true_pred(cm_zhu_2014))
#print_metrics(d_zhu_2014)

In [9]:
cm_rw_zhu_2014 = reduce_wake(cm_zhu_2014)
print(cm_rw_zhu_2014, '\n')
d_rw_zhu_2014 = get_metrics(*get_y_true_pred(cm_rw_zhu_2014))
d_rw_zhu_2014 = add_infos(d_rw_zhu_2014, 'zhu_2014', 'Sleep-EDF', 20, 'R&K', 'Pz-Oz', 'Difference visibility graph, SVM', 'epoch', '10-fold CV', 1)
print_metrics(d_rw_zhu_2014)

(array([[1774,    5,    2,    0,   14],
       [  66,   50,   80,    1,  120],
       [  23,    5, 1619,   74,   75],
       [   4,    3,  139,  504,    2],
       [  20,   12,  159,    0,  612]]), '\n')
('Dataset: ', 'Sleep-EDF')
('Number of patients: ', 20)
('Scoring rule: ', 'R&K')
('Channel: ', 'Pz-Oz')
('Model: ', 'Difference visibility graph, SVM')
('Split type: ', 'epoch')
('Cross_val: ', '10-fold CV')
('Number of raters per record: ', 1)


('Accuracy: ', 0.85008390826030211)
('Kappa: ', 0.7919216100492571)
('f1-micro: ', 0.85008390826030211)
('f1_macro: ', 0.72871014034756576)
('sklearn_report: ', '\n', '             precision    recall  f1-score   support\n\n          W       0.94      0.99      0.96      1795\n         S1       0.67      0.16      0.26       317\n         S2       0.81      0.90      0.85      1796\n     S3(&4)       0.87      0.77      0.82       652\n          R       0.74      0.76      0.75       803\n\navg / total       0.84      0.85      0.84      5363

## Fraiwan 2013

Fraiwan, L., Lweesy, K., Khasawneh, N., Wenz, H., & Dickhaus, H. (2012). Automated sleep stage identification system based on time–frequency analysis of a single EEG channel and random forest classifier. Computer methods and programs in biomedicine, 108(1), 10-19.

In [10]:
cm_fraiwan_2012 = np.array([[2407, 56, 69, 14, 33],
                           [89, 185, 85, 9, 60],
                           [11, 52, 1897, 86, 92],
                           [38, 8, 174, 482, 3],
                           [40, 48, 131, 3, 719]])
cm_fraiwan_2012 = np.transpose(cm_fraiwan_2012)
#print(cm_fraiwan_2012, '\n')
d_fraiwan_2012 = get_metrics(*get_y_true_pred(cm_fraiwan_2012))
#print_metrics(d_fraiwan_2012)

In [11]:
cm_rw_fraiwan_2012 = reduce_wake(cm_fraiwan_2012)
print(cm_rw_zhu_2014, '\n')
d_rw_fraiwan_2012 = get_metrics(*get_y_true_pred(cm_rw_fraiwan_2012))
d_rw_fraiwan_2012 = add_infos(d_rw_fraiwan_2012, 'fraiwan_2012', 'Custom', 16, 'AASM', 'C3-A1', 'Time-frequency feat., random forest', 'epoch', '67/33', 3)
print_metrics(d_rw_fraiwan_2012)

(array([[1774,    5,    2,    0,   14],
       [  66,   50,   80,    1,  120],
       [  23,    5, 1619,   74,   75],
       [   4,    3,  139,  504,    2],
       [  20,   12,  159,    0,  612]]), '\n')
('Dataset: ', 'Custom')
('Number of patients: ', 16)
('Scoring rule: ', 'AASM')
('Channel: ', 'C3-A1')
('Model: ', 'Time-frequency feat., random forest')
('Split type: ', 'epoch')
('Cross_val: ', '67/33')
('Number of raters per record: ', 3)


('Accuracy: ', 0.83465406888143856)
('Kappa: ', 0.77060637491945616)
('f1-micro: ', 0.83465406888143856)
('f1_macro: ', 0.75569797764478752)
('sklearn_report: ', '\n', '             precision    recall  f1-score   support\n\n          W       0.93      0.93      0.93      2356\n         S1       0.44      0.53      0.48       349\n         S2       0.89      0.81      0.84      2356\n     S3(&4)       0.69      0.81      0.74       594\n          R       0.77      0.79      0.78       907\n\navg / total       0.84      0.83      0.84      6562\n'

## Hassan 2016

Hassan, A. R., & Bhuiyan, M. I. H. (2016). Automatic sleep scoring using statistical features in the EMD domain and ensemble methods. Biocybernetics and Biomedical Engineering, 36(1), 248-255.

In [12]:
cm_hassan_2016 = np.array([[120, 60, 1, 33, 88],
                        [4, 1622, 61, 75, 49],
                        [2, 93, 519, 35, 1],
                        [18, 35, 3, 3945, 16],
                        [19, 130, 1, 26, 629]])
#order: 1, 2, 3, W, R
cm_hassan_2016 = reorder(cm_hassan_2016, [3, 0, 1, 2, 4])
#print(cm_hassan_2016, '\n')
d_hassan_2016 = get_metrics(*get_y_true_pred(cm_hassan_2016))
#print_metrics(d_hassan_2016)

In [13]:
cm_rw_hassan_2016 = reduce_wake(cm_hassan_2016)
print(cm_rw_hassan_2016, '\n')
d_rw_hassan_2016 = get_metrics(*get_y_true_pred(cm_rw_hassan_2016))
d_rw_hassan_2016 = add_infos(d_rw_hassan_2016, 'hassan_2016', 'Sleep-EDF', 20, 'R&K', 'Pz-Oz', 'EMD domain, ensemble', 'epoch', '0.6/0.05/0.35', 1)
print_metrics(d_rw_hassan_2016)

(array([[1779,    8,   16,    1,    7],
       [  33,  120,   60,    1,   88],
       [  75,    4, 1622,   61,   49],
       [  35,    2,   93,  519,    1],
       [  26,   19,  130,    1,  629]]), '\n')
('Dataset: ', 'Sleep-EDF')
('Number of patients: ', 20)
('Scoring rule: ', 'R&K')
('Channel: ', 'Pz-Oz')
('Model: ', 'EMD domain, ensemble')
('Split type: ', 'epoch')
('Cross_val: ', '0.6/0.05/0.35')
('Number of raters per record: ', 1)


('Accuracy: ', 0.86800520542851833)
('Kappa: ', 0.8170816167477446)
('f1-micro: ', 0.86800520542851844)
('f1_macro: ', 0.79635915965370885)
('sklearn_report: ', '\n', '             precision    recall  f1-score   support\n\n          W       0.91      0.98      0.95      1811\n         S1       0.78      0.40      0.53       302\n         S2       0.84      0.90      0.87      1811\n     S3(&4)       0.89      0.80      0.84       650\n          R       0.81      0.78      0.80       805\n\navg / total       0.86      0.87      0.86      5379\n')


## Hassan 2016_2

Hassan, A. R., & Bhuiyan, M. I. H. (2016). Computer-aided sleep staging using complete ensemble empirical mode decomposition with adaptive noise and bootstrap aggregating. Biomedical Signal Processing and Control, 24, 1-10.

In [14]:
cm_hassan_2016_2 = np.array([[142, 55, 1, 32, 72],
                          [3, 1673, 45, 54, 36],
                          [4, 43, 585, 16, 2],
                          [42, 79, 18, 3838, 51],
                          [17, 109, 4, 24, 651]])
cm_hassan_2016_2 = reorder(cm_hassan_2016_2, [3, 0, 1, 2, 4])
#print(cm_hassan_2016_2, '\n')
d_hassan_2016_2 = get_metrics(*get_y_true_pred(cm_hassan_2016_2))
#print_metrics(d_hassan_2016_2)

In [15]:
cm_rw_hassan_2016_2 = reduce_wake(cm_hassan_2016_2)
print(cm_rw_hassan_2016_2, '\n')
d_rw_hassan_2016_2 = get_metrics(*get_y_true_pred(cm_rw_hassan_2016_2))
d_rw_hassan_2016_2 = add_infos(d_rw_hassan_2016_2, 'hassan_2016_2', 'Sleep-EDF', 20, 'R&K', 'Pz-Oz', 'EMD, bootstrap aggregating', 'epoch', '50/50', 1)
print_metrics(d_rw_hassan_2016_2)

(array([[1726,   19,   36,    8,   23],
       [  32,  142,   55,    1,   72],
       [  54,    3, 1673,   45,   36],
       [  16,    4,   43,  585,    2],
       [  24,   17,  109,    4,  651]]), '\n')
('Dataset: ', 'Sleep-EDF')
('Number of patients: ', 20)
('Scoring rule: ', 'R&K')
('Channel: ', 'Pz-Oz')
('Model: ', 'EMD, bootstrap aggregating')
('Split type: ', 'epoch')
('Cross_val: ', '50/50')
('Number of raters per record: ', 1)


('Accuracy: ', 0.88791821561338291)
('Kappa: ', 0.84561829962843393)
('f1-micro: ', 0.8879182156133828)
('f1_macro: ', 0.82946612262690778)
('sklearn_report: ', '\n', '             precision    recall  f1-score   support\n\n          W       0.93      0.95      0.94      1812\n         S1       0.77      0.47      0.58       302\n         S2       0.87      0.92      0.90      1811\n     S3(&4)       0.91      0.90      0.90       650\n          R       0.83      0.81      0.82       805\n\navg / total       0.89      0.89      0.88      5380\n')


## Hassan_2016_3

Hassan, A. R., & Bhuiyan, M. I. H. (2016). A decision support system for automatic sleep staging from EEG signals using tunable Q-factor wavelet transform and spectral features. Journal of neuroscience methods, 271, 107-118.

In [16]:
cm_hassan_2016_3 = np.array([[113, 41, 1, 74, 73],
                          [7, 1656, 59, 49, 40],
                          [0, 96, 527, 27, 0],
                          [24, 5, 3, 3945, 21],
                          [33, 70, 0, 41, 661]])
cm_hassan_2016_3 = reorder(cm_hassan_2016_3, [3, 0, 1, 2, 4])
#print(cm_hassan_2016_3, '\n')
d_hassan_2016_3 = get_metrics(*get_y_true_pred(cm_hassan_2016_3))
#print_metrics(d_hassan_2016_3)

In [17]:
cm_rw_hassan_2016_3 = reduce_wake(cm_hassan_2016_3)
print(cm_rw_hassan_2016_3, '\n')
d_rw_hassan_2016_3 = get_metrics(*get_y_true_pred(cm_rw_hassan_2016_3))
d_rw_hassan_2016_3 = add_infos(d_rw_hassan_2016_3, 'hassan_2016_3', 'Sleep-EDF', 20, 'R&K', 'Pz-Oz', 'Wavelet transform, spectral features, random forest', 'epoch', '50/50, 20-fold average', 1)
print_metrics(d_rw_hassan_2016_3)

(array([[1787,   11,    2,    1,   10],
       [  74,  113,   41,    1,   73],
       [  49,    7, 1656,   59,   40],
       [  27,    0,   96,  527,    0],
       [  41,   33,   70,    0,  661]]), '\n')
('Dataset: ', 'Sleep-EDF')
('Number of patients: ', 20)
('Scoring rule: ', 'R&K')
('Channel: ', 'Pz-Oz')
('Model: ', 'Wavelet transform, spectral features, random forest')
('Split type: ', 'epoch')
('Cross_val: ', '50/50, 20-fold average')
('Number of raters per record: ', 1)


('Accuracy: ', 0.88194831753113967)
('Kappa: ', 0.8366579599753623)
('f1-micro: ', 0.88194831753113967)
('f1_macro: ', 0.80251152739785669)
('sklearn_report: ', '\n', '             precision    recall  f1-score   support\n\n          W       0.90      0.99      0.94      1811\n         S1       0.69      0.37      0.48       302\n         S2       0.89      0.91      0.90      1811\n     S3(&4)       0.90      0.81      0.85       650\n          R       0.84      0.82      0.83       805\n\navg / total       0.8

## Hassan 2017

Hassan, A. R., & Bhuiyan, M. I. H. (2017). Automated identification of sleep states from EEG signals by means of ensemble empirical mode decomposition and random under sampling boosting. Computer Methods and Programs in Biomedicine, 140, 201-210.

In [18]:
cm_hassan_2017 = np.array([[127, 16, 0, 0, 17, 142],
                        [65, 1440, 89, 36, 34, 147],
                        [0, 4, 291, 32, 9, 0],
                        [37, 80, 29, 151, 17, 0],
                        [37, 52, 7, 16, 3833, 83],
                        [10, 68, 0, 32, 47, 648]])
cm_hassan_2017 = merge(cm_hassan_2017)
cm_hassan_2017 = reorder(cm_hassan_2017, [3, 0, 1, 2, 4])
#print(cm_hassan_2017, '\n')
d_hassan_2017 = get_metrics(*get_y_true_pred(cm_hassan_2017))
#print_metrics(d_hassan_2017)

In [19]:
cm_rw_hassan_2017 = reduce_wake(cm_hassan_2017)
print(cm_rw_hassan_2017, '\n')
d_rw_hassan_2017 = get_metrics(*get_y_true_pred(cm_rw_hassan_2017))
d_rw_hassan_2017 = add_infos(d_rw_hassan_2017, 'hassan_2017', 'Sleep-EDF', 20, 'R&K', 'Pz-Oz', 'EMD, random undersampling boosting', 'epoch', '50/50, 20-fold average', 1)
print_metrics(d_rw_hassan_2017)

(array([[1723,   17,   23,   10,   37],
       [  17,  127,   16,    0,  142],
       [  34,   65, 1440,  125,  147],
       [  26,   37,   84,  503,    0],
       [  47,   10,   68,   32,  648]]), '\n')
('Dataset: ', 'Sleep-EDF')
('Number of patients: ', 20)
('Scoring rule: ', 'R&K')
('Channel: ', 'Pz-Oz')
('Model: ', 'EMD, random undersampling boosting')
('Split type: ', 'epoch')
('Cross_val: ', '50/50, 20-fold average')
('Number of raters per record: ', 1)


('Accuracy: ', 0.82577166232800292)
('Kappa: ', 0.7637422816458439)
('f1-micro: ', 0.82577166232800292)
('f1_macro: ', 0.74496855416338315)
('sklearn_report: ', '\n', '             precision    recall  f1-score   support\n\n          W       0.93      0.95      0.94      1810\n         S1       0.50      0.42      0.46       302\n         S2       0.88      0.80      0.84      1811\n     S3(&4)       0.75      0.77      0.76       650\n          R       0.67      0.80      0.73       805\n\navg / total       0.83      0.83      

## Sharma 2017

Sharma, R., Pachori, R. B., & Upadhyay, A. (2017). Automatic sleep stages classification based on iterative filtering of electroencephalogram signals. Neural Computing and Applications, 1-20.

In [20]:
cm_sharma_2017 = np.array([[7944, 11, 12, 6, 30],
                        [183, 113, 123, 4, 181],
                        [48, 4, 3334, 149, 86],
                        [13, 0, 198, 1088, 0],
                        [51, 11, 207, 0, 1339],])
#print(cm_sharma_2017, '\n')
d_sharma_2017 = get_metrics(*get_y_true_pred(cm_sharma_2017))
#print_metrics(d_sharma_2017)

In [21]:
cm_rw_sharma_2017 = reduce_wake(cm_sharma_2017)
print(cm_rw_sharma_2017, '\n')
d_rw_sharma_2017 = get_metrics(*get_y_true_pred(cm_rw_sharma_2017))
d_rw_sharma_2017 = add_infos(d_rw_sharma_2017, 'sharma_2017', 'Sleep-EDF', 20, 'R&K', 'Pz-Oz', 'Iterative filtering', 'epoch', '10-fold CV', 1)
print_metrics(d_rw_sharma_2017)

(array([[3594,    5,    5,    3,   14],
       [ 183,  113,  123,    4,  181],
       [  48,    4, 3334,  149,   86],
       [  13,    0,  198, 1088,    0],
       [  51,   11,  207,    0, 1339]]), '\n')
('Dataset: ', 'Sleep-EDF')
('Number of patients: ', 20)
('Scoring rule: ', 'R&K')
('Channel: ', 'Pz-Oz')
('Model: ', 'Iterative filtering')
('Split type: ', 'epoch')
('Cross_val: ', '10-fold CV')
('Number of raters per record: ', 1)


('Accuracy: ', 0.88049846554449918)
('Kappa: ', 0.83400538566890225)
('f1-micro: ', 0.88049846554449918)
('f1_macro: ', 0.76791239788784527)
('sklearn_report: ', '\n', '             precision    recall  f1-score   support\n\n          W       0.92      0.99      0.96      3621\n         S1       0.85      0.19      0.31       604\n         S2       0.86      0.92      0.89      3621\n     S3(&4)       0.87      0.84      0.86      1299\n          R       0.83      0.83      0.83      1608\n\navg / total       0.88      0.88      0.87     10753\n')


## Hsu 2013

Hsu, Y. L., Yang, Y. T., Wang, J. S., & Hsu, C. Y. (2013). Automatic sleep stage recurrent neural classifier using energy features of EEG signals. Neurocomputing, 104, 105-114.

In [22]:
cm_hsu_2013 = np.array([[20, 23, 3, 9, 0],
                     [4, 574, 8, 1, 3],
                     [0, 3, 26, 0, 0],
                     [5, 13, 4, 213, 3],
                     [2, 7, 2, 3, 34]])
cm_hsu_2013 = reorder(cm_hsu_2013, [4, 0, 1, 2, 3])
print(cm_hsu_2013, '\n')
d_hsu_2013 = get_metrics(*get_y_true_pred(cm_hsu_2013))
d_hsu_2013 = add_infos(d_hsu_2013, 'hsu_2013', 'Sleep-EDF', 8, 'R&K', 'Fpz-Cz', 'Energy features, recurrent neural classifier', 'epoch', '10-fold CV', 1)
print_metrics(d_hsu_2013)

(array([[ 34,   2,   7,   2,   3],
       [  0,  20,  23,   3,   9],
       [  3,   4, 574,   8,   1],
       [  0,   0,   3,  26,   0],
       [  3,   5,  13,   4, 213]]), '\n')
('Dataset: ', 'Sleep-EDF')
('Number of patients: ', 8)
('Scoring rule: ', 'R&K')
('Channel: ', 'Fpz-Cz')
('Model: ', 'Energy features, recurrent neural classifier')
('Split type: ', 'epoch')
('Cross_val: ', '10-fold CV')
('Number of raters per record: ', 1)


('Accuracy: ', 0.90312499999999996)
('Kappa: ', 0.82041276099287908)
('f1-micro: ', 0.90312499999999996)
('f1_macro: ', 0.76538591057472727)
('sklearn_report: ', '\n', '             precision    recall  f1-score   support\n\n          W       0.85      0.71      0.77        48\n         S1       0.65      0.36      0.47        55\n         S2       0.93      0.97      0.95       590\n     S3(&4)       0.60      0.90      0.72        29\n          R       0.94      0.89      0.92       238\n\navg / total       0.90      0.90      0.90       960\n')


## Ours

In [23]:
cm_ours_old_old = np.array([[212900, 2704, 9679, 853, 3830], 
                   [7306, 11044, 7470, 11, 5650],
                   [7996, 4308, 300692, 14691, 11442],
                   [983, 2, 15259, 94259, 70],
                   [6858, 1979, 8364, 41, 103993]])

cm_ours_old = np.array([[269094,   4122,  12261,   1273,   3576],
       [  7812,  14015,  11533,     42,   5588],
       [ 10595,   4161, 380571,  16394,  10085],
       [   847,      2,  21560, 119745,    171],
       [  6545,   2558,  15748,    156, 128586]])

cm_ours = np.array([[408881, 7010, 19800, 3740, 7904],
                        [11161, 20614, 18152, 55, 9047],
                        [16547, 6425, 582886, 28899, 18438],
                        [1816, 7, 30050, 183631, 336],
                        [8793, 3477, 20914, 186, 201983]])
cm_ours_normalized = cm_ours.astype(np.float) / np.sum(cm_ours, axis=1)[:, np.newaxis]
print('cm_ours_normalized: ')
print(np.around(cm_ours_normalized, decimals=2))
#print(cm_ours, '\n')
d_ours = get_metrics(*get_y_true_pred(cm_ours))
d_ours = add_infos(d_ours, 'ours', 'SHHS-1', 5728, 'R&K', 'C4-A1', 'CNN', 'record', '0.5/0.2/0.3', 1)
print_metrics(d_ours)

cm_ours_normalized: 
[[ 0.91  0.02  0.04  0.01  0.02]
 [ 0.19  0.35  0.31  0.    0.15]
 [ 0.03  0.01  0.89  0.04  0.03]
 [ 0.01  0.    0.14  0.85  0.  ]
 [ 0.04  0.01  0.09  0.    0.86]]
('Dataset: ', 'SHHS-1')
('Number of patients: ', 5728)
('Scoring rule: ', 'R&K')
('Channel: ', 'C4-A1')
('Model: ', 'CNN')
('Split type: ', 'record')
('Cross_val: ', '0.5/0.2/0.3')
('Number of raters per record: ', 1)


('Accuracy: ', 0.86791448962968853)
('Kappa: ', 0.81483393589670505)
('f1-micro: ', 0.86791448962968853)
('f1_macro: ', 0.78487205539183036)
('sklearn_report: ', '\n', '             precision    recall  f1-score   support\n\n          W       0.91      0.91      0.91    447335\n         S1       0.55      0.35      0.43     59029\n         S2       0.87      0.89      0.88    653195\n     S3(&4)       0.85      0.85      0.85    215840\n          R       0.85      0.86      0.85    235353\n\navg / total       0.86      0.87      0.86   1610752\n')


In [35]:
print '             precision    recall  f1-score   support\n\n          W       0.91      0.91      0.91    447335\n         S1       0.55      0.35      0.43     59029\n         S2       0.87      0.89      0.88    653195\n     S3(&4)       0.85      0.85      0.85    215840\n          R       0.85      0.86      0.85    235353\n\navg / total       0.86      0.87      0.86   1610752\n'

             precision    recall  f1-score   support

          W       0.91      0.91      0.91    447335
         S1       0.55      0.35      0.43     59029
         S2       0.87      0.89      0.88    653195
     S3(&4)       0.85      0.85      0.85    215840
          R       0.85      0.86      0.85    235353

avg / total       0.86      0.87      0.86   1610752



In [26]:
d_ours.keys()

['split type',
 'kappa',
 'scoring rule',
 'f1_macro',
 'dataset',
 'sklearn report',
 'patients',
 'f1_micro',
 'raters per record',
 'article',
 'model',
 'cross_val',
 'channel',
 'accuracy']

In [27]:
col = ['article', 'dataset', 'channel', 'patients', 'raters per record', 'scoring rule', 'model', 'split type', 'cross_val', 'accuracy', 'kappa', 'f1_micro', 'f1_macro']

In [28]:
df_ours = pd.DataFrame([d_ours], columns=col)#, columns=d_ours.keys())
df_ours

Unnamed: 0,article,dataset,channel,patients,raters per record,scoring rule,model,split type,cross_val,accuracy,kappa,f1_micro,f1_macro
0,ours,SHHS-1,C4-A1,5728,1,R&K,CNN,record,0.5/0.2/0.3,0.867914,0.814834,0.867914,0.784872


### Assemble results in a pd.DataFrame


In [29]:
studies_results = [d_tsinalis_2016, d_supratak_2017, d_liang_2012, d_rw_zhu_2014, d_rw_fraiwan_2012, d_rw_hassan_2016, 
                   d_rw_hassan_2016_2, d_rw_hassan_2016_3, d_rw_hassan_2017, d_rw_sharma_2017, d_hsu_2013, d_ours]

df = pd.DataFrame([d_tsinalis_2016], columns=col).round(2)
for r in range(1, len(studies_results)):
    df_ = pd.DataFrame([studies_results[r]], columns=col).round(2)
    df = pd.concat([df, df_])
#del df['sklearn report']

In [30]:
df

Unnamed: 0,article,dataset,channel,patients,raters per record,scoring rule,model,split type,cross_val,accuracy,kappa,f1_micro,f1_macro
0,tsinalis_2016,Sleep-EDF,Fpz-Cz,20,1,R&K,CNN,record,20-fold CV,0.75,0.65,0.75,0.7
0,supratak_2017,MASS,F4-EOG,32,1,AASM,CNN-LSTM,record,31-fold CV,0.86,0.8,0.86,0.82
0,liang_2012,Custom,C3-A2,20,2,R&K,"Multiscale entropy, AR features, smoothing rules",epoch,50/50,0.88,0.82,0.88,0.77
0,zhu_2014,Sleep-EDF,Pz-Oz,20,1,R&K,"Difference visibility graph, SVM",epoch,10-fold CV,0.85,0.79,0.85,0.73
0,fraiwan_2012,Custom,C3-A1,16,3,AASM,"Time-frequency feat., random forest",epoch,67/33,0.83,0.77,0.83,0.76
0,hassan_2016,Sleep-EDF,Pz-Oz,20,1,R&K,"EMD domain, ensemble",epoch,0.6/0.05/0.35,0.87,0.82,0.87,0.8
0,hassan_2016_2,Sleep-EDF,Pz-Oz,20,1,R&K,"EMD, bootstrap aggregating",epoch,50/50,0.89,0.85,0.89,0.83
0,hassan_2016_3,Sleep-EDF,Pz-Oz,20,1,R&K,"Wavelet transform, spectral features, random f...",epoch,"50/50, 20-fold average",0.88,0.84,0.88,0.8
0,hassan_2017,Sleep-EDF,Pz-Oz,20,1,R&K,"EMD, random undersampling boosting",epoch,"50/50, 20-fold average",0.83,0.76,0.83,0.74
0,sharma_2017,Sleep-EDF,Pz-Oz,20,1,R&K,Iterative filtering,epoch,10-fold CV,0.88,0.83,0.88,0.77


In [31]:
print(df.to_latex(index=False))

\begin{tabular}{lllrrllllrrrr}
\toprule
       article &    dataset & channel &  patients &  raters per record & scoring rule &                                              model & split type &               cross\_val &  accuracy &  kappa &  f1\_micro &  f1\_macro \\
\midrule
 tsinalis\_2016 &  Sleep-EDF &  Fpz-Cz &        20 &                  1 &          R\&K &                                                CNN &     record &              20-fold CV &      0.75 &   0.65 &      0.75 &      0.70 \\
 supratak\_2017 &       MASS &  F4-EOG &        32 &                  1 &         AASM &                                           CNN-LSTM &     record &              31-fold CV &      0.86 &   0.80 &      0.86 &      0.82 \\
    liang\_2012 &     Custom &   C3-A2 &        20 &                  2 &          R\&K &   Multiscale entropy, AR features, smoothing rules &      epoch &                   50/50 &      0.88 &   0.82 &      0.88 &      0.77 \\
      zhu\_2014 &  Sleep-EDF &   Pz-Oz 

## More details stats for our results

In [32]:
import pandas_ml

In [33]:
y_true_ours, y_pred_ours = get_y_true_pred(cm_ours)
cm = pandas_ml.ConfusionMatrix(y_true_ours, y_pred_ours)
print(cm)

Predicted       0      1       2       3       4  __all__
Actual                                                   
0          408881   7010   19800    3740    7904   447335
1           11161  20614   18152      55    9047    59029
2           16547   6425  582886   28899   18438   653195
3            1816      7   30050  183631     336   215840
4            8793   3477   20914     186  201983   235353
__all__    447198  37533  671802  216511  237708  1610752


In [34]:
print('PPV(Precision) for Wake, N1, N2, N3, REM, Total:') 
print(cm.PPV)
print('TPR(Sensitivity=Recall) for Wake, N1, N2, N3, REM, Total:') 
print(cm.TPR)
print('TNR(Specificity) for Wake, N1, N2, N3, REM, Total:')
print(cm.TNR)

PPV(Precision) for Wake, N1, N2, N3, REM, Total:
0.914317595338
0.549223350119
0.867645526509
0.848137046155
0.84971056927
0.863703362147
TPR(Sensitivity=Recall) for Wake, N1, N2, N3, REM, Total:
0.914037578101
0.349218180894
0.892361392846
0.850773721275
0.858212982201
0.86791448963
TNR(Specificity) for Wake, N1, N2, N3, REM, Total:
0.967065119385
0.989096636449
0.907142864602
0.976428620587
0.974025719082
0.945844470476
