# Master RNN notebook

In [1]:
from __future__ import absolute_import, division, print_function 
#import os
#import re
#import sys
#import time

#sys.path.append(os.path.join('.', '..')) 
#import utils
#import utils_DL
import utils_s160159 as u_s

#import matplotlib.pyplot as plt
import numpy as np
from sklearn.metrics import confusion_matrix

In [2]:
# https://machinelearningmastery.com/classification-accuracy-is-not-enough-more-performance-measures-you-can-use/
# https://stats.stackexchange.com/questions/51296/how-do-you-calculate-precision-and-recall-for-multiclass-classification-using-co
def performance_measure(cm):
    TP = np.diag(cm)
    FP = np.sum(cm, axis=0) - np.diag(cm)
    FN = np.sum(cm,axis=1) - np.diag(cm)
    TN = np.sum(cm) - (FP+FN+TP)
    
    precision = u_s.safe_div(TP, TP + FP)
    recall = u_s.safe_div(TP, TP + FN)
    F1 = np.multiply(2, u_s.safe_div(np.multiply(precision, recall),
                                     np.add(precision, recall)))
    
    acc = (TP+TN)/(TP+FP+FN+TN)
    
    #
    return TP, FP, precision, recall, F1, acc

# http://www.dummies.com/education/science/biology/the-bootstrap-method-for-standard-errors-and-confidence-intervals/
def bootstrap_CI(values, n_sim = 100000, p = 0.05):
    x_hat_list = []
    # create X simulations
    for i in range(n_sim):
        # draw numbers from distribution
        x_hat = np.random.choice(values, 
                                 size=values.shape[0], 
                                 replace=True)
        # append mean value to list
        x_hat_list.append(np.mean(x_hat))
    # calculate the SD
    sd_hat = np.std(x_hat_list)
    mean_hat = np.mean(x_hat_list)
    # sort list
    x_hat_list = np.sort(x_hat_list)
    # remove lower and upper p/2 quantiles
    x_hat_list = x_hat_list[int(p/2*n_sim):int(n_sim-p/2*n_sim)]
    
    # calculate CI
    ci_l = mean_hat-sd_hat*(np.max(x_hat_list)-np.min(x_hat_list))
    ci_u = mean_hat+sd_hat*(np.max(x_hat_list)-np.min(x_hat_list))
    return(ci_l, mean_hat, ci_u)


def array_to_latex(tbl):
    for ii in range(tbl.shape[0]):
        tmp_str = ''
        for jj in range(tbl.shape[1]):
            if jj != 0:
                tmp_str += ' & ' + "{:.0f}".format(tbl[ii,jj])  
            else:
                tmp_str += "{:.0f}".format(tbl[ii,jj]) 

        tmp_str += ' \\\\ '
        print(tmp_str)

## Load data

In [3]:
data_dir = './../Data'
NUM_SUBJECTS = 6
NUM_CLASSES = 6
VAL_TRAIN_ID = NUM_SUBJECTS - 4
# Training Loop
MAX_EPOCHS = 1 # 50
BATCH_SIZE = 25 # 30 works on AWS 

In [4]:
cm = 'cm_val'
cap_master = dict(np.load('./../Code/models/master/Version_4.0/capture_dict.npz'))['arr_0'].tolist()[1]
cap_rnn = dict(np.load('./../Code/models/rnn/Version_4.0/capture_dict.npz'))['arr_0'].tolist()[1]
cm_master = cap_master[cm]
cm_rnn = cap_rnn[cm]
cm_master_norm = cm_master / cm_master.astype(np.float).sum(axis=1, keepdims=True) * 100
cm_rnn_norm = cm_rnn / cm_rnn.astype(np.float).sum(axis=1, keepdims=True) * 100

### Table 1

In [5]:
table_res_1 = np.zeros((12,16))
TP, FP, precision, recall, F1, Acc = performance_measure(cm_master)
print('--------------------------------------------')
print('Average for all classes')
print('Precision: %f' %(np.mean(precision)))
print('Recall:    %f' %(np.mean(recall)))
print('F1:        %f' %(np.mean(F1)))

table_res_1[0:6,0:6] = cm_master
table_res_1[0:6,6:12] = cm_master_norm
table_res_1[0:6,12] = precision * 100
table_res_1[0:6,13] = recall * 100
table_res_1[0:6,14] = F1 * 100
table_res_1[0:6,15] = Acc * 100

--------------------------------------------
Average for all classes
Precision: 0.678847
Recall:    0.713575
F1:        0.687712


In [6]:
TP, FP, precision, recall, F1, Acc = performance_measure(cm_rnn)
print('--------------------------------------------')
print('Average for all classes')
print('Precision: %f' %(np.mean(precision)))
print('Recall:    %f' %(np.mean(recall)))
print('F1:        %f' %(np.mean(F1)))

table_res_1[6:12,0:6] = cm_rnn
table_res_1[6:12,6:12] = cm_rnn_norm
table_res_1[6:12,12] = precision * 100
table_res_1[6:12,13] = recall * 100
table_res_1[6:12,14] = F1 * 100
table_res_1[6:12,15] = Acc * 100


--------------------------------------------
Average for all classes
Precision: 0.645518
Recall:    0.659172
F1:        0.641919


In [7]:
array_to_latex(table_res_1)

495 & 145 & 29 & 11 & 1 & 20 & 71 & 21 & 4 & 2 & 0 & 3 & 91 & 71 & 80 & 93 \\ 
25 & 211 & 43 & 0 & 0 & 62 & 7 & 62 & 13 & 0 & 0 & 18 & 43 & 62 & 51 & 89 \\ 
4 & 51 & 1313 & 104 & 17 & 68 & 0 & 3 & 84 & 7 & 1 & 4 & 91 & 84 & 88 & 90 \\ 
0 & 2 & 11 & 164 & 64 & 0 & 0 & 1 & 5 & 68 & 27 & 0 & 49 & 68 & 57 & 93 \\ 
0 & 0 & 0 & 54 & 91 & 0 & 0 & 0 & 0 & 37 & 63 & 0 & 53 & 63 & 57 & 96 \\ 
17 & 80 & 46 & 0 & 0 & 591 & 2 & 11 & 6 & 0 & 0 & 81 & 80 & 81 & 80 & 92 \\ 
578 & 39 & 26 & 7 & 1 & 43 & 83 & 6 & 4 & 1 & 0 & 6 & 89 & 83 & 86 & 95 \\ 
38 & 107 & 64 & 0 & 0 & 132 & 11 & 31 & 19 & 0 & 0 & 39 & 55 & 31 & 40 & 91 \\ 
8 & 13 & 1314 & 102 & 28 & 92 & 1 & 1 & 84 & 7 & 2 & 6 & 90 & 84 & 87 & 89 \\ 
3 & 0 & 18 & 125 & 95 & 0 & 1 & 0 & 7 & 52 & 39 & 0 & 43 & 52 & 47 & 92 \\ 
0 & 0 & 1 & 60 & 84 & 0 & 0 & 0 & 1 & 41 & 58 & 0 & 40 & 58 & 48 & 95 \\ 
19 & 36 & 43 & 0 & 0 & 636 & 3 & 5 & 6 & 0 & 0 & 87 & 70 & 87 & 78 & 90 \\ 


In [8]:
master_norm_diag = np.diag(table_res_1[0:6,6:12])
rnn_norm_diag = np.diag(table_res_1[6:12,6:12])

In [9]:
pp = "{:.2f}"
tmp_str = ''
tmp_str += pp.format(np.min(master_norm_diag)) + ' - ' + \
pp.format(np.mean(master_norm_diag)) + ' - ' + \
pp.format(np.max(master_norm_diag))
print(tmp_str)

tmp_str = ''
tmp_str += pp.format(np.min(rnn_norm_diag)) + ' - ' + \
pp.format(np.mean(rnn_norm_diag)) + ' - ' + \
pp.format(np.max(rnn_norm_diag))
print(tmp_str)

61.88 - 71.36 - 84.33
31.38 - 65.92 - 86.65


### Table 2

In [10]:
pp = "{:.1f}"
_, _, precision, recall, F1, Acc = performance_measure(cm_master)
tmp_str = ''
tmp = bootstrap_CI(precision)
tmp_str += pp.format(tmp[0]*100) + '-\\textbf{' + \
pp.format(tmp[1]*100) + '}-' + \
pp.format(tmp[2]*100) + ' & '

tmp = bootstrap_CI(recall)
tmp_str += pp.format(tmp[0]*100) + '-\\textbf{' + \
pp.format(tmp[1]*100) + '}-' + \
pp.format(tmp[2]*100) + ' & '

tmp = bootstrap_CI(F1)
tmp_str += pp.format(tmp[0]*100) + '-\\textbf{' + \
pp.format(tmp[1]*100) + '}-' + \
pp.format(tmp[2]*100) + ' & '

tmp = bootstrap_CI(Acc)
tmp_str += pp.format(tmp[0]*100) + '-\\textbf{' + \
pp.format(tmp[1]*100) + '}-' + \
pp.format(tmp[2]*100) + '\\\\'
print(tmp_str)

65.3-\textbf{67.8}-70.4 & 70.9-\textbf{71.3}-71.8 & 67.5-\textbf{68.8}-70.0 & 92.3-\textbf{92.3}-92.4\\


In [11]:
_, _, precision, recall, F1, Acc = performance_measure(cm_rnn)
tmp_str = ''
tmp = bootstrap_CI(precision)
tmp_str += pp.format(tmp[0]*100) + '-\\textbf{' + \
pp.format(tmp[1]*100) + '}-' + \
pp.format(tmp[2]*100) + ' & '

tmp = bootstrap_CI(recall)
tmp_str += pp.format(tmp[0]*100) + '-\\textbf{' + \
pp.format(tmp[1]*100) + '}-' + \
pp.format(tmp[2]*100) + ' & '

tmp = bootstrap_CI(F1)
tmp_str += pp.format(tmp[0]*100) + '-\\textbf{' + \
pp.format(tmp[1]*100) + '}-' + \
pp.format(tmp[2]*100) + ' & '

tmp = bootstrap_CI(Acc)
tmp_str += pp.format(tmp[0]*100) + '-\\textbf{' + \
pp.format(tmp[1]*100) + '}-' + \
pp.format(tmp[2]*100) + '\\\\'
print(tmp_str)

61.9-\textbf{64.5}-67.1 & 63.2-\textbf{65.9}-68.6 & 61.8-\textbf{64.2}-66.6 & 92.2-\textbf{92.2}-92.2\\


In [12]:
## rnn
# test
rnn_test = np.array([[566,107,0,0,3,18],
 [122,125,11,0,4,79],
 [130,216,475,150,551,35],
 [9,2,15,31,184,0],
 [7,0,7,27,104,0],
 [128,278,10,0,1,317]])

# acc
rnn_acc = np.array([[113,52,3,1,1,25],
[25,64,5,0,1,22],
[16,48,221,99,234,5],
[6,9,24,31,125,0],
[4,2,4,44,268,0],
[32,113,23,0,0,172]])


cnn_test = np.array([[457,189,6,0,0,49],
[13,175,10,0,0,143],
[11,61,1225,69,34,157],
[9,1,18,87,126,0],
[5,0,0,13,127,0],
[1,52,4,0,0,677]])

# acc
cnn_acc = np.array([[126,66,0,1,1,19],
[6,48,4,1,0,58],
[2,32,440,80,2,67],
[3,0,20,97,75,0],
[1,0,0,11,309,1],
[0,8,3,0,0,329]])


In [13]:
rnn_acc

array([[113,  52,   3,   1,   1,  25],
       [ 25,  64,   5,   0,   1,  22],
       [ 16,  48, 221,  99, 234,   5],
       [  6,   9,  24,  31, 125,   0],
       [  4,   2,   4,  44, 268,   0],
       [ 32, 113,  23,   0,   0, 172]])

In [14]:
TP, FP, precision, recall, F1 = performance_measure(cnn_test)
print('--------------------------------------------')
print('Average for all classes')
print('Precision: %f' %(np.mean(precision)))
print('Recall:    %f' %(np.mean(recall)))
print('F1:        %f' %(np.mean(F1)))

ValueError: too many values to unpack (expected 5)

In [None]:
TP, FP, precision, recall, F1 = performance_measure(rnn_test)
print('--------------------------------------------')
print('Average for all classes')
print('Precision: %f' %(np.mean(precision)))
print('Recall:    %f' %(np.mean(recall)))
print('F1:        %f' %(np.mean(F1)))

In [None]:


tmp = np.random.normal(size=20, loc=100, scale=5)  
bootstrap_CI(tmp)
