In [76]:
from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score
import numpy as np
from scipy.stats import sem
from sklearn import metrics
import pandas as pd
target_names = ['non-patient', 'patient']

In [83]:
def CI(y_true, y_pred):
    n_bootstraps = 1000
    rng_seed = 42  # control reproducibility
    bootstrapped_scores = []

    rng = np.random.RandomState(rng_seed)
    for i in range(n_bootstraps):
        # bootstrap by sampling with replacement on the prediction indices
        indices = rng.randint(0, len(y_pred), len(y_pred))
        if len(np.unique(y_true[indices])) < 2:
            # We need at least one positive and one negative sample for ROC AUC
            # to be defined: reject the sample
            continue

        score = roc_auc_score(y_true[indices], y_pred[indices])
        bootstrapped_scores.append(score)
        #print("Bootstrap #{} ROC area: {:0.3f}".format(i + 1, score))

    sorted_scores = np.array(bootstrapped_scores)
    sorted_scores.sort()
    confidence_lower = sorted_scores[int(0.05 * len(sorted_scores))]
    confidence_upper = sorted_scores[int(0.95 * len(sorted_scores))]
    print("Confidence interval for the score: [{:0.3f} - {:0.3}]".format(
        confidence_lower, confidence_upper))

In [11]:
fivefolds_1k = pd.read_csv('5folds1000.csv')
fivefolds_1k['result'] = np.where(fivefolds_1k['hyps'] >= 0.5,1,0)
fivefolds_1k.head(20)

Unnamed: 0,name,hyps,truth,ifcorrect,result
0,54CTT,0.432602,0.0,True,0
1,78CTT,0.489566,0.0,True,0
2,20CTT,0.416978,1.0,False,0
3,74CTT,0.426852,0.0,True,0
4,67CTT,0.463892,1.0,False,0
5,58CTT,0.5359,1.0,True,1
6,68CTT,0.485576,0.0,True,0
7,19CTT,0.467883,1.0,False,0
8,40CTT,0.510572,1.0,True,1
9,94CTT,0.45665,0.0,True,0


In [12]:
fivefolds_1_result = fivefolds_1k['result'].tolist()
fivefolds_1_hyps = fivefolds_1k['hyps'].tolist()
fivefolds_1_truth = [int(i) for i in fivefolds_1k['truth'].tolist()]

In [14]:
print(classification_report(fivefolds_1_truth, fivefolds_1_result, target_names=target_names))
tn, fp, fn, tp = confusion_matrix(fivefolds_1_truth, fivefolds_1_result).ravel()
print('sensitivuty: ',tp / (fn+tp))
print('specificity: ',tn / (fp+tn))
fpr, tpr, thresholds = metrics.roc_curve(np.array(fivefolds_1_truth), np.array(fivefolds_1_hyps), pos_label=1)
print('AUC: ',metrics.auc(fpr, tpr))


              precision    recall  f1-score   support

 non-patient       0.63      0.70      0.67        57
     patient       0.53      0.45      0.49        42

    accuracy                           0.60        99
   macro avg       0.58      0.58      0.58        99
weighted avg       0.59      0.60      0.59        99

sensitivuty:  0.4523809523809524
specificity:  0.7017543859649122
AUC:  0.6052631578947368


In [15]:
fivefolds_1k_steps = pd.read_csv('5folds1000withSteps.csv')
fivefolds_1k_steps['result'] = np.where(fivefolds_1k_steps['hyps'] >= 0.5,1,0)
fivefolds_1k_steps.head(20)

Unnamed: 0,name,hyps,truth,ifcorrect,result
0,54CTT,0.432602,0.0,True,0
1,78CTT,0.489566,0.0,True,0
2,20CTT,0.416978,1.0,False,0
3,74CTT,0.426852,0.0,True,0
4,67CTT,0.463892,1.0,False,0
5,58CTT,0.5359,1.0,True,1
6,68CTT,0.485576,0.0,True,0
7,19CTT,0.467883,1.0,False,0
8,40CTT,0.510572,1.0,True,1
9,94CTT,0.45665,0.0,True,0


In [16]:
fivefolds_2_result = fivefolds_1k_steps['result'].tolist()
fivefolds_2_hyps = fivefolds_1k_steps['hyps'].tolist()
fivefolds_2_truth = [int(i) for i in fivefolds_1k_steps['truth'].tolist()]

In [17]:
print(classification_report(fivefolds_2_truth, fivefolds_2_result, target_names=target_names))
tn, fp, fn, tp = confusion_matrix(fivefolds_2_truth, fivefolds_2_result).ravel()
print('sensitivuty: ',tp / (fn+tp))
print('specificity: ',tn / (fp+tn))
fpr, tpr, thresholds = metrics.roc_curve(np.array(fivefolds_2_truth), np.array(fivefolds_2_hyps), pos_label=1)
print('AUC: ',metrics.auc(fpr, tpr))

              precision    recall  f1-score   support

 non-patient       0.63      0.70      0.67        57
     patient       0.53      0.45      0.49        42

    accuracy                           0.60        99
   macro avg       0.58      0.58      0.58        99
weighted avg       0.59      0.60      0.59        99

sensitivuty:  0.4523809523809524
specificity:  0.7017543859649122
AUC:  0.6052631578947368


In [18]:
fourfolds5val1 = pd.read_csv('4folds10005val_lstm4.csv')
fourfolds5val1['result'] = np.where(fourfolds5val1['hyps'] >= 0.5,1,0)
print(fourfolds5val1.head(20))
fourfolds5val1_result = fourfolds5val1['result'].tolist()
fourfolds5val1_hyps = fourfolds5val1['hyps'].tolist()
fourfolds5val1_truth = [int(i) for i in fourfolds5val1['truth'].tolist()]
print(classification_report(fourfolds5val1_truth, fourfolds5val1_result, target_names=target_names))
tn, fp, fn, tp = confusion_matrix(fourfolds5val1_truth, fourfolds5val1_result).ravel()
print('sensitivuty: ',tp / (fn+tp))
print('specificity: ',tn / (fp+tn))
fpr, tpr, thresholds = metrics.roc_curve(np.array(fourfolds5val1_truth), np.array(fourfolds5val1_hyps), pos_label=1)
print('AUC: ',metrics.auc(fpr, tpr))

     name      hyps  truth  ifcorrect  result
0   78CTT  0.431806    0.0       True       0
1   33CTT  0.464343    1.0      False       0
2   14CTT  0.431905    0.0       True       0
3   85CTT  0.422415    0.0       True       0
4   58CTT  0.417322    1.0      False       0
5   68CTT  0.436241    0.0       True       0
6   86CTT  0.418060    0.0       True       0
7   35CTT  0.422093    1.0      False       0
8   65CTT  0.420795    1.0      False       0
9   93CTT  0.418396    0.0       True       0
10  92CTT  0.427285    0.0       True       0
11  15CTT  0.447985    0.0       True       0
12  82CTT  0.424984    0.0       True       0
13  18CTT  0.458170    1.0      False       0
14  48CTT  0.434692    1.0      False       0
15  46CTT  0.428632    0.0       True       0
16  77CTT  0.426853    0.0       True       0
17  59CTT  0.421778    1.0      False       0
18  64CTT  0.435982    1.0      False       0
19   5CTT  0.461857    1.0      False       0
              precision    recall 

In [22]:
fourfolds5val1.head(40)

Unnamed: 0,name,hyps,truth,ifcorrect,result
39,56CTT,0.392114,0.0,True,0
40,26CTT,0.394826,1.0,False,0
41,3CTT,0.066328,0.0,True,0
42,53CTT,0.124903,0.0,True,0
43,57CTT,0.907933,1.0,True,1
44,51CTT,0.959751,0.0,False,1
45,12CTT,0.002034,1.0,False,0
46,20CTT,0.030451,1.0,False,0
47,38CTT,0.999664,1.0,True,1
48,34CTT,0.434587,0.0,True,0


In [None]:
fourfolds5val1.tail(40)

In [23]:
fivefolds_1k_nonall = pd.read_csv('5folds1000nonall.csv')
fivefolds_1k_nonall['result'] = np.where(fivefolds_1k_nonall['hyps'] >= 0.5,1,0)
print(fivefolds_1k_nonall.head(20))
fivefolds_3_result = fivefolds_1k_nonall['result'].tolist()
fivefolds_3_hyps = fivefolds_1k_nonall['hyps'].tolist()
fivefolds_3_truth = [int(i) for i in fivefolds_1k_nonall['truth'].tolist()]
print(classification_report(fivefolds_3_truth, fivefolds_3_result, target_names=target_names))
tn, fp, fn, tp = confusion_matrix(fivefolds_3_truth, fivefolds_3_result).ravel()
print('sensitivuty: ',tp / (fn+tp))
print('specificity: ',tn / (fp+tn))
fpr, tpr, thresholds = metrics.roc_curve(np.array(fivefolds_3_truth), np.array(fivefolds_3_hyps), pos_label=1)
print('AUC: ',metrics.auc(fpr, tpr))

     name      hyps  truth  ifcorrect  result
0   54CTT  0.000156    0.0       True       0
1   78CTT  0.977957    0.0      False       1
2   20CTT  0.032852    1.0      False       0
3   74CTT  0.000174    0.0       True       0
4   67CTT  0.968559    1.0       True       1
5   58CTT  0.000012    1.0      False       0
6   68CTT  0.998936    0.0      False       1
7   19CTT  0.241037    1.0      False       0
8   40CTT  0.003358    1.0      False       0
9   94CTT  0.000146    0.0       True       0
10  93CTT  0.003290    0.0       True       0
11  92CTT  0.000222    0.0       True       0
12  71CTT  0.160873    0.0       True       0
13  49CTT  0.035031    0.0       True       0
14  81CTT  0.911862    0.0      False       1
15  28CTT  0.993975    1.0       True       1
16  47CTT  0.001348    0.0       True       0
17  27CTT  0.999105    1.0       True       1
18  77CTT  0.000524    0.0       True       0
19  23CTT  0.000743    1.0      False       0
              precision    recall 

In [24]:
fourfolds5val2 = pd.read_csv('4folds5val_lstm4_vgg1.csv')
fourfolds5val2['result'] = np.where(fourfolds5val2['hyps'] >= 0.5,1,0)
print(fourfolds5val2.head(20))
fourfolds5val2_result = fourfolds5val2['result'].tolist()
fourfolds5val2_hyps = fourfolds5val2['hyps'].tolist()
fourfolds5val2_truth = [int(i) for i in fourfolds5val2['truth'].tolist()]
print(classification_report(fourfolds5val2_truth, fourfolds5val2_result, target_names=target_names))
tn, fp, fn, tp = confusion_matrix(fourfolds5val2_truth, fourfolds5val2_result).ravel()
print('sensitivuty: ',tp / (fn+tp))
print('specificity: ',tn / (fp+tn))
fpr, tpr, thresholds = metrics.roc_curve(np.array(fourfolds5val2_truth), np.array(fourfolds5val2_hyps), pos_label=1)
print('AUC: ',metrics.auc(fpr, tpr))

     name      hyps  truth  ifcorrect  result
0   78CTT  0.429831    0.0       True       0
1   33CTT  0.470351    1.0      False       0
2   14CTT  0.429200    0.0       True       0
3   85CTT  0.418456    0.0       True       0
4   58CTT  0.408208    1.0      False       0
5   68CTT  0.434698    0.0       True       0
6   86CTT  0.411929    0.0       True       0
7   35CTT  0.415375    1.0      False       0
8   65CTT  0.414084    1.0      False       0
9   93CTT  0.410918    0.0       True       0
10  92CTT  0.422259    0.0       True       0
11  15CTT  0.448471    0.0       True       0
12  82CTT  0.418585    0.0       True       0
13  18CTT  0.461450    1.0      False       0
14  48CTT  0.432062    1.0      False       0
15  46CTT  0.423687    0.0       True       0
16  77CTT  0.421214    0.0       True       0
17  59CTT  0.416831    1.0      False       0
18  64CTT  0.432610    1.0      False       0
19   5CTT  0.468605    1.0      False       0
              precision    recall 

In [25]:
fourfolds5val3 = pd.read_csv('4folds5val_nonall.csv')
fourfolds5val3['result'] = np.where(fourfolds5val3['hyps'] >= 0.5,1,0)
print(fourfolds5val3.head(20))
fourfolds5val3_result = fourfolds5val3['result'].tolist()
fourfolds5val3_hyps = fourfolds5val3['hyps'].tolist()
fourfolds5val3_truth = [int(i) for i in fourfolds5val3['truth'].tolist()]
print(classification_report(fourfolds5val3_truth, fourfolds5val3_result, target_names=target_names))
tn, fp, fn, tp = confusion_matrix(fourfolds5val3_truth, fourfolds5val3_result).ravel()
print('sensitivuty: ',tp / (fn+tp))
print('specificity: ',tn / (fp+tn))
fpr, tpr, thresholds = metrics.roc_curve(np.array(fourfolds5val3_truth), np.array(fourfolds5val3_hyps), pos_label=1)
print('AUC: ',metrics.auc(fpr, tpr))

     name      hyps  truth  ifcorrect  result
0   78CTT  0.414156    0.0       True       0
1   33CTT  0.424831    1.0      False       0
2   14CTT  0.400427    0.0       True       0
3   85CTT  0.398345    0.0       True       0
4   58CTT  0.386795    1.0      False       0
5   68CTT  0.415819    0.0       True       0
6   86CTT  0.403452    0.0       True       0
7   35CTT  0.397682    1.0      False       0
8   65CTT  0.396779    1.0      False       0
9   93CTT  0.401245    0.0       True       0
10  92CTT  0.407140    0.0       True       0
11  15CTT  0.409370    0.0       True       0
12  82CTT  0.397888    0.0       True       0
13  18CTT  0.413650    1.0      False       0
14  48CTT  0.404053    1.0      False       0
15  46CTT  0.402056    0.0       True       0
16  77CTT  0.400753    0.0       True       0
17  59CTT  0.385368    1.0      False       0
18  64CTT  0.419795    1.0      False       0
19   5CTT  0.421042    1.0      False       0
              precision    recall 

In [26]:
fivefolds_7k4lstm = pd.read_csv('5folds7k_4lstm.csv')
fivefolds_7k4lstm['result'] = np.where(fivefolds_7k4lstm['hyps'] >= 0.5,1,0)
print(fivefolds_7k4lstm.head(20))
fivefolds_4_result = fivefolds_7k4lstm['result'].tolist()
fivefolds_4_hyps = fivefolds_7k4lstm['hyps'].tolist()
fivefolds_4_truth = [int(i) for i in fivefolds_7k4lstm['truth'].tolist()]
print(classification_report(fivefolds_4_truth, fivefolds_4_result, target_names=target_names))
tn, fp, fn, tp = confusion_matrix(fivefolds_4_truth, fivefolds_4_result).ravel()
print('sensitivuty: ',tp / (fn+tp))
print('specificity: ',tn / (fp+tn))
fpr, tpr, thresholds = metrics.roc_curve(np.array(fivefolds_4_truth), np.array(fivefolds_4_hyps), pos_label=1)
print('AUC: ',metrics.auc(fpr, tpr))

     name      hyps  truth  ifcorrect  result
0   54CTT  0.115835    0.0       True       0
1   78CTT  0.761912    0.0      False       1
2   20CTT  0.178502    1.0      False       0
3   74CTT  0.148600    0.0       True       0
4   67CTT  0.314825    1.0      False       0
5   58CTT  0.883505    1.0       True       1
6   68CTT  0.375281    0.0       True       0
7   19CTT  0.451197    1.0      False       0
8   40CTT  0.393623    1.0      False       0
9   94CTT  0.056580    0.0       True       0
10  93CTT  0.144084    0.0       True       0
11  92CTT  0.177906    0.0       True       0
12  71CTT  0.309278    0.0       True       0
13  49CTT  0.328642    0.0       True       0
14  81CTT  0.726019    0.0      False       1
15  28CTT  0.760827    1.0       True       1
16  47CTT  0.255017    0.0       True       0
17  27CTT  0.738734    1.0       True       1
18  77CTT  0.064880    0.0       True       0
19  23CTT  0.875328    1.0       True       1
              precision    recall 

In [27]:
fivefolds_7knonall = pd.read_csv('5folds7k_nonall.csv')
fivefolds_7knonall['result'] = np.where(fivefolds_7knonall['hyps'] >= 0.5,1,0)
print(fivefolds_7knonall.head(20))
fivefolds_5_result = fivefolds_7knonall['result'].tolist()
fivefolds_5_hyps = fivefolds_7knonall['hyps'].tolist()
fivefolds_5_truth = [int(i) for i in fivefolds_7knonall['truth'].tolist()]
print(classification_report(fivefolds_5_truth, fivefolds_5_result, target_names=target_names))
tn, fp, fn, tp = confusion_matrix(fivefolds_5_truth, fivefolds_5_result).ravel()
print('sensitivuty: ',tp / (fn+tp))
print('specificity: ',tn / (fp+tn))
fpr, tpr, thresholds = metrics.roc_curve(np.array(fivefolds_5_truth), np.array(fivefolds_5_hyps), pos_label=1)
print('AUC: ',metrics.auc(fpr, tpr))

     name          hyps  truth  ifcorrect  result
0   54CTT  1.347936e-06    0.0       True       0
1   78CTT  9.241211e-01    0.0      False       1
2   20CTT  1.035075e-03    1.0      False       0
3   74CTT  1.160441e-06    0.0       True       0
4   67CTT  9.492790e-01    1.0       True       1
5   58CTT  2.737322e-07    1.0      False       0
6   68CTT  9.999597e-01    0.0      False       1
7   19CTT  1.037929e-01    1.0      False       0
8   40CTT  5.059901e-06    1.0      False       0
9   94CTT  1.036750e-06    0.0       True       0
10  93CTT  3.451124e-05    0.0       True       0
11  92CTT  1.051064e-06    0.0       True       0
12  71CTT  1.413063e-03    0.0       True       0
13  49CTT  5.146757e-03    0.0       True       0
14  81CTT  9.776317e-01    0.0      False       1
15  28CTT  9.998216e-01    1.0       True       1
16  47CTT  7.882735e-06    0.0       True       0
17  27CTT  9.999844e-01    1.0       True       1
18  77CTT  4.324657e-06    0.0       True       0


In [28]:
fivefolds_8k4lstm = pd.read_csv('5folds8k_4lstm.csv')
fivefolds_8k4lstm['result'] = np.where(fivefolds_8k4lstm['hyps'] >= 0.5,1,0)
print(fivefolds_8k4lstm.head(20))
fivefolds_6_result = fivefolds_8k4lstm['result'].tolist()
fivefolds_6_hyps = fivefolds_8k4lstm['hyps'].tolist()
fivefolds_6_truth = [int(i) for i in fivefolds_8k4lstm['truth'].tolist()]
print(classification_report(fivefolds_6_truth, fivefolds_6_result, target_names=target_names))
tn, fp, fn, tp = confusion_matrix(fivefolds_6_truth, fivefolds_6_result).ravel()
print('sensitivuty: ',tp / (fn+tp))
print('specificity: ',tn / (fp+tn))
fpr, tpr, thresholds = metrics.roc_curve(np.array(fivefolds_6_truth), np.array(fivefolds_6_hyps), pos_label=1)
print('AUC: ',metrics.auc(fpr, tpr))

     name      hyps  truth  ifcorrect  result
0   54CTT  0.210616    0.0       True       0
1   78CTT  0.948521    0.0      False       1
2   20CTT  0.381920    1.0      False       0
3   74CTT  0.301089    0.0       True       0
4   67CTT  0.573395    1.0       True       1
5   58CTT  0.983040    1.0       True       1
6   68CTT  0.626803    0.0      False       1
7   19CTT  0.784302    1.0       True       1
8   40CTT  0.617786    1.0       True       1
9   94CTT  0.067816    0.0       True       0
10  93CTT  0.257867    0.0       True       0
11  92CTT  0.389124    0.0       True       0
12  71CTT  0.574900    0.0      False       1
13  49CTT  0.490658    0.0       True       0
14  81CTT  0.927198    0.0      False       1
15  28CTT  0.921287    1.0       True       1
16  47CTT  0.443595    0.0       True       0
17  27CTT  0.931855    1.0       True       1
18  77CTT  0.100259    0.0       True       0
19  23CTT  0.973423    1.0       True       1
              precision    recall 

In [29]:
fivefolds_9k4lstm = pd.read_csv('5folds9k_4lstm.csv')
fivefolds_9k4lstm['result'] = np.where(fivefolds_9k4lstm['hyps'] >= 0.5,1,0)
print(fivefolds_9k4lstm.head(20))
fivefolds_7_result = fivefolds_9k4lstm['result'].tolist()
fivefolds_7_hyps = fivefolds_9k4lstm['hyps'].tolist()
fivefolds_7_truth = [int(i) for i in fivefolds_9k4lstm['truth'].tolist()]
print(classification_report(fivefolds_7_truth, fivefolds_7_result, target_names=target_names))
tn, fp, fn, tp = confusion_matrix(fivefolds_7_truth, fivefolds_7_result).ravel()
print('sensitivuty: ',tp / (fn+tp))
print('specificity: ',tn / (fp+tn))
fpr, tpr, thresholds = metrics.roc_curve(np.array(fivefolds_7_truth), np.array(fivefolds_7_hyps), pos_label=1)
print('AUC: ',metrics.auc(fpr, tpr))

     name      hyps  truth  ifcorrect  result
0   54CTT  0.192693    0.0       True       0
1   78CTT  0.981313    0.0      False       1
2   20CTT  0.436469    1.0      False       0
3   74CTT  0.305558    0.0       True       0
4   67CTT  0.641180    1.0       True       1
5   58CTT  0.994372    1.0       True       1
6   68CTT  0.661424    0.0      False       1
7   19CTT  0.892173    1.0       True       1
8   40CTT  0.634885    1.0       True       1
9   94CTT  0.032853    0.0       True       0
10  93CTT  0.213742    0.0       True       0
11  92CTT  0.474178    0.0       True       0
12  71CTT  0.671643    0.0      False       1
13  49CTT  0.487799    0.0       True       0
14  81CTT  0.967107    0.0      False       1
15  28CTT  0.960544    1.0       True       1
16  47CTT  0.470555    0.0       True       0
17  27CTT  0.971593    1.0       True       1
18  77CTT  0.063675    0.0       True       0
19  23CTT  0.992646    1.0       True       1
              precision    recall 

In [30]:
fivefolds_10k4lstm = pd.read_csv('5folds10k_4lstm.csv')
fivefolds_10k4lstm['result'] = np.where(fivefolds_10k4lstm['hyps'] >= 0.5,1,0)
print(fivefolds_10k4lstm.head(20))
fivefolds_8_result = fivefolds_10k4lstm['result'].tolist()
fivefolds_8_hyps = fivefolds_10k4lstm['hyps'].tolist()
fivefolds_8_truth = [int(i) for i in fivefolds_10k4lstm['truth'].tolist()]
print(classification_report(fivefolds_8_truth, fivefolds_8_result, target_names=target_names))
tn, fp, fn, tp = confusion_matrix(fivefolds_8_truth, fivefolds_8_result).ravel()
print('sensitivuty: ',tp / (fn+tp))
print('specificity: ',tn / (fp+tn))
fpr, tpr, thresholds = metrics.roc_curve(np.array(fivefolds_8_truth), np.array(fivefolds_8_hyps), pos_label=1)
print('AUC: ',metrics.auc(fpr, tpr))

     name      hyps  truth  ifcorrect  result
0   54CTT  0.157280    0.0       True       0
1   78CTT  0.993375    0.0      False       1
2   20CTT  0.482180    1.0      False       0
3   74CTT  0.271970    0.0       True       0
4   67CTT  0.680198    1.0       True       1
5   58CTT  0.997789    1.0       True       1
6   68CTT  0.680310    0.0      False       1
7   19CTT  0.947697    1.0       True       1
8   40CTT  0.619387    1.0       True       1
9   94CTT  0.013126    0.0       True       0
10  93CTT  0.146771    0.0       True       0
11  92CTT  0.550083    0.0      False       1
12  71CTT  0.748883    0.0      False       1
13  49CTT  0.460513    0.0       True       0
14  81CTT  0.983649    0.0      False       1
15  28CTT  0.978200    1.0       True       1
16  47CTT  0.476623    0.0       True       0
17  27CTT  0.987696    1.0       True       1
18  77CTT  0.034698    0.0       True       0
19  23CTT  0.998037    1.0       True       1
              precision    recall 

In [31]:
fivefolds_11k4lstm = pd.read_csv('5folds11k_4lstm.csv')
fivefolds_11k4lstm['result'] = np.where(fivefolds_11k4lstm['hyps'] >= 0.5,1,0)
print(fivefolds_11k4lstm.head(20))
fivefolds_9_result = fivefolds_11k4lstm['result'].tolist()
fivefolds_9_hyps = fivefolds_11k4lstm['hyps'].tolist()
fivefolds_9_truth = [int(i) for i in fivefolds_11k4lstm['truth'].tolist()]
print(classification_report(fivefolds_9_truth, fivefolds_9_result, target_names=target_names))
tn, fp, fn, tp = confusion_matrix(fivefolds_9_truth, fivefolds_9_result).ravel()
print('sensitivuty: ',tp / (fn+tp))
print('specificity: ',tn / (fp+tn))
fpr, tpr, thresholds = metrics.roc_curve(np.array(fivefolds_9_truth), np.array(fivefolds_9_hyps), pos_label=1)
print('AUC: ',metrics.auc(fpr, tpr))

     name      hyps  truth  ifcorrect  result
0   54CTT  0.103514    0.0       True       0
1   78CTT  0.997206    0.0      False       1
2   20CTT  0.479850    1.0      False       0
3   74CTT  0.194349    0.0       True       0
4   67CTT  0.660756    1.0       True       1
5   58CTT  0.998751    1.0       True       1
6   68CTT  0.667437    0.0      False       1
7   19CTT  0.969406    1.0       True       1
8   40CTT  0.570083    1.0       True       1
9   94CTT  0.004408    0.0       True       0
10  93CTT  0.077424    0.0       True       0
11  92CTT  0.574446    0.0      False       1
12  71CTT  0.783153    0.0      False       1
13  49CTT  0.411204    0.0       True       0
14  81CTT  0.988923    0.0      False       1
15  28CTT  0.985029    1.0       True       1
16  47CTT  0.453715    0.0       True       0
17  27CTT  0.993467    1.0       True       1
18  77CTT  0.015487    0.0       True       0
19  23CTT  0.999382    1.0       True       1
              precision    recall 

In [32]:
fivefolds_7k_1VGG_4LSTM = pd.read_csv('5folds7k_1VGG_4LSTM.csv')
fivefolds_7k_1VGG_4LSTM['result'] = np.where(fivefolds_7k_1VGG_4LSTM['hyps'] >= 0.5,1,0)
print(fivefolds_7k_1VGG_4LSTM.head(20))
fivefolds_10_result = fivefolds_7k_1VGG_4LSTM['result'].tolist()
fivefolds_10_hyps = fivefolds_7k_1VGG_4LSTM['hyps'].tolist()
fivefolds_10_truth = [int(i) for i in fivefolds_7k_1VGG_4LSTM['truth'].tolist()]
print(classification_report(fivefolds_10_truth, fivefolds_10_result, target_names=target_names))
tn, fp, fn, tp = confusion_matrix(fivefolds_10_truth, fivefolds_10_result).ravel()
print('sensitivuty: ',tp / (fn+tp))
print('specificity: ',tn / (fp+tn))
fpr, tpr, thresholds = metrics.roc_curve(np.array(fivefolds_10_truth), np.array(fivefolds_10_hyps), pos_label=1)
print('AUC: ',metrics.auc(fpr, tpr))

     name      hyps  truth  ifcorrect  result
0   54CTT  0.000132    0.0       True       0
1   78CTT  0.962653    0.0      False       1
2   20CTT  0.030665    1.0      False       0
3   74CTT  0.000862    0.0       True       0
4   67CTT  0.446263    1.0      False       0
5   58CTT  0.997323    1.0       True       1
6   68CTT  0.276241    0.0       True       0
7   19CTT  0.255637    1.0      False       0
8   40CTT  0.000911    1.0      False       0
9   94CTT  0.000297    0.0       True       0
10  93CTT  0.007178    0.0       True       0
11  92CTT  0.000988    0.0       True       0
12  71CTT  0.053469    0.0       True       0
13  49CTT  0.018950    0.0       True       0
14  81CTT  0.690208    0.0      False       1
15  28CTT  0.994514    1.0       True       1
16  47CTT  0.003737    0.0       True       0
17  27CTT  0.957831    1.0       True       1
18  77CTT  0.000200    0.0       True       0
19  23CTT  0.999269    1.0       True       1
              precision    recall 

In [49]:
fivefolds_8k_1VGG_4LSTM = pd.read_csv('5folds8k_1VGG_4LSTM.csv')
fivefolds_8k_1VGG_4LSTM['result'] = np.where(fivefolds_8k_1VGG_4LSTM['hyps'] >= 0.5,1,0)
print(fivefolds_8k_1VGG_4LSTM.head(20))
fivefolds_11_result = fivefolds_8k_1VGG_4LSTM['result'].tolist()
fivefolds_11_hyps = fivefolds_8k_1VGG_4LSTM['hyps'].tolist()
fivefolds_11_truth = [int(i) for i in fivefolds_8k_1VGG_4LSTM['truth'].tolist()]
print(classification_report(fivefolds_11_truth, fivefolds_11_result, target_names=target_names))
tn, fp, fn, tp = confusion_matrix(fivefolds_11_truth, fivefolds_11_result).ravel()
print('sensitivuty: ',tp / (fn+tp))
print('specificity: ',tn / (fp+tn))
fpr, tpr, thresholds = metrics.roc_curve(np.array(fivefolds_11_truth), np.array(fivefolds_11_hyps), pos_label=1)
print('AUC: ',metrics.auc(fpr, tpr))

     name      hyps  truth  ifcorrect  result
0   54CTT  0.000129    0.0       True       0
1   78CTT  0.993702    0.0      False       1
2   20CTT  0.066818    1.0      False       0
3   74CTT  0.001028    0.0       True       0
4   67CTT  0.763499    1.0       True       1
5   58CTT  0.999826    1.0       True       1
6   68CTT  0.572132    0.0      False       1
7   19CTT  0.553481    1.0       True       1
8   40CTT  0.001348    1.0      False       0
9   94CTT  0.000317    0.0       True       0
10  93CTT  0.012175    0.0       True       0
11  92CTT  0.001528    0.0       True       0
12  71CTT  0.108665    0.0       True       0
13  49CTT  0.035728    0.0       True       0
14  81CTT  0.877097    0.0      False       1
15  28CTT  0.999350    1.0       True       1
16  47CTT  0.005755    0.0       True       0
17  27CTT  0.991450    1.0       True       1
18  77CTT  0.000167    0.0       True       0
19  23CTT  0.999947    1.0       True       1
              precision    recall 

In [34]:
print(fivefolds_8k_1VGG_4LSTM.head(20))

     name      hyps  truth  ifcorrect  result
0   54CTT  0.000129    0.0       True       0
1   78CTT  0.993702    0.0      False       1
2   20CTT  0.066818    1.0      False       0
3   74CTT  0.001028    0.0       True       0
4   67CTT  0.763499    1.0       True       1
5   58CTT  0.999826    1.0       True       1
6   68CTT  0.572132    0.0      False       1
7   19CTT  0.553481    1.0       True       1
8   40CTT  0.001348    1.0      False       0
9   94CTT  0.000317    0.0       True       0
10  93CTT  0.012175    0.0       True       0
11  92CTT  0.001528    0.0       True       0
12  71CTT  0.108665    0.0       True       0
13  49CTT  0.035728    0.0       True       0
14  81CTT  0.877097    0.0      False       1
15  28CTT  0.999350    1.0       True       1
16  47CTT  0.005755    0.0       True       0
17  27CTT  0.991450    1.0       True       1
18  77CTT  0.000167    0.0       True       0
19  23CTT  0.999947    1.0       True       1


In [44]:
inval = ['23CTT', '26CTT', '27CTT', '36CTT', '48CTT', '49CTT', '59CTT', '60CTT', '62CTT', '66CTT','61CTT']
for i in inval:
    print(fivefolds_8k_1VGG_4LSTM.loc[lambda df: df['name'] == i ])

     name      hyps  truth  ifcorrect  result
19  23CTT  0.999947    1.0       True       1
     name     hyps  truth  ifcorrect  result
60  26CTT  0.99517    1.0       True       1
     name     hyps  truth  ifcorrect  result
17  27CTT  0.99145    1.0       True       1
     name     hyps  truth  ifcorrect  result
71  36CTT  0.99993    1.0       True       1
     name     hyps  truth  ifcorrect  result
92  48CTT  0.33416    1.0      False       0
     name      hyps  truth  ifcorrect  result
13  49CTT  0.035728    0.0       True       0
     name      hyps  truth  ifcorrect  result
34  59CTT  0.000707    1.0      False       0
     name      hyps  truth  ifcorrect  result
29  60CTT  0.076052    1.0      False       0
     name      hyps  truth  ifcorrect  result
83  62CTT  0.014395    1.0      False       0
     name     hyps  truth  ifcorrect  result
44  66CTT  0.04747    1.0      False       0
     name      hyps  truth  ifcorrect  result
28  61CTT  0.537788    1.0       True       

In [46]:
inval_idx = [19,60,17,71,92,13,34,29,83,44,28]
print(fivefolds_8k_1VGG_4LSTM.iloc[inval_idx])
print(fivefolds_7k_1VGG_4LSTM.iloc[inval_idx])
print(fivefolds_11k4lstm.iloc[inval_idx])
print(fivefolds_10k4lstm.iloc[inval_idx])
print(fivefolds_9k4lstm.iloc[inval_idx])
print(fivefolds_8k4lstm.iloc[inval_idx])
print(fivefolds_7k4lstm.iloc[inval_idx])

     name      hyps  truth  ifcorrect  result
19  23CTT  0.999947    1.0       True       1
60  26CTT  0.995170    1.0       True       1
17  27CTT  0.991450    1.0       True       1
71  36CTT  0.999930    1.0       True       1
92  48CTT  0.334160    1.0      False       0
13  49CTT  0.035728    0.0       True       0
34  59CTT  0.000707    1.0      False       0
29  60CTT  0.076052    1.0      False       0
83  62CTT  0.014395    1.0      False       0
44  66CTT  0.047470    1.0      False       0
28  61CTT  0.537788    1.0       True       1
     name      hyps  truth  ifcorrect  result
19  23CTT  0.999269    1.0       True       1
60  26CTT  0.981345    1.0       True       1
17  27CTT  0.957831    1.0       True       1
71  36CTT  0.999165    1.0       True       1
92  48CTT  0.231517    1.0      False       0
13  49CTT  0.018950    0.0       True       0
34  59CTT  0.006572    1.0      False       0
29  60CTT  0.420433    1.0      False       0
83  62CTT  0.014548    1.0      Fa

In [47]:
inval = ['17CTT', '45CTT', '31CTT', '11CTT']
for i in inval:
    print(fivefolds_8k_1VGG_4LSTM.loc[lambda df: df['name'] == i ])

     name    hyps  truth  ifcorrect  result
90  17CTT  0.9883    1.0       True       1
     name      hyps  truth  ifcorrect  result
84  45CTT  0.034262    1.0      False       0
     name     hyps  truth  ifcorrect  result
57  31CTT  0.99958    1.0       True       1
     name      hyps  truth  ifcorrect  result
96  11CTT  0.006082    1.0      False       0


In [48]:
inval_idx = [90,84,57,96]
print(fivefolds_8k_1VGG_4LSTM.iloc[inval_idx])
print(fivefolds_7k_1VGG_4LSTM.iloc[inval_idx])
print(fivefolds_11k4lstm.iloc[inval_idx])
print(fivefolds_10k4lstm.iloc[inval_idx])
print(fivefolds_9k4lstm.iloc[inval_idx])
print(fivefolds_8k4lstm.iloc[inval_idx])
print(fivefolds_7k4lstm.iloc[inval_idx])

     name      hyps  truth  ifcorrect  result
90  17CTT  0.988300    1.0       True       1
84  45CTT  0.034262    1.0      False       0
57  31CTT  0.999580    1.0       True       1
96  11CTT  0.006082    1.0      False       0
     name      hyps  truth  ifcorrect  result
90  17CTT  0.978688    1.0       True       1
84  45CTT  0.025781    1.0      False       0
57  31CTT  0.997488    1.0       True       1
96  11CTT  0.006742    1.0      False       0
     name      hyps  truth  ifcorrect  result
90  17CTT  0.999188    1.0       True       1
84  45CTT  0.040345    1.0      False       0
57  31CTT  0.979773    1.0       True       1
96  11CTT  0.019549    1.0      False       0
     name      hyps  truth  ifcorrect  result
90  17CTT  0.998889    1.0       True       1
84  45CTT  0.062374    1.0      False       0
57  31CTT  0.983586    1.0       True       1
96  11CTT  0.045811    1.0      False       0
     name      hyps  truth  ifcorrect  result
90  17CTT  0.998580    1.0       T

In [51]:
fivefolds_hubert3k = pd.read_csv('hubert3k.csv')
fivefolds_hubert3k['result'] = np.where(fivefolds_hubert3k['hyps'] >= 0.5,1,0)
print(fivefolds_hubert3k.head(20))
hubert_1_result = fivefolds_hubert3k['result'].tolist()
hubert_1_hyps = fivefolds_hubert3k['hyps'].tolist()
hubert_1_truth = [int(i) for i in fivefolds_hubert3k['truth'].tolist()]
print(classification_report(hubert_1_truth, hubert_1_result, target_names=target_names))
tn, fp, fn, tp = confusion_matrix(hubert_1_truth, hubert_1_result).ravel()
print('sensitivuty: ',tp / (fn+tp))
print('specificity: ',tn / (fp+tn))
fpr, tpr, thresholds = metrics.roc_curve(np.array(hubert_1_truth), np.array(hubert_1_hyps), pos_label=1)
print('AUC: ',metrics.auc(fpr, tpr))

     name      hyps  truth  ifcorrect  result
0   54CTT  0.066423    0.0       True       0
1   78CTT  0.256771    0.0       True       0
2   20CTT  0.467185    1.0      False       0
3   74CTT  0.298173    0.0       True       0
4   67CTT  0.401482    1.0      False       0
5   58CTT  0.553135    1.0       True       1
6   68CTT  0.272976    0.0       True       0
7   19CTT  0.350811    1.0      False       0
8   40CTT  0.298203    1.0      False       0
9   94CTT  0.050120    0.0       True       0
10  93CTT  0.106917    0.0       True       0
11  92CTT  0.140541    0.0       True       0
12  71CTT  0.466619    0.0       True       0
13  81CTT  0.287619    0.0       True       0
14  28CTT  0.107407    1.0      False       0
15  47CTT  0.258503    0.0       True       0
16  77CTT  0.205399    0.0       True       0
17  63CTT  0.710787    1.0       True       1
18  13CTT  0.204820    0.0       True       0
19  51CTT  0.204120    0.0       True       0
              precision    recall 

In [52]:
fivefolds_7knew_4LSTM = pd.read_csv('5folds7knew_4LSTM.csv')
fivefolds_7knew_4LSTM['result'] = np.where(fivefolds_7knew_4LSTM['hyps'] >= 0.5,1,0)
print(fivefolds_7knew_4LSTM.head(20))
fivefolds_12_result = fivefolds_7knew_4LSTM['result'].tolist()
fivefolds_12_hyps = fivefolds_7knew_4LSTM['hyps'].tolist()
fivefolds_12_truth = [int(i) for i in fivefolds_7knew_4LSTM['truth'].tolist()]
print(classification_report(fivefolds_12_truth, fivefolds_12_result, target_names=target_names))
tn, fp, fn, tp = confusion_matrix(fivefolds_12_truth, fivefolds_12_result).ravel()
print('sensitivuty: ',tp / (fn+tp))
print('specificity: ',tn / (fp+tn))
fpr, tpr, thresholds = metrics.roc_curve(np.array(fivefolds_12_truth), np.array(fivefolds_12_hyps), pos_label=1)
print('AUC: ',metrics.auc(fpr, tpr))

     name      hyps  truth  ifcorrect  result
0   54CTT  0.062993    0.0       True       0
1   78CTT  0.074852    0.0       True       0
2   20CTT  0.391101    1.0      False       0
3   74CTT  0.014599    0.0       True       0
4   67CTT  0.358763    1.0      False       0
5   58CTT  0.993051    1.0       True       1
6   68CTT  0.011558    0.0       True       0
7   19CTT  0.174241    1.0      False       0
8   40CTT  0.217162    1.0      False       0
9   94CTT  0.002765    0.0       True       0
10  93CTT  0.022063    0.0       True       0
11  92CTT  0.012617    0.0       True       0
12  71CTT  0.040289    0.0       True       0
13  81CTT  0.096050    0.0       True       0
14  28CTT  0.603356    1.0       True       1
15  47CTT  0.025052    0.0       True       0
16  77CTT  0.003681    0.0       True       0
17  63CTT  0.373348    1.0      False       0
18  13CTT  0.049029    0.0       True       0
19  51CTT  0.002303    0.0       True       0
              precision    recall 

In [54]:
fivefolds_hubert2k = pd.read_csv('hubert2k.csv')
fivefolds_hubert2k['result'] = np.where(fivefolds_hubert2k['hyps'] >= 0.5,1,0)
print(fivefolds_hubert2k.head(20))
hubert_2_result = fivefolds_hubert2k['result'].tolist()
hubert_2_hyps = fivefolds_hubert2k['hyps'].tolist()
hubert_2_truth = [int(i) for i in fivefolds_hubert2k['truth'].tolist()]
print(classification_report(hubert_2_truth, hubert_2_result, target_names=target_names))
tn, fp, fn, tp = confusion_matrix(hubert_2_truth, hubert_2_result).ravel()
print('sensitivuty: ',tp / (fn+tp))
print('specificity: ',tn / (fp+tn))
fpr, tpr, thresholds = metrics.roc_curve(np.array(hubert_2_truth), np.array(hubert_2_hyps), pos_label=1)
print('AUC: ',metrics.auc(fpr, tpr))

     name      hyps  truth  ifcorrect  result
0   54CTT  0.103136    0.0       True       0
1   78CTT  0.293934    0.0       True       0
2   20CTT  0.412575    1.0      False       0
3   74CTT  0.303758    0.0       True       0
4   67CTT  0.360668    1.0      False       0
5   58CTT  0.464984    1.0      False       0
6   68CTT  0.265517    0.0       True       0
7   19CTT  0.337914    1.0      False       0
8   40CTT  0.305112    1.0      False       0
9   94CTT  0.080226    0.0       True       0
10  93CTT  0.153516    0.0       True       0
11  92CTT  0.184220    0.0       True       0
12  71CTT  0.439798    0.0       True       0
13  81CTT  0.290587    0.0       True       0
14  28CTT  0.142722    1.0      False       0
15  47CTT  0.267733    0.0       True       0
16  77CTT  0.231820    0.0       True       0
17  63CTT  0.627377    1.0       True       1
18  13CTT  0.238240    0.0       True       0
19  51CTT  0.286941    0.0       True       0
              precision    recall 

In [55]:
fivefolds_hubert5k = pd.read_csv('hubert5k.csv')
fivefolds_hubert5k['result'] = np.where(fivefolds_hubert5k['hyps'] >= 0.5,1,0)
print(fivefolds_hubert5k.head(20))
hubert_3_result = fivefolds_hubert5k['result'].tolist()
hubert_3_hyps = fivefolds_hubert5k['hyps'].tolist()
hubert_3_truth = [int(i) for i in fivefolds_hubert5k['truth'].tolist()]
print(classification_report(hubert_3_truth, hubert_3_result, target_names=target_names))
tn, fp, fn, tp = confusion_matrix(hubert_3_truth, hubert_3_result).ravel()
print('sensitivuty: ',tp / (fn+tp))
print('specificity: ',tn / (fp+tn))
fpr, tpr, thresholds = metrics.roc_curve(np.array(hubert_3_truth), np.array(hubert_3_hyps), pos_label=1)
print('AUC: ',metrics.auc(fpr, tpr))

     name      hyps  truth  ifcorrect  result
0   54CTT  0.029614    0.0       True       0
1   78CTT  0.173512    0.0       True       0
2   20CTT  0.540424    1.0       True       1
3   74CTT  0.257507    0.0       True       0
4   67CTT  0.445544    1.0      False       0
5   58CTT  0.665380    1.0       True       1
6   68CTT  0.245527    0.0       True       0
7   19CTT  0.348396    1.0      False       0
8   40CTT  0.266473    1.0      False       0
9   94CTT  0.023858    0.0       True       0
10  93CTT  0.053705    0.0       True       0
11  92CTT  0.083837    0.0       True       0
12  71CTT  0.487067    0.0       True       0
13  81CTT  0.248502    0.0       True       0
14  28CTT  0.068065    1.0      False       0
15  47CTT  0.223797    0.0       True       0
16  77CTT  0.146106    0.0       True       0
17  63CTT  0.780789    1.0       True       1
18  13CTT  0.147916    0.0       True       0
19  51CTT  0.159851    0.0       True       0
              precision    recall 

In [56]:
fivefolds_hubert6k = pd.read_csv('hubert6k.csv')
fivefolds_hubert6k['result'] = np.where(fivefolds_hubert6k['hyps'] >= 0.5,1,0)
print(fivefolds_hubert6k.head(20))
hubert_4_result = fivefolds_hubert6k['result'].tolist()
hubert_4_hyps = fivefolds_hubert6k['hyps'].tolist()
hubert_4_truth = [int(i) for i in fivefolds_hubert6k['truth'].tolist()]
print(classification_report(hubert_4_truth, hubert_4_result, target_names=target_names))
tn, fp, fn, tp = confusion_matrix(hubert_4_truth, hubert_4_result).ravel()
print('sensitivuty: ',tp / (fn+tp))
print('specificity: ',tn / (fp+tn))
fpr, tpr, thresholds = metrics.roc_curve(np.array(hubert_4_truth), np.array(hubert_4_hyps), pos_label=1)
print('AUC: ',metrics.auc(fpr, tpr))

     name      hyps  truth  ifcorrect  result
0   54CTT  0.021347    0.0       True       0
1   78CTT  0.147694    0.0       True       0
2   20CTT  0.572990    1.0       True       1
3   74CTT  0.237934    0.0       True       0
4   67CTT  0.472948    1.0      False       0
5   58CTT  0.707474    1.0       True       1
6   68CTT  0.232758    0.0       True       0
7   19CTT  0.351879    1.0      False       0
8   40CTT  0.257183    1.0      False       0
9   94CTT  0.018060    0.0       True       0
10  93CTT  0.040171    0.0       True       0
11  92CTT  0.069463    0.0       True       0
12  71CTT  0.493347    0.0       True       0
13  81CTT  0.234224    0.0       True       0
14  28CTT  0.058134    1.0      False       0
15  47CTT  0.209277    0.0       True       0
16  77CTT  0.126106    0.0       True       0
17  63CTT  0.808166    1.0       True       1
18  13CTT  0.125736    0.0       True       0
19  51CTT  0.145319    0.0       True       0
              precision    recall 

In [91]:
fivefolds_hubert7k = pd.read_csv('hubert7k.csv')
fivefolds_hubert7k['result'] = np.where(fivefolds_hubert7k['hyps'] >= 0.5,1,0)
print(fivefolds_hubert7k.head(20))
hubert_5_result = fivefolds_hubert7k['result'].tolist()
hubert_5_hyps = fivefolds_hubert7k['hyps'].tolist()
hubert_5_truth = [int(i) for i in fivefolds_hubert7k['truth'].tolist()]
print(classification_report(hubert_5_truth, hubert_5_result, target_names=target_names))
tn, fp, fn, tp = confusion_matrix(hubert_5_truth, hubert_5_result).ravel()
print('sensitivuty: ',tp / (fn+tp))
print('specificity: ',tn / (fp+tn))
fpr, tpr, thresholds = metrics.roc_curve(np.array(hubert_5_truth), np.array(hubert_5_hyps), pos_label=1)
print('AUC: ',metrics.auc(fpr, tpr))
print('acc: ',metrics.accuracy_score(hubert_5_truth, hubert_5_result))
CI(np.array(hubert_5_truth).astype(int), np.array(hubert_5_hyps))

     name      hyps  truth  ifcorrect  result
0   54CTT  0.016313    0.0       True       0
1   78CTT  0.126271    0.0       True       0
2   20CTT  0.604310    1.0       True       1
3   74CTT  0.220144    0.0       True       0
4   67CTT  0.502500    1.0       True       1
5   58CTT  0.746159    1.0       True       1
6   68CTT  0.221024    0.0       True       0
7   19CTT  0.359000    1.0      False       0
8   40CTT  0.254162    1.0      False       0
9   94CTT  0.014340    0.0       True       0
10  93CTT  0.031508    0.0       True       0
11  92CTT  0.060317    0.0       True       0
12  71CTT  0.498663    0.0       True       0
13  81CTT  0.223320    0.0       True       0
14  28CTT  0.051604    1.0      False       0
15  47CTT  0.197791    0.0       True       0
16  77CTT  0.112139    0.0       True       0
17  63CTT  0.831532    1.0       True       1
18  13CTT  0.107796    0.0       True       0
19  51CTT  0.129674    0.0       True       0
              precision    recall 

In [62]:
fivefolds_8knew_4LSTM = pd.read_csv('5folds8knew_4LSTM.csv')
fivefolds_8knew_4LSTM['result'] = np.where(fivefolds_8knew_4LSTM['hyps'] >= 0.5,1,0)
print(fivefolds_8knew_4LSTM.head(20))
fivefolds_13_result = fivefolds_8knew_4LSTM['result'].tolist()
fivefolds_13_hyps = fivefolds_8knew_4LSTM['hyps'].tolist()
fivefolds_13_truth = [int(i) for i in fivefolds_8knew_4LSTM['truth'].tolist()]
print(classification_report(fivefolds_13_truth, fivefolds_13_result, target_names=target_names))
tn, fp, fn, tp = confusion_matrix(fivefolds_13_truth, fivefolds_13_result).ravel()
print('sensitivuty: ',tp / (fn+tp))
print('specificity: ',tn / (fp+tn))
fpr, tpr, thresholds = metrics.roc_curve(np.array(fivefolds_13_truth), np.array(fivefolds_13_hyps), pos_label=1)
print('AUC: ',metrics.auc(fpr, tpr))
print('acc: ',metrics.accuracy_score(fivefolds_13_truth, fivefolds_13_result))

     name      hyps  truth  ifcorrect  result
0   54CTT  0.063968    0.0       True       0
1   78CTT  0.056447    0.0       True       0
2   20CTT  0.593411    1.0       True       1
3   74CTT  0.008770    0.0       True       0
4   67CTT  0.628695    1.0       True       1
5   58CTT  0.998784    1.0       True       1
6   68CTT  0.004664    0.0       True       0
7   19CTT  0.269115    1.0      False       0
8   40CTT  0.310748    1.0      False       0
9   94CTT  0.001171    0.0       True       0
10  93CTT  0.015581    0.0       True       0
11  92CTT  0.008354    0.0       True       0
12  71CTT  0.038350    0.0       True       0
13  81CTT  0.088141    0.0       True       0
14  28CTT  0.708043    1.0       True       1
15  47CTT  0.010463    0.0       True       0
16  77CTT  0.001342    0.0       True       0
17  63CTT  0.520765    1.0       True       1
18  13CTT  0.054156    0.0       True       0
19  51CTT  0.001222    0.0       True       0
              precision    recall 

In [61]:
fivefolds_7knew_1VGG_4LSTM = pd.read_csv('5folds7knew_1VGG_4LSTM.csv')
fivefolds_7knew_1VGG_4LSTM['result'] = np.where(fivefolds_7knew_1VGG_4LSTM['hyps'] >= 0.5,1,0)
print(fivefolds_7knew_1VGG_4LSTM.head(20))
fivefolds_14_result = fivefolds_7knew_1VGG_4LSTM['result'].tolist()
fivefolds_14_hyps = fivefolds_7knew_1VGG_4LSTM['hyps'].tolist()
fivefolds_14_truth = [int(i) for i in fivefolds_7knew_1VGG_4LSTM['truth'].tolist()]
print(classification_report(fivefolds_14_truth, fivefolds_14_result, target_names=target_names))
tn, fp, fn, tp = confusion_matrix(fivefolds_14_truth, fivefolds_14_result).ravel()
print('sensitivuty: ',tp / (fn+tp))
print('specificity: ',tn / (fp+tn))
fpr, tpr, thresholds = metrics.roc_curve(np.array(fivefolds_14_truth), np.array(fivefolds_14_hyps), pos_label=1)
print('AUC: ',metrics.auc(fpr, tpr))
print('acc: ',metrics.accuracy_score(fivefolds_14_truth, fivefolds_14_result))


     name          hyps  truth  ifcorrect  result
0   54CTT  1.450609e-03    0.0       True       0
1   78CTT  4.628095e-05    0.0       True       0
2   20CTT  9.881721e-01    1.0       True       1
3   74CTT  1.114335e-05    0.0       True       0
4   67CTT  9.978326e-01    1.0       True       1
5   58CTT  1.000000e+00    1.0       True       1
6   68CTT  4.555696e-06    0.0       True       0
7   19CTT  1.997055e-01    1.0      False       0
8   40CTT  4.232090e-03    1.0      False       0
9   94CTT  2.202225e-06    0.0       True       0
10  93CTT  3.787169e-05    0.0       True       0
11  92CTT  7.925399e-06    0.0       True       0
12  71CTT  2.637093e-04    0.0       True       0
13  81CTT  1.418594e-03    0.0       True       0
14  28CTT  9.943456e-01    1.0       True       1
15  47CTT  2.009057e-04    0.0       True       0
16  77CTT  5.019429e-07    0.0       True       0
17  63CTT  9.904920e-01    1.0       True       1
18  13CTT  8.291458e-04    0.0       True       0


In [64]:
fivefolds_9knew_4LSTM = pd.read_csv('5folds9knew_4LSTM.csv')
fivefolds_9knew_4LSTM['result'] = np.where(fivefolds_9knew_4LSTM['hyps'] >= 0.5,1,0)
print(fivefolds_9knew_4LSTM.head(20))
fivefolds_15_result = fivefolds_9knew_4LSTM['result'].tolist()
fivefolds_15_hyps = fivefolds_9knew_4LSTM['hyps'].tolist()
fivefolds_15_truth = [int(i) for i in fivefolds_9knew_4LSTM['truth'].tolist()]
print(classification_report(fivefolds_15_truth, fivefolds_15_result, target_names=target_names))
tn, fp, fn, tp = confusion_matrix(fivefolds_15_truth, fivefolds_15_result).ravel()
print('sensitivuty: ',tp / (fn+tp))
print('specificity: ',tn / (fp+tn))
fpr, tpr, thresholds = metrics.roc_curve(np.array(fivefolds_15_truth), np.array(fivefolds_15_hyps), pos_label=1)
print('AUC: ',metrics.auc(fpr, tpr))
print('acc: ',metrics.accuracy_score(fivefolds_15_truth, fivefolds_15_result))

     name      hyps  truth  ifcorrect  result
0   54CTT  0.035178    0.0       True       0
1   78CTT  0.021808    0.0       True       0
2   20CTT  0.706751    1.0       True       1
3   74CTT  0.002983    0.0       True       0
4   67CTT  0.742031    1.0       True       1
5   58CTT  0.999510    1.0       True       1
6   68CTT  0.000793    0.0       True       0
7   19CTT  0.265650    1.0      False       0
8   40CTT  0.237525    1.0      False       0
9   94CTT  0.000228    0.0       True       0
10  93CTT  0.005798    0.0       True       0
11  92CTT  0.003108    0.0       True       0
12  71CTT  0.019268    0.0       True       0
13  81CTT  0.038683    0.0       True       0
14  28CTT  0.599781    1.0       True       1
15  47CTT  0.003341    0.0       True       0
16  77CTT  0.000252    0.0       True       0
17  63CTT  0.597576    1.0       True       1
18  13CTT  0.028581    0.0       True       0
19  51CTT  0.000330    0.0       True       0
              precision    recall 

In [65]:
fivefolds_10knew_4LSTM = pd.read_csv('5folds10knew_4LSTM.csv')
fivefolds_10knew_4LSTM['result'] = np.where(fivefolds_10knew_4LSTM['hyps'] >= 0.5,1,0)
print(fivefolds_10knew_4LSTM.head(20))
fivefolds_16_result = fivefolds_10knew_4LSTM['result'].tolist()
fivefolds_16_hyps = fivefolds_10knew_4LSTM['hyps'].tolist()
fivefolds_16_truth = [int(i) for i in fivefolds_10knew_4LSTM['truth'].tolist()]
print(classification_report(fivefolds_16_truth, fivefolds_16_result, target_names=target_names))
tn, fp, fn, tp = confusion_matrix(fivefolds_16_truth, fivefolds_16_result).ravel()
print('sensitivuty: ',tp / (fn+tp))
print('specificity: ',tn / (fp+tn))
fpr, tpr, thresholds = metrics.roc_curve(np.array(fivefolds_16_truth), np.array(fivefolds_16_hyps), pos_label=1)
print('AUC: ',metrics.auc(fpr, tpr))
print('acc: ',metrics.accuracy_score(fivefolds_16_truth, fivefolds_16_result))

     name      hyps  truth  ifcorrect  result
0   54CTT  0.020113    0.0       True       0
1   78CTT  0.010459    0.0       True       0
2   20CTT  0.840241    1.0       True       1
3   74CTT  0.001149    0.0       True       0
4   67CTT  0.862413    1.0       True       1
5   58CTT  0.999867    1.0       True       1
6   68CTT  0.000158    0.0       True       0
7   19CTT  0.319566    1.0      False       0
8   40CTT  0.206404    1.0      False       0
9   94CTT  0.000050    0.0       True       0
10  93CTT  0.002548    0.0       True       0
11  92CTT  0.001333    0.0       True       0
12  71CTT  0.011274    0.0       True       0
13  81CTT  0.017805    0.0       True       0
14  28CTT  0.526960    1.0       True       1
15  47CTT  0.001342    0.0       True       0
16  77CTT  0.000056    0.0       True       0
17  63CTT  0.732828    1.0       True       1
18  13CTT  0.013778    0.0       True       0
19  51CTT  0.000091    0.0       True       0
              precision    recall 

In [66]:
fivefolds_8knew_1VGG_4LSTM = pd.read_csv('5folds8knew_1VGG_4LSTM.csv')
fivefolds_8knew_1VGG_4LSTM['result'] = np.where(fivefolds_8knew_1VGG_4LSTM['hyps'] >= 0.5,1,0)
print(fivefolds_8knew_1VGG_4LSTM.head(20))
fivefolds_16_result = fivefolds_8knew_1VGG_4LSTM['result'].tolist()
fivefolds_16_hyps = fivefolds_8knew_1VGG_4LSTM['hyps'].tolist()
fivefolds_16_truth = [int(i) for i in fivefolds_8knew_1VGG_4LSTM['truth'].tolist()]
print(classification_report(fivefolds_16_truth, fivefolds_16_result, target_names=target_names))
tn, fp, fn, tp = confusion_matrix(fivefolds_16_truth, fivefolds_16_result).ravel()
print('sensitivuty: ',tp / (fn+tp))
print('specificity: ',tn / (fp+tn))
fpr, tpr, thresholds = metrics.roc_curve(np.array(fivefolds_16_truth), np.array(fivefolds_16_hyps), pos_label=1)
print('AUC: ',metrics.auc(fpr, tpr))
print('acc: ',metrics.accuracy_score(fivefolds_16_truth, fivefolds_16_result))

     name          hyps  truth  ifcorrect  result
0   54CTT  6.829200e-04    0.0       True       0
1   78CTT  2.900924e-05    0.0       True       0
2   20CTT  9.983218e-01    1.0       True       1
3   74CTT  2.876987e-06    0.0       True       0
4   67CTT  9.995310e-01    1.0       True       1
5   58CTT  1.000000e+00    1.0       True       1
6   68CTT  2.159361e-06    0.0       True       0
7   19CTT  2.616837e-01    1.0      False       0
8   40CTT  4.192036e-03    1.0      False       0
9   94CTT  8.138483e-07    0.0       True       0
10  93CTT  2.055760e-05    0.0       True       0
11  92CTT  2.916012e-06    0.0       True       0
12  71CTT  1.338671e-04    0.0       True       0
13  81CTT  8.193093e-04    0.0       True       0
14  28CTT  9.982400e-01    1.0       True       1
15  47CTT  5.992237e-05    0.0       True       0
16  77CTT  2.193921e-07    0.0       True       0
17  63CTT  9.977006e-01    1.0       True       1
18  13CTT  1.406565e-04    0.0       True       0


In [68]:
fivefolds_9knew_1VGG_4LSTM = pd.read_csv('5folds9knew_1VGG_4LSTM.csv')
fivefolds_9knew_1VGG_4LSTM['result'] = np.where(fivefolds_8knew_1VGG_4LSTM['hyps'] >= 0.5,1,0)
print(fivefolds_9knew_1VGG_4LSTM.head(20))
fivefolds_16_result = fivefolds_9knew_1VGG_4LSTM['result'].tolist()
fivefolds_16_hyps = fivefolds_9knew_1VGG_4LSTM['hyps'].tolist()
fivefolds_16_truth = [int(i) for i in fivefolds_9knew_1VGG_4LSTM['truth'].tolist()]
print(classification_report(fivefolds_16_truth, fivefolds_16_result, target_names=target_names))
tn, fp, fn, tp = confusion_matrix(fivefolds_16_truth, fivefolds_16_result).ravel()
print('sensitivuty: ',tp / (fn+tp))
print('specificity: ',tn / (fp+tn))
fpr, tpr, thresholds = metrics.roc_curve(np.array(fivefolds_16_truth), np.array(fivefolds_16_hyps), pos_label=1)
print('AUC: ',metrics.auc(fpr, tpr))
print('acc: ',metrics.accuracy_score(fivefolds_16_truth, fivefolds_16_result))

     name          hyps  truth  ifcorrect  result
0   54CTT  2.692573e-04    0.0       True       0
1   78CTT  2.515412e-05    0.0       True       0
2   20CTT  9.993308e-01    1.0       True       1
3   74CTT  9.757995e-07    0.0       True       0
4   67CTT  9.997886e-01    1.0       True       1
5   58CTT  1.000000e+00    1.0       True       1
6   68CTT  1.736897e-06    0.0       True       0
7   19CTT  2.781648e-01    1.0      False       0
8   40CTT  3.130357e-03    1.0      False       0
9   94CTT  3.040484e-07    0.0       True       0
10  93CTT  1.273282e-05    0.0       True       0
11  92CTT  1.333850e-06    0.0       True       0
12  71CTT  7.111462e-05    0.0       True       0
13  81CTT  4.233341e-04    0.0       True       0
14  28CTT  9.988599e-01    1.0       True       1
15  47CTT  1.973417e-05    0.0       True       0
16  77CTT  1.448096e-07    0.0       True       0
17  63CTT  9.990877e-01    1.0       True       1
18  13CTT  8.942179e-05    0.0       True       0


In [69]:
fivefolds_11knew_4LSTM = pd.read_csv('5folds11knew_4LSTM.csv')
fivefolds_11knew_4LSTM['result'] = np.where(fivefolds_10knew_4LSTM['hyps'] >= 0.5,1,0)
print(fivefolds_11knew_4LSTM.head(20))
fivefolds_16_result = fivefolds_11knew_4LSTM['result'].tolist()
fivefolds_16_hyps = fivefolds_11knew_4LSTM['hyps'].tolist()
fivefolds_16_truth = [int(i) for i in fivefolds_11knew_4LSTM['truth'].tolist()]
print(classification_report(fivefolds_16_truth, fivefolds_16_result, target_names=target_names))
tn, fp, fn, tp = confusion_matrix(fivefolds_16_truth, fivefolds_16_result).ravel()
print('sensitivuty: ',tp / (fn+tp))
print('specificity: ',tn / (fp+tn))
fpr, tpr, thresholds = metrics.roc_curve(np.array(fivefolds_16_truth), np.array(fivefolds_16_hyps), pos_label=1)
print('AUC: ',metrics.auc(fpr, tpr))
print('acc: ',metrics.accuracy_score(fivefolds_16_truth, fivefolds_16_result))

     name      hyps  truth  ifcorrect  result
0   54CTT  0.010710    0.0       True       0
1   78CTT  0.005914    0.0       True       0
2   20CTT  0.921953    1.0       True       1
3   74CTT  0.000453    0.0       True       0
4   67CTT  0.931284    1.0       True       1
5   58CTT  0.999971    1.0       True       1
6   68CTT  0.000038    0.0       True       0
7   19CTT  0.401512    1.0      False       0
8   40CTT  0.188788    1.0      False       0
9   94CTT  0.000012    0.0       True       0
10  93CTT  0.001193    0.0       True       0
11  92CTT  0.000604    0.0       True       0
12  71CTT  0.007128    0.0       True       0
13  81CTT  0.008317    0.0       True       0
14  28CTT  0.500136    1.0       True       1
15  47CTT  0.000674    0.0       True       0
16  77CTT  0.000014    0.0       True       0
17  63CTT  0.848390    1.0       True       1
18  13CTT  0.006211    0.0       True       0
19  51CTT  0.000027    0.0       True       0
              precision    recall 

In [84]:
fivefolds_12knew_4LSTM = pd.read_csv('5folds12knew_4LSTM.csv')
fivefolds_12knew_4LSTM['result'] = np.where(fivefolds_10knew_4LSTM['hyps'] >= 0.5,1,0)
print(fivefolds_12knew_4LSTM.head(20))
fivefolds_16_result = fivefolds_12knew_4LSTM['result'].tolist()
fivefolds_16_hyps = fivefolds_12knew_4LSTM['hyps'].tolist()
fivefolds_16_truth = [int(i) for i in fivefolds_12knew_4LSTM['truth'].tolist()]
print(classification_report(fivefolds_16_truth, fivefolds_16_result, target_names=target_names))
tn, fp, fn, tp = confusion_matrix(fivefolds_16_truth, fivefolds_16_result).ravel()
print('sensitivuty: ',tp / (fn+tp))
print('specificity: ',tn / (fp+tn))
fpr, tpr, thresholds = metrics.roc_curve(np.array(fivefolds_16_truth), np.array(fivefolds_16_hyps), pos_label=1)
print('AUC: ',metrics.auc(fpr, tpr))
print('acc: ',metrics.accuracy_score(fivefolds_16_truth, fivefolds_16_result))
CI(np.array(fivefolds_16_truth).astype(int), np.array(fivefolds_16_hyps))

     name      hyps  truth  ifcorrect  result
0   54CTT  0.004043    0.0       True       0
1   78CTT  0.002495    0.0       True       0
2   20CTT  0.947666    1.0       True       1
3   74CTT  0.000135    0.0       True       0
4   67CTT  0.952488    1.0       True       1
5   58CTT  0.999990    1.0       True       1
6   68CTT  0.000007    0.0       True       0
7   19CTT  0.400553    1.0      False       0
8   40CTT  0.129056    1.0      False       0
9   94CTT  0.000002    0.0       True       0
10  93CTT  0.000420    0.0       True       0
11  92CTT  0.000211    0.0       True       0
12  71CTT  0.003488    0.0       True       0
13  81CTT  0.002903    0.0       True       0
14  28CTT  0.393806    1.0      False       1
15  47CTT  0.000298    0.0       True       0
16  77CTT  0.000003    0.0       True       0
17  63CTT  0.888502    1.0       True       1
18  13CTT  0.002643    0.0       True       0
19  51CTT  0.000008    0.0       True       0
              precision    recall 

In [72]:
fivefolds_hubert8k = pd.read_csv('hubert8k.csv')
fivefolds_hubert8k['result'] = np.where(fivefolds_hubert8k['hyps'] >= 0.5,1,0)
print(fivefolds_hubert8k.head(20))
hubert_5_result = fivefolds_hubert8k['result'].tolist()
hubert_5_hyps = fivefolds_hubert8k['hyps'].tolist()
hubert_5_truth = [int(i) for i in fivefolds_hubert8k['truth'].tolist()]
print(classification_report(hubert_5_truth, hubert_5_result, target_names=target_names))
tn, fp, fn, tp = confusion_matrix(hubert_5_truth, hubert_5_result).ravel()
print('sensitivuty: ',tp / (fn+tp))
print('specificity: ',tn / (fp+tn))
fpr, tpr, thresholds = metrics.roc_curve(np.array(hubert_5_truth), np.array(hubert_5_hyps), pos_label=1)
print('AUC: ',metrics.auc(fpr, tpr))
print('acc: ',metrics.accuracy_score(hubert_5_truth, hubert_5_result))

     name      hyps  truth  ifcorrect  result
0   54CTT  0.012861    0.0       True       0
1   78CTT  0.108073    0.0       True       0
2   20CTT  0.632107    1.0       True       1
3   74CTT  0.202457    0.0       True       0
4   67CTT  0.530283    1.0       True       1
5   58CTT  0.778579    1.0       True       1
6   68CTT  0.208427    0.0       True       0
7   19CTT  0.366086    1.0      False       0
8   40CTT  0.253161    1.0      False       0
9   94CTT  0.011712    0.0       True       0
10  93CTT  0.025295    0.0       True       0
11  92CTT  0.053716    0.0       True       0
12  71CTT  0.501822    0.0      False       1
13  81CTT  0.212715    0.0       True       0
14  28CTT  0.046798    1.0      False       0
15  47CTT  0.187449    0.0       True       0
16  77CTT  0.100794    0.0       True       0
17  63CTT  0.850330    1.0       True       1
18  13CTT  0.092615    0.0       True       0
19  51CTT  0.115007    0.0       True       0
              precision    recall 

In [73]:
fivefolds_hubert9k = pd.read_csv('hubert9k.csv')
fivefolds_hubert9k['result'] = np.where(fivefolds_hubert9k['hyps'] >= 0.5,1,0)
print(fivefolds_hubert9k.head(20))
hubert_5_result = fivefolds_hubert9k['result'].tolist()
hubert_5_hyps = fivefolds_hubert9k['hyps'].tolist()
hubert_5_truth = [int(i) for i in fivefolds_hubert9k['truth'].tolist()]
print(classification_report(hubert_5_truth, hubert_5_result, target_names=target_names))
tn, fp, fn, tp = confusion_matrix(hubert_5_truth, hubert_5_result).ravel()
print('sensitivuty: ',tp / (fn+tp))
print('specificity: ',tn / (fp+tn))
fpr, tpr, thresholds = metrics.roc_curve(np.array(hubert_5_truth), np.array(hubert_5_hyps), pos_label=1)
print('AUC: ',metrics.auc(fpr, tpr))
print('acc: ',metrics.accuracy_score(hubert_5_truth, hubert_5_result))

     name      hyps  truth  ifcorrect  result
0   54CTT  0.010377    0.0       True       0
1   78CTT  0.092956    0.0       True       0
2   20CTT  0.656797    1.0       True       1
3   74CTT  0.185290    0.0       True       0
4   67CTT  0.556344    1.0       True       1
5   58CTT  0.805548    1.0       True       1
6   68CTT  0.195515    0.0       True       0
7   19CTT  0.372748    1.0      False       0
8   40CTT  0.253181    1.0      False       0
9   94CTT  0.009772    0.0       True       0
10  93CTT  0.020642    0.0       True       0
11  92CTT  0.048664    0.0       True       0
12  71CTT  0.503271    0.0      False       1
13  81CTT  0.202300    0.0       True       0
14  28CTT  0.043055    1.0      False       0
15  47CTT  0.178176    0.0       True       0
16  77CTT  0.091276    0.0       True       0
17  63CTT  0.865737    1.0       True       1
18  13CTT  0.080094    0.0       True       0
19  51CTT  0.102079    0.0       True       0
              precision    recall 

In [88]:
best_acc = 0
best_auc = 0
best_acc_file = ''
best_auc_file = ''
for i in range(7, 13):
    if i == 11:
        continue
    fivefolds_12knew_4LSTM = pd.read_csv(f'5folds{i}knew_4LSTM.csv')
    print(f'5folds{i}knew_4LSTM.csv')
    fivefolds_12knew_4LSTM['result'] = np.where(fivefolds_10knew_4LSTM['hyps'] >= 0.5,1,0)
    #print(fivefolds_12knew_4LSTM.head(20))
    fivefolds_16_result = fivefolds_12knew_4LSTM['result'].tolist()
    fivefolds_16_hyps = fivefolds_12knew_4LSTM['hyps'].tolist()
    fivefolds_16_truth = [int(i) for i in fivefolds_12knew_4LSTM['truth'].tolist()]
    print(classification_report(fivefolds_16_truth, fivefolds_16_result, target_names=target_names))
    tn, fp, fn, tp = confusion_matrix(fivefolds_16_truth, fivefolds_16_result).ravel()
    print('sensitivuty: ',tp / (fn+tp))
    print('specificity: ',tn / (fp+tn))
    fpr, tpr, thresholds = metrics.roc_curve(np.array(fivefolds_16_truth), np.array(fivefolds_16_hyps), pos_label=1)
    auc = metrics.auc(fpr, tpr)
    print('AUC: ',auc)
    if auc > best_auc:
        best_auc = auc
        best_auc_file = f'5folds{i}knew_4LSTM.csv'
    acc = metrics.accuracy_score(fivefolds_16_truth, fivefolds_16_result)
    print('acc: ',acc)
    if acc > best_acc:
        best_acc = acc
        best_acc_file = f'5folds{i}knew_4LSTM.csv'
    CI(np.array(fivefolds_16_truth).astype(int), np.array(fivefolds_16_hyps))
print('best_acc', best_acc)
print('best_acc_file', best_acc_file)
print('best_auc', best_auc)
print('best_auc_file', best_auc_file)


5folds7knew_4LSTM.csv
              precision    recall  f1-score   support

 non-patient       0.88      0.95      0.91        56
     patient       0.89      0.78      0.83        32

    accuracy                           0.89        88
   macro avg       0.89      0.86      0.87        88
weighted avg       0.89      0.89      0.88        88

sensitivuty:  0.78125
specificity:  0.9464285714285714
AUC:  0.9095982142857142
acc:  0.8863636363636364
Confidence interval for the score: [0.856 - 0.956]
5folds8knew_4LSTM.csv
              precision    recall  f1-score   support

 non-patient       0.88      0.95      0.91        56
     patient       0.89      0.78      0.83        32

    accuracy                           0.89        88
   macro avg       0.89      0.86      0.87        88
weighted avg       0.89      0.89      0.88        88

sensitivuty:  0.78125
specificity:  0.9464285714285714
AUC:  0.9386160714285714
acc:  0.8863636363636364
Confidence interval for the score: [0.894

In [89]:
best_acc = 0
best_auc = 0
best_acc_file = ''
best_auc_file = ''
for i in range(6, 15):
    fivefolds_12knew_4LSTM = pd.read_csv(f'5folds{i}knew2_1VGG_4LSTM.csv')
    print(f'5folds{i}knew2_1VGG_4LSTM.csv')
    fivefolds_12knew_4LSTM['result'] = np.where(fivefolds_10knew_4LSTM['hyps'] >= 0.5,1,0)
    #print(fivefolds_12knew_4LSTM.head(20))
    fivefolds_16_result = fivefolds_12knew_4LSTM['result'].tolist()
    fivefolds_16_hyps = fivefolds_12knew_4LSTM['hyps'].tolist()
    fivefolds_16_truth = [int(i) for i in fivefolds_12knew_4LSTM['truth'].tolist()]
    print(classification_report(fivefolds_16_truth, fivefolds_16_result, target_names=target_names))
    tn, fp, fn, tp = confusion_matrix(fivefolds_16_truth, fivefolds_16_result).ravel()
    print('sensitivuty: ',tp / (fn+tp))
    print('specificity: ',tn / (fp+tn))
    fpr, tpr, thresholds = metrics.roc_curve(np.array(fivefolds_16_truth), np.array(fivefolds_16_hyps), pos_label=1)
    auc = metrics.auc(fpr, tpr)
    print('AUC: ',auc)
    if auc > best_auc:
        best_auc = auc
        best_auc_file = f'5folds{i}knew2_1VGG_4LSTM.csv'
    acc = metrics.accuracy_score(fivefolds_16_truth, fivefolds_16_result)
    print('acc: ',acc)
    if acc > best_acc:
        best_acc = acc
        best_acc_file = f'5folds{i}knew2_1VGG_4LSTM.csv'
    CI(np.array(fivefolds_16_truth).astype(int), np.array(fivefolds_16_hyps))
print('best_acc', best_acc)
print('best_acc_file', best_acc_file)
print('best_auc', best_auc)
print('best_auc_file', best_auc_file)

5folds6knew2_1VGG_4LSTM.csv
              precision    recall  f1-score   support

 non-patient       0.88      0.95      0.91        56
     patient       0.89      0.78      0.83        32

    accuracy                           0.89        88
   macro avg       0.89      0.86      0.87        88
weighted avg       0.89      0.89      0.88        88

sensitivuty:  0.78125
specificity:  0.9464285714285714
AUC:  0.9185267857142857
acc:  0.8863636363636364
Confidence interval for the score: [0.862 - 0.965]
5folds7knew2_1VGG_4LSTM.csv
              precision    recall  f1-score   support

 non-patient       0.88      0.95      0.91        56
     patient       0.89      0.78      0.83        32

    accuracy                           0.89        88
   macro avg       0.89      0.86      0.87        88
weighted avg       0.89      0.89      0.88        88

sensitivuty:  0.78125
specificity:  0.9464285714285714
AUC:  0.9202008928571429
acc:  0.8863636363636364
Confidence interval for the s

In [90]:
best_acc = 0
best_auc = 0
best_acc_file = ''
best_auc_file = ''
for i in range(6, 15):
    fivefolds_12knew_4LSTM = pd.read_csv(f'5folds{i}knew2_4LSTM.csv')
    print(f'5folds{i}knew2_4LSTM5.csv')
    fivefolds_12knew_4LSTM['result'] = np.where(fivefolds_10knew_4LSTM['hyps'] >= 0.5,1,0)
    #print(fivefolds_12knew_4LSTM.head(20))
    fivefolds_16_result = fivefolds_12knew_4LSTM['result'].tolist()
    fivefolds_16_hyps = fivefolds_12knew_4LSTM['hyps'].tolist()
    fivefolds_16_truth = [int(i) for i in fivefolds_12knew_4LSTM['truth'].tolist()]
    print(classification_report(fivefolds_16_truth, fivefolds_16_result, target_names=target_names))
    tn, fp, fn, tp = confusion_matrix(fivefolds_16_truth, fivefolds_16_result).ravel()
    print('sensitivuty: ',tp / (fn+tp))
    print('specificity: ',tn / (fp+tn))
    fpr, tpr, thresholds = metrics.roc_curve(np.array(fivefolds_16_truth), np.array(fivefolds_16_hyps), pos_label=1)
    auc = metrics.auc(fpr, tpr)
    print('AUC: ',auc)
    if auc > best_auc:
        best_auc = auc
        best_auc_file = f'5folds{i}knew2_4LSTM.csv'
    acc = metrics.accuracy_score(fivefolds_16_truth, fivefolds_16_result)
    print('acc: ',acc)
    if acc > best_acc:
        best_acc = acc
        best_acc_file = f'5folds{i}knew2_4LSTM.csv'
    CI(np.array(fivefolds_16_truth).astype(int), np.array(fivefolds_16_hyps))
print('best_acc', best_acc)
print('best_acc_file', best_acc_file)
print('best_auc', best_auc)
print('best_auc_file', best_auc_file)

5folds6knew2_4LSTM5.csv
              precision    recall  f1-score   support

 non-patient       0.88      0.95      0.91        56
     patient       0.89      0.78      0.83        32

    accuracy                           0.89        88
   macro avg       0.89      0.86      0.87        88
weighted avg       0.89      0.89      0.88        88

sensitivuty:  0.78125
specificity:  0.9464285714285714
AUC:  0.9001116071428571
acc:  0.8863636363636364
Confidence interval for the score: [0.845 - 0.949]
5folds7knew2_4LSTM5.csv
              precision    recall  f1-score   support

 non-patient       0.88      0.95      0.91        56
     patient       0.89      0.78      0.83        32

    accuracy                           0.89        88
   macro avg       0.89      0.86      0.87        88
weighted avg       0.89      0.89      0.88        88

sensitivuty:  0.78125
specificity:  0.9464285714285714
AUC:  0.9207589285714286
acc:  0.8863636363636364
Confidence interval for the score: [0

In [92]:
best_acc = 0
best_auc = 0
best_acc_file = ''
best_auc_file = ''
for i in range(6, 15):
    fivefolds_12knew_4LSTM = pd.read_csv(f'5folds{i}knew_oldASR_1VGG_4LSTM.csv')
    print(f'5folds{i}knew_oldASR_1VGG_4LSTM.csv')
    fivefolds_12knew_4LSTM['result'] = np.where(fivefolds_10knew_4LSTM['hyps'] >= 0.5,1,0)
    #print(fivefolds_12knew_4LSTM.head(20))
    fivefolds_16_result = fivefolds_12knew_4LSTM['result'].tolist()
    fivefolds_16_hyps = fivefolds_12knew_4LSTM['hyps'].tolist()
    fivefolds_16_truth = [int(i) for i in fivefolds_12knew_4LSTM['truth'].tolist()]
    print(classification_report(fivefolds_16_truth, fivefolds_16_result, target_names=target_names))
    tn, fp, fn, tp = confusion_matrix(fivefolds_16_truth, fivefolds_16_result).ravel()
    print('sensitivuty: ',tp / (fn+tp))
    print('specificity: ',tn / (fp+tn))
    fpr, tpr, thresholds = metrics.roc_curve(np.array(fivefolds_16_truth), np.array(fivefolds_16_hyps), pos_label=1)
    auc = metrics.auc(fpr, tpr)
    print('AUC: ',auc)
    if auc > best_auc:
        best_auc = auc
        best_auc_file = f'5folds{i}knew_oldASR_1VGG_4LSTM.csv'
    acc = metrics.accuracy_score(fivefolds_16_truth, fivefolds_16_result)
    print('acc: ',acc)
    if acc > best_acc:
        best_acc = acc
        best_acc_file = f'5folds{i}knew_oldASR_1VGG_4LSTM.csv'
    CI(np.array(fivefolds_16_truth).astype(int), np.array(fivefolds_16_hyps))
print('best_acc', best_acc)
print('best_acc_file', best_acc_file)
print('best_auc', best_auc)
print('best_auc_file', best_auc_file)

5folds6knew_oldASR_1VGG_4LSTM.csv
              precision    recall  f1-score   support

 non-patient       0.88      0.95      0.91        56
     patient       0.89      0.78      0.83        32

    accuracy                           0.89        88
   macro avg       0.89      0.86      0.87        88
weighted avg       0.89      0.89      0.88        88

sensitivuty:  0.78125
specificity:  0.9464285714285714
AUC:  0.9151785714285714
acc:  0.8863636363636364
Confidence interval for the score: [0.856 - 0.967]
5folds7knew_oldASR_1VGG_4LSTM.csv
              precision    recall  f1-score   support

 non-patient       0.88      0.95      0.91        56
     patient       0.89      0.78      0.83        32

    accuracy                           0.89        88
   macro avg       0.89      0.86      0.87        88
weighted avg       0.89      0.89      0.88        88

sensitivuty:  0.78125
specificity:  0.9464285714285714
AUC:  0.9202008928571429
acc:  0.8863636363636364
Confidence interv

In [93]:
best_acc = 0
best_auc = 0
best_acc_file = ''
best_auc_file = ''
for i in range(6, 15):
    fivefolds_12knew_4LSTM = pd.read_csv(f'5folds{i}knew_oldASR_4LSTM.csv')
    print(f'5folds{i}knew_oldASR_4LSTM.csv')
    fivefolds_12knew_4LSTM['result'] = np.where(fivefolds_10knew_4LSTM['hyps'] >= 0.5,1,0)
    #print(fivefolds_12knew_4LSTM.head(20))
    fivefolds_16_result = fivefolds_12knew_4LSTM['result'].tolist()
    fivefolds_16_hyps = fivefolds_12knew_4LSTM['hyps'].tolist()
    fivefolds_16_truth = [int(i) for i in fivefolds_12knew_4LSTM['truth'].tolist()]
    print(classification_report(fivefolds_16_truth, fivefolds_16_result, target_names=target_names))
    tn, fp, fn, tp = confusion_matrix(fivefolds_16_truth, fivefolds_16_result).ravel()
    print('sensitivuty: ',tp / (fn+tp))
    print('specificity: ',tn / (fp+tn))
    fpr, tpr, thresholds = metrics.roc_curve(np.array(fivefolds_16_truth), np.array(fivefolds_16_hyps), pos_label=1)
    auc = metrics.auc(fpr, tpr)
    print('AUC: ',auc)
    if auc > best_auc:
        best_auc = auc
        best_auc_file = f'5folds{i}knew_oldASR_4LSTM.csv'
    acc = metrics.accuracy_score(fivefolds_16_truth, fivefolds_16_result)
    print('acc: ',acc)
    if acc > best_acc:
        best_acc = acc
        best_acc_file = f'5folds{i}knew_oldASR_4LSTM.csv'
    CI(np.array(fivefolds_16_truth).astype(int), np.array(fivefolds_16_hyps))
print('best_acc', best_acc)
print('best_acc_file', best_acc_file)
print('best_auc', best_auc)
print('best_auc_file', best_auc_file)

5folds6knew_oldASR_4LSTM.csv
              precision    recall  f1-score   support

 non-patient       0.88      0.95      0.91        56
     patient       0.89      0.78      0.83        32

    accuracy                           0.89        88
   macro avg       0.89      0.86      0.87        88
weighted avg       0.89      0.89      0.88        88

sensitivuty:  0.78125
specificity:  0.9464285714285714
AUC:  0.8761160714285714
acc:  0.8863636363636364
Confidence interval for the score: [0.812 - 0.932]
5folds7knew_oldASR_4LSTM.csv
              precision    recall  f1-score   support

 non-patient       0.88      0.95      0.91        56
     patient       0.89      0.78      0.83        32

    accuracy                           0.89        88
   macro avg       0.89      0.86      0.87        88
weighted avg       0.89      0.89      0.88        88

sensitivuty:  0.78125
specificity:  0.9464285714285714
AUC:  0.8738839285714286
acc:  0.8863636363636364
Confidence interval for the