-
Notifications
You must be signed in to change notification settings - Fork 0
/
plot_results.py
163 lines (144 loc) · 7 KB
/
plot_results.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
"""Script to plot results as reported in the paper."""
import numpy as np
import pandas as pd
import json
import os
import matplotlib.pyplot as plt
import seaborn as sns
def load_auc_ici(folders, dependence_on="n_samples", n_folds=10):
"""Load auc and ici (computed on test set) for a given list of folders, for all the models considered in the study.
Input:
- folders (list of str): folder names
- dependence_on (str): parameter to be used to index the results ("n_samples" fo dependence on size, "ratio" for
dependence on balancing ratio). Default "n_samples"
- n_folds (int): number of folds in training (Default 10)
Return:
- dictionaries with model names as keys and dataframe with results as values (one for auc and one for ici)
"""
sizes_or_ratios = np.zeros(len(folders), dtype=float)
auc = np.zeros((len(folders), n_folds))
ici = np.zeros((len(folders), n_folds))
auc_dict = {}
ici_dict = {}
for model_name in ["cnn", "xgb", "lr"]:
for i, folder in enumerate(folders):
path = 'results/' + model_name + '/' + folder + '/'
with open(path + 'config.json') as file:
config = json.load(file)
sizes_or_ratios[i] = config[dependence_on]
if dependence_on == "ratio" and config[dependence_on] is None:
sizes_or_ratios[i] = 0.125
auc[i, :] = np.load(path + "auc_test.npy")
ici[i, :] = np.load(path + "ici_test.npy")
auc_dict[model_name] = pd.DataFrame(auc.transpose(), columns=sizes_or_ratios).copy()
ici_dict[model_name] = pd.DataFrame(ici.transpose(), columns=sizes_or_ratios).copy()
return auc_dict, ici_dict
def main():
"""
Before to run this script, one has to create a json file with the names of the directories that have to be analyzed.
The file has to be structured as follows: keys are strings indicating the case-study ("size_dependence_original",
"size_dependence_balanced" or "balance_dependence"); values are dictionaries with folders to be analyzed as values
and corresponding sizes/ratios as keys.
Example:
{
"size_dependence_original" : {
"250": "N250_2023-05-29",
"300": "N300_2023-05-29"
},
"size_dependence_balanced" : {
"250": "N250_balanced05_2023-05-29",
"300": "N300_balanced05_2023-05-29"
},
"balance_dependence" : {
"0.125": "N250_2023-05-29",
"0.25": "N250_balanced025_2023-05-29",
"0.375": "N250_balanced0375_2023-05-29",
"0.5": "N250_balanced05_2023-05-29"
}
}
"""
folders_json = 'results/folders2use.json' # json file with directory names. Change it if necessary.
with open(folders_json) as file:
folders_dict = json.load(file)
# Factor to compute 95% CI from standard deviation
n_folds = 10
ci_factor = 1.96 / np.sqrt(n_folds)
# Set plot configurations
figsize = (16, 6)
ms = 10
lw = 2.5
capsize = 3
labels_size = 15
legend_size = 14
dpi = 600
figures_dir = "results/figures/"
if not os.path.exists(figures_dir):
os.makedirs(figures_dir)
palette = sns.color_palette("deep")
sns.set_style("white")
models = ["cnn", "xgb", "lr"]
# --- SIZE DEPENDENCE ---
folders = folders_dict["size_dependence_original"].values() # folder names (size dependence, original event ratio)
auc, ici = load_auc_ici(folders)
folders_bal = folders_dict["size_dependence_balanced"].values() # folder names (size dependence, balanced ratio)
auc_bal, ici_bal = load_auc_ici(folders_bal)
# AUC plot
fig, ax = plt.subplots(1, 2, figsize=figsize, sharey=True, dpi=dpi)
for i, model in enumerate(models): # original ratio
ax[0].errorbar(auc[model].columns, auc[model].mean(), auc[model].std()*ci_factor, fmt='.-', markersize=ms,
label=model, capsize=capsize, linewidth=lw, color=palette[i])
for i, model in enumerate(models): # balanced
ax[1].errorbar(auc[model].columns, auc_bal[model].mean(), auc_bal[model].std()*ci_factor, fmt='.-',
markersize=ms, label=model, capsize=capsize, linewidth=lw, color=palette[i])
ax[0].set_title("Original fraction", fontsize=labels_size)
ax[1].set_title("Balanced to 0.5", fontsize=labels_size)
ax[0].set_ylabel("AUC", fontsize=labels_size)
for axes in ax:
axes.set_xlabel("# of ECGs", fontsize=labels_size)
axes.set_xscale('log')
axes.legend(loc="lower right", fontsize=legend_size)
axes.grid(axis='y', alpha=0.5)
plt.autoscale()
fig.subplots_adjust(wspace=0.1)
plt.savefig(figures_dir + 'auc_vs_size.png', bbox_inches='tight')
# ICI plot
fig, ax = plt.subplots(1, 2, figsize=figsize, sharey=True, dpi=dpi)
for i, model in enumerate(models): # plot ICI results (original ratio)
ax[0].errorbar(ici[model].columns, ici[model].mean(), ici[model].std()*ci_factor, fmt='.-', markersize=ms,
label=model, capsize=capsize, linewidth=lw, color=palette[i])
for i, model in enumerate(models): # plot ICI results (balanced)
ax[1].errorbar(ici[model].columns, ici_bal[model].mean(), ici_bal[model].std()*ci_factor, fmt='.-',
markersize=ms, label=model, capsize=capsize, linewidth=lw, color=palette[i])
ax[0].set_title("Original fraction", fontsize=labels_size)
ax[1].set_title("Balanced to 0.5", fontsize=labels_size)
ax[0].set_ylabel("ICI", fontsize=labels_size)
for axes in ax:
axes.set_xlabel("# of ECGs", fontsize=labels_size)
axes.set_xscale('log')
axes.legend(loc="best", fontsize=legend_size)
axes.grid(axis='y', alpha=0.5)
plt.autoscale()
fig.subplots_adjust(wspace=0.1)
plt.savefig(figures_dir + 'ici_vs_size.png', bbox_inches='tight')
# --- BALANCE RATIO DEPENDENCE ---
folders = folders_dict["balance_dependence"].values()
auc, ici = load_auc_ici(folders, dependence_on="ratio")
# AUC and ICI plot
fig, ax = plt.subplots(1, 2, figsize=figsize, dpi=dpi)
for i, model in enumerate(models):
ax[0].errorbar(auc[model].columns, auc[model].mean(), auc[model].std()*ci_factor, fmt='.-', markersize=ms,
label=model, capsize=capsize, linewidth=lw, color=palette[i])
for i, model in enumerate(models):
ax[1].errorbar(ici[model].columns, ici[model].mean(), auc[model].std()*ci_factor, fmt='.-', markersize=ms,
label=model, capsize=capsize, linewidth=lw, color=palette[i])
for axes in ax:
axes.set_xlabel("Event fraction in training", fontsize=labels_size)
axes.set_xscale('log')
axes.legend(loc="best", fontsize=legend_size)
axes.grid(axis='y', alpha=0.5)
ax[0].set_ylabel("AUC", fontsize=labels_size)
ax[1].set_ylabel("ICI", fontsize=labels_size)
plt.autoscale()
plt.savefig(figures_dir + 'auc_ici_vs_ratio.png', bbox_inches='tight')
if __name__ == "__main__":
main()