# Experiments on real-world data

In [1]:
%load_ext autoreload
%autoreload 2

from utils.train_medical import run_medical_experiments
from utils.results import (
    get_joint_medical_coverages, 
    get_medical_interval_widths, 
    load_medical_results, 
    get_uncorrected_medical_results
)

To obtain the results as presented in the paper, run the following three sections.

## MIMIC-III dataset

In [2]:
for baseline in ['CFRNN', 'AdaptiveCFRNN', 'QRNN', 'DPRNN']:
    for seed in range(5):
        run_medical_experiments(dataset='mimic', 
                                baseline=baseline,
                                save_model=True, 
                                save_results=True,
                                seed=seed)

Training CFRNN


FileNotFoundError: [Errno 2] No such file or directory: 'data/mimic.p'

In [15]:
for baseline in ['CFRNN', 'AdaptiveCFRNN', 'QRNN', 'DPRNN']:
    print(baseline)
    coverages_mean, coverages_std = get_joint_medical_coverages(baseline, 'mimic', seeds=range(5))
    
    print('{:.1f} \\(\\pm\\) {:.1f}\\%'.format(coverages_mean, coverages_std))
    print()

CFRNN
94.0 \(\pm\) 1.2\%

AdaptiveCFRNN
75.0 \(\pm\) 37.5\%

QRNN
89.3 \(\pm\) 1.2\%

DPRNN
40.2 \(\pm\) 13.9\%



In [21]:
for baseline in ['CFRNN', 'DPRNN', 'QRNN']:
    print(baseline)
    widths_mean, widths_std = get_medical_interval_widths(baseline, 'mimic', seeds=range(5))
    
    print(widths_mean)
    print(widths_std)
    print()

CFRNN
20.59477424621582
3.076612983230133

DPRNN
3.594958412647247
0.8972864178894718

QRNN
16.159785747528076
3.921973974254315



## EEG dataset

In [23]:
for baseline in ['CFRNN', 'QRNN', 'DPRNN']:
    for seed in range(5):
        run_medical_experiments(dataset='eeg', 
                                baseline=baseline,
                                save_model=True, 
                                save_results=True,
                                seed=seed)

Training CFRNN
Epoch: 0	Train loss: 176.828530839346
Epoch: 50	Train loss: 72.87019629617339
Training CFRNN
Epoch: 0	Train loss: 178.46980163426076
Epoch: 50	Train loss: 69.71923409619377
Training CFRNN
Epoch: 0	Train loss: 171.19047687123123
Epoch: 50	Train loss: 77.84114200629077
Training CFRNN
Epoch: 0	Train loss: 172.37090971863387
Epoch: 50	Train loss: 139.98008124342243
Training CFRNN
Epoch: 0	Train loss: 172.20736364716464
Epoch: 50	Train loss: 118.57290893850974
Training QRNN
Epoch:  0 | train loss: 2.8654
Epoch:  1 | train loss: 2.9459
Epoch:  2 | train loss: 2.0752
Epoch:  3 | train loss: 1.5364
Epoch:  4 | train loss: 1.5655
Epoch:  5 | train loss: 1.7958
Epoch:  6 | train loss: 1.8150
Epoch:  7 | train loss: 1.4566
Epoch:  8 | train loss: 1.5117
Epoch:  9 | train loss: 1.2796
Training QRNN
Epoch:  0 | train loss: 2.8155
Epoch:  1 | train loss: 3.4734
Epoch:  2 | train loss: 3.2736
Epoch:  3 | train loss: 3.0898
Epoch:  4 | train loss: 3.3527
Epoch:  5 | train loss: 1.7739
E

In [25]:
for baseline in ['CFRNN', 'QRNN', 'DPRNN']:
    print(baseline)
    coverages_mean, coverages_std = get_joint_medical_coverages(baseline, 'eeg', seeds=range(5))
    
    print('{:.1f} \\(\\pm\\) {:.1f}\\%'.format(coverages_mean, coverages_std))
    print()

CFRNN
96.5 \(\pm\) 1.0\%

QRNN
48.0 \(\pm\) 4.0\%

DPRNN
3.3 \(\pm\) 0.7\%



In [27]:
for baseline in ['CFRNN', 'DPRNN', 'QRNN']:
    print(baseline)
    widths_mean, widths_std = get_medical_interval_widths(baseline, 'eeg', seeds=range(5))
    
    print(widths_mean)
    print(widths_std)
    print()

CFRNN
61.863339309692385
18.023400935029958

DPRNN
7.387410955429077
0.7367469770792188

QRNN
21.385921783447266
2.356940865461466



## COVID-19 dataset

In [29]:
for baseline in ['CFRNN', 'QRNN', 'DPRNN']:
    for seed in range(5):
        run_medical_experiments(dataset='covid', 
                                baseline=baseline,
                                save_model=True, 
                                save_results=True,
                                seed=seed)

Training CFRNN
Epoch: 0	Train loss: 19849.4248046875
Epoch: 50	Train loss: 14987.1103515625
Epoch: 100	Train loss: 12784.7021484375
Epoch: 150	Train loss: 16437.63427734375
Epoch: 200	Train loss: 11722.68603515625
Epoch: 250	Train loss: 15581.306396484375
Epoch: 300	Train loss: 10658.34130859375
Epoch: 350	Train loss: 8696.23583984375
Epoch: 400	Train loss: 9109.50390625
Epoch: 450	Train loss: 10849.78125
Epoch: 500	Train loss: 11905.59814453125
Epoch: 550	Train loss: 8048.343505859375
Epoch: 600	Train loss: 7777.374267578125
Epoch: 650	Train loss: 7038.0400390625
Epoch: 700	Train loss: 10878.801025390625
Epoch: 750	Train loss: 5562.97900390625
Epoch: 800	Train loss: 5780.314453125
Epoch: 850	Train loss: 6390.28076171875
Epoch: 900	Train loss: 5894.050537109375
Epoch: 950	Train loss: 10112.148315429688
Training CFRNN
Epoch: 0	Train loss: 22146.2509765625
Epoch: 50	Train loss: 18750.36279296875
Epoch: 100	Train loss: 16809.369140625
Epoch: 150	Train loss: 14085.14453125
Epoch: 200	Train

In [31]:
for baseline in ['CFRNN', 'QRNN', 'DPRNN']:
    print(baseline)
    coverages_mean, coverages_std = get_joint_medical_coverages(baseline, 'covid', seeds=range(5))
    
    print('{:.1f} \\(\\pm\\) {:.1f}\\%'.format(coverages_mean, coverages_std))
    print()

CFRNN
89.7 \(\pm\) 5.3\%

QRNN
15.0 \(\pm\) 5.9\%

DPRNN
0.0 \(\pm\) 0.0\%



In [32]:
for baseline in ['CFRNN', 'DPRNN', 'QRNN']:
    print(baseline)
    widths_mean, widths_std = get_medical_interval_widths(baseline, 'covid', seeds=range(5))
    
    print(widths_mean)
    print(widths_std)
    print()

CFRNN
733.9547253723144
582.5152458938113

DPRNN
61.18421086502075
32.372609877768895

QRNN
136.56350823974608
63.3235278998503



## Ablation: Uncorrected calibration scores

#### MIMIC-III

In [53]:
coverages_mean, coverages_std = get_joint_medical_coverages('CFRNN', 'mimic', seeds=range(5), correct_conformal=True)
    
print('{:.1f} \\(\\pm\\) {:.1f}\\%'.format(coverages_mean, coverages_std))

94.0 \(\pm\) 1.2\%


In [54]:
coverages_mean, coverages_std = get_joint_medical_coverages('CFRNN', 'mimic', seeds=range(5), correct_conformal=False)
    
print('{:.1f} \\(\\pm\\) {:.1f}\\%'.format(coverages_mean, coverages_std))

89.0 \(\pm\) 1.4\%


In [62]:
for seed in range(5):
    results = load_medical_results(dataset='mimic', baseline='CFRNN', seed=seed)
    independent_coverages = results['Mean independent coverage']
    print('[{:.1f}\\%, {:.1f}\\%]'.format(independent_coverages.min() * 100, independent_coverages.max() * 100))

[94.0\%, 94.0\%]
[95.2\%, 95.4\%]
[94.0\%, 95.2\%]
[96.0\%, 96.8\%]
[93.8\%, 94.0\%]


In [55]:
for seed in range(5):
    uncorrected_mimic_results = get_uncorrected_medical_results(dataset='mimic', seed=seed)
    independent_coverages = uncorrected_mimic_results['Mean independent coverage']
    print('[{:.1f}\\%, {:.1f}\\%]'.format(independent_coverages.min() * 100, independent_coverages.max() * 100))

[89.4\%, 89.8\%]
[91.2\%, 91.4\%]
[89.0\%, 90.0\%]
[90.8\%, 91.4\%]
[90.4\%, 91.2\%]


#### EEG

In [51]:
coverages_mean, coverages_std = get_joint_medical_coverages('CFRNN', 'eeg', seeds=range(5), correct_conformal=True)
    
print('{:.1f} \\(\\pm\\) {:.1f}\\%'.format(coverages_mean, coverages_std))

96.5 \(\pm\) 1.0\%


In [52]:
coverages_mean, coverages_std = get_joint_medical_coverages('CFRNN', 'eeg', seeds=range(5), correct_conformal=False)
    
print('{:.1f} \\(\\pm\\) {:.1f}\\%'.format(coverages_mean, coverages_std))

59.4 \(\pm\) 2.4\%


In [61]:
for seed in range(5):
    results = load_medical_results(dataset='eeg', baseline='CFRNN', seed=seed)
    independent_coverages = results['Mean independent coverage']
    print('[{:.1f}\\%, {:.1f}\\%]'.format(independent_coverages.min() * 100, independent_coverages.max() * 100))

[98.5\%, 99.8\%]
[98.5\%, 99.5\%]
[98.3\%, 99.7\%]
[98.9\%, 99.8\%]
[98.3\%, 99.3\%]


In [56]:
for seed in range(5):
    uncorrected_mimic_results = get_uncorrected_medical_results(dataset='eeg', seed=seed)
    independent_coverages = uncorrected_mimic_results['Mean independent coverage']
    print('[{:.1f}\\%, {:.1f}\\%]'.format(independent_coverages.min() * 100, independent_coverages.max() * 100))

[86.3\%, 91.4\%]
[85.7\%, 91.2\%]
[86.0\%, 90.6\%]
[87.4\%, 91.6\%]
[85.5\%, 90.8\%]


#### COVID-19

In [57]:
coverages_mean, coverages_std = get_joint_medical_coverages('CFRNN', 'covid', seeds=range(5), correct_conformal=True)
    
print('{:.1f} \\(\\pm\\) {:.1f}\\%'.format(coverages_mean, coverages_std))

89.7 \(\pm\) 5.3\%


In [58]:
coverages_mean, coverages_std = get_joint_medical_coverages('CFRNN', 'covid', seeds=range(5), correct_conformal=False)
    
print('{:.1f} \\(\\pm\\) {:.1f}\\%'.format(coverages_mean, coverages_std))

55.5 \(\pm\) 8.0\%


In [63]:
for seed in range(5):
    results = load_medical_results(dataset='covid', baseline='CFRNN', seed=seed)
    independent_coverages = results['Mean independent coverage']
    print('[{:.1f}\\%, {:.1f}\\%]'.format(independent_coverages.min() * 100, independent_coverages.max() * 100))

[95.0\%, 100.0\%]
[93.8\%, 100.0\%]
[96.2\%, 100.0\%]
[87.5\%, 98.8\%]
[96.2\%, 100.0\%]


In [59]:
for seed in range(5):
    uncorrected_mimic_results = get_uncorrected_medical_results(dataset='covid', seed=seed)
    independent_coverages = uncorrected_mimic_results['Mean independent coverage']
    print('[{:.1f}\\%, {:.1f}\\%]'.format(independent_coverages.min() * 100, independent_coverages.max() * 100))

[81.2\%, 98.8\%]
[81.2\%, 98.8\%]
[87.5\%, 98.8\%]
[77.5\%, 95.0\%]
[85.0\%, 96.2\%]
