In [53]:
CALCULATE_NEW_METRICS = True
filename = 'all_statistics.pkl'

In [2]:
from pathlib import Path
import pickle as pkl
import glob
import numpy as np

import pandas as pd
from tqdm.notebook import tqdm
import matplotlib.pyplot as plt

from neuralhydrology.evaluation import metrics

In [3]:
%matplotlib notebook
%matplotlib notebook

In [4]:
metrics_list = metrics.get_available_metrics()
metrics_list.remove('MSE')
metrics_list.remove('RMSE')
metrics_list.remove('FHV')
metrics_list.remove('FMS')
metrics_list.remove('FLV')
metrics_list

['NSE',
 'KGE',
 'Alpha-NSE',
 'Pearson-r',
 'Beta-NSE',
 'Peak-Timing-Error',
 'Peak-Timing-Abs-Error',
 'Missed-Peaks',
 'Peak-Abs-Bias']

In [5]:
basin_file = '../531_basin_list.txt'
autoregression_50_holdout_dir = '../runs/autoregression_50_holdout/test'
autoregression_holdout_dir = '../runs/autoregression_holdout/test'
autoregression_noholdout_dir = '../runs/autoregression_noholdout/test'
assimilation_dir = '../runs/simulation/assimilation/'
simulation_dir = '../runs/simulation'

In [6]:
with Path(basin_file).open('r') as fp:
    basins = sorted(basin.strip() for basin in fp if basin.strip())
print(f"There are {len(basins)} basins.")

There are 531 basins.


In [7]:
simulation_run_dirs = glob.glob(simulation_dir + '/simulation_*')
for i, run_dir in enumerate(simulation_run_dirs):
    simulation_run_dirs[i] = run_dir.split('/')[-1]
print(f"There are {len(simulation_run_dirs)} simulation runs.")

There are 1 simulation runs.


In [8]:
assimilation_run_dirs = glob.glob(assimilation_dir + '/**')
for i, run_dir in enumerate(assimilation_run_dirs):
    assimilation_run_dirs[i] = run_dir.split('/')[-1]
print(f"There are {len(assimilation_run_dirs)} assimilation runs.")

There are 60 assimilation runs.


In [9]:
autoregression_noholdout_run_dirs = glob.glob(autoregression_noholdout_dir + '/**')
for i, run_dir in enumerate(autoregression_noholdout_run_dirs):
    autoregression_noholdout_run_dirs[i] = run_dir.split('/')[-1]
print(f"There are {len(autoregression_noholdout_run_dirs)} no-holdout autoregression runs.")

There are 60 no-holdout autoregression runs.


In [10]:
autoregression_50_holdout_run_dirs = glob.glob(autoregression_50_holdout_dir + '/**')
for i, run_dir in enumerate(autoregression_50_holdout_run_dirs):
    autoregression_50_holdout_run_dirs[i] = run_dir.split('/')[-1]
print(f"There are {len(autoregression_50_holdout_run_dirs)} 50% holdout autoregression runs.")

There are 60 50% holdout autoregression runs.


In [48]:
autoregression_holdout_run_dirs = glob.glob(autoregression_holdout_dir + '/**')
for i, run_dir in enumerate(autoregression_holdout_run_dirs):
    autoregression_holdout_run_dirs[i] = run_dir.split('/')[-1]
print(f"There are {len(autoregression_holdout_run_dirs)} variable fraction holdout autoregression runs.")

There are 100 variable fraction holdout autoregression runs.


In [12]:
lead_times = []
holdout_fractions = []
ensembles = []
for run in autoregression_50_holdout_run_dirs:
    holdout_fractions.append(float(run.split('_')[0]))    
    lead_times.append(int(run.split('_')[1]))  
    ensembles.append(int(run.split('_')[2]))

holdout_fractions = sorted(list(set(holdout_fractions)))
lead_times = sorted(list(set(lead_times)))
ensembles = sorted(list(set(ensembles)))

In [13]:
run_index = pd.MultiIndex.from_product((holdout_fractions, lead_times, ensembles))
fracs_index = pd.MultiIndex.from_product((holdout_fractions, holdout_fractions, ensembles))

autoregression_holdout_statistics = {}
autoregression_noholdout_statistics = {}
autoregression_fractional_holdout_statistics = {}
assimilation_statistics = {}
simulation_statistics = {}

for metric in metrics_list:
    autoregression_fractional_holdout_statistics[metric] = pd.DataFrame(index=basins, 
                                                                        columns=fracs_index,
                                                                        dtype=np.float64)

    autoregression_holdout_statistics[metric] = pd.DataFrame(index=basins, 
                                                             columns=run_index,
                                                             dtype=np.float64)
    autoregression_noholdout_statistics[metric] = pd.DataFrame(index=basins, 
                                                               columns=run_index,
                                                               dtype=np.float64)
    assimilation_statistics[metric] = pd.DataFrame(index=basins, 
                                                   columns=run_index,
                                                   dtype=np.float64)
    assimilation_statistics[metric] = pd.DataFrame(index=basins, 
                                                   columns=run_index,
                                                   dtype=np.float64)
    simulation_statistics[metric] = pd.DataFrame(index=basins, 
                                                 columns=run_index,
                                                 dtype=np.float64)

In [14]:
xr = {}
for run in assimilation_run_dirs:
    hf = float(run.split('_')[0])   
    if hf == 0.0:
        try:
            with open(assimilation_dir + '/' + run + '/test/model_epoch030/test_results.p', 'rb') as f:
                run_data = pkl.load(f)
            for basin in basins:
                xr[basin] = run_data[basin]['1D']['xr']['QObs(mm/d)_obs']
            break
        except:
            pass

In [64]:
if CALCULATE_NEW_METRICS:
    for run in simulation_run_dirs:

        en = int(run.split('_')[1])
        with open(simulation_dir + '/' + run + '/test/model_epoch030/test_results.p', 'rb') as f:
            run_data = pkl.load(f)

        for basin in basins:

            sim = run_data[basin]['1D']['xr'].stack(datetime=['date', 'time_step'])['QObs(mm/d)_sim']
            sim['datetime'] = sim.coords['date']# + sim.coords['time_step']
            obs = xr[basin].stack(datetime=['date', 'time_step'])
            obs['datetime'] = obs.coords['date']# + obs.coords['time_step']
            basin_metrics = metrics.calculate_metrics(obs=obs, sim=sim, metrics=metrics_list)

            for metric in metrics_list:
                simulation_statistics[metric].loc[basin, (0, 1, en)] = basin_metrics[metric]

In [16]:
if CALCULATE_NEW_METRICS:
    for run in tqdm(autoregression_50_holdout_run_dirs):
        hf = float(run.split('_')[0])   
        lt = int(run.split('_')[1])
        en = int(run.split('_')[2])

        with open(autoregression_50_holdout_dir + '/' + run + '/test/model_epoch030/test_results.p', 'rb') as f:
            run_data = pkl.load(f)

        for basin in basins:
            sim = run_data[basin]['1D']['xr'].stack(datetime=['date', 'time_step'])['QObs(mm/d)_sim']
            sim['datetime'] = sim.coords['date']# + sim.coords['time_step']
            obs = xr[basin].stack(datetime=['date', 'time_step'])
            obs['datetime'] = obs.coords['date']# + obs.coords['time_step']
            basin_metrics = metrics.calculate_metrics(obs=obs, sim=sim, metrics=metrics_list)

            for metric in metrics_list:
                autoregression_holdout_statistics[metric].loc[basin, (hf, lt, en)] = basin_metrics[metric]

In [17]:
if CALCULATE_NEW_METRICS:
    for run in tqdm(autoregression_noholdout_run_dirs):
        hf = float(run.split('_')[0])   
        lt = int(run.split('_')[1])
        en = int(run.split('_')[2])

        with open(autoregression_noholdout_dir + '/' + run + '/test/model_epoch030/test_results.p', 'rb') as f:
            run_data = pkl.load(f)

        for basin in basins:
            sim = run_data[basin]['1D']['xr'].stack(datetime=['date', 'time_step'])['QObs(mm/d)_sim']
            sim['datetime'] = sim.coords['date']# + sim.coords['time_step']
            obs = xr[basin].stack(datetime=['date', 'time_step'])
            obs['datetime'] = obs.coords['date']# + obs.coords['time_step']
            basin_metrics = metrics.calculate_metrics(obs=obs, sim=sim, metrics=metrics_list)

            for metric in metrics_list:
                autoregression_noholdout_statistics[metric].loc[basin, (hf, lt, en)] = basin_metrics[metric]

In [18]:
if CALCULATE_NEW_METRICS:
    for run in tqdm(assimilation_run_dirs):
        hf = float(run.split('_')[0])   
        lt = int(run.split('_')[1])
        en = int(run.split('_')[2])
        with open(assimilation_dir + '/' + run + '/test/model_epoch030/test_results.p', 'rb') as f:
            run_data = pkl.load(f)

        for basin in basins:
            sim = run_data[basin]['1D']['xr'].stack(datetime=['date', 'time_step'])['QObs(mm/d)_sim']
            sim['datetime'] = sim.coords['date']# + sim.coords['time_step']
            obs = xr[basin].stack(datetime=['date', 'time_step'])
            obs['datetime'] = obs.coords['date']# + obs.coords['time_step']
            basin_metrics = metrics.calculate_metrics(obs=obs, sim=sim, metrics=metrics_list)

            for metric in metrics_list:
                assimilation_statistics[metric].loc[basin, (hf, lt, en)] = basin_metrics[metric]

In [55]:
if CALCULATE_NEW_METRICS:
    for run in tqdm(autoregression_holdout_run_dirs):
        hf1 = float(run.split('_')[0])   
        hf2 = float(run.split('_')[1])
        en = int(run.split('_')[2])

        with open(autoregression_holdout_dir + '/' + run + '/test/model_epoch030/test_results.p', 'rb') as f:
            run_data = pkl.load(f)

        for basin in basins:
            sim = run_data[basin]['1D']['xr'].stack(datetime=['date', 'time_step'])['QObs(mm/d)_sim']
            sim['datetime'] = sim.coords['date']# + sim.coords['time_step']
            obs = xr[basin].stack(datetime=['date', 'time_step'])
            obs['datetime'] = obs.coords['date']# + obs.coords['time_step']
            basin_metrics = metrics.calculate_metrics(obs=obs, sim=sim, metrics=metrics_list)

            for metric in metrics_list:
                autoregression_fractional_holdout_statistics[metric].loc[basin, (hf1, hf2, en)] = basin_metrics[metric]


HBox(children=(HTML(value=''), FloatProgress(value=0.0), HTML(value='')))




In [20]:
if CALCULATE_NEW_METRICS:
    with open(filename, 'wb') as f:
        pkl.dump([
            simulation_statistics,
            autoregression_holdout_statistics,
            autoregression_noholdout_statistics,
            autoregression_fractional_noholdout_statistics, 
            assimilation_statistics
        ], f)

In [21]:
with open(filename, 'rb') as f:
    simulation_statistics, \
    autoregression_holdout_statistics, \
    autoregression_noholdout_statistics, \
    autoregression_fractional_noholdout_statistics, \
    assimilation_statistics = pkl.load(f)

EOFError: Ran out of input

In [85]:
autoregression_holdout_medians = {}
autoregression_noholdout_medians = {}
for metric in metrics_list:
    autoregression_holdout_medians[metric] = pd.DataFrame(index=holdout_fractions, 
                                                          columns=lead_times,
                                                          dtype=np.float64)
    autoregression_noholdout_medians[metric] = pd.DataFrame(index=holdout_fractions, 
                                                            columns=lead_times,
                                                            dtype=np.float64)
    for hf in holdout_fractions:
        for lt in lead_times:
            autoregression_holdout_medians[metric].loc[hf, lt] = \
            autoregression_holdout_statistics[metric][(hf, lt, 0)].median()

            autoregression_noholdout_medians[metric].loc[hf, lt] = \
            autoregression_noholdout_statistics[metric][(hf, lt, 0)].median()
            
assimilation_medians = {}
for metric in metrics_list:
    assimilation_medians[metric] = pd.DataFrame(index=holdout_fractions, 
                                                columns=lead_times,
                                                dtype=np.float64)
    for hf in holdout_fractions:
        for lt in lead_times:
            assimilation_medians[metric].loc[hf, lt] = \
            assimilation_statistics[metric][(hf, lt, 0)].median()
            
simulation_medians = {}
for metric in metrics_list:
    simulation_medians[metric] = pd.DataFrame(index=holdout_fractions, 
                                              columns=lead_times,
                                              dtype=np.float64)
    for hf in holdout_fractions:
        for lt in lead_times:
            simulation_medians[metric].loc[hf, lt] = \
            simulation_statistics[metric][(0.0, 1, 0)].median()

simulation_fraction_medians = {}
for metric in metrics_list:
    simulation_fraction_medians[metric] = pd.DataFrame(index=holdout_fractions, 
                                                       columns=holdout_fractions,
                                                       dtype=np.float64)
    for hf1 in holdout_fractions:
        for hf2 in holdout_fractions:
            simulation_fraction_medians[metric].loc[hf1, hf2] = \
            simulation_statistics[metric][(0.0, 1, 0)].median()
            
autoregression_fractional_holdout_medians = {}
for metric in metrics_list:
    autoregression_fractional_holdout_medians[metric] = pd.DataFrame(index=holdout_fractions, 
                                                                     columns=holdout_fractions,
                                                                     dtype=np.float64)
    for hf1 in holdout_fractions:
        for hf2 in holdout_fractions:
            autoregression_fractional_holdout_medians[metric].loc[hf1, hf2] = \
            autoregression_fractional_holdout_statistics[metric][(hf1, hf2, 0)].median()

In [86]:
simulation_medians['NSE']

Unnamed: 0,1,2,3,5,7,10
0.0,0.795836,0.795836,0.795836,0.795836,0.795836,0.795836
0.1,0.795836,0.795836,0.795836,0.795836,0.795836,0.795836
0.2,0.795836,0.795836,0.795836,0.795836,0.795836,0.795836
0.3,0.795836,0.795836,0.795836,0.795836,0.795836,0.795836
0.4,0.795836,0.795836,0.795836,0.795836,0.795836,0.795836
0.5,0.795836,0.795836,0.795836,0.795836,0.795836,0.795836
0.6,0.795836,0.795836,0.795836,0.795836,0.795836,0.795836
0.7,0.795836,0.795836,0.795836,0.795836,0.795836,0.795836
0.8,0.795836,0.795836,0.795836,0.795836,0.795836,0.795836
0.9,0.795836,0.795836,0.795836,0.795836,0.795836,0.795836


In [67]:
autoregression_holdout_medians['NSE']

Unnamed: 0,1,2,3,5,7,10
0.0,,,,,,
0.1,,,,,,
0.2,,,,,,
0.3,,,,,,
0.4,,,,,,
0.5,,,,,,
0.6,,,,,,
0.7,,,,,,
0.8,,,,,,
0.9,,,,,,


In [68]:
autoregression_noholdout_medians['NSE']

Unnamed: 0,1,2,3,5,7,10
0.0,,,,,,
0.1,,,,,,
0.2,,,,,,
0.3,,,,,,
0.4,,,,,,
0.5,,,,,,
0.6,,,,,,
0.7,,,,,,
0.8,,,,,,
0.9,,,,,,


In [69]:
assimilation_medians['NSE']

Unnamed: 0,1,2,3,5,7,10
0.0,,,,,,
0.1,,,,,,
0.2,,,,,,
0.3,,,,,,
0.4,,,,,,
0.5,,,,,,
0.6,,,,,,
0.7,,,,,,
0.8,,,,,,
0.9,,,,,,


In [70]:
autoregression_holdout_statistics[metric]

Unnamed: 0_level_0,0.0,0.0,0.0,0.0,0.0,0.0,0.1,0.1,0.1,0.1,...,0.8,0.8,0.8,0.8,0.9,0.9,0.9,0.9,0.9,0.9
Unnamed: 0_level_1,1,2,3,5,7,10,1,2,3,5,...,3,5,7,10,1,2,3,5,7,10
Unnamed: 0_level_2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
01022500,,,,,,,,,,,...,,,,,,,,,,
01031500,,,,,,,,,,,...,,,,,,,,,,
01047000,,,,,,,,,,,...,,,,,,,,,,
01052500,,,,,,,,,,,...,,,,,,,,,,
01054200,,,,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
14309500,,,,,,,,,,,...,,,,,,,,,,
14316700,,,,,,,,,,,...,,,,,,,,,,
14325000,,,,,,,,,,,...,,,,,,,,,,
14362250,,,,,,,,,,,...,,,,,,,,,,


In [71]:
autoregression_fractional_holdout_medians['NSE']

Unnamed: 0,0.0,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9
0.0,0.878501,0.87292,0.86989,0.858554,0.85247,0.84198,0.830374,0.814944,0.794482,0.750122
0.1,0.882055,0.878759,0.869568,0.865491,0.85949,0.84747,0.839602,0.825282,0.797894,0.758827
0.2,0.876557,0.871145,0.865122,0.860374,0.856844,0.845791,0.835636,0.819786,0.796523,0.7598
0.3,0.878067,0.875887,0.8716,0.865655,0.863002,0.852731,0.842244,0.833677,0.820268,0.785528
0.4,0.874398,0.871183,0.868071,0.863059,0.858313,0.851756,0.841619,0.835318,0.817382,0.7875
0.5,0.875754,0.872269,0.867504,0.860482,0.858966,0.849772,0.84159,0.832781,0.823043,0.802042
0.6,0.8696,0.866992,0.860621,0.858861,0.850248,0.850435,0.841452,0.832197,0.820044,0.80243
0.7,0.863723,0.85932,0.857949,0.852556,0.852556,0.843768,0.842063,0.834977,0.826287,0.81277
0.8,0.864046,0.862237,0.85896,0.855775,0.85297,0.847522,0.844575,0.839145,0.83191,0.821611
0.9,0.850892,0.84701,0.846288,0.843279,0.841805,0.8364,0.831936,0.829434,0.823682,0.815008


In [94]:
metric = 'NSE'

fig, ax = plt.subplots(subplot_kw={"projection": "3d"})
pltdata = simulation_fraction_medians[metric]
X = pltdata.columns.values
Y = pltdata.index.values
X, Y = np.meshgrid(X, Y)
ax.plot_surface(X, Y, pltdata.values, label='simulation')

pltdata = autoregression_fractional_holdout_medians[metric]
X = pltdata.columns.values
Y = pltdata.index.values
X, Y = np.meshgrid(X, Y)
ax.plot_surface(X, Y, pltdata.values, label='autoregression with training holdout')

# -------------
metric = 'NSE'      
fig, axes = plt.subplots(2, 1, figsize=(10, 8))
for hf in holdout_fractions:
    axes[0].plot(autoregression_fractional_holdout_medians[metric].loc[hf, :], label=hf)
axes[0].plot(simulation_fraction_medians[metric].loc[hf, :], 'k--', label='Sim')
axes[0].set_xlabel('test holdout fraction')
axes[0].legend(title="train holdout fraction", prop={'size': 6})
axes[0].set_ylabel(metric)
axes[0].grid()

for hf in holdout_fractions:
    axes[1].plot(autoregression_fractional_holdout_medians[metric][hf], label=hf)
axes[1].plot(simulation_fraction_medians[metric].loc[hf, :], 'k--', label='Sim')
axes[1].set_xlabel('train holdout fraction')
axes[1].legend(title="test holdout fraction", prop={'size': 6})
axes[1].set_ylabel(metric)
axes[1].grid()


# plot_file_name = f"./plots/{metric}_plots.png"
# plt.savefig(plot_file_name)

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [None]:
metric = 'NSE'
fig, axes = plt.subplots(1,2, figsize=(10,4))

xdata = autoregression_noholdout_statistics[metric][0.000, 1, 0]
ydata = assimilation_statistics[metric][0.000, 1, 0]
axes[0].scatter(xdata, ydata)
axes[0].plot([0, 1], [0, 1], 'k--')
axes[0].set_xlabel('AR w/o Holdout')
axes[0].set_ylabel('Variational Assimilation')
axes[0].set_title('NSE (1-day Lag, 0% Missing Data)')
axes[0].grid()

xdata = autoregression_noholdout_statistics[metric][0.000, 1, 0]
ydata = autoregression_holdout_statistics[metric][0.000, 1, 0]
axes[1].scatter(xdata, ydata)
axes[1].plot([0, 1], [0, 1], 'k--')
axes[1].set_xlabel('AR w/o Holdout')
axes[1].set_ylabel('AR w Holdout')
axes[1].set_title('NSE (1-day Lag, 0% Missing Data)')
axes[1].grid()

plot_file_name = f"./plots/scatterplot.png"
plt.savefig(plot_file_name)

In [None]:
metric = 'NSE'
fig, ax = plt.subplots(subplot_kw={"projection": "3d"})

pltdata = simulation_medians[metric]
X = pltdata.columns.values
Y = pltdata.index.values
X, Y = np.meshgrid(X, Y)
ax.plot_surface(X, Y, pltdata.values, label='simulation')

pltdata = autoregression_holdout_medians[metric]
X = pltdata.columns.values
Y = pltdata.index.values
X, Y = np.meshgrid(X, Y)
ax.plot_surface(X, Y, pltdata.values, label='autoregression with training holdout')

pltdata = autoregression_noholdout_medians[metric]
X = pltdata.columns.values
Y = pltdata.index.values
X, Y = np.meshgrid(X, Y)
ax.plot_surface(X, Y, pltdata.values, label='autoregression without training holdout')

pltdata = assimilation_medians[metric]
X = pltdata.columns.values
Y = pltdata.index.values
X, Y = np.meshgrid(X, Y)
ax.plot_surface(X, Y, pltdata.values, label='assimilation')

plot_file_name = f"./plots/3d-surfaceplot.png"
plt.savefig(plot_file_name)

In [None]:
for metric in metrics_list:
    
    ymin = pd.concat((simulation_medians[metric], 
                      assimilation_medians[metric], 
                      autoregression_holdout_medians[metric], 
                      autoregression_noholdout_medians[metric])).min().min() * 0.99
    ymax = pd.concat((simulation_medians[metric], 
                      assimilation_medians[metric], 
                      autoregression_holdout_medians[metric], 
                      autoregression_noholdout_medians[metric])).max().max()*1.01

    fig, axes = plt.subplots(3,2, figsize=(8,7))
    for i, lead_time in enumerate(lead_times):
        axes.flatten()[i].plot(simulation_medians[metric][lead_time], label='Sim')
        axes.flatten()[i].plot(autoregression_holdout_medians[metric][lead_time], label='AR w/ holdout')
        axes.flatten()[i].plot(autoregression_noholdout_medians[metric][lead_time], label='AR w/o holdout')
        axes.flatten()[i].plot(assimilation_medians[metric][lead_time], label='DA')
        if i == 5: axes.flatten()[i].legend()
        if i >= 4: 
            axes.flatten()[i].set_xlabel('fraction of missing data')
        else:
            axes.flatten()[i].set_xticks([])
        if i%2 == 0: 
            axes.flatten()[i].set_ylabel(metric)
        else:
            axes.flatten()[i].set_yticks([])
        axes.flatten()[i].set_ylim([ymin, ymax])
        axes.flatten()[i].set_title(f'{lead_time} Days')
        
    plot_file_name = f"./plots/{metric}_plots.png"
    plt.savefig(plot_file_name)

In [None]:
def make_table(holdout_frac, lag_time):
    models_list = ['Simulation', 'AR w/o holdout', 'AR w/ holdout', 'Assimilation']
    table_df = pd.DataFrame(index=metrics_list, columns=models_list)
    for metric in metrics_list:
        table_df.loc[metric, 'Simulation'] = simulation_medians[metric].loc[holdout_frac, lag_time]
        table_df.loc[metric, 'AR w/o holdout'] = autoregression_noholdout_medians[metric].loc[holdout_frac, lag_time]
        table_df.loc[metric, 'AR w/ holdout'] = autoregression_holdout_medians[metric].loc[holdout_frac, lag_time]
        table_df.loc[metric, 'Assimilation'] = assimilation_medians[metric].loc[holdout_frac, lag_time]
    return table_df.to_latex()

In [None]:
supplemental_table_filename = 'tables/all_metrics_tables.txt'
pd.set_option('display.float_format', lambda x: '%.3f' % x)
with open(supplemental_table_filename, 'wt') as f:
    for holdout_frac in tqdm(holdout_fractions):
        for lag_time in lead_times:
            f.write(f'Missing Data Fraction: {holdout_frac} -- Observation Lag Time {lag_time} [days] \n')
            f.write(make_table(holdout_frac, lag_time))
            f.write('\n\n\n\pagebreak \n\n\n')

In [None]:
supplemental_table_filename = 'tables/zero_lag_zero_holdout_metrics_table.txt'
pd.set_option('display.float_format', lambda x: '%.3f' % x)
with open(supplemental_table_filename, 'wt') as f:
    f.write(make_table(holdout_fractions[0], lead_times[0]))