In [1]:
import pandas as pd
import requests
from tqdm.auto import tqdm
tqdm.pandas()

In [2]:
def next_monday(date):
    return pd.date_range(start=date, end=date + pd.offsets.Day(6), freq='W-MON')[0]

# Setting

In [3]:
test_date = pd.to_datetime('2021-03-22')

In [3]:
test_date = pd.to_datetime('2021-04-19')

In [4]:
models_to_exclude = ['COVIDhub-ensemble', 'COVIDhub-trained_ensemble', 
                     'CU-nochange', 'CU-scenario_high', 'CU-scenario_low', 'CU-scenario_mid']

locations_to_exclude = ["11", "60", "66", "69", "72", "74", "78"]

# DC,11,District of Columbia
# AS,60,American Samoa
# GU,66,Guam
# MP,69,Northern Mariana Islands
# PR,72,Puerto Rico
# UM,74,U.S. Minor Outlying Islands
# VI,78,Virgin Islands

In [5]:
dtype={'target': str, 'location': str, 'type': str, 'quantile': float, 'value': float}
parse_dates=['forecast_date', 'target_end_date']

# Load Files

In [6]:
url = "https://api.github.com/repos/reichlab/covid19-forecast-hub/git/trees/master?recursive=1"
r = requests.get(url)
res = r.json()

In [7]:
files = [file["path"] for file in res["tree"] if (file["path"].startswith('data-processed/') and file["path"].endswith('.csv'))]

In [10]:
files[-5:]

['data-processed/epiforecasts-ensemble1/2021-03-22-epiforecasts-ensemble1.csv',
 'data-processed/epiforecasts-ensemble1/2021-03-29-epiforecasts-ensemble1.csv',
 'data-processed/epiforecasts-ensemble1/2021-04-05-epiforecasts-ensemble1.csv',
 'data-processed/epiforecasts-ensemble1/2021-04-12-epiforecasts-ensemble1.csv',
 'data-processed/epiforecasts-ensemble1/2021-04-19-epiforecasts-ensemble1.csv']

In [11]:
df_files = pd.DataFrame({'filename':files})

df_files['model'] = df_files.filename.apply(lambda f: f.split('/')[1])

df_files['forecast_date'] = df_files.filename.apply(lambda f: f.split('/')[2][:10])
df_files.forecast_date = pd.to_datetime(df_files.forecast_date)

df_files['timezero'] = df_files.forecast_date.apply(next_monday)

df_files = df_files[~df_files.model.isin(models_to_exclude)]

In [12]:
df_files

Unnamed: 0,filename,model,forecast_date,timezero
0,data-processed/AIpert-pwllnod/2020-12-21-AIper...,AIpert-pwllnod,2020-12-21,2020-12-21
1,data-processed/AIpert-pwllnod/2020-12-28-AIper...,AIpert-pwllnod,2020-12-28,2020-12-28
2,data-processed/AIpert-pwllnod/2021-01-04-AIper...,AIpert-pwllnod,2021-01-04,2021-01-04
3,data-processed/AIpert-pwllnod/2021-01-11-AIper...,AIpert-pwllnod,2021-01-11,2021-01-11
4,data-processed/AIpert-pwllnod/2021-01-18-AIper...,AIpert-pwllnod,2021-01-18,2021-01-18
...,...,...,...,...
3165,data-processed/epiforecasts-ensemble1/2021-03-...,epiforecasts-ensemble1,2021-03-22,2021-03-22
3166,data-processed/epiforecasts-ensemble1/2021-03-...,epiforecasts-ensemble1,2021-03-29,2021-03-29
3167,data-processed/epiforecasts-ensemble1/2021-04-...,epiforecasts-ensemble1,2021-04-05,2021-04-05
3168,data-processed/epiforecasts-ensemble1/2021-04-...,epiforecasts-ensemble1,2021-04-12,2021-04-12


### Test Data

In [13]:
VALID_TARGETS = [f"{_} wk ahead inc death" for _ in range(1, 5)] + \
                [f"{_} wk ahead cum death" for _ in range(1, 5)] + \
                [f"{_} wk ahead inc case" for _ in range(1, 5)]

In [14]:
df_test_files = df_files[df_files.timezero == test_date]

In [15]:
df_test_files.head()

Unnamed: 0,filename,model,forecast_date,timezero
17,data-processed/AIpert-pwllnod/2021-04-19-AIper...,AIpert-pwllnod,2021-04-19,2021-04-19
58,data-processed/BPagano-RtDriven/2021-04-18-BPa...,BPagano-RtDriven,2021-04-18,2021-04-19
102,data-processed/CEID-Walk/2021-04-19-CEID-Walk.csv,CEID-Walk,2021-04-19,2021-04-19
142,data-processed/CMU-TimeSeries/2021-04-19-CMU-T...,CMU-TimeSeries,2021-04-19,2021-04-19
197,data-processed/COVIDhub-baseline/2021-04-19-CO...,COVIDhub-baseline,2021-04-19,2021-04-19


In [16]:
dfs_test = []

for _, row in tqdm(df_test_files.iterrows(), total=df_test_files.shape[0]):
    #print(row['filename'])
    df_temp = pd.read_csv('https://github.com/reichlab/covid19-forecast-hub/raw/master/' + row['filename'],
                         dtype=dtype, parse_dates=parse_dates)
    df_temp = df_temp[df_temp.target.isin(VALID_TARGETS)]
    df_temp['model'] = row['model']
    dfs_test.append(df_temp)

  0%|          | 0/56 [00:00<?, ?it/s]

In [17]:
df_test = pd.concat(dfs_test)

In [18]:
df_test.shape

(2299271, 8)

In [19]:
df_test

Unnamed: 0,forecast_date,target,target_end_date,quantile,type,value,location,model
0,2021-04-19,1 wk ahead cum death,2021-04-24,0.025,quantile,570235.090388,US,AIpert-pwllnod
1,2021-04-19,1 wk ahead cum death,2021-04-24,0.250,quantile,571226.816316,US,AIpert-pwllnod
2,2021-04-19,1 wk ahead cum death,2021-04-24,0.750,quantile,576744.447118,US,AIpert-pwllnod
3,2021-04-19,1 wk ahead cum death,2021-04-24,0.975,quantile,586268.442502,US,AIpert-pwllnod
4,2021-04-19,1 wk ahead cum death,2021-04-24,,point,571747.177912,US,AIpert-pwllnod
...,...,...,...,...,...,...,...,...
10939,2021-04-19,4 wk ahead cum death,2021-05-15,0.850,quantile,592731.100000,US,epiforecasts-ensemble1
10940,2021-04-19,4 wk ahead cum death,2021-05-15,0.900,quantile,593953.833333,US,epiforecasts-ensemble1
10941,2021-04-19,4 wk ahead cum death,2021-05-15,0.950,quantile,595722.750000,US,epiforecasts-ensemble1
10942,2021-04-19,4 wk ahead cum death,2021-05-15,0.975,quantile,596946.266667,US,epiforecasts-ensemble1


In [22]:
# only consider US + 50 states
df_test = df_test[df_test.location.str.len() == 2]
df_test = df_test[~df_test.location.isin(locations_to_exclude)]

In [23]:
df_test.shape

(362655, 8)

In [24]:
# ensure that for all targets each model provides forecasts for all locations
df_test = df_test[df_test.groupby(['target', 'model'])['location'].transform('nunique') == 51]

In [25]:
df_test = df_test[df_test.type == 'quantile']

In [26]:
df_test['no_quantiles'] = df_test.groupby(['model', 'target', 'target_end_date', 'location'])['quantile'].transform('nunique')

df_test['no_quantiles'] = df_test.groupby(['target', 'model'])['no_quantiles'].transform('min')

In [27]:
df_test = df_test[(df_test.no_quantiles == 23) | 
        (df_test.target.str.contains('inc case') & (df_test.no_quantiles == 7))].drop(columns='no_quantiles').reset_index(drop=True)

In [28]:
df_test.shape

(279786, 8)

In [29]:
# dict of the models available for each target
available_models = dict(df_test.groupby(['target'])['model'].unique())

In [30]:
available_models

{'1 wk ahead cum death': array(['BPagano-RtDriven', 'CEID-Walk', 'COVIDhub-baseline', 'CU-select',
        'Columbia_UNC-SurvCon', 'Covid19Sim-Simulator',
        'CovidAnalytics-DELPHI', 'DDS-NBDS', 'IHME-CurveFit',
        'JHUAPL-Bucky', 'JHU_CSSE-DECOM', 'Karlen-pypm', 'LANL-GrowthRate',
        'LNQ-ens1', 'MIT_CritData-GBCF', 'MOBS-GLEAM_COVID',
        'Microsoft-DeepSTIA', 'OliverWyman-Navigator',
        'RobertWalraven-ESG', 'SteveMcConnell-CovidComplete',
        'UA-EpiCovDA', 'UCSD_NEU-DeepGLEAM', 'UMass-MechBayes',
        'USC-SI_kJalpha', 'epiforecasts-ensemble1'], dtype=object),
 '1 wk ahead inc case': array(['BPagano-RtDriven', 'CEID-Walk', 'COVIDhub-baseline', 'CU-select',
        'Covid19Sim-Simulator', 'CovidAnalytics-DELPHI',
        'IEM_MED-CovidProject', 'JHUAPL-Bucky', 'JHU_CSSE-DECOM',
        'Karlen-pypm', 'LANL-GrowthRate', 'LNQ-ens1', 'MOBS-GLEAM_COVID',
        'Microsoft-DeepSTIA', 'RobertWalraven-ESG',
        'UChicagoCHATTOPADHYAY-UnIT', 'USC-SI_kJal

## Training Data

d - 4 weeks - (horizon - 1) --> end: d - horizon

In [31]:
lower_bound = test_date - pd.Timedelta(weeks=4) - pd.Timedelta(weeks=(4 - 1))

In [32]:
lower_bound

Timestamp('2021-03-01 00:00:00')

In [33]:
df_files = df_files[(df_files.timezero >= lower_bound) & (df_files.timezero < test_date)].copy()

In [34]:
# assigns to each horizon the corresponding training forecast dates for the test date
h_dict = {}
for h in range(1, 5):
    h_dict[h] = [test_date - pd.Timedelta(weeks=4) - pd.Timedelta(weeks=(h - 1)), 
                 test_date - pd.Timedelta(weeks=h)]

In [35]:
h_dict

{1: [Timestamp('2021-03-22 00:00:00'), Timestamp('2021-04-12 00:00:00')],
 2: [Timestamp('2021-03-15 00:00:00'), Timestamp('2021-04-05 00:00:00')],
 3: [Timestamp('2021-03-08 00:00:00'), Timestamp('2021-03-29 00:00:00')],
 4: [Timestamp('2021-03-01 00:00:00'), Timestamp('2021-03-22 00:00:00')]}

In [36]:
def relevant_horizons(d):
    hs = []
    for h in range(1, 5):
        if((d >= h_dict[h][0]) & (d <= h_dict[h][1])):
            hs.append(h)
    return hs

In [37]:
df_files['horizons'] = df_files.timezero.apply(relevant_horizons)

In [38]:
df_files

Unnamed: 0,filename,model,forecast_date,timezero,horizons
10,data-processed/AIpert-pwllnod/2021-03-01-AIper...,AIpert-pwllnod,2021-03-01,2021-03-01,[4]
11,data-processed/AIpert-pwllnod/2021-03-08-AIper...,AIpert-pwllnod,2021-03-08,2021-03-08,"[3, 4]"
12,data-processed/AIpert-pwllnod/2021-03-15-AIper...,AIpert-pwllnod,2021-03-15,2021-03-15,"[2, 3, 4]"
13,data-processed/AIpert-pwllnod/2021-03-22-AIper...,AIpert-pwllnod,2021-03-22,2021-03-22,"[1, 2, 3, 4]"
14,data-processed/AIpert-pwllnod/2021-03-29-AIper...,AIpert-pwllnod,2021-03-29,2021-03-29,"[1, 2, 3]"
...,...,...,...,...,...
3164,data-processed/epiforecasts-ensemble1/2021-03-...,epiforecasts-ensemble1,2021-03-15,2021-03-15,"[2, 3, 4]"
3165,data-processed/epiforecasts-ensemble1/2021-03-...,epiforecasts-ensemble1,2021-03-22,2021-03-22,"[1, 2, 3, 4]"
3166,data-processed/epiforecasts-ensemble1/2021-03-...,epiforecasts-ensemble1,2021-03-29,2021-03-29,"[1, 2, 3]"
3167,data-processed/epiforecasts-ensemble1/2021-04-...,epiforecasts-ensemble1,2021-04-05,2021-04-05,"[1, 2]"


In [39]:
# only keep relevant training data
df_files = df_files[df_files.horizons.apply(len) > 0]

In [40]:
dfs = []
for _, row in tqdm(df_files.iterrows(), total=df_files.shape[0]):
    VALID_TARGETS = [f"{_} wk ahead inc death" for _ in row['horizons']] + \
                    [f"{_} wk ahead cum death" for _ in row['horizons']] + \
                    [f"{_} wk ahead inc case" for _ in row['horizons']]
    df_temp = pd.read_csv('https://github.com/reichlab/covid19-forecast-hub/raw/master/' + row['filename'],
                         dtype=dtype, parse_dates=parse_dates)
    df_temp = df_temp[df_temp.target.isin(VALID_TARGETS)]
    df_temp['model'] = row['model']
    dfs.append(df_temp)

  0%|          | 0/428 [00:00<?, ?it/s]

In [41]:
df = pd.concat(dfs)

In [42]:
df.shape

(9179789, 8)

# Data Cleaning

In [43]:
df = df[df.location.str.len() == 2]
df = df[~df.location.isin(locations_to_exclude)]

In [44]:
df.location.nunique() # US + 50 states

51

In [45]:
df = df[df.type == 'quantile']

In [46]:
# how many forecasts for each target/model/location? should be 4 for every location
df['no_forecasts'] = df.groupby(['target', 'model', 'location'])['target_end_date'].transform('nunique')

In [47]:
df = df[df.groupby(['target', 'model'])['no_forecasts'].transform('min') == 4].drop(columns='no_forecasts').reset_index(drop=True)

In [48]:
# df = df[df.groupby(['target', 'model', 'location'])['target_end_date'].transform('nunique') == 4]
# would only remove the respective location with <4 forecasts, but we want to remove the model as a whole if in one location <4

In [49]:
df = df[df.groupby(['target', 'model', 'target_end_date'])['location'].transform('nunique') == 51]

In [50]:
df['no_quantiles'] = df.groupby(['model', 'target', 'target_end_date', 'location'])['quantile'].transform('nunique')

df['no_quantiles'] = df.groupby(['target', 'model'])['no_quantiles'].transform('min')

In [51]:
df[(df.target.str.contains('inc case') & (df.no_quantiles == 7))]

Unnamed: 0,forecast_date,target,target_end_date,quantile,type,value,location,model,no_quantiles
8874,2021-02-28,4 wk ahead inc case,2021-03-27,0.025,quantile,168081.870850,US,BPagano-RtDriven,7
8875,2021-02-28,4 wk ahead inc case,2021-03-27,0.100,quantile,202546.871190,US,BPagano-RtDriven,7
8876,2021-02-28,4 wk ahead inc case,2021-03-27,0.250,quantile,233868.695470,US,BPagano-RtDriven,7
8877,2021-02-28,4 wk ahead inc case,2021-03-27,0.500,quantile,268784.628640,US,BPagano-RtDriven,7
8878,2021-02-28,4 wk ahead inc case,2021-03-27,0.750,quantile,303700.561800,US,BPagano-RtDriven,7
...,...,...,...,...,...,...,...,...,...
1090696,2021-04-12,1 wk ahead inc case,2021-04-17,0.250,quantile,355932.360438,US,UVA-Ensemble,7
1090697,2021-04-12,1 wk ahead inc case,2021-04-17,0.500,quantile,470727.890841,US,UVA-Ensemble,7
1090698,2021-04-12,1 wk ahead inc case,2021-04-17,0.750,quantile,585523.421244,US,UVA-Ensemble,7
1090699,2021-04-12,1 wk ahead inc case,2021-04-17,0.900,quantile,688842.979562,US,UVA-Ensemble,7


In [52]:
df = df[(df.no_quantiles == 23) | 
        (df.target.str.contains('inc case') & (df.no_quantiles == 7))].drop(columns='no_quantiles').reset_index(drop=True)

In [53]:
df.groupby('target').model.nunique()

target
1 wk ahead cum death    23
1 wk ahead inc case     19
1 wk ahead inc death    24
2 wk ahead cum death    25
2 wk ahead inc case     18
2 wk ahead inc death    25
3 wk ahead cum death    24
3 wk ahead inc case     19
3 wk ahead inc death    23
4 wk ahead cum death    23
4 wk ahead inc case     19
4 wk ahead inc death    23
Name: model, dtype: int64

In [54]:
# check if there are models used for training that are not available for the test date
train_models = dict(df.groupby('target').model.unique())

a = [(k, t) for k, v in train_models.items() for t in v]
b = [(k, t) for k, v in available_models.items() for t in v]

[i for i in a if i not in b]

[('1 wk ahead cum death', 'IUPUI-HkPrMobiDyR'),
 ('1 wk ahead cum death', 'SigSci-TS'),
 ('1 wk ahead inc case', 'IUPUI-HkPrMobiDyR'),
 ('1 wk ahead inc case', 'SigSci-TS'),
 ('1 wk ahead inc death', 'IUPUI-HkPrMobiDyR'),
 ('1 wk ahead inc death', 'SigSci-TS'),
 ('2 wk ahead cum death', 'IUPUI-HkPrMobiDyR'),
 ('2 wk ahead cum death', 'PSI-DRAFT'),
 ('2 wk ahead cum death', 'WalmartLabsML-LogForecasting'),
 ('2 wk ahead inc case', 'IUPUI-HkPrMobiDyR'),
 ('2 wk ahead inc death', 'IUPUI-HkPrMobiDyR'),
 ('2 wk ahead inc death', 'PSI-DRAFT'),
 ('3 wk ahead cum death', 'PSI-DRAFT'),
 ('3 wk ahead cum death', 'UCLA-SuEIR'),
 ('3 wk ahead cum death', 'WalmartLabsML-LogForecasting'),
 ('3 wk ahead inc case', 'DDS-NBDS'),
 ('3 wk ahead inc case', 'UCLA-SuEIR'),
 ('3 wk ahead inc death', 'PSI-DRAFT'),
 ('3 wk ahead inc death', 'UCLA-SuEIR'),
 ('4 wk ahead cum death', 'PSI-DRAFT'),
 ('4 wk ahead cum death', 'UCLA-SuEIR'),
 ('4 wk ahead inc case', 'DDS-NBDS'),
 ('4 wk ahead inc case', 'UCLA-SuEIR')

In [55]:
df = df[df.apply(lambda x: x.model in (available_models[x.target]), axis=1)]

In [56]:
df.groupby('target').model.nunique()

target
1 wk ahead cum death    21
1 wk ahead inc case     17
1 wk ahead inc death    22
2 wk ahead cum death    22
2 wk ahead inc case     17
2 wk ahead inc death    23
3 wk ahead cum death    21
3 wk ahead inc case     17
3 wk ahead inc death    21
4 wk ahead cum death    21
4 wk ahead inc case     17
4 wk ahead inc death    21
Name: model, dtype: int64

In [57]:
df.shape

(928455, 8)

In [58]:
models_by_target = pd.DataFrame([(i, k) for i,j in train_models.items() for k in j], 
                  columns=['target','model'])

In [59]:
models_by_target[models_by_target.target == '4 wk ahead cum death']

Unnamed: 0,target,model
200,4 wk ahead cum death,BPagano-RtDriven
201,4 wk ahead cum death,CEID-Walk
202,4 wk ahead cum death,COVIDhub-baseline
203,4 wk ahead cum death,CU-select
204,4 wk ahead cum death,Covid19Sim-Simulator
205,4 wk ahead cum death,CovidAnalytics-DELPHI
206,4 wk ahead cum death,DDS-NBDS
207,4 wk ahead cum death,JHUAPL-Bucky
208,4 wk ahead cum death,Karlen-pypm
209,4 wk ahead cum death,LANL-GrowthRate


In [60]:
df = df[~df.model.isin(models_to_exclude)]

In [61]:
df

Unnamed: 0,forecast_date,target,target_end_date,quantile,type,value,location,model
0,2021-02-28,4 wk ahead inc death,2021-03-27,0.010,quantile,3692.05707,US,BPagano-RtDriven
1,2021-02-28,4 wk ahead inc death,2021-03-27,0.025,quantile,4128.22709,US,BPagano-RtDriven
2,2021-02-28,4 wk ahead inc death,2021-03-27,0.050,quantile,4518.14009,US,BPagano-RtDriven
3,2021-02-28,4 wk ahead inc death,2021-03-27,0.100,quantile,4974.71545,US,BPagano-RtDriven
4,2021-02-28,4 wk ahead inc death,2021-03-27,0.150,quantile,5284.77041,US,BPagano-RtDriven
...,...,...,...,...,...,...,...,...
1022902,2021-04-12,1 wk ahead cum death,2021-04-17,0.850,quantile,570159.00000,US,epiforecasts-ensemble1
1022903,2021-04-12,1 wk ahead cum death,2021-04-17,0.900,quantile,570489.00000,US,epiforecasts-ensemble1
1022904,2021-04-12,1 wk ahead cum death,2021-04-17,0.950,quantile,570974.00000,US,epiforecasts-ensemble1
1022905,2021-04-12,1 wk ahead cum death,2021-04-17,0.975,quantile,571359.00000,US,epiforecasts-ensemble1


In [62]:
available_train_models = dict(df.groupby(['target'])['model'].unique())

In [63]:
available_train_models

{'1 wk ahead cum death': array(['BPagano-RtDriven', 'CEID-Walk', 'COVIDhub-baseline', 'CU-select',
        'Covid19Sim-Simulator', 'CovidAnalytics-DELPHI', 'DDS-NBDS',
        'JHUAPL-Bucky', 'Karlen-pypm', 'LANL-GrowthRate', 'LNQ-ens1',
        'MIT_CritData-GBCF', 'MOBS-GLEAM_COVID', 'Microsoft-DeepSTIA',
        'RobertWalraven-ESG', 'SteveMcConnell-CovidComplete',
        'UA-EpiCovDA', 'UCSD_NEU-DeepGLEAM', 'UMass-MechBayes',
        'USC-SI_kJalpha', 'epiforecasts-ensemble1'], dtype=object),
 '1 wk ahead inc case': array(['BPagano-RtDriven', 'CEID-Walk', 'COVIDhub-baseline', 'CU-select',
        'Covid19Sim-Simulator', 'CovidAnalytics-DELPHI',
        'IEM_MED-CovidProject', 'JHUAPL-Bucky', 'JHU_CSSE-DECOM',
        'Karlen-pypm', 'LANL-GrowthRate', 'LNQ-ens1', 'MOBS-GLEAM_COVID',
        'Microsoft-DeepSTIA', 'RobertWalraven-ESG', 'USC-SI_kJalpha',
        'UVA-Ensemble'], dtype=object),
 '1 wk ahead inc death': array(['BPagano-RtDriven', 'CEID-Walk', 'COVIDhub-baseline', 'CU-se

In [64]:
df_test.shape

(279786, 8)

In [65]:
df_test = df_test[df_test.apply(lambda x: x.model in (available_train_models[x.target]), axis=1)]

In [66]:
df_test.head()

Unnamed: 0,forecast_date,target,target_end_date,quantile,type,value,location,model
0,2021-04-18,1 wk ahead inc death,2021-04-24,0.01,quantile,3251.19694,US,BPagano-RtDriven
1,2021-04-18,1 wk ahead inc death,2021-04-24,0.025,quantile,3575.34252,US,BPagano-RtDriven
2,2021-04-18,1 wk ahead inc death,2021-04-24,0.05,quantile,3865.11159,US,BPagano-RtDriven
3,2021-04-18,1 wk ahead inc death,2021-04-24,0.1,quantile,4204.42168,US,BPagano-RtDriven
4,2021-04-18,1 wk ahead inc death,2021-04-24,0.15,quantile,4434.84318,US,BPagano-RtDriven


In [67]:
df.groupby(['target'])['model'].unique()

target
1 wk ahead cum death    [BPagano-RtDriven, CEID-Walk, COVIDhub-baselin...
1 wk ahead inc case     [BPagano-RtDriven, CEID-Walk, COVIDhub-baselin...
1 wk ahead inc death    [BPagano-RtDriven, CEID-Walk, COVIDhub-baselin...
2 wk ahead cum death    [BPagano-RtDriven, CEID-Walk, COVIDhub-baselin...
2 wk ahead inc case     [BPagano-RtDriven, CEID-Walk, COVIDhub-baselin...
2 wk ahead inc death    [BPagano-RtDriven, CEID-Walk, COVIDhub-baselin...
3 wk ahead cum death    [BPagano-RtDriven, CEID-Walk, COVIDhub-baselin...
3 wk ahead inc case     [BPagano-RtDriven, CEID-Walk, COVIDhub-baselin...
3 wk ahead inc death    [BPagano-RtDriven, CEID-Walk, COVIDhub-baselin...
4 wk ahead cum death    [BPagano-RtDriven, CEID-Walk, COVIDhub-baselin...
4 wk ahead inc case     [BPagano-RtDriven, CEID-Walk, COVIDhub-baselin...
4 wk ahead inc death    [BPagano-RtDriven, CEID-Walk, COVIDhub-baselin...
Name: model, dtype: object

# Export

In [79]:
df.to_csv('data/ensemble_data/df_train_{}.csv'.format(str(test_date.date())), index=False)

In [80]:
df_test.to_csv('data/ensemble_data/df_test_{}.csv'.format(str(test_date.date())), index=False)