# Example Predictor: Linear Rollout Predictor

This example contains basic functionality for training and evaluating a linear predictor that rolls out predictions day-by-day.

First, a training data set is created from historical case and npi data.

Second, a linear model is trained to predict future cases from prior case data along with prior and future npi data.
The model is an off-the-shelf sklearn Lasso model, that uses a positive weight constraint to enforce the assumption that increased npis has a negative correlation with future cases.

Third, a sample evaluation set is created, and the predictor is applied to this evaluation set to produce prediction results in the correct format.

## Training

In [1]:
import pickle
import numpy as np
import pandas as pd
from sklearn.linear_model import Lasso
from sklearn.model_selection import train_test_split

### Copy the data locally

In [2]:
# Main source for the training data
DATA_URL = 'https://raw.githubusercontent.com/OxCGRT/covid-policy-tracker/master/data/OxCGRT_latest.csv'
# Local file
DATA_FILE = 'data/OxCGRT_latest.csv'

In [3]:
import os
import urllib.request
if not os.path.exists('data'):
    os.mkdir('data')
urllib.request.urlretrieve(DATA_URL, DATA_FILE)

('data/OxCGRT_latest.csv', <http.client.HTTPMessage at 0x20029bcd9c8>)

In [22]:
# Load historical data from local file
df = pd.read_csv(DATA_FILE, 
                 parse_dates=['Date'],
                 encoding="ISO-8859-1",
                 dtype={"RegionName": str,
                        "RegionCode": str},
                 error_bad_lines=False)

In [5]:
df.columns

Index(['CountryName', 'CountryCode', 'RegionName', 'RegionCode',
       'Jurisdiction', 'Date', 'C1_School closing', 'C1_Flag',
       'C2_Workplace closing', 'C2_Flag', 'C3_Cancel public events', 'C3_Flag',
       'C4_Restrictions on gatherings', 'C4_Flag', 'C5_Close public transport',
       'C5_Flag', 'C6_Stay at home requirements', 'C6_Flag',
       'C7_Restrictions on internal movement', 'C7_Flag',
       'C8_International travel controls', 'E1_Income support', 'E1_Flag',
       'E2_Debt/contract relief', 'E3_Fiscal measures',
       'E4_International support', 'H1_Public information campaigns',
       'H1_Flag', 'H2_Testing policy', 'H3_Contact tracing',
       'H4_Emergency investment in healthcare', 'H5_Investment in vaccines',
       'H6_Facial Coverings', 'H6_Flag', 'M1_Wildcard', 'ConfirmedCases',
       'ConfirmedDeaths', 'StringencyIndex', 'StringencyIndexForDisplay',
       'StringencyLegacyIndex', 'StringencyLegacyIndexForDisplay',
       'GovernmentResponseIndex', 'Gove

In [6]:
# For testing, restrict training data to that before a hypothetical predictor submission date
HYPOTHETICAL_SUBMISSION_DATE = np.datetime64("2020-07-31")
df = df[df.Date <= HYPOTHETICAL_SUBMISSION_DATE]

In [7]:
# Add RegionID column that combines CountryName and RegionName for easier manipulation of data
df['GeoID'] = df['CountryName'] + '__' + df['RegionName'].astype(str)

In [8]:
# Add new cases column
df['NewCases'] = df.groupby('GeoID').ConfirmedCases.diff().fillna(0)

In [9]:
# Keep only columns of interest
id_cols = ['CountryName',
           'RegionName',
           'GeoID',
           'Date']
cases_col = ['NewCases']
npi_cols = ['C1_School closing',
            'C2_Workplace closing',
            'C3_Cancel public events',
            'C4_Restrictions on gatherings',
            'C5_Close public transport',
            'C6_Stay at home requirements',
            'C7_Restrictions on internal movement',
            'C8_International travel controls',
            'H1_Public information campaigns',
            'H2_Testing policy',
            'H3_Contact tracing',
            'H6_Facial Coverings']
df = df[id_cols + cases_col + npi_cols]

In [10]:
# Fill any missing case values by interpolation and setting NaNs to 0
df.update(df.groupby('GeoID').NewCases.apply(
    lambda group: group.interpolate()).fillna(0))

In [11]:
# Fill any missing NPIs by assuming they are the same as previous day
for npi_col in npi_cols:
    df.update(df.groupby('GeoID')[npi_col].ffill().fillna(0))

In [12]:
df

Unnamed: 0,CountryName,RegionName,GeoID,Date,NewCases,C1_School closing,C2_Workplace closing,C3_Cancel public events,C4_Restrictions on gatherings,C5_Close public transport,C6_Stay at home requirements,C7_Restrictions on internal movement,C8_International travel controls,H1_Public information campaigns,H2_Testing policy,H3_Contact tracing,H6_Facial Coverings
0,Aruba,,Aruba__nan,2020-01-01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,Aruba,,Aruba__nan,2020-01-02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,Aruba,,Aruba__nan,2020-01-03,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,Aruba,,Aruba__nan,2020-01-04,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,Aruba,,Aruba__nan,2020-01-05,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
87064,Zimbabwe,,Zimbabwe__nan,2020-07-27,78.0,3.0,1.0,2.0,3.0,1.0,2.0,2.0,4.0,2.0,1.0,1.0,4.0
87065,Zimbabwe,,Zimbabwe__nan,2020-07-28,192.0,3.0,1.0,2.0,3.0,1.0,2.0,2.0,4.0,2.0,1.0,1.0,4.0
87066,Zimbabwe,,Zimbabwe__nan,2020-07-29,113.0,3.0,1.0,2.0,3.0,1.0,2.0,2.0,4.0,2.0,1.0,1.0,4.0
87067,Zimbabwe,,Zimbabwe__nan,2020-07-30,62.0,3.0,1.0,2.0,3.0,1.0,2.0,2.0,4.0,2.0,1.0,1.0,4.0


In [13]:
# Set number of past days to use to make predictions
nb_lookback_days = 30

# Create training data across all countries for predicting one day ahead
X_cols = cases_col + npi_cols
y_col = cases_col
X_samples = []
y_samples = []
geo_ids = df.GeoID.unique()
for g in geo_ids:
    gdf = df[df.GeoID == g]
    all_case_data = np.array(gdf[cases_col])
    all_npi_data = np.array(gdf[npi_cols])

    # Create one sample for each day where we have enough data
    # Each sample consists of cases and npis for previous nb_lookback_days
    nb_total_days = len(gdf)
    for d in range(nb_lookback_days, nb_total_days - 1):
        X_cases = all_case_data[d-nb_lookback_days:d]

        # Take negative of npis to support positive
        # weight constraint in Lasso.
        X_npis = -all_npi_data[d - nb_lookback_days:d]

        # Flatten all input data so it fits Lasso input format.
        X_sample = np.concatenate([X_cases.flatten(),
                                   X_npis.flatten()])
        y_sample = all_case_data[d + 1]
        X_samples.append(X_sample)
        y_samples.append(y_sample)

X_samples = np.array(X_samples)
y_samples = np.array(y_samples).flatten()

In [14]:
# Helpful function to compute mae
def mae(pred, true):
    return np.mean(np.abs(pred - true))

In [15]:
# Split data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X_samples,
                                                    y_samples,
                                                    test_size=0.2,
                                                    random_state=301)

In [16]:
# Create and train Lasso model.
# Set positive=True to enforce assumption that cases are positively correlated
# with future cases and npis are negatively correlated.
model = Lasso(alpha=0.1,
              precompute=True,
              max_iter=10000,
              positive=True,
              selection='random')
# Fit model
model.fit(X_train, y_train)

Lasso(alpha=0.1, copy_X=True, fit_intercept=True, max_iter=10000,
      normalize=False, positive=True, precompute=True, random_state=None,
      selection='random', tol=0.0001, warm_start=False)

In [17]:
# Evaluate model
train_preds = model.predict(X_train)
train_preds = np.maximum(train_preds, 0) # Don't predict negative cases
print('Train MAE:', mae(train_preds, y_train))

test_preds = model.predict(X_test)
test_preds = np.maximum(test_preds, 0) # Don't predict negative cases
print('Test MAE:', mae(test_preds, y_test))

Train MAE: 140.7115130708164
Test MAE: 152.4960998257208


In [18]:
# Inspect the learned feature coefficients for the model
# to see what features it's paying attention to.

# Give names to the features
x_col_names = []
for d in range(-nb_lookback_days, 0):
    x_col_names.append('Day ' + str(d) + ' ' + cases_col[0])
for d in range(-nb_lookback_days, 1):
    for col_name in npi_cols:
        x_col_names.append('Day ' + str(d) + ' ' + col_name)

# View non-zero coefficients
for (col, coeff) in zip(x_col_names, list(model.coef_)):
    if coeff != 0.:
        print(col, coeff)
print('Intercept', model.intercept_)

Day -7 NewCases 0.0010981690962886179
Day -6 NewCases 0.4395939174551574
Day -5 NewCases 0.21708595513072407
Day -4 NewCases 0.05899975513969943
Day -3 NewCases 0.06945280743161403
Day -2 NewCases 0.05194441075319036
Day -1 NewCases 0.2383894816999364
Day -26 C6_Stay at home requirements 4.31299480136158
Day -22 C2_Workplace closing 9.716853002891929
Day -17 C2_Workplace closing 5.766094345069354
Intercept 26.55687525188671


In [19]:
# Save model to file
if not os.path.exists('models'):
    os.mkdir('models')
with open('models/model.pkl', 'wb') as model_file:
    pickle.dump(model, model_file)

## Evaluation

Now that the predictor has been trained and saved, this section contains the functionality for evaluating it on sample evaluation data.

In [20]:
# Reload the module to get the latest changes
import predict
from importlib import reload
reload(predict)
from predict import predict_df

In [30]:
# df_eval = pd.read_csv('data/2020-09-30_historical_ip.csv')

# row_indexs = df[df['CountryName'] == 'Anguilla'].index
# row_indexs

path_to_ips_file="data/2020-09-30_historical_ip.csv"
list_countries = sorted(list(set(df.CountryName)))
hist_ips_df = pd.read_csv(path_to_ips_file,
                              parse_dates=['Date'],
                              encoding="ISO-8859-1",
                              dtype={"RegionName": str},
                              error_bad_lines=True)
hist_ips_df = hist_ips_df[hist_ips_df.CountryName.isin(list_countries)]
hist_ips_df.to_csv("data/2020-09-30_historical_ip_new.csv" , index = False)

In [31]:
%%time
preds_df = predict_df("2020-08-01", "2020-08-31", path_to_ips_file="data/2020-09-30_historical_ip_new.csv", verbose=True)


Predicting for Aruba__nan
2020-08-01: 58.832013097676466
2020-08-02: 71.32297323510508
2020-08-03: 78.9884411385919
2020-08-04: 90.24123842546683
2020-08-05: 87.8117472014118
2020-08-06: 98.7600349484055
2020-08-07: 131.043712129859
2020-08-08: 147.02330387430604
2020-08-09: 158.9529114875828
2020-08-10: 169.9426640501474
2020-08-11: 177.51782587473804
2020-08-12: 193.4843233415486
2020-08-13: 216.8238449408644
2020-08-14: 234.0413552003931
2020-08-15: 248.5614891807204
2020-08-16: 261.9688949069497
2020-08-17: 275.3003157177194
2020-08-18: 293.29289528071473
2020-08-19: 314.0776214183259
2020-08-20: 332.4304943890407
2020-08-21: 349.2338921371767
2020-08-22: 365.50191374821793
2020-08-23: 382.5349053058303
2020-08-24: 407.89251745148283
2020-08-25: 430.0842793419013
2020-08-26: 450.5729646715425
2020-08-27: 466.0014796361783
2020-08-28: 484.648520643598
2020-08-29: 515.3546084163565
2020-08-30: 541.9068844107129
2020-08-31: 566.2680616270064

Predicting for Afghanistan__nan
2020-08-0

2020-08-21: 7910.84537081261
2020-08-22: 7860.659200471844
2020-08-23: 7807.965424691464
2020-08-24: 7994.498827564898
2020-08-25: 8369.232526121434
2020-08-26: 8630.390825116065
2020-08-27: 8742.663609313939
2020-08-28: 8786.624652833447
2020-08-29: 8860.458626042933
2020-08-30: 9066.839840404056
2020-08-31: 9351.180796573675

Predicting for Azerbaijan__nan
2020-08-01: 2143.905508864326
2020-08-02: 2777.115434179791
2020-08-03: 3002.283203813526
2020-08-04: 2959.1106050606804
2020-08-05: 2639.3525037681984
2020-08-06: 1674.8565956871844
2020-08-07: 2515.0067092880695
2020-08-08: 2970.024579963173
2020-08-09: 3126.5901473337626
2020-08-10: 3100.8489962529766
2020-08-11: 2834.0265596383724
2020-08-12: 2564.8487562942364
2020-08-13: 2961.312889098826
2020-08-14: 3256.72709783517
2020-08-15: 3377.044136433693
2020-08-16: 3363.6585710101017
2020-08-17: 3234.8695147372355
2020-08-18: 3208.235017564942
2020-08-19: 3439.482666855574
2020-08-20: 3639.9084674373753
2020-08-21: 3740.560764483733

2020-08-12: 1175.4373465056506
2020-08-13: 1408.9578552954586
2020-08-14: 1502.5692836350077
2020-08-15: 1525.8530719076202
2020-08-16: 1491.9720669482945
2020-08-17: 1438.3819367637952
2020-08-18: 1489.537375319962
2020-08-19: 1620.9651836969206
2020-08-20: 1695.6945168657362
2020-08-21: 1723.7103507499196
2020-08-22: 1719.9150119042924
2020-08-23: 1720.9199044274983
2020-08-24: 1778.277004191289
2020-08-25: 1867.4455935629344
2020-08-26: 1930.6044309928125
2020-08-27: 1957.2832946137587
2020-08-28: 1975.0815736919785
2020-08-29: 2003.2467950824603
2020-08-30: 2061.037070764019
2020-08-31: 2132.0587311890354

Predicting for Belarus__nan
2020-08-01: 1204.9756095394246
2020-08-02: 1465.4856591021608
2020-08-03: 1541.6280481334315
2020-08-04: 1617.1840586861654
2020-08-05: 1378.408580939371
2020-08-06: 920.1021009700008
2020-08-07: 1392.7190497155634
2020-08-08: 1601.8250014185369
2020-08-09: 1680.465067242601
2020-08-10: 1697.3212982907103
2020-08-11: 1543.4586636759345
2020-08-12: 1426

2020-08-09: 231.62158240389508
2020-08-10: 219.0780145826726
2020-08-11: 231.0635236390047
2020-08-12: 266.63492628815874
2020-08-13: 324.9463260095422
2020-08-14: 350.0876429416763
2020-08-15: 332.8528332560628
2020-08-16: 333.2197209456309
2020-08-17: 350.5754582667661
2020-08-18: 383.32698713988026
2020-08-19: 422.1749993602521
2020-08-20: 441.73883734818236
2020-08-21: 444.2501956055553
2020-08-22: 454.40558640575694
2020-08-23: 475.3475388524662
2020-08-24: 505.04586861756536
2020-08-25: 535.4272964111753
2020-08-26: 555.4542009113054
2020-08-27: 568.4348070328069
2020-08-28: 585.445012060351
2020-08-29: 609.0219550333837
2020-08-30: 637.2827415238758
2020-08-31: 664.9274295961122

Predicting for Central African Republic__nan
2020-08-01: 52.42141290904502
2020-08-02: 67.10802689864575
2020-08-03: 71.15272221074937
2020-08-04: 74.75896665844084
2020-08-05: 79.93724114044045
2020-08-06: 93.88638611768256
2020-08-07: 124.20218969464617
2020-08-08: 140.1179120513649
2020-08-09: 149.33

2020-08-25: 523.2783448917694
2020-08-26: 551.7702035883752
2020-08-27: 569.7526142879846
2020-08-28: 589.475559964872
2020-08-29: 611.5719444948255
2020-08-30: 638.5091102373596
2020-08-31: 670.7195773722983

Predicting for Congo__nan
2020-08-01: 57.568783553915985
2020-08-02: 89.78866142637972
2020-08-03: 126.49334954427641
2020-08-04: 89.61131477153927
2020-08-05: 88.23144833664318
2020-08-06: 102.93427003307653
2020-08-07: 138.27296852834803
2020-08-08: 167.3841602083289
2020-08-09: 185.26331920398542
2020-08-10: 177.8871612315016
2020-08-11: 183.70900617093798
2020-08-12: 201.80632433321946
2020-08-13: 228.83597200312005
2020-08-14: 252.90598715754686
2020-08-15: 267.93873995115285
2020-08-16: 273.75867603726766
2020-08-17: 285.6732487452306
2020-08-18: 310.8757781765562
2020-08-19: 335.9210360500456
2020-08-20: 358.24568283941187
2020-08-21: 380.9862086413999
2020-08-22: 395.95476136544494
2020-08-23: 424.1644567221687
2020-08-24: 451.0923626668222
2020-08-25: 477.2421086074804
2

2020-08-21: 22362.63228871747
2020-08-22: 21886.43308297664
2020-08-23: 21741.77935847247
2020-08-24: 22485.631946735724
2020-08-25: 23703.961909355843
2020-08-26: 24419.770611798027
2020-08-27: 24561.012000284252
2020-08-28: 24519.569514787483
2020-08-29: 24735.99096120987
2020-08-30: 25428.789543922496
2020-08-31: 26302.4215666645

Predicting for Djibouti__nan
2020-08-01: 52.0971613995611
2020-08-02: 64.06676516226548
2020-08-03: 70.15408357062816
2020-08-04: 76.29740424930436
2020-08-05: 83.10001545053086
2020-08-06: 94.4087762604437
2020-08-07: 123.73618563227132
2020-08-08: 138.79032116217115
2020-08-09: 149.11196274140656
2020-08-10: 159.24259941983934
2020-08-11: 170.42175623812167
2020-08-12: 186.56333844415602
2020-08-13: 208.47720315430135
2020-08-14: 224.80442817697804
2020-08-15: 238.36869717518437
2020-08-16: 251.8162522449079
2020-08-17: 266.58300543821423
2020-08-18: 290.3384528003493
2020-08-19: 311.69814994009056
2020-08-20: 329.98906093719273
2020-08-21: 346.880031481

2020-08-24: 6773.79676450099
2020-08-25: 7206.18025742905
2020-08-26: 7106.702592962968
2020-08-27: 6866.251304834782
2020-08-28: 6873.581124772037
2020-08-29: 7160.339714123943
2020-08-30: 7575.1503203019165
2020-08-31: 7844.418301132438

Predicting for Estonia__nan
2020-08-01: 319.4151530548073
2020-08-02: 428.02971526817123
2020-08-03: 413.93979291632195
2020-08-04: 401.3607540546632
2020-08-05: 370.16776392600633
2020-08-06: 283.4850951735127
2020-08-07: 423.10399906343554
2020-08-08: 494.0147445916044
2020-08-09: 501.50550900382945
2020-08-10: 499.2406118127775
2020-08-11: 479.7085664062113
2020-08-12: 471.80852871146
2020-08-13: 545.8694676339903
2020-08-14: 594.5756672358949
2020-08-15: 611.2117881456896
2020-08-16: 617.1577774292239
2020-08-17: 616.8881089692995
2020-08-18: 627.9790002187145
2020-08-19: 675.124931530329
2020-08-20: 712.3759668699124
2020-08-21: 733.1169886277879
2020-08-22: 746.4987390527714
2020-08-23: 748.7137839459438
2020-08-24: 768.6851703646014
2020-08-25

2020-08-26: 775.3576621027291
2020-08-27: 785.4153683036259
2020-08-28: 802.4429703651732
2020-08-29: 832.2744097586091
2020-08-30: 869.9457291792924
2020-08-31: 907.2706098842872

Predicting for United Kingdom__Scotland
2020-08-01: 717.7471077804687
2020-08-02: 753.4983163242173
2020-08-03: 614.7275064837404
2020-08-04: 386.2905492201383
2020-08-05: 269.6127374055672
2020-08-06: 377.96787161802564
2020-08-07: 696.963152020685
2020-08-08: 743.4344167521737
2020-08-09: 661.17030717386
2020-08-10: 546.6203703902696
2020-08-11: 509.0685718897132
2020-08-12: 607.9482301054215
2020-08-13: 767.1957536217527
2020-08-14: 803.8488510152555
2020-08-15: 764.5315542973943
2020-08-16: 715.3589476629493
2020-08-17: 718.3676746880744
2020-08-18: 799.7246617978583
2020-08-19: 891.6104138301124
2020-08-20: 922.8009706443468
2020-08-21: 912.9193324132539
2020-08-22: 902.3595912096174
2020-08-23: 935.5634032275561
2020-08-24: 999.7986979377869
2020-08-25: 1062.7728443325354
2020-08-26: 1094.4718790792754

2020-08-27: 1215.3308996655337
2020-08-28: 1234.3790257704977
2020-08-29: 1265.0849267713556
2020-08-30: 1311.823616975979
2020-08-31: 1364.1221435813127

Predicting for Guam__nan
2020-08-01: 124.08135443047581
2020-08-02: 124.67814176778063
2020-08-03: 123.7926931520585
2020-08-04: 137.9509632649901
2020-08-05: 106.06874823892275
2020-08-06: 126.00731687440023
2020-08-07: 184.71041222678002
2020-08-08: 198.5677229641051
2020-08-09: 207.10918361223614
2020-08-10: 214.4203892248653
2020-08-11: 211.36158585810307
2020-08-12: 233.8964608936549
2020-08-13: 268.95702528689003
2020-08-14: 286.71485930613494
2020-08-15: 299.5111340699424
2020-08-16: 309.8079765736735
2020-08-17: 319.7846290707121
2020-08-18: 342.1482575926552
2020-08-19: 368.75999874979385
2020-08-20: 388.18867092794346
2020-08-21: 404.22439561832505
2020-08-22: 418.93033632824955
2020-08-23: 435.44031930795757
2020-08-24: 458.0189363869797
2020-08-25: 487.9332200389001
2020-08-26: 510.3026960595782
2020-08-27: 525.6814583756

2020-08-22: 54012.22174644183
2020-08-23: 53422.79547076941
2020-08-24: 54231.571265587176
2020-08-25: 56603.51190790104
2020-08-26: 58492.18512735734
2020-08-27: 59472.32709993527
2020-08-28: 59813.278358496624
2020-08-29: 60132.862927626506
2020-08-30: 61276.06453892258
2020-08-31: 63100.283850420965

Predicting for Ireland__nan
2020-08-01: 364.2604660715715
2020-08-02: 433.96785906475907
2020-08-03: 410.9108523219283
2020-08-04: 415.7599112588939
2020-08-05: 362.92002992014875
2020-08-06: 292.34649232622803
2020-08-07: 446.6641532509875
2020-08-08: 502.4397880345909
2020-08-09: 506.72647546752455
2020-08-10: 507.83508270860955
2020-08-11: 482.7572219328768
2020-08-12: 482.84345275316696
2020-08-13: 561.7589743373636
2020-08-14: 604.5184398706023
2020-08-15: 619.5237841539598
2020-08-16: 625.8560009114358
2020-08-17: 624.7666018033551
2020-08-18: 645.5426032563029
2020-08-19: 695.7372545426996
2020-08-20: 731.2211790568509
2020-08-21: 751.6840328847754
2020-08-22: 765.6808811314802
2

2020-08-20: 2676.52436455136
2020-08-21: 2710.951156814347
2020-08-22: 2666.4250471187343
2020-08-23: 2638.351819895663
2020-08-24: 2728.2341807073453
2020-08-25: 2880.3270962566085
2020-08-26: 2987.456394622825
2020-08-27: 3026.7938707697213
2020-08-28: 3031.9726929563158
2020-08-29: 3058.7869880357284
2020-08-30: 3146.999091125692
2020-08-31: 3262.3154592718965

Predicting for Kazakhstan__nan
2020-08-01: 860.695771354821
2020-08-02: 995.0961228230009
2020-08-03: 981.4588708921254
2020-08-04: 929.0106087205941
2020-08-05: 847.8235575683636
2020-08-06: 615.7646218679107
2020-08-07: 958.3035624879668
2020-08-08: 1076.2402857200982
2020-08-09: 1084.007942987284
2020-08-10: 1061.3892134841376
2020-08-11: 998.6777161845362
2020-08-12: 962.3103010260179
2020-08-13: 1125.1961366574187
2020-08-14: 1210.3540147441463
2020-08-15: 1231.5239047031791
2020-08-16: 1226.6129585844465
2020-08-17: 1206.5792892577274
2020-08-18: 1233.1135384788217
2020-08-19: 1329.3565908914488
2020-08-20: 1394.2065362

2020-08-14: 230.6535019464471
2020-08-15: 243.03723877156347
2020-08-16: 255.09590685179944
2020-08-17: 269.8227437798241
2020-08-18: 294.84763275042235
2020-08-19: 317.25848471792085
2020-08-20: 335.55835812977716
2020-08-21: 351.77149335634147
2020-08-22: 368.1315753470455
2020-08-23: 397.10351625249086
2020-08-24: 422.9477155951263
2020-08-25: 446.55825718054655
2020-08-26: 468.0954150155597
2020-08-27: 488.65920820775943
2020-08-28: 511.3437537724466
2020-08-29: 539.072785034669
2020-08-30: 566.0786158790929
2020-08-31: 591.8285004737295

Predicting for Libya__nan
2020-08-01: 501.36795628552693
2020-08-02: 653.18543579554
2020-08-03: 645.4547955025232
2020-08-04: 494.50916713131033
2020-08-05: 723.213340433236
2020-08-06: 442.0802645711358
2020-08-07: 628.2424012046322
2020-08-08: 730.6070102134461
2020-08-09: 722.6479505408473
2020-08-10: 705.6957583716859
2020-08-11: 758.6752927203976
2020-08-12: 692.9911436001352
2020-08-13: 782.1866943060286
2020-08-14: 846.1929501140778
2020-0

2020-08-30: 5386.097794947003
2020-08-31: 5603.744598574533

Predicting for Moldova__nan
2020-08-01: 1310.9425481927806
2020-08-02: 1471.5447368254327
2020-08-03: 1453.630798803527
2020-08-04: 1412.1461379923312
2020-08-05: 1194.1685894926081
2020-08-06: 882.4377843497301
2020-08-07: 1402.6355219775683
2020-08-08: 1561.0162980157997
2020-08-09: 1574.578545468264
2020-08-10: 1538.1998494134418
2020-08-11: 1408.3840213388926
2020-08-12: 1361.4872673010887
2020-08-13: 1604.553604663135
2020-08-14: 1722.0382950320645
2020-08-15: 1749.9936402293329
2020-08-16: 1732.7171631139536
2020-08-17: 1685.2644275723374
2020-08-18: 1719.7022180862045
2020-08-19: 1858.1996385130888
2020-08-20: 1946.6709799316386
2020-08-21: 1983.215422737324
2020-08-22: 1990.3084241164497
2020-08-23: 2004.5274047634098
2020-08-24: 2061.1957805060006
2020-08-25: 2158.218591609137
2020-08-26: 2232.674290531609
2020-08-27: 2269.3139055677616
2020-08-28: 2298.242801957883
2020-08-29: 2336.4981243987686
2020-08-30: 2400.047

2020-08-26: 398.0292913011799
2020-08-27: 405.8986847263586
2020-08-28: 418.9435032275379
2020-08-29: 430.52030209611604
2020-08-30: 445.4845129151756
2020-08-31: 462.4491915472598

Predicting for Malawi__nan
2020-08-01: 53.16080155123458
2020-08-02: 64.87904851500448
2020-08-03: 69.33658988773828
2020-08-04: 74.25829495623412
2020-08-05: 79.61236329187494
2020-08-06: 93.68579778145092
2020-08-07: 123.83672238741104
2020-08-08: 138.59497567417918
2020-08-09: 148.01349803124157
2020-08-10: 157.28051879872862
2020-08-11: 168.19698366946932
2020-08-12: 185.543418292836
2020-08-13: 207.91841389301766
2020-08-14: 224.02373702357627
2020-08-15: 237.04243220900435
2020-08-16: 250.01385567832153
2020-08-17: 264.7956898553764
2020-08-18: 283.34238420827427
2020-08-19: 303.5517570748405
2020-08-20: 321.0553562930764
2020-08-21: 336.99457823029354
2020-08-22: 353.1267675190288
2020-08-23: 370.74696347773676
2020-08-24: 390.48155969754634
2020-08-25: 416.63235127979715
2020-08-26: 437.243944474881

2020-08-10: 1949.273505365556
2020-08-11: 1772.2226502338171
2020-08-12: 1676.464099988701
2020-08-13: 1977.522816793086
2020-08-14: 2167.9554525385006
2020-08-15: 2203.7633272572
2020-08-16: 2168.7182442996454
2020-08-17: 2094.5183121589284
2020-08-18: 2123.3303686850604
2020-08-19: 2299.6019368387906
2020-08-20: 2427.7158334185874
2020-08-21: 2473.3787178013854
2020-08-22: 2473.3874842253854
2020-08-23: 2488.0914293631595
2020-08-24: 2553.1774570951193
2020-08-25: 2677.4830544235765
2020-08-26: 2777.943240725842
2020-08-27: 2833.9525300074333
2020-08-28: 2862.476257908905
2020-08-29: 2907.089662894626
2020-08-30: 2984.63637504492
2020-08-31: 3087.249655381487

Predicting for New Zealand__nan
2020-08-01: 53.282515029391604
2020-08-02: 66.6612005765598
2020-08-03: 74.03750122263823
2020-08-04: 79.87597696718794
2020-08-05: 82.21577355024607
2020-08-06: 95.05848723525405
2020-08-07: 125.40709081841045
2020-08-08: 141.35695324512892
2020-08-09: 152.2904029983368
2020-08-10: 161.673437132

2020-08-30: 29608.381310024444
2020-08-31: 30542.471202371024

Predicting for Puerto Rico__nan
2020-08-01: 658.4454981957956
2020-08-02: 877.141119416678
2020-08-03: 1209.6422862752606
2020-08-04: 1387.9847553813006
2020-08-05: 1329.3670546538683
2020-08-06: 720.3324550175718
2020-08-07: 939.0703714405367
2020-08-08: 1135.0719274359399
2020-08-09: 1332.522556719286
2020-08-10: 1435.071292964421
2020-08-11: 1338.5076391948592
2020-08-12: 1125.7849096857117
2020-08-13: 1226.8661623585103
2020-08-14: 1368.5219079292597
2020-08-15: 1496.3455599447411
2020-08-16: 1552.9794080200418
2020-08-17: 1500.4069084644216
2020-08-18: 1442.1431086123666
2020-08-19: 1511.950299110054
2020-08-20: 1615.3859238208502
2020-08-21: 1705.1621079866445
2020-08-22: 1746.9709798503736
2020-08-23: 1746.9238906757091
2020-08-24: 1750.9063019935234
2020-08-25: 1813.1309915715788
2020-08-26: 1895.6704900635254
2020-08-27: 1967.5078477394259
2020-08-28: 2011.9445172904102
2020-08-29: 2036.562881131616
2020-08-30: 206

2020-08-28: 545.3525987028851
2020-08-29: 572.8782935209277
2020-08-30: 599.787851204639
2020-08-31: 626.2181382223973

Predicting for Saudi Arabia__nan
2020-08-01: 291.65455358517966
2020-08-02: 351.4250766912296
2020-08-03: 336.8577955898945
2020-08-04: 315.57154964756523
2020-08-05: 283.71936836705083
2020-08-06: 242.3835138672476
2020-08-07: 369.47600156490626
2020-08-08: 417.59093280547205
2020-08-09: 419.95361361553853
2020-08-10: 413.1163156251834
2020-08-11: 399.45041197050745
2020-08-12: 408.22433363286524
2020-08-13: 475.5393715988312
2020-08-14: 512.493295493548
2020-08-15: 524.2096278406085
2020-08-16: 528.1453899038355
2020-08-17: 532.1200952112891
2020-08-18: 554.7211730596634
2020-08-19: 598.9032008404336
2020-08-20: 629.9800893179175
2020-08-21: 647.5331257598287
2020-08-22: 660.3397358815075
2020-08-23: 675.727540118093
2020-08-24: 702.6446580305966
2020-08-25: 737.9791326172559
2020-08-26: 767.1452547621269
2020-08-27: 784.9284021743622
2020-08-28: 803.714376617738
20

2020-08-31: 481.9071996337944

Predicting for Serbia__nan
2020-08-01: 4879.236137704834
2020-08-02: 5950.704573717736
2020-08-03: 6026.787394000714
2020-08-04: 5764.798600699814
2020-08-05: 4641.270404928724
2020-08-06: 3290.911333849758
2020-08-07: 5268.9298573999395
2020-08-08: 6069.721344957095
2020-08-09: 6181.0426134360905
2020-08-10: 5947.897840286505
2020-08-11: 5283.093064217869
2020-08-12: 5002.034689145367
2020-08-13: 5932.758378876908
2020-08-14: 6458.467212366203
2020-08-15: 6572.596051347279
2020-08-16: 6428.483245213525
2020-08-17: 6137.9670534782545
2020-08-18: 6183.699970336616
2020-08-19: 6699.191886324914
2020-08-20: 7052.671786587116
2020-08-21: 7169.213290368133
2020-08-22: 7127.564690649774
2020-08-23: 7070.4312664561085
2020-08-24: 7215.288140405111
2020-08-25: 7554.228896472764
2020-08-26: 7817.381213755937
2020-08-27: 7942.674690713952
2020-08-28: 7987.716184171354
2020-08-29: 8049.521136990201
2020-08-30: 8228.017083800429
2020-08-31: 8490.581648764146

Predict

2020-08-16: 332.2127809601851
2020-08-17: 343.02005276566814
2020-08-18: 367.20806562993596
2020-08-19: 394.9757439914224
2020-08-20: 417.551809995227
2020-08-21: 435.79337814377254
2020-08-22: 451.9633696976392
2020-08-23: 479.70278910122255
2020-08-24: 506.4271706060006
2020-08-25: 533.5722011102396
2020-08-26: 558.2268623554744
2020-08-27: 576.2479671610535
2020-08-28: 598.4368120021852
2020-08-29: 625.989705799204
2020-08-30: 654.0879906441181
2020-08-31: 682.1361822500733

Predicting for Chad__nan
2020-08-01: 56.364066087854965
2020-08-02: 68.21016300999356
2020-08-03: 74.54675147306766
2020-08-04: 80.9322730345858
2020-08-05: 85.85838775796063
2020-08-06: 96.78512272742124
2020-08-07: 127.80221346548534
2020-08-08: 143.12777495250668
2020-08-09: 153.62666590651003
2020-08-10: 163.60785246181393
2020-08-11: 173.97156823391202
2020-08-12: 190.13612845174984
2020-08-13: 212.81446809787107
2020-08-14: 229.41932839843525
2020-08-15: 243.0887502879918
2020-08-16: 256.387721415814
2020-

2020-08-01: 3891.590690950056
2020-08-02: 4831.718272609557
2020-08-03: 5208.799687531824
2020-08-04: 5557.295227731273
2020-08-05: 4862.325541084902
2020-08-06: 2996.7168259952523
2020-08-07: 4470.527652802044
2020-08-08: 5196.661332530502
2020-08-09: 5518.193997517431
2020-08-10: 5627.594891838918
2020-08-11: 5097.644049837545
2020-08-12: 4541.235357126653
2020-08-13: 5211.096885315429
2020-08-14: 5702.153226405699
2020-08-15: 5949.990958703577
2020-08-16: 5981.676466867724
2020-08-17: 5722.099112826478
2020-08-18: 5608.2913942463065
2020-08-19: 5984.957708843173
2020-08-20: 6323.0834754456955
2020-08-21: 6516.401246902716
2020-08-22: 6557.346146001402
2020-08-23: 6484.075873899354
2020-08-24: 6539.332490095352
2020-08-25: 6801.806140138054
2020-08-26: 7055.592529522541
2020-08-27: 7223.482571715662
2020-08-28: 7300.483712203132
2020-08-29: 7350.221068584709
2020-08-30: 7473.900831911765
2020-08-31: 7691.75774291782

Predicting for Taiwan__nan
2020-08-01: 50.66581220257115
2020-08-02

2020-08-27: 3131.007389811509
2020-08-28: 3151.84096410591
2020-08-29: 3191.480772436838
2020-08-30: 3278.0588148891416
2020-08-31: 3388.2551183462474

Predicting for United States__Arkansas
2020-08-01: 1742.896707916435
2020-08-02: 1938.5647400027233
2020-08-03: 1796.5256998833256
2020-08-04: 1517.8847201085891
2020-08-05: 1191.854014044974
2020-08-06: 1032.2607332157704
2020-08-07: 1757.0739691197682
2020-08-08: 1939.5821357577488
2020-08-09: 1867.7065615024953
2020-08-10: 1707.5552594140067
2020-08-11: 1542.8102771525018
2020-08-12: 1587.8263191675628
2020-08-13: 1932.704679297307
2020-08-14: 2061.789765206644
2020-08-15: 2037.721133206619
2020-08-16: 1959.0530873808798
2020-08-17: 1905.537867350794
2020-08-18: 1994.8805938638297
2020-08-19: 2186.1938072967187
2020-08-20: 2279.9823340512685
2020-08-21: 2287.8095079337722
2020-08-22: 2266.8798238507393
2020-08-23: 2285.598842569405
2020-08-24: 2375.7983609969415
2020-08-25: 2501.839912151913
2020-08-26: 2579.77562487101
2020-08-27: 2

2020-08-08: 3938.851885037862
2020-08-09: 4046.3380306009403
2020-08-10: 3365.484901316742
2020-08-11: 2953.0866369330283
2020-08-12: 2994.955749558452
2020-08-13: 3668.6454955418767
2020-08-14: 4097.914701088404
2020-08-15: 4114.041291714546
2020-08-16: 3800.737021353062
2020-08-17: 3623.5014823439674
2020-08-18: 3755.623909538597
2020-08-19: 4146.489924027446
2020-08-20: 4408.681724564747
2020-08-21: 4429.75482144274
2020-08-22: 4307.155469448248
2020-08-23: 4270.720856178748
2020-08-24: 4415.336817124129
2020-08-25: 4669.533177675448
2020-08-26: 4848.140920330925
2020-08-27: 4894.754497636198
2020-08-28: 4879.55108486424
2020-08-29: 4920.993422977347
2020-08-30: 5062.5731102069285
2020-08-31: 5250.846405370834

Predicting for United States__Hawaii
2020-08-01: 147.29519899774107
2020-08-02: 187.98308679953612
2020-08-03: 207.62542258961022
2020-08-04: 198.4953391012975
2020-08-05: 180.3244165487706
2020-08-06: 161.5749164814241
2020-08-07: 230.14488751261916
2020-08-08: 266.028599612

2020-08-21: 2945.14169070707
2020-08-22: 2947.2219242660794
2020-08-23: 2947.8446313782765
2020-08-24: 3056.652005297896
2020-08-25: 3218.1103479188905
2020-08-26: 3307.130818317678
2020-08-27: 3341.2983196616146
2020-08-28: 3372.7523814334513
2020-08-29: 3421.630809308488
2020-08-30: 3525.4240720564094
2020-08-31: 3647.3273904863254

Predicting for United States__Massachusetts
2020-08-01: 2222.060959503015
2020-08-02: 2616.03460996217
2020-08-03: 2891.3504161428204
2020-08-04: 2603.2698236769847
2020-08-05: 1916.5990317203382
2020-08-06: 1482.2712636780016
2020-08-07: 2399.682516144957
2020-08-08: 2766.53072424543
2020-08-09: 2889.8807268356145
2020-08-10: 2701.0306215076585
2020-08-11: 2345.5646401848803
2020-08-12: 2288.701712454522
2020-08-13: 2733.292202293197
2020-08-14: 2989.543233862155
2020-08-15: 3062.433146386134
2020-08-16: 2960.594732033702
2020-08-17: 2815.3196167835263
2020-08-18: 2872.472264663967
2020-08-19: 3126.783477644014
2020-08-20: 3303.2371570346895
2020-08-21: 

2020-08-02: 3659.0241770514567
2020-08-03: 3726.6344667757994
2020-08-04: 3877.3668413089254
2020-08-05: 2690.693659528012
2020-08-06: 2105.5144532979266
2020-08-07: 3463.4904418450815
2020-08-08: 3822.5362440658105
2020-08-09: 3930.757328878009
2020-08-10: 3843.721536218235
2020-08-11: 3285.128618979386
2020-08-12: 3212.3979195946135
2020-08-13: 3840.6432030915166
2020-08-14: 4125.520296082497
2020-08-15: 4217.131008537805
2020-08-16: 4133.705879799106
2020-08-17: 3913.990260737907
2020-08-18: 3990.01311816484
2020-08-19: 4334.269820096103
2020-08-20: 4545.91158649552
2020-08-21: 4629.037733610194
2020-08-22: 4603.9729138582625
2020-08-23: 4566.868922175874
2020-08-24: 4682.893095978566
2020-08-25: 4909.149387046618
2020-08-26: 5076.517524032368
2020-08-27: 5161.058107319829
2020-08-28: 5193.483253652569
2020-08-29: 5239.426735145435
2020-08-30: 5367.889580632669
2020-08-31: 5544.061745395258

Predicting for United States__North Dakota
2020-08-01: 1170.7487697027623
2020-08-02: 1396.3

2020-08-17: 9876.138483711084
2020-08-18: 9650.002779866501
2020-08-19: 10259.138451244593
2020-08-20: 10764.533378180453
2020-08-21: 11043.858706324838
2020-08-22: 11164.74553587387
2020-08-23: 11101.848149417863
2020-08-24: 11174.833604444957
2020-08-25: 11591.079598454988
2020-08-26: 11980.338420371054
2020-08-27: 12241.387958803089
2020-08-28: 12396.848542143682
2020-08-29: 12487.389867092004
2020-08-30: 12680.522122823253
2020-08-31: 13025.026950396385

Predicting for United States__Oklahoma
2020-08-01: 2606.266689582806
2020-08-02: 3201.5071424935195
2020-08-03: 3511.182594975134
2020-08-04: 3505.6398529537696
2020-08-05: 3006.5437536934196
2020-08-06: 1951.9180468501909
2020-08-07: 2963.4923502799584
2020-08-08: 3446.6208630150536
2020-08-09: 3647.2278546283983
2020-08-10: 3617.7370610508983
2020-08-11: 3266.014320591125
2020-08-12: 2978.5160556194787
2020-08-13: 3449.901175838974
2020-08-14: 3778.2126165666396
2020-08-15: 3922.5594488903916
2020-08-16: 3900.703112376714
2020-08

2020-08-01: 3333.245367887409
2020-08-02: 3947.972010817269
2020-08-03: 3488.84327881831
2020-08-04: 3215.198603101952
2020-08-05: 2459.1792467956466
2020-08-06: 2005.2242633104634
2020-08-07: 3407.8999344497797
2020-08-08: 3824.2701617985213
2020-08-09: 3659.6966608450434
2020-08-10: 3427.81078573791
2020-08-11: 3044.469291169825
2020-08-12: 3058.290286788274
2020-08-13: 3722.354999782284
2020-08-14: 3989.9212119295016
2020-08-15: 3944.316053658835
2020-08-16: 3808.945022571046
2020-08-17: 3666.299044442675
2020-08-18: 3793.460778590616
2020-08-19: 4154.291399024711
2020-08-20: 4337.470439005201
2020-08-21: 4351.156124202112
2020-08-22: 4305.972183222518
2020-08-23: 4304.3898335452595
2020-08-24: 4447.497727937205
2020-08-25: 4677.724438925723
2020-08-26: 4821.1575952582525
2020-08-27: 4869.250940428613
2020-08-28: 4892.408598680961
2020-08-29: 4954.29397930594
2020-08-30: 5094.939060652214
2020-08-31: 5268.628570490043

Predicting for United States__Virginia
2020-08-01: 1918.14010302

2020-08-17: 471.7830316976523
2020-08-18: 491.6491641895873
2020-08-19: 524.9603942388293
2020-08-20: 550.0768307231419
2020-08-21: 567.610843085466
2020-08-22: 587.60249149031
2020-08-23: 619.5081495550946
2020-08-24: 646.837824545383
2020-08-25: 677.5508868411483
2020-08-26: 704.5716234839787
2020-08-27: 728.4643368444756
2020-08-28: 755.042969779906
2020-08-29: 786.2892344270618
2020-08-30: 817.0886307163855
2020-08-31: 848.7066869513257

Predicting for Venezuela__nan
2020-08-01: 318.2055101062801
2020-08-02: 342.90450384636677
2020-08-03: 273.9616160435894
2020-08-04: 488.20098851203176
2020-08-05: 583.842675187388
2020-08-06: 324.4420121481896
2020-08-07: 422.7282950422875
2020-08-08: 448.2073888778224
2020-08-09: 463.24210003947354
2020-08-10: 574.5364772032209
2020-08-11: 595.3838506038588
2020-08-12: 516.0928778692054
2020-08-13: 555.3427394840324
2020-08-14: 583.1672346692476
2020-08-15: 618.359715952358
2020-08-16: 679.7089393063504
2020-08-17: 692.4838651362011
2020-08-18: 6

Wall time: 1min 6s


In [32]:
# Check the predictions
preds_df.head()

Unnamed: 0,CountryName,RegionName,Date,PredictedDailyNewCases
213,Aruba,,2020-08-01,58.832013
214,Aruba,,2020-08-02,71.322973
215,Aruba,,2020-08-03,78.988441
216,Aruba,,2020-08-04,90.241238
217,Aruba,,2020-08-05,87.811747


# Validation
This is how the predictor is going to be called during the competition.  
!!! PLEASE DO NOT CHANGE THE API !!!

In [37]:
!python predict.py -s 2020-08-01 -e 2020-08-04 -ip data/2020-09-30_historical_ip_new.csv -o predictions/2020-08-01_2020-08-04.csv

Generating predictions from 2020-08-01 to 2020-08-04...
Saved predictions to predictions/2020-08-01_2020-08-04.csv
Done!


In [45]:
!head predictions/2020-08-01_2020-08-04.csv

'head' 不是内部或外部命令，也不是可运行的程序
或批处理文件。


# Test cases
We can generate a prediction file. Let's validate a few cases...

In [40]:
import os
from covid_xprize.validation.predictor_validation import validate_submission

def validate(start_date, end_date, ip_file, output_file):
    # First, delete any potential old file
    try:
        os.remove(output_file)
    except OSError:
        pass
    
    # Then generate the prediction, calling the official API
    !python predict.py -s {start_date} -e {end_date} -ip {ip_file} -o {output_file}
    
    # And validate it
    errors = validate_submission(start_date, end_date, ip_file, output_file)
    if errors:
        for error in errors:
            print(error)
    else:
        print("All good!")

ModuleNotFoundError: No module named 'covid_xprize'

## 4 days, no gap
- All countries and regions
- Official number of cases is known up to start_date
- Intervention Plans are the official ones

In [None]:
validate(start_date="2020-08-01",
         end_date="2020-08-04",
         ip_file="../../../validation/data/2020-09-30_historical_ip.csv",
         output_file="predictions/val_4_days.csv")

## 1 month in the future
- 2 countries only
- there's a gap between date of last known number of cases and start_date
- For future dates, Intervention Plans contains scenarios for which predictions are requested to answer the question: what will happen if we apply these plans?

In [None]:
%%time
validate(start_date="2021-01-01",
         end_date="2021-01-31",
         ip_file="../../../validation/data/future_ip.csv",
         output_file="predictions/val_1_month_future.csv")

## 180 days, from a future date, all countries and regions
- Prediction start date is 1 week from now. (i.e. assuming submission date is 1 week from now)  
- Prediction end date is 6 months after start date.  
- Prediction is requested for all available countries and regions.  
- Intervention plan scenario: freeze last known intervention plans for each country and region.  

As the number of cases is not known yet between today and start date, but the model relies on them, the model has to predict them in order to use them.  
This test is the most demanding test. It should take less than 1 hour to generate the prediction file.

### Generate the scenario

In [None]:
from datetime import datetime, timedelta

start_date = datetime.now() + timedelta(days=7)
start_date_str = start_date.strftime('%Y-%m-%d')
end_date = start_date + timedelta(days=180)
end_date_str = end_date.strftime('%Y-%m-%d')
print(f"Start date: {start_date_str}")
print(f"End date: {end_date_str}")

In [None]:
from covid_xprize.validation.scenario_generator import get_raw_data, generate_scenario, NPI_COLUMNS
DATA_FILE = 'data/OxCGRT_latest.csv'
latest_df = get_raw_data(DATA_FILE, latest=True)
scenario_df = generate_scenario(start_date_str, end_date_str, latest_df, countries=None, scenario="Freeze")
scenario_file = "predictions/180_days_future_scenario.csv"
scenario_df.to_csv(scenario_file, index=False)
print(f"Saved scenario to {scenario_file}")

### Check it

In [None]:
%%time
validate(start_date=start_date_str,
         end_date=end_date_str,
         ip_file=scenario_file,
         output_file="predictions/val_6_month_future.csv")