# Example Predictor: Linear Rollout Predictor

This example contains basic functionality for training and evaluating a linear predictor that rolls out predictions day-by-day.

First, a training data set is created from historical case and npi data.

Second, a linear model is trained to predict future cases from prior case data along with prior and future npi data.
The model is an off-the-shelf sklearn Lasso model, that uses a positive weight constraint to enforce the assumption that increased npis has a negative correlation with future cases.

Third, a sample evaluation set is created, and the predictor is applied to this evaluation set to produce prediction results in the correct format.

## Training

In [1]:
import pickle
import numpy as np
import pandas as pd
from sklearn.linear_model import Lasso
from sklearn.model_selection import train_test_split

### Copy the data locally

In [2]:
# Main source for the training data
DATA_URL = 'https://raw.githubusercontent.com/OxCGRT/covid-policy-tracker/master/data/OxCGRT_latest.csv'
# Local file
DATA_FILE = 'data/OxCGRT_latest.csv'

In [3]:
import os
import urllib.request
if not os.path.exists('data'):
    os.mkdir('data')
urllib.request.urlretrieve(DATA_URL, DATA_FILE)

('data/OxCGRT_latest.csv', <http.client.HTTPMessage at 0x17e410cf888>)

In [4]:
# Load historical data from local file
df = pd.read_csv(DATA_FILE, 
                 parse_dates=['Date'],
                 encoding="ISO-8859-1",
                 dtype={"RegionName": str,
                        "RegionCode": str},
                 error_bad_lines=False)

In [5]:
df.columns

Index(['CountryName', 'CountryCode', 'RegionName', 'RegionCode',
       'Jurisdiction', 'Date', 'C1_School closing', 'C1_Flag',
       'C2_Workplace closing', 'C2_Flag', 'C3_Cancel public events', 'C3_Flag',
       'C4_Restrictions on gatherings', 'C4_Flag', 'C5_Close public transport',
       'C5_Flag', 'C6_Stay at home requirements', 'C6_Flag',
       'C7_Restrictions on internal movement', 'C7_Flag',
       'C8_International travel controls', 'E1_Income support', 'E1_Flag',
       'E2_Debt/contract relief', 'E3_Fiscal measures',
       'E4_International support', 'H1_Public information campaigns',
       'H1_Flag', 'H2_Testing policy', 'H3_Contact tracing',
       'H4_Emergency investment in healthcare', 'H5_Investment in vaccines',
       'H6_Facial Coverings', 'H6_Flag', 'M1_Wildcard', 'ConfirmedCases',
       'ConfirmedDeaths', 'StringencyIndex', 'StringencyIndexForDisplay',
       'StringencyLegacyIndex', 'StringencyLegacyIndexForDisplay',
       'GovernmentResponseIndex', 'Gove

In [6]:
# For testing, restrict training data to that before a hypothetical predictor submission date
HYPOTHETICAL_SUBMISSION_DATE = np.datetime64("2020-07-31")
df = df[df.Date <= HYPOTHETICAL_SUBMISSION_DATE]

In [7]:
# Add RegionID column that combines CountryName and RegionName for easier manipulation of data
df['GeoID'] = df['CountryName'] + '__' + df['RegionName'].astype(str)

In [8]:
# Add new cases column
df['NewCases'] = df.groupby('GeoID').ConfirmedCases.diff().fillna(0)

In [9]:
# Keep only columns of interest
id_cols = ['CountryName',
           'RegionName',
           'GeoID',
           'Date']
cases_col = ['NewCases']
npi_cols = ['C1_School closing',
            'C2_Workplace closing',
            'C3_Cancel public events',
            'C4_Restrictions on gatherings',
            'C5_Close public transport',
            'C6_Stay at home requirements',
            'C7_Restrictions on internal movement',
            'C8_International travel controls',
            'H1_Public information campaigns',
            'H2_Testing policy',
            'H3_Contact tracing',
            'H6_Facial Coverings']
df = df[id_cols + cases_col + npi_cols]

In [10]:
# Fill any missing case values by interpolation and setting NaNs to 0
df.update(df.groupby('GeoID').NewCases.apply(
    lambda group: group.interpolate()).fillna(0))

In [11]:
# Fill any missing NPIs by assuming they are the same as previous day
for npi_col in npi_cols:
    df.update(df.groupby('GeoID')[npi_col].ffill().fillna(0))

In [12]:
df

Unnamed: 0,CountryName,RegionName,GeoID,Date,NewCases,C1_School closing,C2_Workplace closing,C3_Cancel public events,C4_Restrictions on gatherings,C5_Close public transport,C6_Stay at home requirements,C7_Restrictions on internal movement,C8_International travel controls,H1_Public information campaigns,H2_Testing policy,H3_Contact tracing,H6_Facial Coverings
0,Aruba,,Aruba__nan,2020-01-01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,Aruba,,Aruba__nan,2020-01-02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,Aruba,,Aruba__nan,2020-01-03,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,Aruba,,Aruba__nan,2020-01-04,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,Aruba,,Aruba__nan,2020-01-05,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
87064,Zimbabwe,,Zimbabwe__nan,2020-07-27,78.0,3.0,1.0,2.0,3.0,1.0,2.0,2.0,4.0,2.0,1.0,1.0,4.0
87065,Zimbabwe,,Zimbabwe__nan,2020-07-28,192.0,3.0,1.0,2.0,3.0,1.0,2.0,2.0,4.0,2.0,1.0,1.0,4.0
87066,Zimbabwe,,Zimbabwe__nan,2020-07-29,113.0,3.0,1.0,2.0,3.0,1.0,2.0,2.0,4.0,2.0,1.0,1.0,4.0
87067,Zimbabwe,,Zimbabwe__nan,2020-07-30,62.0,3.0,1.0,2.0,3.0,1.0,2.0,2.0,4.0,2.0,1.0,1.0,4.0


In [13]:
# Set number of past days to use to make predictions
nb_lookback_days = 30

# Create training data across all countries for predicting one day ahead
X_cols = cases_col + npi_cols
y_col = cases_col
X_samples = []
y_samples = []
geo_ids = df.GeoID.unique()
for g in geo_ids:
    gdf = df[df.GeoID == g]
    all_case_data = np.array(gdf[cases_col])
    all_npi_data = np.array(gdf[npi_cols])

    # Create one sample for each day where we have enough data
    # Each sample consists of cases and npis for previous nb_lookback_days
    nb_total_days = len(gdf)
    for d in range(nb_lookback_days, nb_total_days - 1):
        X_cases = all_case_data[d-nb_lookback_days:d]

        # Take negative of npis to support positive
        # weight constraint in Lasso.
        X_npis = -all_npi_data[d - nb_lookback_days:d]

        # Flatten all input data so it fits Lasso input format.
        X_sample = np.concatenate([X_cases.flatten(),
                                   X_npis.flatten()])
        y_sample = all_case_data[d + 1]
        X_samples.append(X_sample)
        y_samples.append(y_sample)

X_samples = np.array(X_samples)
y_samples = np.array(y_samples).flatten()

In [14]:
# Helpful function to compute mae
def mae(pred, true):
    return np.mean(np.abs(pred - true))

In [15]:
# Split data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X_samples,
                                                    y_samples,
                                                    test_size=0.2,
                                                    random_state=301)

In [16]:
# Create and train Lasso model.
# Set positive=True to enforce assumption that cases are positively correlated
# with future cases and npis are negatively correlated.
model = Lasso(alpha=0.1,
              precompute=True,
              max_iter=10000,
              positive=True,
              selection='random')
# Fit model
model.fit(X_train, y_train)

Lasso(alpha=0.1, copy_X=True, fit_intercept=True, max_iter=10000,
      normalize=False, positive=True, precompute=True, random_state=None,
      selection='random', tol=0.0001, warm_start=False)

In [17]:
# Evaluate model
train_preds = model.predict(X_train)
train_preds = np.maximum(train_preds, 0) # Don't predict negative cases
print('Train MAE:', mae(train_preds, y_train))

test_preds = model.predict(X_test)
test_preds = np.maximum(test_preds, 0) # Don't predict negative cases
print('Test MAE:', mae(test_preds, y_test))

Train MAE: 140.71142554508796
Test MAE: 152.4961604640549


In [18]:
# Inspect the learned feature coefficients for the model
# to see what features it's paying attention to.

# Give names to the features
x_col_names = []
for d in range(-nb_lookback_days, 0):
    x_col_names.append('Day ' + str(d) + ' ' + cases_col[0])
for d in range(-nb_lookback_days, 1):
    for col_name in npi_cols:
        x_col_names.append('Day ' + str(d) + ' ' + col_name)

# View non-zero coefficients
for (col, coeff) in zip(x_col_names, list(model.coef_)):
    if coeff != 0.:
        print(col, coeff)
print('Intercept', model.intercept_)

Day -7 NewCases 0.0011608475677650214
Day -6 NewCases 0.43952450027599643
Day -5 NewCases 0.21711030732343317
Day -4 NewCases 0.059028719919464595
Day -3 NewCases 0.06940508018514298
Day -2 NewCases 0.05198015541172006
Day -1 NewCases 0.23835895029729234
Day -26 C6_Stay at home requirements 4.315284773631653
Day -22 C2_Workplace closing 9.716177378999028
Day -17 C2_Workplace closing 5.765630705654963
Intercept 26.556349932951832


In [19]:
# Save model to file
if not os.path.exists('models'):
    os.mkdir('models')
with open('models/model.pkl', 'wb') as model_file:
    pickle.dump(model, model_file)

## Evaluation

Now that the predictor has been trained and saved, this section contains the functionality for evaluating it on sample evaluation data.

In [20]:
# Reload the module to get the latest changes
import predict
from importlib import reload
reload(predict)
from predict import predict_df

In [21]:
list_countries = sorted(list(set(df.CountryName)))
hist_ips_df = pd.read_csv("data/2020-09-30_historical_ip.csv",
                              parse_dates=['Date'],
                              encoding="ISO-8859-1",
                              dtype={"RegionName": str},
                              error_bad_lines=True)
hist_ips_df = hist_ips_df[hist_ips_df.CountryName.isin(list_countries)]
hist_ips_df.to_csv("data/2020-09-30_historical_ip_new.csv" , index = False)

In [22]:
%%time
preds_df = predict_df("2020-08-01", "2020-08-31", path_to_ips_file="data/2020-09-30_historical_ip_new.csv", verbose=True)


Predicting for Aruba__nan
2020-08-01: 58.83415407877576
2020-08-02: 71.32515221885238
2020-08-03: 78.99290276184884
2020-08-04: 90.24148802296129
2020-08-05: 87.81474353574718
2020-08-06: 98.76529261660889
2020-08-07: 131.04548858897294
2020-08-08: 147.02767952954667
2020-08-09: 158.958731678789
2020-08-10: 169.946226662304
2020-08-11: 177.52399757978435
2020-08-12: 193.49142350203726
2020-08-13: 216.82870610844768
2020-08-14: 234.0482504086842
2020-08-15: 248.56932611676996
2020-08-16: 261.97622729928634
2020-08-17: 275.3094472671903
2020-08-18: 293.3023526062957
2020-08-19: 314.08623682222384
2020-08-20: 332.44049891970025
2020-08-21: 349.24472728944994
2020-08-22: 365.5131260236332
2020-08-23: 382.54734140390616
2020-08-24: 407.9047871685246
2020-08-25: 430.0962569233913
2020-08-26: 450.5860861516399
2020-08-27: 466.0129087227012
2020-08-28: 484.66028347426965
2020-08-29: 515.3663534295372
2020-08-30: 541.9178913415958
2020-08-31: 566.2793263685082

Predicting for Afghanistan__nan


2020-08-21: 7910.986024204321
2020-08-22: 7860.781264957484
2020-08-23: 7808.159044090948
2020-08-24: 7994.6898787791715
2020-08-25: 8369.320083567198
2020-08-26: 8630.511544506488
2020-08-27: 8742.821056496457
2020-08-28: 8786.793996076804
2020-08-29: 8860.665892848101
2020-08-30: 9067.032153550104
2020-08-31: 9351.325749395939

Predicting for Azerbaijan__nan
2020-08-01: 2144.0217949319003
2020-08-02: 2777.044526099755
2020-08-03: 3002.3690737418406
2020-08-04: 2959.1302554376925
2020-08-05: 2639.26260112625
2020-08-06: 1675.0809907652488
2020-08-07: 2515.0083286818744
2020-08-08: 2969.979608882416
2020-08-09: 3126.668068017902
2020-08-10: 3100.8569432371796
2020-08-11: 2834.032844518544
2020-08-12: 2565.026737903932
2020-08-13: 2961.3231926289277
2020-08-14: 3256.7232064271375
2020-08-15: 3377.1060273197554
2020-08-16: 3363.6881974824732
2020-08-17: 3234.9328995501833
2020-08-18: 3208.3645148261076
2020-08-19: 3439.5178743160486
2020-08-20: 3639.9379430403437
2020-08-21: 3740.6219709

2020-08-01: 1226.6276183159891
2020-08-02: 1258.2139362985847
2020-08-03: 1286.0992825082685
2020-08-04: 1238.7120274941826
2020-08-05: 880.8257797488732
2020-08-06: 755.6662321794918
2020-08-07: 1250.7678056481016
2020-08-08: 1355.9986359303377
2020-08-09: 1379.0092256515513
2020-08-10: 1318.442194925048
2020-08-11: 1157.2021500010464
2020-08-12: 1175.4950673010653
2020-08-13: 1408.9393697152652
2020-08-14: 1502.5898064925234
2020-08-15: 1525.8846736044718
2020-08-16: 1491.9863481218326
2020-08-17: 1438.4293057104735
2020-08-18: 1489.582487057113
2020-08-19: 1620.9719532264976
2020-08-20: 1695.7208636288156
2020-08-21: 1723.744170160842
2020-08-22: 1719.9479506035282
2020-08-23: 1720.9703494744786
2020-08-24: 1778.3209239324951
2020-08-25: 1867.4718235164974
2020-08-26: 1930.6403531536703
2020-08-27: 1957.320921249188
2020-08-28: 1975.1228123889903
2020-08-29: 2003.295956263569
2020-08-30: 2061.080895379176
2020-08-31: 2132.09447741659

Predicting for Belarus__nan
2020-08-01: 1205.022

2020-08-23: 390.72664663245297
2020-08-24: 415.87203695531144
2020-08-25: 438.89261167106304
2020-08-26: 460.3701416023775
2020-08-27: 476.8973117653649
2020-08-28: 498.74554704719833
2020-08-29: 525.8083651202487
2020-08-30: 551.8930413018284
2020-08-31: 576.8195327320223

Predicting for Botswana__nan
2020-08-01: 157.26988846067482
2020-08-02: 303.96194272724205
2020-08-03: 131.8656665632026
2020-08-04: 108.81540766865888
2020-08-05: 113.84019496029461
2020-08-06: 144.6993441003789
2020-08-07: 241.529985886933
2020-08-08: 292.4960606827778
2020-08-09: 231.6412496043434
2020-08-10: 219.0873017805376
2020-08-11: 231.06640539924712
2020-08-12: 266.6461204298091
2020-08-13: 324.9465726427689
2020-08-14: 350.0838509634558
2020-08-15: 332.868928876142
2020-08-16: 333.2311864106621
2020-08-17: 350.58401446990484
2020-08-18: 383.337756346538
2020-08-19: 422.1792162487164
2020-08-20: 441.7443190789353
2020-08-21: 444.2658632481009
2020-08-22: 454.41981404399115
2020-08-23: 475.36043711517146
2

2020-08-04: 139.70429446310726
2020-08-05: 134.9006722097803
2020-08-06: 133.16292576592227
2020-08-07: 187.72001449684083
2020-08-08: 220.2956431519963
2020-08-09: 230.8822229129866
2020-08-10: 228.91902551516083
2020-08-11: 231.97079862243658
2020-08-12: 246.3293767187623
2020-08-13: 281.4488249915752
2020-08-14: 307.3417194207196
2020-08-15: 320.7803835883265
2020-08-16: 328.4265502048696
2020-08-17: 339.27422617547495
2020-08-18: 358.65773545271185
2020-08-19: 386.23992254983267
2020-08-20: 415.13080180996707
2020-08-21: 433.03330771791275
2020-08-22: 447.5922303405837
2020-08-23: 464.61142493257177
2020-08-24: 486.8932906733539
2020-08-25: 523.2904917349895
2020-08-26: 551.7820048927194
2020-08-27: 569.7639754394238
2020-08-28: 589.4873808543507
2020-08-29: 611.5841490383752
2020-08-30: 638.5215849481347
2020-08-31: 670.7304432124315

Predicting for Congo__nan
2020-08-01: 57.575087795446485
2020-08-02: 89.79417101190788
2020-08-03: 126.48910045382537
2020-08-04: 89.61787448789536


2020-08-30: 6252.52647894774
2020-08-31: 6486.730316925278

Predicting for Germany__nan
2020-08-01: 18155.241645844788
2020-08-02: 21467.163022607485
2020-08-03: 20290.785162186497
2020-08-04: 16566.990693058284
2020-08-05: 12409.155322580524
2020-08-06: 10499.42098122571
2020-08-07: 18186.31777831851
2020-08-08: 20632.246721395284
2020-08-09: 19915.168485302176
2020-08-10: 17751.39333763707
2020-08-11: 15575.455618163487
2020-08-12: 15863.642081894019
2020-08-13: 19534.122073542065
2020-08-14: 21073.531735206478
2020-08-15: 20740.702723056776
2020-08-16: 19588.866822733384
2020-08-17: 18724.201009049397
2020-08-18: 19452.80067088272
2020-08-19: 21429.75585392671
2020-08-20: 22419.458461831375
2020-08-21: 22363.078844440886
2020-08-22: 21886.92986898535
2020-08-23: 21742.416158697768
2020-08-24: 22486.178575263657
2020-08-25: 23704.214653963085
2020-08-26: 24420.12178266467
2020-08-27: 24561.53004063731
2020-08-28: 24520.170158777197
2020-08-29: 24736.657220351528
2020-08-30: 25429.364

2020-08-22: 382.0717917933774
2020-08-23: 421.92330672346606
2020-08-24: 453.2144919629348
2020-08-25: 479.58036583819177
2020-08-26: 503.4619319058132
2020-08-27: 526.5626907102858
2020-08-28: 553.3084268265115
2020-08-29: 588.4276961745004
2020-08-30: 620.7257512327305
2020-08-31: 650.2793909768088

Predicting for Spain__nan
2020-08-01: 10493.775621382096
2020-08-02: 9232.231434083353
2020-08-03: 2814.3156144868503
2020-08-04: 1929.7002064267772
2020-08-05: 1917.116511198503
2020-08-06: 3626.538177985205
2020-08-07: 7931.479110741105
2020-08-08: 7057.671648372882
2020-08-09: 4176.708239006163
2020-08-10: 3445.281503562402
2020-08-11: 3679.230261945444
2020-08-12: 5131.406279662342
2020-08-13: 6973.2805937737285
2020-08-14: 6456.30494615879
2020-08-15: 5117.349531847584
2020-08-16: 4710.848933255292
2020-08-17: 5034.459031158301
2020-08-18: 6011.211757947906
2020-08-19: 6852.594685757387
2020-08-20: 6586.5962212291115
2020-08-21: 5976.46290377025
2020-08-22: 5823.296030079627
2020-08-

2020-08-15: 7251.27417775563
2020-08-16: 6351.580032928087
2020-08-17: 6409.235404854421
2020-08-18: 7381.2320478512875
2020-08-19: 8428.753393061957
2020-08-20: 8503.170117753294
2020-08-21: 7987.740305981238
2020-08-22: 7614.686573768478
2020-08-23: 7811.015706906219
2020-08-24: 8461.7513476676
2020-08-25: 9048.444518266833
2020-08-26: 9135.737642751
2020-08-27: 8927.71253522762
2020-08-28: 8839.85746533138
2020-08-29: 9075.933399390728
2020-08-30: 9531.97159325201
2020-08-31: 9912.14026279138

Predicting for United Kingdom__Northern Ireland
2020-08-01: 333.6219401401492
2020-08-02: 385.2520720887035
2020-08-03: 346.27872538818434
2020-08-04: 275.643078148731
2020-08-05: 181.06326201164265
2020-08-06: 227.36194799306514
2020-08-07: 384.1228114505379
2020-08-08: 427.7794045265769
2020-08-09: 411.5587037072689
2020-08-10: 371.94899082789135
2020-08-11: 342.3476674402814
2020-08-12: 388.95821511465283
2020-08-13: 473.25525466737787
2020-08-14: 507.2268755520059
2020-08-15: 505.515375525

2020-08-31: 3888.5547179033974

Predicting for Greenland__nan
2020-08-01: 50.66872756486913
2020-08-02: 62.74607228013082
2020-08-03: 68.25858382217515
2020-08-04: 73.71698964161766
2020-08-05: 79.13372993618964
2020-08-06: 92.80479676709288
2020-08-07: 121.94148168748873
2020-08-08: 136.85917814639006
2020-08-09: 146.82003876284764
2020-08-10: 156.38047517770934
2020-08-11: 167.2875789505857
2020-08-12: 184.2971760275767
2020-08-13: 206.2309809552635
2020-08-14: 222.41773174962532
2020-08-15: 235.71152483398706
2020-08-16: 248.8296351831384
2020-08-17: 263.5636311844709
2020-08-18: 276.1208529367354
2020-08-19: 294.7495342624648
2020-08-20: 311.66568782262937
2020-08-21: 327.1172051664737
2020-08-22: 342.6937441221115
2020-08-23: 348.9847233272453
2020-08-24: 362.945647400063
2020-08-25: 380.4685490778571
2020-08-26: 397.53844228110376
2020-08-27: 405.4205698906502
2020-08-28: 418.4569589136973
2020-08-29: 430.0072868915677
2020-08-30: 444.94059172797733
2020-08-31: 461.89453083250874

2020-08-15: 4817.329192831495
2020-08-16: 4707.749181979713
2020-08-17: 4498.110888910601
2020-08-18: 4558.699374887526
2020-08-19: 4946.029674022285
2020-08-20: 5193.47737795684
2020-08-21: 5272.534653482982
2020-08-22: 5241.1056179067555
2020-08-23: 5198.647388045106
2020-08-24: 5317.467066747179
2020-08-25: 5570.102067504118
2020-08-26: 5758.067223108952
2020-08-27: 5845.638847499889
2020-08-28: 5877.890534942543
2020-08-29: 5925.187517727517
2020-08-30: 6062.3354177861565
2020-08-31: 6256.878400331814

Predicting for Indonesia__nan
2020-08-01: 3827.741016684826
2020-08-02: 4714.8589099711035
2020-08-03: 4827.7178078726065
2020-08-04: 4861.049421510859
2020-08-05: 3935.5994374099537
2020-08-06: 2690.9157341562322
2020-08-07: 4225.0315993814775
2020-08-08: 4882.595889038857
2020-08-09: 5035.931000059326
2020-08-10: 4953.519781411304
2020-08-11: 4401.089000521862
2020-08-12: 4099.5149234143555
2020-08-13: 4817.848629188584
2020-08-14: 5254.275022034674
2020-08-15: 5392.365746680464
20

2020-08-01: 117.38953212454538
2020-08-02: 131.06656898046356
2020-08-03: 137.7017665174686
2020-08-04: 147.19513404450564
2020-08-05: 128.3801974172455
2020-08-06: 131.75197123071476
2020-08-07: 187.14213934580425
2020-08-08: 207.36291006751003
2020-08-09: 219.17861468324105
2020-08-10: 227.18481638645642
2020-08-11: 226.853824898481
2020-08-12: 242.69068221057765
2020-08-13: 276.4408587182401
2020-08-14: 297.2754759634172
2020-08-15: 312.03054703907907
2020-08-16: 323.36857642704456
2020-08-17: 333.57848616434813
2020-08-18: 347.3775627922712
2020-08-19: 372.2311224981051
2020-08-20: 392.65033957283265
2020-08-21: 409.34074263951396
2020-08-22: 424.1370955351843
2020-08-23: 429.19619122564023
2020-08-24: 445.00775608961726
2020-08-25: 466.424701389248
2020-08-26: 486.20330221563376
2020-08-27: 503.9989851520838
2020-08-28: 519.3097378874226
2020-08-29: 532.1947919666507
2020-08-30: 555.8354447938211
2020-08-31: 577.9790386054149

Predicting for Jordan__nan
2020-08-01: 5563.2621411080

2020-08-21: 846.0417379212324
2020-08-22: 855.2994209298236
2020-08-23: 866.7682903357897
2020-08-24: 897.890689780415
2020-08-25: 943.2652934955006
2020-08-26: 980.114995885885
2020-08-27: 1000.9944319299234
2020-08-28: 1019.4562019804882
2020-08-29: 1041.9865973254637
2020-08-30: 1075.4845589101585
2020-08-31: 1115.1338869306899

Predicting for Laos__nan
2020-08-01: 50.66872756486913
2020-08-02: 62.74607228013082
2020-08-03: 68.25858382217515
2020-08-04: 73.71698964161766
2020-08-05: 79.13372993618964
2020-08-06: 92.80479676709288
2020-08-07: 121.94148168748873
2020-08-08: 136.85917814639006
2020-08-09: 146.82003876284764
2020-08-10: 156.38047517770934
2020-08-11: 167.2875789505857
2020-08-12: 184.2971760275767
2020-08-13: 206.2309809552635
2020-08-14: 222.41773174962532
2020-08-15: 235.71152483398706
2020-08-16: 248.8296351831384
2020-08-17: 263.5636311844709
2020-08-18: 287.6521143480453
2020-08-19: 309.02937503937824
2020-08-20: 327.20007385218986
2020-08-21: 343.893822991029
2020

2020-08-06: 338.0883954416001
2020-08-07: 504.9421201434408
2020-08-08: 565.7086223388624
2020-08-09: 613.6261943679162
2020-08-10: 636.1086255369887
2020-08-11: 570.5017545122496
2020-08-12: 554.4362169751703
2020-08-13: 637.9971460039171
2020-08-14: 691.1586682244213
2020-08-15: 729.198638486158
2020-08-16: 741.5736513087936
2020-08-17: 722.8251220112297
2020-08-18: 741.5479338829
2020-08-19: 796.3908120690467
2020-08-20: 841.5872182254705
2020-08-21: 874.8716382405303
2020-08-22: 891.4789328248851
2020-08-23: 909.0967448953866
2020-08-24: 939.2516761654636
2020-08-25: 984.4116464473836
2020-08-26: 1026.101443283644
2020-08-27: 1055.490978356008
2020-08-28: 1080.7405771390584
2020-08-29: 1108.1556215097519
2020-08-30: 1143.582376874633
2020-08-31: 1185.8741796780157

Predicting for Macao__nan
2020-08-01: 50.66872756486913
2020-08-02: 62.74607228013082
2020-08-03: 68.25858382217515
2020-08-04: 73.71698964161766
2020-08-05: 79.13372993618964
2020-08-06: 92.80479676709288
2020-08-07: 12

2020-08-21: 387.72193682289384
2020-08-22: 403.72988585817006
2020-08-23: 431.7377990118066
2020-08-24: 457.52946931840086
2020-08-25: 482.49649615966104
2020-08-26: 505.78706410903186
2020-08-27: 518.8951904315954
2020-08-28: 539.6232447320058
2020-08-29: 566.2639640988557
2020-08-30: 592.7652312480739
2020-08-31: 618.7394049471957

Predicting for Mozambique__nan
2020-08-01: 104.03484979808064
2020-08-02: 187.93237039020858
2020-08-03: 194.62594414647003
2020-08-04: 172.09635998855393
2020-08-05: 145.7247275150263
2020-08-06: 141.60259837666905
2020-08-07: 201.95657032775202
2020-08-08: 251.4170213057625
2020-08-09: 262.6487678659715
2020-08-10: 256.222378140448
2020-08-11: 249.75775411404788
2020-08-12: 262.8430149623464
2020-08-13: 303.10358127872894
2020-08-14: 336.79987189719924
2020-08-15: 351.04982833943274
2020-08-16: 355.54961155699186
2020-08-17: 362.07023785760106
2020-08-18: 381.3212274178307
2020-08-19: 412.428788048904
2020-08-20: 439.5132981091469
2020-08-21: 456.5864005

2020-08-23: 365.9082219117647
2020-08-24: 379.3620502900508
2020-08-25: 397.5366232220369
2020-08-26: 415.88478825909164
2020-08-27: 424.9533990766577
2020-08-28: 438.0736150703245
2020-08-29: 449.0032569016351
2020-08-30: 463.8674786308966
2020-08-31: 481.41190521097803

Predicting for Netherlands__nan
2020-08-01: 4354.0552753882685
2020-08-02: 5588.328919751633
2020-08-03: 5850.927042000665
2020-08-04: 5877.721568262965
2020-08-05: 4773.2390122438965
2020-08-06: 3181.451772308352
2020-08-07: 4937.424186770986
2020-08-08: 5802.72018658461
2020-08-09: 6047.241119162824
2020-08-10: 5950.702475582891
2020-08-11: 5273.101148010336
2020-08-12: 4854.947106794819
2020-08-13: 5685.597767563202
2020-08-14: 6244.57277108663
2020-08-15: 6439.4847457448595
2020-08-16: 6358.706164894565
2020-08-17: 6048.691368103263
2020-08-18: 6018.654056618914
2020-08-19: 6487.243876024043
2020-08-20: 6860.055117932646
2020-08-21: 7021.6704777464265
2020-08-22: 7007.735871404524
2020-08-23: 6933.192373323964
202

2020-08-12: 1574.6470034625615
2020-08-13: 1795.3707444731394
2020-08-14: 1961.0394315358553
2020-08-15: 2058.4739320632975
2020-08-16: 2083.112935614194
2020-08-17: 2009.5479872229603
2020-08-18: 1986.255770754607
2020-08-19: 2117.119745093864
2020-08-20: 2237.675947178923
2020-08-21: 2315.6214316770192
2020-08-22: 2343.1455243452824
2020-08-23: 2342.2039043919463
2020-08-24: 2374.025278543222
2020-08-25: 2471.7374546607048
2020-08-26: 2568.3035508106996
2020-08-27: 2638.9276823423697
2020-08-28: 2681.4247537902393
2020-08-29: 2714.2222013453393
2020-08-30: 2770.0501185315334
2020-08-31: 2856.1297133774046

Predicting for Papua New Guinea__nan
2020-08-01: 51.547776565421124
2020-08-02: 62.957923172297804
2020-08-03: 68.35477348211482
2020-08-04: 73.81193981665928
2020-08-05: 79.22795477912298
2020-08-06: 93.04222356330315
2020-08-07: 122.44759887338411
2020-08-08: 137.11931952541931
2020-08-09: 146.99353273704668
2020-08-10: 156.54679508669412
2020-08-11: 167.477243770506
2020-08-12: 

2020-08-29: 1496.0907599088441
2020-08-30: 1543.3804304458008
2020-08-31: 1598.766652977376

Predicting for Romania__nan
2020-08-01: 8291.198389753808
2020-08-02: 9471.441839879904
2020-08-03: 9273.518421540675
2020-08-04: 8863.704179635944
2020-08-05: 6368.98320504982
2020-08-06: 5039.106474539866
2020-08-07: 8445.968092581783
2020-08-08: 9476.9592490741
2020-08-09: 9485.637023664513
2020-08-10: 8977.266315900026
2020-08-11: 7743.50471663726
2020-08-12: 7636.717155100646
2020-08-13: 9232.055165392023
2020-08-14: 9950.114384177145
2020-08-15: 10018.595573885514
2020-08-16: 9685.370869941227
2020-08-17: 9187.467888844743
2020-08-18: 9396.602610924121
2020-08-19: 10258.449774412851
2020-08-20: 10752.847326625171
2020-08-21: 10859.20099433796
2020-08-22: 10727.930936456423
2020-08-23: 10613.535181755427
2020-08-24: 10894.467378489418
2020-08-25: 11439.035406751398
2020-08-26: 11809.143407310828
2020-08-27: 11952.917774706126
2020-08-28: 11978.395365356759
2020-08-29: 12060.335274279041
20

2020-08-01: 236.57272189823962
2020-08-02: 222.96003049567742
2020-08-03: 196.6674104460618
2020-08-04: 193.4571062371772
2020-08-05: 273.57455071678316
2020-08-06: 204.46892413147359
2020-08-07: 291.04853292451236
2020-08-08: 302.04760855755814
2020-08-09: 296.8336637553395
2020-08-10: 314.04524893603815
2020-08-11: 343.95714710473055
2020-08-12: 340.78567734642274
2020-08-13: 382.8330041028506
2020-08-14: 399.58539060330963
2020-08-15: 408.7675677049987
2020-08-16: 428.611145923611
2020-08-17: 449.9414653413319
2020-08-18: 471.2186950691565
2020-08-19: 501.4325882975603
2020-08-20: 521.7975475647752
2020-08-21: 539.3215741648683
2020-08-22: 561.2735519266669
2020-08-23: 594.3477396175051
2020-08-24: 621.7270820982353
2020-08-25: 650.2562829909957
2020-08-26: 674.8615887117751
2020-08-27: 698.5539085592432
2020-08-28: 725.925933511478
2020-08-29: 757.5803427170424
2020-08-30: 787.9113131228069
2020-08-31: 817.9977888949679

Predicting for San Marino__nan
2020-08-01: 82.74045622065708


2020-08-16: 2941.831501977373
2020-08-17: 2895.1759079127123
2020-08-18: 3257.89798473315
2020-08-19: 3729.3760923266063
2020-08-20: 3893.8555709265793
2020-08-21: 3730.806162186246
2020-08-22: 3519.079143684166
2020-08-23: 3557.033272119877
2020-08-24: 3815.2007671502456
2020-08-25: 4097.748068002081
2020-08-26: 4206.091934620532
2020-08-27: 4140.690074962159
2020-08-28: 4080.573947176663
2020-08-29: 4159.5299621953445
2020-08-30: 4351.9400973533375
2020-08-31: 4541.882692900217

Predicting for Eswatini__nan
2020-08-01: 65.9142540191892
2020-08-02: 80.12593535054026
2020-08-03: 91.12244347853591
2020-08-04: 92.9920190860267
2020-08-05: 93.19930735608244
2020-08-06: 103.0983639442768
2020-08-07: 138.28771559572732
2020-08-08: 156.0250663266574
2020-08-09: 168.03698784326573
2020-08-10: 175.7282694807887
2020-08-11: 183.73664726179038
2020-08-12: 199.91710076430792
2020-08-13: 224.76203093153106
2020-08-14: 242.979695167263
2020-08-15: 257.17919246689445
2020-08-16: 269.3233546739026
20

2020-08-01: 50.66872756486913
2020-08-02: 62.74607228013082
2020-08-03: 68.25858382217515
2020-08-04: 73.71698964161766
2020-08-05: 79.13372993618964
2020-08-06: 92.80479676709288
2020-08-07: 121.94148168748873
2020-08-08: 136.85917814639006
2020-08-09: 146.82003876284764
2020-08-10: 156.38047517770934
2020-08-11: 167.2875789505857
2020-08-12: 184.2971760275767
2020-08-13: 206.2309809552635
2020-08-14: 222.41773174962532
2020-08-15: 235.71152483398706
2020-08-16: 248.8296351831384
2020-08-17: 263.5636311844709
2020-08-18: 276.1208529367354
2020-08-19: 294.7495342624648
2020-08-20: 311.66568782262937
2020-08-21: 327.1172051664737
2020-08-22: 342.6937441221115
2020-08-23: 348.9847233272453
2020-08-24: 362.945647400063
2020-08-25: 380.4685490778571
2020-08-26: 397.53844228110376
2020-08-27: 405.4205698906502
2020-08-28: 418.4569589136973
2020-08-29: 430.0072868915677
2020-08-30: 444.94059172797733
2020-08-31: 461.89453083250874

Predicting for Trinidad and Tobago__nan
2020-08-01: 94.57427

2020-08-04: 175387.59819818838
2020-08-05: 142232.47558814296
2020-08-06: 98897.96671640508
2020-08-07: 158535.5479441532
2020-08-08: 183500.360491652
2020-08-09: 186854.49581247228
2020-08-10: 179150.42129564006
2020-08-11: 158749.5324855645
2020-08-12: 149055.64119996177
2020-08-13: 176929.85393480657
2020-08-14: 192969.43931732004
2020-08-15: 196195.0513152499
2020-08-16: 191348.5598209002
2020-08-17: 182039.3533829665
2020-08-18: 182512.32885854997
2020-08-19: 197717.71101992187
2020-08-20: 208216.90895532226
2020-08-21: 211377.31300069668
2020-08-22: 209612.0914579001
2020-08-23: 206997.01899959458
2020-08-24: 210619.3618277683
2020-08-25: 220374.0581904348
2020-08-26: 227920.23138767967
2020-08-27: 231336.7244580752
2020-08-28: 232094.22660485771
2020-08-29: 233186.93425361085
2020-08-30: 237876.24674671458
2020-08-31: 245235.016187362

Predicting for United States__Alaska
2020-08-01: 472.7443583839919
2020-08-02: 582.168034375231
2020-08-03: 662.1290656458509
2020-08-04: 622.208

2020-08-01: 454.3370447268256
2020-08-02: 617.1887287558109
2020-08-03: 573.6485036514891
2020-08-04: 553.1479613017609
2020-08-05: 459.6835070989579
2020-08-06: 364.3461807601632
2020-08-07: 567.3516334747226
2020-08-08: 665.7388117797277
2020-08-09: 664.2097206865903
2020-08-10: 648.0682677750367
2020-08-11: 601.150615867376
2020-08-12: 596.8928684053901
2020-08-13: 702.7047188012646
2020-08-14: 766.6427216470452
2020-08-15: 780.2556987954792
2020-08-16: 776.6338867515608
2020-08-17: 765.5971642627901
2020-08-18: 794.3098140513998
2020-08-19: 861.5157825453393
2020-08-20: 909.2282869403994
2020-08-21: 930.7068127788614
2020-08-22: 940.6935628912831
2020-08-23: 962.5638990509876
2020-08-24: 999.8013386814041
2020-08-25: 1051.7058974742167
2020-08-26: 1093.8328789148347
2020-08-27: 1117.7963413469279
2020-08-28: 1140.661162651414
2020-08-29: 1171.0532817742157
2020-08-30: 1211.2970900532775
2020-08-31: 1257.4735704380594

Predicting for United States__Florida
2020-08-01: 7308.025804474

2020-08-17: 3753.0843904253816
2020-08-18: 3519.84765182799
2020-08-19: 3636.9100527582236
2020-08-20: 3747.0617679888037
2020-08-21: 3805.366001891021
2020-08-22: 4005.6427492794437
2020-08-23: 4095.926193549306
2020-08-24: 4061.50601867005
2020-08-25: 4150.43257110105
2020-08-26: 4249.136773804879
2020-08-27: 4345.14747113674
2020-08-28: 4484.999001578506
2020-08-29: 4567.865611955526
2020-08-30: 4611.660293670827
2020-08-31: 4702.255504290684

Predicting for United States__Kentucky
2020-08-01: 2962.3807426338826
2020-08-02: 3519.3311301466993
2020-08-03: 3276.8223317476104
2020-08-04: 2650.8826484571227
2020-08-05: 2211.595881967869
2020-08-06: 1796.423741247008
2020-08-07: 3037.3548316668653
2020-08-08: 3439.7050533503507
2020-08-09: 3303.527048625866
2020-08-10: 2982.830844115507
2020-08-11: 2716.54513462706
2020-08-12: 2737.1331448467363
2020-08-13: 3330.1945958044216
2020-08-14: 3583.932064735864
2020-08-15: 3531.937542089552
2020-08-16: 3376.1849636103257
2020-08-17: 3276.03408

2020-08-31: 6523.546807811873

Predicting for United States__Mississippi
2020-08-01: 1228.0866407993203
2020-08-02: 1587.5937338575254
2020-08-03: 1571.955825402438
2020-08-04: 1089.5561707547886
2020-08-05: 882.8960795252972
2020-08-06: 788.0072558126888
2020-08-07: 1337.2587894095723
2020-08-08: 1576.469423998673
2020-08-09: 1532.063104535825
2020-08-10: 1309.5196914482176
2020-08-11: 1191.1967704707338
2020-08-12: 1239.7661809889214
2020-08-13: 1520.3602480116222
2020-08-14: 1664.5537984442037
2020-08-15: 1642.3390111496126
2020-08-16: 1543.3268076493523
2020-08-17: 1503.4232661341894
2020-08-18: 1583.6305575908557
2020-08-19: 1747.1814855377977
2020-08-20: 1840.599694231374
2020-08-21: 1843.4863750829336
2020-08-22: 1812.908585910011
2020-08-23: 1831.384531340537
2020-08-24: 1910.6290476545373
2020-08-25: 2020.7860173746953
2020-08-26: 2092.5155933963706
2020-08-27: 2112.3527129458257
2020-08-28: 2123.707719803996
2020-08-29: 2164.2161815855675
2020-08-30: 2238.8404406996683
2020-0

2020-08-11: 2173.0072761777915
2020-08-12: 2011.4004161086705
2020-08-13: 2325.430264271118
2020-08-14: 2491.3286328007985
2020-08-15: 2572.505814448265
2020-08-16: 2587.5513074846535
2020-08-17: 2505.863439172439
2020-08-18: 2505.2927410318466
2020-08-19: 2680.6010376327695
2020-08-20: 2808.4815785202504
2020-08-21: 2882.352537394275
2020-08-22: 2907.713009748653
2020-08-23: 2910.5274317305657
2020-08-24: 2962.9077353224407
2020-08-25: 3086.4758098506895
2020-08-26: 3192.7927177314095
2020-08-27: 3262.1662641804014
2020-08-28: 3307.739975558585
2020-08-29: 3349.5206749368767
2020-08-30: 3422.792631227144
2020-08-31: 3527.1420020960895

Predicting for United States__New York
2020-08-01: 4611.584640717378
2020-08-02: 5584.295368129514
2020-08-03: 5772.139037255095
2020-08-04: 5695.513470887256
2020-08-05: 4970.166257916438
2020-08-06: 3269.734119253876
2020-08-07: 5063.721200450069
2020-08-08: 5821.7458004872815
2020-08-09: 6001.899392748317
2020-08-10: 5917.4392229500945
2020-08-11: 53

Predicting for United States__Tennessee
2020-08-01: 2859.832309972857
2020-08-02: 3748.5635816531967
2020-08-03: 4247.756298918712
2020-08-04: 4363.038083388452
2020-08-05: 3536.3684778160114
2020-08-06: 2262.1023444411885
2020-08-07: 3398.0568339524443
2020-08-08: 4054.33394962037
2020-08-09: 4378.040747930873
2020-08-10: 4364.705424028095
2020-08-11: 3851.0920326116657
2020-08-12: 3474.7772113416668
2020-08-13: 4016.849087232896
2020-08-14: 4450.109945724189
2020-08-15: 4665.266098324398
2020-08-16: 4637.4844310116805
2020-08-17: 4396.651609794357
2020-08-18: 4335.769831489287
2020-08-19: 4651.3942150264475
2020-08-20: 4942.877173499831
2020-08-21: 5099.357056101436
2020-08-22: 5105.870680576243
2020-08-23: 5045.032351430403
2020-08-24: 5100.422638614525
2020-08-25: 5322.089838991022
2020-08-26: 5536.4210865878285
2020-08-27: 5665.498311663127
2020-08-28: 5715.896361159529
2020-08-29: 5747.872477448999
2020-08-30: 5852.125282014015
2020-08-31: 6033.780126791267

Predicting for United

2020-08-16: 1246.6779601788999
2020-08-17: 1213.5885489392135
2020-08-18: 1256.933130827866
2020-08-19: 1369.395911939398
2020-08-20: 1437.4313787514225
2020-08-21: 1458.1105534449466
2020-08-22: 1455.092576482973
2020-08-23: 1461.636849613967
2020-08-24: 1511.920569970519
2020-08-25: 1589.5091846128698
2020-08-26: 1645.4164081078761
2020-08-27: 1670.849173220672
2020-08-28: 1688.288953952435
2020-08-29: 1721.7836924484334
2020-08-30: 1774.6930925438287
2020-08-31: 1838.0557595595046

Predicting for United States__Wyoming
2020-08-01: 787.8482248302926
2020-08-02: 854.8407273899502
2020-08-03: 659.2819052238974
2020-08-04: 914.8483952422987
2020-08-05: 964.3975707144803
2020-08-06: 596.8281091696656
2020-08-07: 877.3426082874505
2020-08-08: 931.5257412865399
2020-08-09: 906.0465933864889
2020-08-10: 1023.4201911643877
2020-08-11: 1012.6634567161329
2020-08-12: 917.0348935670293
2020-08-13: 1034.9541749749183
2020-08-14: 1082.881008147001
2020-08-15: 1107.5094025033893
2020-08-16: 1167.6

2020-08-01: 86.56843495590346
2020-08-02: 110.42804782146678
2020-08-03: 98.01058271719789
2020-08-04: 101.54880665611338
2020-08-05: 105.95280406681528
2020-08-06: 113.35263307855348
2020-08-07: 158.05224323895197
2020-08-08: 177.49737379457397
2020-08-09: 180.56743755645653
2020-08-10: 188.3460326577213
2020-08-11: 197.89421567709252
2020-08-12: 214.89760694537352
2020-08-13: 244.04483159015422
2020-08-14: 262.2630685117373
2020-08-15: 272.92511860932126
2020-08-16: 284.9356103001114
2020-08-17: 299.2348967046573
2020-08-18: 324.6612511144665
2020-08-19: 349.7141104076634
2020-08-20: 369.0647945112083
2020-08-21: 384.90322389925353
2020-08-22: 401.3197925698957
2020-08-23: 430.4133167233808
2020-08-24: 457.0742069538958
2020-08-25: 482.2577647031077
2020-08-26: 504.6074843047414
2020-08-27: 516.7290176455616
2020-08-28: 537.6520346876541
2020-08-29: 564.9017673818685
2020-08-30: 591.8646356370637
2020-08-31: 617.8276985796165

Predicting for Zimbabwe__nan
2020-08-01: 91.1317371375112

In [23]:
# Check the predictions
preds_df.head()

Unnamed: 0,CountryName,RegionName,Date,PredictedDailyNewCases
213,Aruba,,2020-08-01,58.834154
214,Aruba,,2020-08-02,71.325152
215,Aruba,,2020-08-03,78.992903
216,Aruba,,2020-08-04,90.241488
217,Aruba,,2020-08-05,87.814744


# Validation
This is how the predictor is going to be called during the competition.  
!!! PLEASE DO NOT CHANGE THE API !!!

In [24]:
!python predict.py -s 2020-08-01 -e 2020-08-04 -ip data/2020-09-30_historical_ip_new.csv -o predictions/2020-08-01_2020-08-04.csv

Generating predictions from 2020-08-01 to 2020-08-04...
Saved predictions to predictions/2020-08-01_2020-08-04.csv
Done!


In [25]:
!head predictions/2020-08-01_2020-08-04.csv

'head' 不是内部或外部命令，也不是可运行的程序
或批处理文件。


# Test cases
We can generate a prediction file. Let's validate a few cases...

In [26]:
import os
from covid_xprize.validation.predictor_validation import validate_submission

def validate(start_date, end_date, ip_file, output_file):
    # First, delete any potential old file
    try:
        os.remove(output_file)
    except OSError:
        pass
    
    # Then generate the prediction, calling the official API
    !python predict.py -s {start_date} -e {end_date} -ip {ip_file} -o {output_file}
    
    # And validate it
    errors = validate_submission(start_date, end_date, ip_file, output_file)
    if errors:
        for error in errors:
            print(error)
    else:
        print("All good!")

ModuleNotFoundError: No module named 'covid_xprize'

## 4 days, no gap
- All countries and regions
- Official number of cases is known up to start_date
- Intervention Plans are the official ones

In [None]:
validate(start_date="2020-08-01",
         end_date="2020-08-04",
         ip_file="../../../validation/data/2020-09-30_historical_ip.csv",
         output_file="predictions/val_4_days.csv")

## 1 month in the future
- 2 countries only
- there's a gap between date of last known number of cases and start_date
- For future dates, Intervention Plans contains scenarios for which predictions are requested to answer the question: what will happen if we apply these plans?

In [None]:
%%time
validate(start_date="2021-01-01",
         end_date="2021-01-31",
         ip_file="../../../validation/data/future_ip.csv",
         output_file="predictions/val_1_month_future.csv")

## 180 days, from a future date, all countries and regions
- Prediction start date is 1 week from now. (i.e. assuming submission date is 1 week from now)  
- Prediction end date is 6 months after start date.  
- Prediction is requested for all available countries and regions.  
- Intervention plan scenario: freeze last known intervention plans for each country and region.  

As the number of cases is not known yet between today and start date, but the model relies on them, the model has to predict them in order to use them.  
This test is the most demanding test. It should take less than 1 hour to generate the prediction file.

### Generate the scenario

In [None]:
from datetime import datetime, timedelta

start_date = datetime.now() + timedelta(days=7)
start_date_str = start_date.strftime('%Y-%m-%d')
end_date = start_date + timedelta(days=180)
end_date_str = end_date.strftime('%Y-%m-%d')
print(f"Start date: {start_date_str}")
print(f"End date: {end_date_str}")

In [None]:
from covid_xprize.validation.scenario_generator import get_raw_data, generate_scenario, NPI_COLUMNS
DATA_FILE = 'data/OxCGRT_latest.csv'
latest_df = get_raw_data(DATA_FILE, latest=True)
scenario_df = generate_scenario(start_date_str, end_date_str, latest_df, countries=None, scenario="Freeze")
scenario_file = "predictions/180_days_future_scenario.csv"
scenario_df.to_csv(scenario_file, index=False)
print(f"Saved scenario to {scenario_file}")

### Check it

In [None]:
%%time
validate(start_date=start_date_str,
         end_date=end_date_str,
         ip_file=scenario_file,
         output_file="predictions/val_6_month_future.csv")