In [None]:
# Example submission for predictor that always predicts zero cases

# Example Predictor: The Zero Predictor

This example contains the minimal functionality for a predictor.

First, a sample evaluation data set is constructed to be used as the input to the predictor.

Second, a predictions file is created that is of the correct form to be output by the predictor, with all predictions simply set to 0.

In [None]:
import numpy as np
import pandas as pd

### Create example evaluation data

In [None]:
URL = 'https://raw.githubusercontent.com/OxCGRT/covid-policy-tracker/master/data/OxCGRT_latest.csv'
HYPOTHETICAL_SUBMISSION_DATE = np.datetime64("2020-07-31")

nb_eval_days = 31
test_df = pd.read_csv(URL, 
                      parse_dates=['Date'],
                      encoding="ISO-8859-1",
                      error_bad_lines=False)

# Pull out relevant evaluation days
test_df = test_df[(test_df.Date > HYPOTHETICAL_SUBMISSION_DATE) & \
                  (test_df.Date <= HYPOTHETICAL_SUBMISSION_DATE + nb_eval_days)]

# Only include columns we would see during evaluation
# Keep only columns of interest
id_cols = ['CountryName',
           'RegionName',
           'Date']
npi_cols = ['C1_School closing',
            'C2_Workplace closing',
            'C3_Cancel public events',
            'C4_Restrictions on gatherings',
            'C5_Close public transport',
            'C6_Stay at home requirements',
            'C7_Restrictions on internal movement',
            'C8_International travel controls',
            'H1_Public information campaigns',
            'H2_Testing policy',
            'H3_Contact tracing']
test_df = test_df[id_cols + npi_cols]

# Fill any missing NPIs by assuming they are the same as previous day
for npi_col in npi_cols:
    test_df.update(test_df.groupby(["CountryName", "RegionName"])[npi_col].ffill().fillna(0))

### Predictor call

In [None]:
def predict(start_date: str, end_date: str, path_to_ips_file: str):
    """
    Generates a file with daily new cases predictions for the given countries, regions and npis, between
    start_date and end_date, included.
    :param start_date: day from which to start making predictions, as a string, format YYYY-MM-DDD
    :param end_date: day on which to stop making predictions, as a string, format YYYY-MM-DDD
    :param path_to_ips_file: path to a csv file containing the intervention plans between start_date and end_date
    :return: Nothing. Saves a csv file called 'start_date_end_date.csv'
    with columns "CountryName,RegionName,Date,PredictedDailyNewCases"
    """
    
    # Copy the test data frame
    pred_df = test_df[id_cols].copy()
    # Keep only the requested prediction period.
    # Note: this period *might* be in the future, and pred_df doesn't necessarily contain the requested rows
    pred_df = pred_df[(pred_df.Date >= start_date) & (pred_df.Date <= end_date)]
    
    # Make predictions: ignore the given n 0 daily new cases for every country and region
    pred_df['PredictedDailyNewCases'] = 0
    
    # Save to expected file name
    output_file_name = start_date + "_" + end_date + ".csv"
    pred_df.to_csv(output_file_name, index=None)
    print(f"Predictions saved to {output_file_name}")


In [None]:
predict(start_date="2020-08-01", end_date="2020-08-31", path_to_ips_file="../2020-08-01_2020-08-31_npis_example.csv")

## Predictions

In [None]:
# Check results written correctly
!head 2020-08-01_2020-08-31.csv