In [None]:
import os
import pandas as pd
import requests 
from AlphanumericsTeam.data.util import get_aug_oxford_df, filter_df_regions


REPO_ROOT = os.path.abspath(os.path.join(os.path.abspath(''), os.pardir, os.pardir, os.pardir))

In [None]:
#sample input and output

# IP until 30 sep
EXAMPLE_INPUT_FILE = os.path.join(REPO_ROOT, "covid_xprize/validation/data/2020-09-30_historical_ip.csv")
prediction_input_df = pd.read_csv(EXAMPLE_INPUT_FILE,
                                  parse_dates=['Date'],
                                  dtype={"RegionName": str},
                                  encoding="ISO-8859-1")

#print(prediction_input_df)

#prediction from Aug 1 to Aug 4
EXAMPLE_OUTPUT_FILE = os.path.join(REPO_ROOT, "2020-08-01_2020-08-04_predictions_example.csv")
prediction_output_df = pd.read_csv(EXAMPLE_OUTPUT_FILE,
                                   parse_dates=['Date'],
                                   encoding="ISO-8859-1")  


print(prediction_output_df)

In [None]:
# Input data for training

#DATA_URL = 'https://raw.githubusercontent.com/OxCGRT/covid-policy-tracker/master/data/OxCGRT_latest.csv'
#df = pd.read_csv(DATA_URL,
#                 parse_dates=['Date'],
#                 encoding="ISO-8859-1",
#                 dtype={"RegionName": str,
#                        "RegionCode": str},
#                 error_bad_lines=False)

# Has 6 additional columns 
# 'New Cases' 
# 'GeoID' 
# 'Holidays' 
# 'pop_2020' 
# 'area_km2' 
# 'density_perkm2'
df = get_aug_oxford_df() 
df = filter_df_regions(df)

# Final list of 180 countries and 56 regions
assert df.CountryName.unique().size == 180
assert df.RegionName.unique().size == 56 + 1 

In [None]:
df.sample(3)
df["DailyChangeConfirmedCases"] = df.groupby(["CountryName", "RegionName"]).ConfirmedCases.diff().fillna(0)
california_df = df[(df.CountryName == "United States") & (df.RegionName == "California")]
california_df[["CountryName", "RegionName", "Date", "ConfirmedCases", "DailyChangeConfirmedCases"]].tail(5)

In [None]:
# TRAINING
import xprize_predictor
from importlib import reload
reload(xprize_predictor)
from xprize_predictor import XPrizePredictor

DATA_PATH = os.path.join("data", 'OxCGRT_latest_aug.csv')
predictor = XPrizePredictor(None, DATA_PATH)
 
predictor_model = predictor.train()

model_weights_file = "models/trained_model_weights.h5"
if not os.path.exists('models'):
    os.mkdir('models')
predictor_model.save_weights(model_weights_file)

In [None]:
# TESTING AND PREDICTION ---
DATA_PATH = os.path.join("data", 'OxCGRT_latest_aug.csv')
model_weights_file = "models/trained_model_weights.h5"

import xprize_predictor
from importlib import reload
reload(xprize_predictor)

from xprize_predictor import XPrizePredictor

predictor = XPrizePredictor(model_weights_file, DATA_PATH)

NPIS_INPUT_FILE = "../../validation/data/2020-09-30_historical_ip.csv"
start_date = "2020-08-01"
end_date = "2020-08-31"
 
preds_df = predictor.predict(start_date, end_date, NPIS_INPUT_FILE)
preds_df.head()

In [None]:
!python predict.py -s 2020-08-01 -e 2020-08-04 -ip ../../validation/data/2020-09-30_historical_ip.csv -o predictions/2020-08-01_2020-08-04.csv
!head predictions/2020-08-01_2020-08-04.csv

In [None]:
# Check the pediction file is valid
import os
from covid_xprize.validation.predictor_validation import validate_submission

def validate(start_date, end_date, ip_file, output_file):
    # First, delete any potential old file
    try:
        os.remove(output_file)
    except OSError:
        pass
    
    # Then generate the prediction, calling the official API
    !python predict.py -s {start_date} -e {end_date} -ip {ip_file} -o {output_file}
    
    # And validate it
    errors = validate_submission(start_date, end_date, ip_file, output_file)
    if errors:
        for error in errors:
            print(error)
    else:
        print("All good!")

In [None]:
validate(start_date="2020-08-01",
         end_date="2020-08-04",
         ip_file="../../validation/data/2020-09-30_historical_ip.csv",
         output_file="predictions/val_4_days.csv")

In [None]:
%%time
validate(start_date="2021-01-01",
         end_date="2021-01-31",
         ip_file="../../validation/data/future_ip.csv",
         output_file="predictions/val_1_month_future.csv")

In [None]:
from datetime import datetime, timedelta

start_date = datetime.now() + timedelta(days=7)
start_date_str = start_date.strftime('%Y-%m-%d')
end_date = start_date + timedelta(days=180)
end_date_str = end_date.strftime('%Y-%m-%d')
print(f"Start date: {start_date_str}")
print(f"End date: {end_date_str}")

In [None]:
from covid_xprize.validation.scenario_generator import get_raw_data, generate_scenario, NPI_COLUMNS
DATA_FILE = 'data/OxCGRT_latest.csv'
latest_df = get_raw_data(DATA_FILE, latest=True)
scenario_df = generate_scenario(start_date_str, end_date_str, latest_df, countries=None, scenario="Freeze")
scenario_file = "predictions/180_days_future_scenario.csv"
scenario_df.to_csv(scenario_file, index=False)
print(f"Saved scenario to {scenario_file}")

In [None]:
%%time
validate(start_date=start_date_str,
         end_date=end_date_str,
         ip_file=scenario_file,
         output_file="predictions/val_6_month_future.csv")

In [None]:

#TESTING/DEBUGGING CODE
import pandas as pd
from util import add_features_df

df = pd.read_csv("../../validation/data/2020-09-30_historical_ip.csv",
                                parse_dates=['Date'],
                                encoding="ISO-8859-1",) 
df["RegionName"] = df["RegionName"].fillna(value="") 
add_features_df(df)