# Pull data

In [21]:
import requests
import numpy as np
import pandas as pd
from epiweeks import Week
from datetime import date, timedelta
import os

In [2]:
def pull_scenario_modeling_hub_predictions(model,dates):
    """pull_scenario_modeling_hub_predictions. Load predictions of the model saved by the scenario modeling
    hub.

    Parameters
    ----------
    model : str
        Model name on thhe
    dates : list or string
        List of potential dates in the iso format, e.g., 'yyyy-mm-dd', for the submission.
    """
    predictions = None
    if isinstance(dates,str):
        dates = [dates]
    for date in dates:
        url = f"https://raw.githubusercontent.com/midas-network/covid19-scenario-modeling-hub/master/data-processed/{model}/{date}-{model}"
        for ext in [".csv",".gz",".zip",".csv.zip",".csv.gz"]:
            try:
                predictions = pd.read_csv(url+ext,dtype={'location':str},parse_dates=['target_end_date'])
            except:
                pass
    if predictions is None:
        print(f"Data for model {model} and date {dates} unavailable")
    return predictions


def pull_surveillance_data(target='death',incidence=True):
    mapping = {'death':'Deaths', 'case':'Cases', 'hospitalization': 'Hospitalizations'}
    if incidence:
        s = 'Incident'
    else:
        s = 'Cumulative'
    url = f"https://media.githubusercontent.com/media/reichlab/covid19-forecast-hub/master/data-truth/truth-{s}%20{mapping[target]}.csv"
    return pd.read_csv(url, dtype={'location':str})

## Surveillance data

In [11]:
target = 'hospitalization'
# target = 'death'
incidence = True

In [4]:
observations = pull_surveillance_data(target,incidence)

In [16]:
observations.head(5)

Unnamed: 0,date,location,location_name,value
0,2021-02-11,72,Puerto Rico,12
1,2021-02-04,5,Arkansas,111
2,2021-02-01,20,Kansas,78
3,2021-01-25,27,Minnesota,67
4,2021-01-19,25,Massachusetts,244


## Forecast data

### Pull a single model

In [17]:
model = "MOBS_NEU-GLEAM_COVID"
dates = ['2021-05-01','2021-05-02','2021-05-04'] #potential submission dates
predictions = pull_scenario_modeling_hub_predictions(model,dates)

In [14]:
predictions.head(5)

Unnamed: 0,target,location,scenario_name,scenario_id,quantile,value,target_end_date,model_projection_date,type
0,1 wk ahead inc death,45,highVac_modNPI,A-2021-05-02,0.01,58.474616,2021-05-08,2021-05-01,quantile
1,2 wk ahead inc death,45,highVac_modNPI,A-2021-05-02,0.01,47.918443,2021-05-15,2021-05-01,quantile
2,3 wk ahead inc death,45,highVac_modNPI,A-2021-05-02,0.01,37.881073,2021-05-22,2021-05-01,quantile
3,4 wk ahead inc death,45,highVac_modNPI,A-2021-05-02,0.01,30.219431,2021-05-29,2021-05-01,quantile
4,5 wk ahead inc death,45,highVac_modNPI,A-2021-05-02,0.01,23.725748,2021-06-05,2021-05-01,quantile


### Pull multiple models and save data

In [25]:
# rd = 5 #smh round
# dates = ['2021-05-01','2021-05-02','2021-05-04'] #potential submission dates
# models = ["Ensemble","Ensemble_LOP","IHME-IHME_COVID_model_deaths_unscaled","JHUAPL-Bucky",
#           "JHU_IDD-CovidSP","Karlen-pypm","MOBS_NEU-GLEAM_COVID","UNCC-hierbin",
#           "USC-SIkJalpha","UVA-adaptive"]

In [24]:
# rd = 12
# models = ["Ensemble","Ensemble_LOP","Ensemble_LOP_untrimmed","JHU_IDD-CovidSP","MOBS_NEU-GLEAM_COVID",
#           "NCSU-COVSIM","NotreDame-FRED","UNCC-hierbin","USC-SIkJalpha","UTA-ImmunoSEIRS",
#           "UVA-EpiHiper","UVA-adaptive"]
# dates = ['2022-01-09']

In [26]:
rd = 14
models = ["Ensemble","Ensemble_LOP","Ensemble_LOP_untrimmed","JHU_IDD-CovidSP","MOBS_NEU-GLEAM_COVID",
          "MOBS_NEU-GLEAM_COVID_OT","NCSU-COVSIM","UNCC-hierbin","USC-SIkJalpha","USC-SIkJalpha-update",
          "UTA-ImmunoSEIRS","UVA-adaptive"]
dates = ['2022-06-05','2022-06-04']

In [28]:
for model in models:
    print(model)
    predictions = pull_scenario_modeling_hub_predictions(model,dates)
    if predictions is not None:
        predictions.to_parquet(f'./dat/{model}_rd{rd}.pq', index=False)
        # predictions.to_csv(f'./dat/{model}_rd{rd}.csv', index=False)

Ensemble
Ensemble_LOP
Ensemble_LOP_untrimmed
JHU_IDD-CovidSP
MOBS_NEU-GLEAM_COVID
MOBS_NEU-GLEAM_COVID_OT
NCSU-COVSIM
UNCC-hierbin
Data for model UNCC-hierbin and date ['2022-06-05', '2022-06-04'] unavailable
USC-SIkJalpha
USC-SIkJalpha-update
UTA-ImmunoSEIRS
UVA-adaptive


**NOTE**: anything saved in the ```dat``` folder is not going to be tracked by git, to avoid having data on the Github repository. The repository is to track code only.