# Pull data

In [1]:
import requests
import numpy as np
import pandas as pd
from epiweeks import Week
from datetime import date, timedelta
import os

In [2]:
def pull_scenario_modeling_hub_predictions(model,dates):
    """pull_scenario_modeling_hub_predictions. Load predictions of the model saved by the scenario modeling
    hub.

    Parameters
    ----------
    model : str
        Model name on thhe
    dates : list or string
        List of potential dates in the iso format, e.g., 'yyyy-mm-dd', for the submission.
    """
    predictions = None
    if isinstance(dates,str):
        dates = [dates]
    for date in dates:
        url = f"https://raw.githubusercontent.com/midas-network/covid19-scenario-modeling-hub/master/data-processed/{model}/{date}-{model}"
        for ext in [".csv",".gz",".zip",".csv.zip",".csv.gz"]:
            try:
                predictions = pd.read_csv(url+ext,dtype={'location':str},parse_dates=['target_end_date'])
            except:
                pass
    if predictions is None:
        print(f"Data for model {model} and date {dates} unavailable")
    return predictions


def pull_surveillance_data(target='death',incidence=True):
    mapping = {'death':'Deaths', 'case':'Cases', 'hospitalization': 'Hospitalizations'}
    if incidence:
        s = 'Incident'
    else:
        s = 'Cumulative'
    url = f"https://media.githubusercontent.com/media/reichlab/covid19-forecast-hub/master/data-truth/truth-{s}%20{mapping[target]}.csv"
    return pd.read_csv(url, dtype={'location':str})

## Surveillance data

In [73]:
#target = 'hospitalization'
#target = 'death'
target = 'death'
incidence = True

In [74]:
observations = pull_surveillance_data(target,incidence)

In [75]:
observations.tail(5)

Unnamed: 0,date,location,location_name,value
3244795,2022-10-27,US,United States,539
3244796,2022-10-28,US,United States,179
3244797,2022-10-29,US,United States,30
3244798,2022-10-30,US,United States,2
3244799,2022-10-31,US,United States,123


In [77]:
list(observations.date.unique()[-100:-1])

['2022-07-24',
 '2022-07-25',
 '2022-07-26',
 '2022-07-27',
 '2022-07-28',
 '2022-07-29',
 '2022-07-30',
 '2022-07-31',
 '2022-08-01',
 '2022-08-02',
 '2022-08-03',
 '2022-08-04',
 '2022-08-05',
 '2022-08-06',
 '2022-08-07',
 '2022-08-08',
 '2022-08-09',
 '2022-08-10',
 '2022-08-11',
 '2022-08-12',
 '2022-08-13',
 '2022-08-14',
 '2022-08-15',
 '2022-08-16',
 '2022-08-17',
 '2022-08-18',
 '2022-08-19',
 '2022-08-20',
 '2022-08-21',
 '2022-08-22',
 '2022-08-23',
 '2022-08-24',
 '2022-08-25',
 '2022-08-26',
 '2022-08-27',
 '2022-08-28',
 '2022-08-29',
 '2022-08-30',
 '2022-08-31',
 '2022-09-01',
 '2022-09-02',
 '2022-09-03',
 '2022-09-04',
 '2022-09-05',
 '2022-09-06',
 '2022-09-07',
 '2022-09-08',
 '2022-09-09',
 '2022-09-10',
 '2022-09-11',
 '2022-09-12',
 '2022-09-13',
 '2022-09-14',
 '2022-09-15',
 '2022-09-16',
 '2022-09-17',
 '2022-09-18',
 '2022-09-19',
 '2022-09-20',
 '2022-09-21',
 '2022-09-22',
 '2022-09-23',
 '2022-09-24',
 '2022-09-25',
 '2022-09-26',
 '2022-09-27',
 '2022-09-

### Save data

In [71]:
observations.to_parquet(f"./dat/truth_{'inc' if incidence else 'cum'}_{target}.pq", index=False)

## Forecast data

### Pull a single model

In [17]:
model = "MOBS_NEU-GLEAM_COVID"
dates = ['2021-05-01','2021-05-02','2021-05-04'] #potential submission dates
predictions = pull_scenario_modeling_hub_predictions(model,dates)

In [14]:
predictions.head(5)

Unnamed: 0,target,location,scenario_name,scenario_id,quantile,value,target_end_date,model_projection_date,type
0,1 wk ahead inc death,45,highVac_modNPI,A-2021-05-02,0.01,58.474616,2021-05-08,2021-05-01,quantile
1,2 wk ahead inc death,45,highVac_modNPI,A-2021-05-02,0.01,47.918443,2021-05-15,2021-05-01,quantile
2,3 wk ahead inc death,45,highVac_modNPI,A-2021-05-02,0.01,37.881073,2021-05-22,2021-05-01,quantile
3,4 wk ahead inc death,45,highVac_modNPI,A-2021-05-02,0.01,30.219431,2021-05-29,2021-05-01,quantile
4,5 wk ahead inc death,45,highVac_modNPI,A-2021-05-02,0.01,23.725748,2021-06-05,2021-05-01,quantile


### Pull multiple models and save data

In [3]:
rd = 4 #smh round
dates = ['2021-03-27','2021-03-29','2021-03-28'] #potential submission dates
models = ["Ensemble","Ensemble_LOP","IHME-IHME_COVID_model_deaths_unscaled","JHUAPL-Bucky",
           "JHU_IDD-CovidSP","Karlen-pypm","MOBS_NEU-GLEAM_COVID",
           "USC-SIkJalpha","UVA-adaptive"]

In [5]:
rd = 5 #smh round
dates = ['2021-05-01','2021-05-02','2021-05-04'] #potential submission dates
models = ["Ensemble","Ensemble_LOP","IHME-IHME_COVID_model_deaths_unscaled","JHUAPL-Bucky",
           "JHU_IDD-CovidSP","Karlen-pypm","MOBS_NEU-GLEAM_COVID","UNCC-hierbin",
           "USC-SIkJalpha","UVA-adaptive"]

In [8]:
rd = 6 #smh round
dates = ['2021-05-28','2021-05-29','2021-05-30','2021-06-08','2021-06-05'] #potential submission dates
models = ["Ensemble","Ensemble_LOP","JHUAPL-Bucky",
           "JHU_IDD-CovidSP","Karlen-pypm","MOBS_NEU-GLEAM_COVID","UNCC-hierbin","NCSU-COVSIM",
           "USC-SIkJalpha","UVA-adaptive","UTA-ImmunoSEIRS", "UVA-EpiHiper"]

In [15]:
rd = 7 #smh round
dates = ['2021-07-03','2021-07-13','2021-07-04'] #potential submission dates
models = ["Ensemble","Ensemble_LOP","JHUAPL-Bucky",
           "JHU_IDD-CovidSP","Karlen-pypm","MOBS_NEU-GLEAM_COVID","UNCC-hierbin",
           "USC-SIkJalpha","UVA-adaptive","UVA-EpiHiper"]

In [21]:

rd = 9 #smh round
dates = ['2021-09-11','2021-09-13','2021-09-14','2021-09-12'] #potential submission dates
models = ["Ensemble","Ensemble_LOP","Ensemble_LOP_untrimmed", "JHUAPL-Bucky",
           "JHU_IDD-CovidSP","MOBS_NEU-GLEAM_COVID","UNCC-hierbin", "NotreDame-FRED",
           "USC-SIkJalpha","UVA-adaptive","UVA-EpiHiper"]

In [24]:

rd = 10 #smh round
dates = ['2021-11-13','2021-11-20','2021-11-14','2021-11-09','2021-11-21'] #potential submission dates
models = ["Ensemble","Ensemble_LOP","Ensemble_LOP_untrimmed", 
           "MOBS_NEU-GLEAM_COVID","UNCC-hierbin", "NotreDame-FRED",
           "USC-SIkJalpha","UVA-adaptive","UVA-EpiHiper"]

In [27]:
rd = 11 #smh round
dates = ['2021-12-18','2021-12-21','2021-12-17','2021-12-19','2021-11-21'] #potential submission dates
models = ["Ensemble","Ensemble_LOP","Ensemble_LOP_untrimmed", "JHU_IDD-CovidSP", 
           "MOBS_NEU-GLEAM_COVID","UNCC-hierbin", "NotreDame-FRED",
           "USC-SIkJalpha","UVA-adaptive","UVA-EpiHiper"]

In [31]:
rd = 10
dates = ['2021-11-13','2021-11-20','2021-11-14','2021-11-09','2021-11-21'] #potential submission dates
models = ["NotreDame-FRED"]

In [35]:
rd = 12
models = ["Ensemble","Ensemble_LOP","Ensemble_LOP_untrimmed","JHU_IDD-CovidSP","MOBS_NEU-GLEAM_COVID",
           "NCSU-COVSIM","NotreDame-FRED","UNCC-hierbin","USC-SIkJalpha","UTA-ImmunoSEIRS",
           "UVA-EpiHiper","UVA-adaptive"]
dates = ['2022-01-09']

In [38]:
rd = 13
models = ["Ensemble","Ensemble_LOP","Ensemble_LOP_untrimmed","JHU_IDD-CovidSP","MOBS_NEU-GLEAM_COVID",
           "NCSU-COVSIM","UNCC-hierbin","USC-SIkJalpha","UTA-ImmunoSEIRS",
           "UVA-EpiHiper","UVA-adaptive"]
dates = ['2022-03-13']

In [60]:
rd = 13
models = ["MOBS_NEU-GLEAM_COVID_OT"]
dates = ['2022-03-13']

In [62]:
rd = 5
model = "UNCC-hierbin"
predictions = pd.read_csv('2021-05-02-UNCC-hierbin-v5.csv', dtype={'location':str},parse_dates=['target_end_date'])
predictions.to_parquet(f'./dat/{model}_rd{rd}.pq', index=False)

In [42]:
rd = 14
models = ["Ensemble","Ensemble_LOP","Ensemble_LOP_untrimmed","JHU_IDD-CovidSP","MOBS_NEU-GLEAM_COVID",
          "MOBS_NEU-GLEAM_COVID_OT","NCSU-COVSIM","UNCC-hierbin","USC-SIkJalpha","USC-SIkJalpha-update",
          "UTA-ImmunoSEIRS","UVA-adaptive"]
dates = ['2022-06-05','2022-06-04']

In [45]:
rd = 15
models = ["Ensemble","Ensemble_LOP","Ensemble_LOP_untrimmed","JHU_IDD-CovidSP","MOBS_NEU-GLEAM_COVID",
          "MOBS_NEU-GLEAM_COVID_OT","NCSU-COVSIM","UNCC-hierbin","USC-SIkJalpha","USC-SIkJalpha-update",
          "UTA-ImmunoSEIRS","UVA-adaptive"]
dates = ['2022-07-31']

In [50]:
rd = 16
models = ["Ensemble","Ensemble_LOP","Ensemble_LOP_untrimmed","JHU_IDD-CovidSP","MOBS_NEU-GLEAM_COVID",
          "NCSU-COVSIM","UNCC-hierbin","USC-SIkJalpha",
          "UTA-ImmunoSEIRS","UVA-adaptive"]
dates = ['2022-10-30']

In [59]:
rd = 13
model = "CU-AGE-ST"
predictions = pd.read_csv('2022-03-13-CU-AGE-ST.csv', dtype={'location':str},parse_dates=['target_end_date'])
predictions.to_parquet(f'./dat/{model}_rd{rd}.pq', index=False)

In [61]:
for model in models:
    print(model)
    predictions = pull_scenario_modeling_hub_predictions(model,dates)
    if predictions is not None:
        predictions.to_parquet(f'./dat/{model}_rd{rd}.pq', index=False)
        # predictions.to_csv(f'./dat/{model}_rd{rd}.csv', index=False)

MOBS_NEU-GLEAM_COVID_OT


**NOTE**: anything saved in the ```dat``` folder is not going to be tracked by git, to avoid having data on the Github repository. The repository is to track code only.