### Get forecasts by the methods submitting to EU Covid 19 Forecast Hub

In [None]:
import os
from tqdm import tqdm
from github import Github
import pandas as pd

In [32]:
# https://github.com/settings/tokens  -> personal access token
# 5000 requests/hour
MY_GITHUB_TOKEN = ' ' # put a token here
g = Github(login_or_token=MY_GITHUB_TOKEN)

In [33]:
repo = g.get_repo("epiforecasts/covid19-forecast-hub-europe")

links = {}

models = [ 
    "EuroCOVIDhub-ensemble",
    "EuroCOVIDhub-baseline",
    "epiforecasts-EpiExpert",
    "epiforecasts-EpiNow2","RobertWalraven-ESG","UVA-Ensemble", 
    "MUNI-ARIMA","JBUD-HMXK", "IEM_Health-CovidProject","BIOCOMSC-Gompertz","ILM-EKF","Karlen-pypm",
    "USC-SIkJalpha"]
     
wk = 1 # number of forecasted weeks, values 1 and 2 are used for comparisons

type_data = "death"


for model in models:
    print(model)
    
    files = repo.get_contents(f'data-processed/{model}')
    # sort files based on date in the file name
    files = sorted(files, key = lambda x: x.name)
    
    d = {}
    
    for f in tqdm(files, leave=False):
        # only keep csv file
        if not f.name.endswith('.csv'):
            continue
        # only keep file updated before 2021-02-01
        if f.name[:10] > '2021-12-15':
            continue

        df = pd.read_csv(f.download_url)

        one_wk = df[df.target.str.startswith(str(wk)+' wk')]
        one_wk_end = one_wk.target_end_date.unique()
        assert len(one_wk_end) == 1

        # if there are multiple files with the same end date
        # only keep the first/earliest one
        if one_wk_end[0] in d:
            continue
        d[one_wk_end[0]] = f.download_url
    
    links[model] = d

EuroCOVIDhub-ensemble


                                               

EuroCOVIDhub-baseline


  0%|          | 0/29 [00:00<?, ?it/s]         

epiforecasts-EpiExpert


                                               

epiforecasts-EpiNow2


                                               

RobertWalraven-ESG


  0%|          | 0/46 [00:00<?, ?it/s]         

UVA-Ensemble


  0%|          | 0/41 [00:00<?, ?it/s]         

MUNI-ARIMA


  0%|          | 0/10 [00:00<?, ?it/s]         

JBUD-HMXK


  0%|          | 0/49 [00:00<?, ?it/s]        

IEM_Health-CovidProject


                                               

BIOCOMSC-Gompertz


  0%|          | 0/49 [00:00<?, ?it/s]         

ILM-EKF


  0%|          | 0/44 [00:00<?, ?it/s]         

Karlen-pypm


  0%|          | 0/52 [00:00<?, ?it/s]         

USC-SIkJalpha


                                               

In [34]:
df_links = pd.DataFrame(links)
df_links = df_links.sort_index().T

In [35]:
df_links.columns.name = str(wk)+'wk_end_date'

In [36]:
df_links

1wk_end_date,2021-02-20,2021-02-27,2021-03-06,2021-03-13,2021-03-20,2021-03-27,2021-04-03,2021-04-10,2021-04-17,2021-04-24,...,2021-10-16,2021-10-23,2021-10-30,2021-11-06,2021-11-13,2021-11-20,2021-11-27,2021-12-04,2021-12-11,2021-12-18
EuroCOVIDhub-ensemble,,,,https://raw.githubusercontent.com/covid19-fore...,https://raw.githubusercontent.com/covid19-fore...,https://raw.githubusercontent.com/covid19-fore...,https://raw.githubusercontent.com/covid19-fore...,https://raw.githubusercontent.com/covid19-fore...,https://raw.githubusercontent.com/covid19-fore...,https://raw.githubusercontent.com/covid19-fore...,...,https://raw.githubusercontent.com/covid19-fore...,https://raw.githubusercontent.com/covid19-fore...,https://raw.githubusercontent.com/covid19-fore...,https://raw.githubusercontent.com/covid19-fore...,https://raw.githubusercontent.com/covid19-fore...,https://raw.githubusercontent.com/covid19-fore...,https://raw.githubusercontent.com/covid19-fore...,https://raw.githubusercontent.com/covid19-fore...,https://raw.githubusercontent.com/covid19-fore...,https://raw.githubusercontent.com/covid19-fore...
EuroCOVIDhub-baseline,,,,https://raw.githubusercontent.com/covid19-fore...,https://raw.githubusercontent.com/covid19-fore...,https://raw.githubusercontent.com/covid19-fore...,https://raw.githubusercontent.com/covid19-fore...,https://raw.githubusercontent.com/covid19-fore...,https://raw.githubusercontent.com/covid19-fore...,https://raw.githubusercontent.com/covid19-fore...,...,https://raw.githubusercontent.com/covid19-fore...,https://raw.githubusercontent.com/covid19-fore...,https://raw.githubusercontent.com/covid19-fore...,https://raw.githubusercontent.com/covid19-fore...,https://raw.githubusercontent.com/covid19-fore...,https://raw.githubusercontent.com/covid19-fore...,https://raw.githubusercontent.com/covid19-fore...,https://raw.githubusercontent.com/covid19-fore...,https://raw.githubusercontent.com/covid19-fore...,https://raw.githubusercontent.com/covid19-fore...
epiforecasts-EpiExpert,https://raw.githubusercontent.com/covid19-fore...,https://raw.githubusercontent.com/covid19-fore...,,https://raw.githubusercontent.com/covid19-fore...,https://raw.githubusercontent.com/covid19-fore...,https://raw.githubusercontent.com/covid19-fore...,https://raw.githubusercontent.com/covid19-fore...,https://raw.githubusercontent.com/covid19-fore...,https://raw.githubusercontent.com/covid19-fore...,https://raw.githubusercontent.com/covid19-fore...,...,,,,,,,,,,
epiforecasts-EpiNow2,https://raw.githubusercontent.com/covid19-fore...,https://raw.githubusercontent.com/covid19-fore...,,https://raw.githubusercontent.com/covid19-fore...,https://raw.githubusercontent.com/covid19-fore...,https://raw.githubusercontent.com/covid19-fore...,https://raw.githubusercontent.com/covid19-fore...,https://raw.githubusercontent.com/covid19-fore...,https://raw.githubusercontent.com/covid19-fore...,https://raw.githubusercontent.com/covid19-fore...,...,https://raw.githubusercontent.com/covid19-fore...,https://raw.githubusercontent.com/covid19-fore...,,,,,,https://raw.githubusercontent.com/covid19-fore...,https://raw.githubusercontent.com/covid19-fore...,https://raw.githubusercontent.com/covid19-fore...
RobertWalraven-ESG,,,https://raw.githubusercontent.com/covid19-fore...,https://raw.githubusercontent.com/covid19-fore...,https://raw.githubusercontent.com/covid19-fore...,https://raw.githubusercontent.com/covid19-fore...,https://raw.githubusercontent.com/covid19-fore...,https://raw.githubusercontent.com/covid19-fore...,https://raw.githubusercontent.com/covid19-fore...,https://raw.githubusercontent.com/covid19-fore...,...,https://raw.githubusercontent.com/covid19-fore...,https://raw.githubusercontent.com/covid19-fore...,https://raw.githubusercontent.com/covid19-fore...,https://raw.githubusercontent.com/covid19-fore...,https://raw.githubusercontent.com/covid19-fore...,https://raw.githubusercontent.com/covid19-fore...,https://raw.githubusercontent.com/covid19-fore...,https://raw.githubusercontent.com/covid19-fore...,https://raw.githubusercontent.com/covid19-fore...,https://raw.githubusercontent.com/covid19-fore...
UVA-Ensemble,,,,https://raw.githubusercontent.com/covid19-fore...,https://raw.githubusercontent.com/covid19-fore...,https://raw.githubusercontent.com/covid19-fore...,https://raw.githubusercontent.com/covid19-fore...,https://raw.githubusercontent.com/covid19-fore...,https://raw.githubusercontent.com/covid19-fore...,https://raw.githubusercontent.com/covid19-fore...,...,https://raw.githubusercontent.com/covid19-fore...,https://raw.githubusercontent.com/covid19-fore...,https://raw.githubusercontent.com/covid19-fore...,https://raw.githubusercontent.com/covid19-fore...,https://raw.githubusercontent.com/covid19-fore...,,https://raw.githubusercontent.com/covid19-fore...,https://raw.githubusercontent.com/covid19-fore...,,
MUNI-ARIMA,,,,,,,,,,,...,https://raw.githubusercontent.com/covid19-fore...,https://raw.githubusercontent.com/covid19-fore...,https://raw.githubusercontent.com/covid19-fore...,https://raw.githubusercontent.com/covid19-fore...,https://raw.githubusercontent.com/covid19-fore...,https://raw.githubusercontent.com/covid19-fore...,https://raw.githubusercontent.com/covid19-fore...,https://raw.githubusercontent.com/covid19-fore...,https://raw.githubusercontent.com/covid19-fore...,https://raw.githubusercontent.com/covid19-fore...
JBUD-HMXK,,,,,,,,,,,...,,,,,,https://raw.githubusercontent.com/covid19-fore...,https://raw.githubusercontent.com/covid19-fore...,,,
IEM_Health-CovidProject,,,,https://raw.githubusercontent.com/covid19-fore...,https://raw.githubusercontent.com/covid19-fore...,https://raw.githubusercontent.com/covid19-fore...,https://raw.githubusercontent.com/covid19-fore...,https://raw.githubusercontent.com/covid19-fore...,https://raw.githubusercontent.com/covid19-fore...,https://raw.githubusercontent.com/covid19-fore...,...,https://raw.githubusercontent.com/covid19-fore...,https://raw.githubusercontent.com/covid19-fore...,https://raw.githubusercontent.com/covid19-fore...,https://raw.githubusercontent.com/covid19-fore...,https://raw.githubusercontent.com/covid19-fore...,https://raw.githubusercontent.com/covid19-fore...,https://raw.githubusercontent.com/covid19-fore...,https://raw.githubusercontent.com/covid19-fore...,https://raw.githubusercontent.com/covid19-fore...,https://raw.githubusercontent.com/covid19-fore...
BIOCOMSC-Gompertz,,,,,,,,,,,...,https://raw.githubusercontent.com/covid19-fore...,https://raw.githubusercontent.com/covid19-fore...,https://raw.githubusercontent.com/covid19-fore...,https://raw.githubusercontent.com/covid19-fore...,https://raw.githubusercontent.com/covid19-fore...,,https://raw.githubusercontent.com/covid19-fore...,https://raw.githubusercontent.com/covid19-fore...,https://raw.githubusercontent.com/covid19-fore...,https://raw.githubusercontent.com/covid19-fore...


In [37]:
df_links.to_csv('../data/ensemble_eu_links.csv')

### Save to folder /paper

In [38]:
retro_data_info = pd.read_csv("../data/ensemble_eu_links.csv")

In [39]:
missingness = retro_data_info.isnull().sum(axis=1).tolist()
#models = [ 
#    "EuroCOVIDhub-ensemble",
#    "EuroCOVIDhub-baseline", 
#    "epiforecasts-EpiNow2",  "MUNI-ARIMA", "IEM_Health-CovidProject"
#]
dates = list(retro_data_info.columns)[1:]
col0  = list(retro_data_info.columns)[0]
for method,m in zip(retro_data_info.values[:,0],missingness):
    
    missing_dates = retro_data_info[retro_data_info[col0]==method].isnull().values[0]
    if method in models:
        print(method, " has ", m, " missing submissions out of " , retro_data_info.shape[1]-1, " on ", [dates[i] for i in range(len(dates)) if missing_dates[i]])

EuroCOVIDhub-ensemble  has  3  missing submissions out of  44  on  ['2021-02-27', '2021-03-06', '2021-03-13']
EuroCOVIDhub-baseline  has  3  missing submissions out of  44  on  ['2021-02-27', '2021-03-06', '2021-03-13']
epiforecasts-EpiExpert  has  16  missing submissions out of  44  on  ['2021-03-13', '2021-09-18', '2021-09-25', '2021-10-02', '2021-10-09', '2021-10-16', '2021-10-23', '2021-10-30', '2021-11-06', '2021-11-13', '2021-11-20', '2021-11-27', '2021-12-04', '2021-12-11', '2021-12-18']
epiforecasts-EpiNow2  has  6  missing submissions out of  44  on  ['2021-03-13', '2021-11-06', '2021-11-13', '2021-11-20', '2021-11-27', '2021-12-04']
RobertWalraven-ESG  has  2  missing submissions out of  44  on  ['2021-02-27', '2021-03-06']
UVA-Ensemble  has  6  missing submissions out of  44  on  ['2021-02-27', '2021-03-06', '2021-03-13', '2021-11-27', '2021-12-18']
MUNI-ARIMA  has  11  missing submissions out of  44  on  ['2021-02-27', '2021-03-06', '2021-03-13', '2021-03-20', '2021-03-27',

In [40]:
import numpy as np 
import os
import os.path 
import datetime
import pandas as pd
retro_data_info = pd.read_csv("../data/ensemble_eu_links.csv")
retro_data_info.loc[0].index.values
methods = retro_data_info.values[:,0]
hw = wk

methods = [ "UVA-Ensemble", 
    "EuroCOVIDhub-ensemble",
    "EuroCOVIDhub-baseline", 
    "epiforecasts-EpiNow2","RobertWalraven-ESG",
     "IEM_Health-CovidProject", "ILM-EKF","Karlen-pypm",
    "USC-SIkJalpha"]
methods = ["EuroCOVIDhub-baseline"]
for method_id, name in enumerate(methods):
    links = retro_data_info.loc[method_id].values[1:]
    dates = retro_data_info.loc[method_id].index.values[1:] 

 
    print(name)
    print((dates, links))
    for (date, link) in zip(dates, links): 
        

        u = datetime.datetime.strptime(date,"%Y-%m-%d")
        d = datetime.timedelta(days=7*(hw-1))
        #date = str(u + d)[:10] 
        if len(str(link))>10:
            week_forecast_ = pd.read_csv(link) 
            week_forecast = week_forecast_[(week_forecast_["target"]==str(hw)+" wk ahead inc "+type_data)].reset_index()
            try:
                date = week_forecast["target_end_date"].values[0]
            
                forecast_type = "day" 

                df = pd.pivot(week_forecast, values="value", index="location", columns="quantile")
                df["target_date"] = date
                df["type"] = forecast_type
                df.columns = df.columns.fillna("forecast_"+name)
                sorted_col = [str(x) for x in list(df.columns)].sort() 
                df = df.reindex(sorted_col, axis=1)
                folder = "../paper/horison_" + str(hw)+"_week/JHU_"+type_data+"s/hub_methods_eu/"+name 
                if not os.path.isdir(folder): 
                    os.makedirs(folder)   
                df.to_csv(folder+"/Forecast_CI_hor"+str(hw*7)+"_"+date+".csv")

            except:
                print(np.unique(week_forecast_["target"]))
                 
 

EuroCOVIDhub-baseline
(array(['2021-02-20', '2021-02-27', '2021-03-06', '2021-03-13',
       '2021-03-20', '2021-03-27', '2021-04-03', '2021-04-10',
       '2021-04-17', '2021-04-24', '2021-05-01', '2021-05-08',
       '2021-05-15', '2021-05-22', '2021-05-29', '2021-06-05',
       '2021-06-12', '2021-06-19', '2021-06-26', '2021-07-03',
       '2021-07-10', '2021-07-17', '2021-07-24', '2021-07-31',
       '2021-08-07', '2021-08-14', '2021-08-21', '2021-08-28',
       '2021-09-04', '2021-09-11', '2021-09-18', '2021-09-25',
       '2021-10-02', '2021-10-09', '2021-10-16', '2021-10-23',
       '2021-10-30', '2021-11-06', '2021-11-13', '2021-11-20',
       '2021-11-27', '2021-12-04', '2021-12-11', '2021-12-18'],
      dtype=object), array([nan, nan, nan,
       'https://raw.githubusercontent.com/covid19-forecast-hub-europe/covid19-forecast-hub-europe/main/data-processed/EuroCOVIDhub-ensemble/2021-03-08-EuroCOVIDhub-ensemble.csv',
       'https://raw.githubusercontent.com/covid19-forecast-hu