In [202]:
library(tidyverse) #Collection of packages in the tidyverse (see https://www.tidyverse.org/)
library(gsubfn)
library(downloader)
library("MMWRweek")

# install.packages("downloader","MMWRweek")

#  John Hopkins University data

## Mortality data

In [160]:
url_mortality_data = 'https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_deaths_global.csv'
mortality_data = read.csv(url(url_mortality_data),stringsAsFactors = FALSE)

In [161]:
mortality_data = as.numeric(mortality_data[mortality_data[,'Country.Region']=="Belgium",5:NCOL(mortality_data)])
# Take day to day difference to get incient mortality 
mortality_data = mortality_data[2:length(mortality_data)]-mortality_data[1:(length(mortality_data)-1)]
mortality_data = data.frame(list(value=mortality_data))
mortality_data = mortality_data  %>%
    mutate(date=as.Date("2020-01-23","%Y-%m-%d")+0:(NROW(mortality_data)-1)) %>%
    mutate(location="Belgium") %>%
    filter(date>="2020-03-01")

In [164]:
write.csv(file="data-truth/truth-incident-deaths.csv", x=mortality_data, row.names=F)

# Sciensano data

## Mortality data

In [153]:
url_mortality_data = 'https://epistat.sciensano.be/Data/COVID19BE_MORT.csv'
mortality_data = read.csv(url(url_mortality_data),stringsAsFactors = FALSE)

In [154]:
mortality_data = mortality_data %>% 
    mutate(date=DATE, location=REGION,value=DEATHS) %>%
    select(date,location,value) %>%
    group_by(date,location) %>%
    summarize(value=sum(value,na.rm=TRUE)) 

In [155]:
mortality_data_country = mortality_data %>% 
    group_by(date) %>% 
    summarize(value=sum(value,na.rm=TRUE)) %>%
    mutate(location="Belgium")

In [156]:
mortality_data = bind_rows(mortality_data,mortality_data_country) %>%
    arrange(date,location)

In [157]:
write.csv(file="data-truth/truth-incident-deaths-sciensano.csv", x=mortality_data, row.names=F)

# Formatting data

In [271]:
format_data<-function(data, forecast_date, horizon) {
    
    data <- data %>% mutate(date=as.Date(date,"%Y-%m-%d"))
    
    predictions = data[data$date>forecast_date,]
    predictions = predictions[1:horizon,]
    
    inc_death = data.frame(value=predictions[,"predicted_deaths_mean"], target_end_date=predictions$date)
    inc_death_day = inc_death %>% 
        dplyr::mutate(forecast_date = forecast_date) %>%
        dplyr::mutate(target = paste(1:horizon, " day ahead inc death",sep="")) %>%
        dplyr::mutate(location = "Belgium") %>%
        dplyr::mutate(type = "point") %>%
        dplyr::mutate(quantile = NA) %>%
        dplyr::select(forecast_date,target,target_end_date,location,type,quantile,value)
    
    inc_death_wk = inc_death %>% 
        dplyr::mutate(week = MMWRweek::MMWRweek(target_end_date)$MMWRweek) %>%
        dplyr::group_by(week) %>%
        dplyr::summarize(target_end_date = min(target_end_date),
                         value = sum(value, na.rm = TRUE)) %>%
        dplyr::mutate(forecast_date = forecast_date) %>%
        dplyr::mutate(target = paste(1:(horizon/7), " wk ahead inc death",sep="")) %>%
        dplyr::mutate(location = "Belgium") %>%
        dplyr::mutate(type = "point") %>%
        dplyr::mutate(quantile = NA) %>%
        dplyr::select(-week) %>%
        dplyr::select(forecast_date,target,target_end_date,location,type,quantile,value)
    
    inc_death_lower = data.frame(value=predictions[,"predicted_deaths_lower"], target_end_date=predictions$date)
    inc_death_lower_day = inc_death_lower %>% 
        dplyr::mutate(forecast_date = forecast_date) %>%
        dplyr::mutate(target = paste(1:horizon, " day ahead inc death",sep="")) %>%
        dplyr::mutate(location = "Belgium") %>%
        dplyr::mutate(type = "quantile") %>%
        dplyr::mutate(quantile = 0.025) %>%
        dplyr::select(forecast_date,target,target_end_date,location,type,quantile,value)
    
    inc_death_lower_wk = inc_death_lower %>% 
        dplyr::mutate(week = MMWRweek::MMWRweek(target_end_date)$MMWRweek) %>%
        dplyr::group_by(week) %>%
        dplyr::summarize(target_end_date = min(target_end_date),
                         value = sum(value, na.rm = TRUE)) %>%
        dplyr::mutate(forecast_date = forecast_date) %>%
        dplyr::mutate(target = paste(1:(horizon/7), " wk ahead inc death",sep="")) %>%
        dplyr::mutate(location = "Belgium") %>%
        dplyr::mutate(type = "quantile") %>%
        dplyr::mutate(quantile = 0.025) %>%
        dplyr::select(-week) %>%
        dplyr::select(forecast_date,target,target_end_date,location,type,quantile,value)
    
    inc_death_upper = data.frame(value=predictions[,"predicted_deaths_upper"], target_end_date=predictions$date)
    inc_death_upper_day = inc_death_upper %>% 
        dplyr::mutate(forecast_date = forecast_date) %>%
        dplyr::mutate(target = paste(1:horizon, " day ahead inc death",sep="")) %>%
        dplyr::mutate(location = "Belgium") %>%
        dplyr::mutate(type = "quantile") %>%
        dplyr::mutate(quantile = 0.975) %>%
        dplyr::select(forecast_date,target,target_end_date,location,type,quantile,value)
    
    inc_death_upper_wk = inc_death_upper %>% 
        dplyr::mutate(week = MMWRweek::MMWRweek(target_end_date)$MMWRweek) %>%
        dplyr::group_by(week) %>%
        dplyr::summarize(target_end_date = min(target_end_date),
                         value = sum(value, na.rm = TRUE)) %>%
        dplyr::mutate(forecast_date = forecast_date) %>%
        dplyr::mutate(target = paste(1:(horizon/7), " wk ahead inc death",sep="")) %>%
        dplyr::mutate(location = "Belgium") %>%
        dplyr::mutate(type = "quantile") %>%
        dplyr::mutate(quantile = 0.975) %>%
        dplyr::select(-week) %>%
        dplyr::select(forecast_date,target,target_end_date,location,type,quantile,value)
    
    
    formatted_data = bind_rows(inc_death_day, inc_death_lower_day, inc_death_upper_day, inc_death_wk, inc_death_lower_wk, inc_death_upper_wk)
    
    formatted_data
}

# YYG

In [272]:
model_ID = "YYG-ParamSearch"

dir.create(file.path(paste("data-processed/",model_ID,sep="")), showWarnings = FALSE)

horizon = 28
forecast_dates = as.character(as.Date("2020-04-11","%Y-%m-%d")+(0:5)*7)

for (i in 1:length(forecast_dates)) { 
    
    url_yyg_data = paste('https://raw.githubusercontent.com/youyanggu/covid19_projections/master/projections/',forecast_dates[i],'/global/Belgium_ALL.csv',sep="")
    data = read.csv(url(url_yyg_data),stringsAsFactors = FALSE)
    
    data = data %>% 
        select(date,predicted_deaths_mean,predicted_deaths_lower,predicted_deaths_upper)
    
    formatted_data = format_data(data, forecast_dates[i], horizon)
    
    write.csv(file=paste("data-processed/YYG-ParamSearch/",forecast_dates[i],"-YYG-ParamSearch.csv",sep=""), x=formatted_data, row.names=F) 

    write.csv(file=paste("data-processed/",model_ID,"/",forecast_dates[i],"-",model_ID,".csv",sep=""), x=formatted_data, row.names=F) 

    
}

# IHME

* Data provided in ZIP files. The model was updated at the following dates:


In [270]:
# note: Assumes that the CSV file containing data is the file starting with 'Hospitalization' in the downloaded archive

model_ID = "IHME-CurveFit"

dir.create(file.path(paste("data-processed/",model_ID,sep="")), showWarnings = FALSE)

horizon = 28
forecast_dates = as.character(as.Date("2020-04-11","%Y-%m-%d")+(0:5)*7)
# '2020-04-11''2020-04-18''2020-04-25''2020-05-02''2020-05-09''2020-05-16'

url_archives_ihme = c("https://ihmecovid19storage.blob.core.windows.net/archive/2020-04-10/ihme-covid19.zip",
                       "https://ihmecovid19storage.blob.core.windows.net/archive/2020-04-17/ihme-covid19.zip",
                       "http://www.healthdata.org/sites/default/files/files/Projects/COVID/ihme-covid19-0422.zip",
                       "http://www.healthdata.org/sites/default/files/files/Projects/COVID/ihme-covid19-0422.zip",
                       "https://ihmecovid19storage.blob.core.windows.net/archive/2020-05-04/ihme-covid19.zip",
                       "https://ihmecovid19storage.blob.core.windows.net/archive/2020-05-12/ihme-covid19.zip"
                      )

for (i in 1:length(forecast_dates)) { 
    
    dir.create(file.path("temp"), showWarnings = FALSE)                    
    download(url_archives_ihme[i], dest="temp/dataset.zip", mode="wb") 
    unzip("temp/dataset.zip", exdir = paste("./temp/",forecast_dates[i],sep=""))

    csv_file_name = list.files(paste("./temp/",forecast_dates[i],sep=""),"Hospitalization*", rec=T)
    csv_file_name = paste("./temp/",forecast_dates[i],"/",csv_file_name,sep="")
    data = read.csv(csv_file_name)
    
    data = data[data$location_name=="Belgium",]
    
    data = data %>%
        dplyr::mutate(predicted_deaths_mean=deaths_mean) %>%
        dplyr::mutate(predicted_deaths_lower=deaths_lower) %>%
        dplyr::mutate(predicted_deaths_upper=deaths_upper) %>%
        select(date,predicted_deaths_mean,predicted_deaths_lower,predicted_deaths_upper)
    
    formatted_data = format_data(data, forecast_dates[i], horizon)
    
    write.csv(file=paste("data-processed/",model_ID,"/",forecast_dates[i],"-",model_ID,".csv",sep=""), x=formatted_data, row.names=F) 
    
}
