In [1]:
import datetime
import os
import numpy as np
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt
import tensorflow_probability as tfp
import tensorflow_probability.python.distributions as tfd
import tensorflow_probability.python.layers as tfl

In [2]:
data_path = "C:/Users/pmarc/PycharmProjects/covid19-forecast-hub-europe/data-truth/JHU/"
cols = ["cases"]
target_cols = ["cases"]
input_width = 1
label_width = 1
shift = 1
train_data_pcnt = 70
valid_data_pcnt = 20
test_data_pcnt = 10
epochs = 1000
final_model_path = "Models/Final/"
results_path = "Models/Results/"
graphs_path = "Models/Graphs/"
processed_path = "C:/Users/pmarc/PycharmProjects/covid19-forecast-hub-europe/data-processed/UNED-CovidPredPMA/"
display_images = True
save_images = False
period = "weeks"
last_day = "2021-07-31"
first_day = "2020-01-23"
# predictions_date_init = "2021-09-13"
# predictions_date_end = "2021-09-25"
team_model_name = "UNED-CovidPredPMA-Poisson"
countries = [
    # 'Austria',
    # 'Belgium',
    # 'Bulgaria',
    # 'Croatia',
    # 'Cyprus',
    # 'Czechia',
    # 'Denmark',
    # 'Estonia',
    # 'Finland',
    'France',
    'Germany',
    # 'Greece',
    # 'Hungary',
    # 'Iceland',
    # 'Ireland',
    'Italy',
    # 'Latvia',
    # 'Liechtenstein',
    # 'Lithuania',
    # 'Luxembourg',
    # 'Malta',
    # 'Netherlands',
    # 'Norway',
    # 'Poland',
    # 'Portugal',
    # 'Romania',
    # 'Slovakia',
    # 'Slovenia',
    'Spain',
    # 'Sweden',
    # 'Switzerland',
    'United Kingdom'
]
country_codes = {
    'Austria': 'AT',
    'Belgium': 'BE',
    'Bulgaria': 'BG',
    'Croatia': 'HR',
    'Cyprus': 'CY',
    'Czechia': 'CZ',
    'Denmark': 'DK',
    'Estonia': 'EE',
    'Finland': 'FI',
    'France': 'FR',
    'Germany': 'DE',
    'Greece': 'GR',
    'Hungary': 'HU',
    'Iceland': 'IS',
    'Ireland': 'IE',
    'Italy': 'IT',
    'Latvia': 'LV',
    'Liechtenstein': 'LI',
    'Lithuania': 'LT',
    'Luxembourg': 'LU',
    'Malta': 'MT',
    'Netherlands': 'NL',
    'Norway': 'NO',
    'Poland': 'PL',
    'Portugal': 'PT',
    'Romania': 'RO',
    'Slovakia': 'SK',
    'Slovenia': 'SI',
    'Spain': 'ES',
    'Sweden': 'SE',
    'Switzerland': 'CH',
    'United Kingdom': 'GB',
}
quantiles = [
    1.00,
    2.50,
    5.00,
    10.0,
    15.0,
    20.0,
    25.0,
    30.0,
    35.0,
    40.0,
    45.0,
    50.0,
    55.0,
    60.0,
    65.0,
    70.0,
    75.0,
    80.0,
    85.0,
    90.0,
    95.0,
    97.5,
    99.0]
median_quantile_index = 11
quantile_50_low_index = 6
quantile_50_high_index = 16
quantile_95_low_index = 1
quantile_95_high_index = 21
country = 'Spain'

In [3]:
def read_truth_data(filepath, filepath_deaths):
    df_cases = pd.read_csv(filepath)
    df_deaths = pd.read_csv(filepath_deaths)
    if country == "all":
        df_filtered = df_cases
        df_deaths_filtered = df_deaths
    else:
        df_filtered = df_cases[(df_cases["location_name"] == country)]
        df_deaths_filtered = df_deaths[(df_deaths["location_name"] == country)]

    df_filtered["value"] = df_filtered["value"].where(df_filtered["value"] >= 0, other=-df_filtered["value"], axis=0)
    df_deaths_filtered["value"] = df_deaths_filtered["value"].where(df_deaths_filtered["value"] >= 0,
                                                                    other=-df_deaths_filtered["value"], axis=0)
    df_filtered["date"] = pd.to_datetime(df_filtered["date"])
    df_filtered.set_index(df_filtered["date"], inplace=True)
    df_deaths_filtered["date"] = pd.to_datetime(df_deaths_filtered["date"])
    df_deaths_filtered.set_index(df_deaths_filtered["date"], inplace=True)
    df_filtered.drop("date", axis=1, inplace=True)
    df_filtered.drop("location", axis=1, inplace=True)
    df_filtered.drop("location_name", axis=1, inplace=True)
    df_filtered["cases"] = df_filtered["value"]
    df_filtered.drop(labels="value", axis=1, inplace=True)
    # df_filtered["deaths"] = df_deaths_filtered["value"]
    # df_filtered.replace(to_replace='United Kingdom', value='United_Kingdom', inplace=True)

    # Include data only after 1th case in a country.
    # mask = df_filtered['cases'].cumsum() >= 1

    # Get the date that the epidemic starts in a country.
    # first_day = df_filtered.index[mask][0]  # - pd.to_timedelta(START_DAYS, 'days')
    if first_day is not None:
        df_filtered = df_filtered.truncate(before=first_day)
    if last_day is not None:
        df_filtered = df_filtered.truncate(after=last_day)

    if period == "weeks":
        df_filtered = df_filtered.resample("w-sat", convention="end").sum()

    return df_filtered

In [4]:
def nll(y, distr):
    return -distr.log_prob(y)

In [None]:
def plot_linear_model(model=None, plot_col='cases', plot_model="random", plot_quantile=True,
                      image_path=None, x=None, y=None):

    fig, ax = plt.subplots(1, 1, figsize=(12, 8))
    num_days = len(x)

    pred_cases_raw = model(x)
    mean = pred_cases_raw.mean()
    std = pred_cases_raw.stddev()
    posterior_quantile = np.percentile(mean, quantiles, axis=-1, interpolation="midpoint")
    ax.plot(range(num_days), mean, '--X', color='#ff7f0e', label='Posterior median', lw=3, markersize=6)
    if plot_quantile:
        ax.plot(range(num_days), mean + 2 * std, color='b', label='50% quantile', alpha=.4, lw=3)
        ax.plot(range(num_days), mean - 2 * std, color='b', label='50% quantile', alpha=.4, lw=3)
    ax.plot(range(num_days), y[:num_days], '--o', color='k', markersize=6, label='Observed '+plot_col)

    ax.xaxis.set_tick_params(rotation=45)
    ax.set_title(plot_model + " set for " + country + " " + plot_col)
    ax.set_xlabel('Day', fontsize='large')
    ax.set_ylabel(plot_col, fontsize='large')
    fontsize = 'large'
    ax.legend(loc='upper left', fontsize=fontsize)
    ax.axhline(y=0, color='k', linestyle='--')

    plt.tight_layout()
    if save_images:
        plt.savefig(image_path + "_predictions.png")
    if display_images:
        plt.show()

    return

In [None]:
def plot_cpd(model=None, num_weeks=1, x=None, y=None):
    y_hat=model.predict(x)
    y_hat_mean = y_hat.mean()
    y_hat_std = y_hat.std()
    plt.scatter(y_hat, y, alpha=0.3)
    # sort_idx=np.argsort(y_hat,axis=0)
    plt.plot(y_hat, y_hat + 2 * y_hat_std, linestyle='dashed', c="black")
    plt.plot(y_hat, y_hat - 2 * y_hat_std, linestyle='dashed', c="black")
    plt.plot(y_hat, y_hat, c="black")
    plt.title('Observed vs. Predicted for ' + country + " and " + str(num_weeks) + " predicted weeks")
    plt.xlabel('Predicted average')
    plt.ylabel('Observed number')
    plt.show()

    return

In [None]:
def plot_train_history(history, title, image_path):
    loss = history.history['loss']
    val_loss = history.history['val_loss']
    epochs = range(len(loss))

    plt.figure()
    plt.plot(epochs, loss, 'b', label='Training loss')
    plt.plot(epochs, val_loss, 'r', label='Validation loss')
    plt.title(title)
    plt.legend()

    if save_images:
        plt.savefig(image_path + "_train_history.png")
    if display_images:
        plt.show()

    return

In [None]:
df = read_truth_data(data_path + "truth_JHU-Incident Cases.csv",
                     data_path + "truth_JHU-Incident Deaths.csv", country=country, period=period, last_day=last_day)


In [None]:
image_path = "{0}_{1}_{2}".format(country, target_cols, datetime.datetime.today().strftime("%Y%m%d"))
file_path = "{0}_{1}".format(country, datetime.datetime.today().strftime("%Y%m%d"))
df_train = df[0:int(len(df) * (train_data_pcnt / 100))]
df_valid = df[int(len(df)*(train_data_pcnt/100)):int(len(df)*((train_data_pcnt + valid_data_pcnt)/100))]
df_test = df[-int(len(df) * (test_data_pcnt / 100)):]

In [None]:
shift=0  # Indica el salto entre la semana con datos y la semana a predecir: 1 indica un salto de 1 semana,
         # por lo tanto, se hará una predicción de 2 semanas después
data_train_1_w = np.array(df_train, dtype=np.float32)
x_train_1_w = data_train_1_w[:-label_width-shift]
y_train_1_w = np.squeeze(data_train_1_w[input_width+shift:])
data_valid_1_w = np.array(df_valid, dtype=np.float32)
x_valid_1_w = data_valid_1_w[:-label_width-shift]
y_valid_1_w = np.squeeze(data_valid_1_w[input_width+shift:])
data_test_1_w = np.array(df_test, dtype=np.float32)
x_test_1_w = data_test_1_w[:-label_width-shift]
y_test_1_w = np.squeeze(data_test_1_w[input_width+shift:])

In [None]:
model_lr_1_w = tf.keras.Sequential([
        tf.keras.layers.Dense(2),
        tfl.DistributionLambda(lambda t: tfd.Normal(loc=t[:, :1],
                                                    scale=1e-3 + tf.math.softplus(0.05 * t[:, 1:]))),
    ])
model_lr_1_w.compile(optimizer=tf.optimizers.Adam(learning_rate=0.01), loss=nll)
result_lr_1_w = model_lr_1_w.fit(x_train_1_w, y_train_1_w, epochs=epochs, validation_data=(x_valid_1_w, y_valid_1_w), verbose=True)

In [None]:
model_lr_1_w.summary()

In [None]:
plot_train_history(result_lr_1_w, 'Train history loss for ' + country + " " +
                   datetime.datetime.now().strftime("%Y-%m-%d-%H:%M"), final_model_path + image_path)

train_performance_1_w = model_lr_1_w.evaluate(x_train_1_w, y_train_1_w, verbose=1)
val_performance_1_w = model_lr_1_w.evaluate(x_valid_1_w, y_valid_1_w, verbose=1)
test_performance_1_w = model_lr_1_w.evaluate(x_test_1_w, y_test_1_w, verbose=1)
print("Linear Regression NLL on training 1 week: ", train_performance_1_w)
print("Linear Regression NLL on validation 1 week: ", val_performance_1_w)
print("Linear Regression NLL on test 1 week: ", test_performance_1_w)

In [None]:
plot_linear_model(model_lr_1_w, plot_model="train", plot_col="cases", x=x_train_1_w, y=y_train_1_w,
                  image_path=graphs_path + image_path)
plot_linear_model(model_lr_1_w, plot_model="val", plot_col="cases", x=x_valid_1_w, y=y_valid_1_w,
              image_path=graphs_path + image_path)
plot_linear_model(model_lr_1_w, plot_model="test", plot_col="cases", x=x_test_1_w, y=y_test_1_w,
              image_path=graphs_path + image_path)

In [None]:
plot_cpd(model_lr_1_w, 1, x_train_1_w, y_train_1_w)
plot_cpd(model_lr_1_w, 1, x_valid_1_w, y_valid_1_w)
plot_cpd(model_lr_1_w, 1, x_test_1_w, y_test_1_w)

In [None]:
shift=1  # Indica el salto entre la semana con datos y la semana a predecir: 1 indica un salto de 1 semana,
         # por lo tanto, se hará una predicción de 2 semanas después
data_train_2_w = np.array(df_train, dtype=np.float32)
x_train_2_w = data_train_2_w[:-label_width-shift]
y_train_2_w = np.squeeze(data_train_2_w[input_width+shift:])
data_valid_2_w = np.array(df_valid, dtype=np.float32)
x_valid_2_w = data_valid_2_w[:-label_width-shift]
y_valid_2_w = np.squeeze(data_valid_2_w[input_width+shift:])
data_test_2_w = np.array(df_test, dtype=np.float32)
x_test_2_w = data_test_2_w[:-label_width-shift]
y_test_2_w = np.squeeze(data_test_2_w[input_width+shift:])

In [None]:
model_lr_2_w = tf.keras.Sequential([
        tf.keras.layers.Dense(2),
        tfl.DistributionLambda(lambda t: tfd.Normal(loc=t[:, :1],
                                                    scale=1e-3 + tf.math.softplus(0.05 * t[:, 1:]))),
    ])
model_lr_2_w.compile(optimizer=tf.optimizers.Adam(learning_rate=0.01), loss=nll)
result_lr_2_w = model_lr_2_w.fit(x_train_2_w, y_train_2_w, epochs=epochs, validation_data=(x_valid_2_w, y_valid_2_w), verbose=True)

In [None]:
model_lr_2_w.summary()

In [None]:
plot_train_history(result_lr_2_w, 'Train history loss for ' + country + " " +
                   datetime.datetime.now().strftime("%Y-%m-%d-%H:%M"), final_model_path + image_path)

train_performance_2_w = model_lr_2_w.evaluate(x_train_2_w, y_train_2_w, verbose=1)
val_performance_2_w = model_lr_2_w.evaluate(x_valid_2_w, y_valid_2_w, verbose=1)
test_performance_2_w = model_lr_2_w.evaluate(x_test_2_w, y_test_2_w, verbose=1)
print("Linear Regression NLL on training 2 weeks: ", train_performance_2_w)
print("Linear Regression NLL on validation 2 weeks: ", val_performance_2_w)
print("Linear Regression NLL on test 2 weeks: ", test_performance_2_w)

In [None]:
plot_linear_model(model_lr_2_w, plot_model="train", plot_col="cases", x=x_train_2_w, y=y_train_2_w,
                  image_path=graphs_path + image_path)
plot_linear_model(model_lr_2_w, plot_model="val", plot_col="cases", x=x_valid_2_w, y=y_valid_2_w,
              image_path=graphs_path + image_path)
plot_linear_model(model_lr_2_w, plot_model="test", plot_col="cases", x=x_test_2_w, y=y_test_2_w,
              image_path=graphs_path + image_path)

In [None]:
plot_cpd(model_lr_2_w, 2, x_train_2_w, y_train_2_w)
plot_cpd(model_lr_2_w, 2, x_valid_2_w, y_valid_2_w)
plot_cpd(model_lr_2_w, 2, x_test_2_w, y_test_2_w)

In [None]:
shift=2  # Indica el salto entre la semana con datos y la semana a predecir: 1 indica un salto de 1 semana,
         # por lo tanto, se hará una predicción de 2 semanas después
data_train_3_w = np.array(df_train, dtype=np.float32)
x_train_3_w = data_train_3_w[:-label_width-shift]
y_train_3_w = np.squeeze(data_train_3_w[input_width+shift:])
data_valid_3_w = np.array(df_valid, dtype=np.float32)
x_valid_3_w = data_valid_3_w[:-label_width-shift]
y_valid_3_w = np.squeeze(data_valid_3_w[input_width+shift:])
data_test_3_w = np.array(df_test, dtype=np.float32)
x_test_3_w = data_test_3_w[:-label_width-shift]
y_test_3_w = np.squeeze(data_test_3_w[input_width+shift:])

In [None]:
model_lr_3_w = tf.keras.Sequential([
        tf.keras.layers.Dense(2),
        tfl.DistributionLambda(lambda t: tfd.Normal(loc=t[:, :1],
                                                    scale=1e-3 + tf.math.softplus(0.05 * t[:, 1:]))),
    ])
model_lr_3_w.compile(optimizer=tf.optimizers.Adam(learning_rate=0.01), loss=nll)
result_lr_3_w = model_lr_3_w.fit(x_train_3_w, y_train_3_w, epochs=epochs, validation_data=(x_valid_3_w, y_valid_3_w), verbose=True)

In [None]:
model_lr_3_w.summary()

In [None]:
plot_train_history(result_lr_3_w, 'Train history loss for ' + country + " " +
                   datetime.datetime.now().strftime("%Y-%m-%d-%H:%M"), final_model_path + image_path)

train_performance_3_w = model_lr_3_w.evaluate(x_train_3_w, y_train_3_w, verbose=1)
val_performance_3_w = model_lr_3_w.evaluate(x_valid_3_w, y_valid_3_w, verbose=1)
test_performance_3_w = model_lr_3_w.evaluate(x_test_3_w, y_test_3_w, verbose=1)
print("Linear Regression NLL on training 3 weeks: ", train_performance_3_w)
print("Linear Regression NLL on validation 3 weeks: ", val_performance_3_w)
print("Linear Regression NLL on test 3 weeks: ", test_performance_3_w)

In [None]:
plot_linear_model(model_lr_3_w, plot_model="train", plot_col="cases", x=x_train_3_w, y=y_train_3_w,
                  image_path=graphs_path + image_path)
plot_linear_model(model_lr_3_w, plot_model="val", plot_col="cases", x=x_valid_3_w, y=y_valid_3_w,
              image_path=graphs_path + image_path)
plot_linear_model(model_lr_3_w, plot_model="test", plot_col="cases", x=x_test_3_w, y=y_test_3_w,
              image_path=graphs_path + image_path)

In [None]:
plot_cpd(model_lr_3_w, 3, x_train_3_w, y_train_3_w)
plot_cpd(model_lr_3_w, 3, x_valid_3_w, y_valid_3_w)
plot_cpd(model_lr_3_w, 3, x_test_3_w, y_test_3_w)

In [None]:
shift=3  # Indica el salto entre la semana con datos y la semana a predecir: 1 indica un salto de 1 semana,
         # por lo tanto, se hará una predicción de 2 semanas después
data_train_4_w = np.array(df_train, dtype=np.float32)
x_train_4_w = data_train_4_w[:-label_width-shift]
y_train_4_w = np.squeeze(data_train_4_w[input_width+shift:])
data_valid_4_w = np.array(df_valid, dtype=np.float32)
x_valid_4_w = data_valid_4_w[:-label_width-shift]
y_valid_4_w = np.squeeze(data_valid_4_w[input_width+shift:])
data_test_4_w = np.array(df_test, dtype=np.float32)
x_test_4_w = data_test_4_w[:-label_width-shift]
y_test_4_w = np.squeeze(data_test_4_w[input_width+shift:])

In [None]:
model_lr_4_w = tf.keras.Sequential([
        tf.keras.layers.Dense(2),
        tfl.DistributionLambda(lambda t: tfd.Normal(loc=t[:, :1],
                                                    scale=1e-3 + tf.math.softplus(0.05 * t[:, 1:]))),
    ])
model_lr_4_w.compile(optimizer=tf.optimizers.Adam(learning_rate=0.01), loss=nll)
result_lr_4_w = model_lr_4_w.fit(x_train_4_w, y_train_4_w, epochs=epochs, validation_data=(x_valid_4_w, y_valid_4_w), verbose=True)

In [None]:
model_lr_4_w.summary()

In [None]:
plot_train_history(result_lr_4_w, 'Train history loss for ' + country + " " +
                   datetime.datetime.now().strftime("%Y-%m-%d-%H:%M"), final_model_path + image_path)

train_performance_4_w = model_lr_4_w.evaluate(x_train_4_w, y_train_4_w, verbose=1)
val_performance_4_w = model_lr_4_w.evaluate(x_valid_4_w, y_valid_4_w, verbose=1)
test_performance_4_w = model_lr_4_w.evaluate(x_test_4_w, y_test_4_w, verbose=1)
print("Linear Regression NLL on training 4 weeks: ", train_performance_4_w)
print("Linear Regression NLL on validation 4 weeks: ", val_performance_4_w)
print("Linear Regression NLL on test 4 weeks: ", test_performance_4_w)

In [None]:
plot_linear_model(model_lr_4_w, plot_model="train", plot_col="cases", x=x_train_4_w, y=y_train_4_w,
                  image_path=graphs_path + image_path)
plot_linear_model(model_lr_4_w, plot_model="val", plot_col="cases", x=x_valid_4_w, y=y_valid_4_w,
              image_path=graphs_path + image_path)
plot_linear_model(model_lr_4_w, plot_model="test", plot_col="cases", x=x_test_4_w, y=y_test_4_w,
              image_path=graphs_path + image_path)

In [None]:
plot_cpd(model_lr_4_w, 4, x_train_4_w, y_train_4_w)
plot_cpd(model_lr_4_w, 4, x_valid_4_w, y_valid_4_w)
plot_cpd(model_lr_4_w, 4, x_test_4_w, y_test_4_w)

In [None]:
print("SUMMARY:\n")
print("Linear Regression NLL on test 1 week: ", test_performance_1_w)
print("Linear Regression NLL on test 2 weeks: ", test_performance_2_w)
print("Linear Regression NLL on test 3 weeks: ", test_performance_3_w)
print("Linear Regression NLL on test 4 weeks: ", test_performance_4_w)


In [None]:
def create_forecasts_file(country, values, date_init, date_end, num_weeks, quantiles=[2.5, 25, 50, 75, 97.5], plot_col_index=0):
    forecast_filename = processed_path + date_init + "-" + team_model_name + ".csv"
    if os.path.exists(forecast_filename):
        append_write = 'a'  # append if already exists
    else:
        append_write = 'w'  # make a new file if not

    with open(forecast_filename, append_write) as file:
        if append_write == "w":
            file.write("scenario_id,forecast_date,target,target_end_date,location,type,quantile,value\n")
        for i in range(len(values)):
            file.write("forecast,")
            file.write(date_init + ",")
            file.write(str(num_weeks) + " wk ahead inc case,")
            file.write(date_end + ",")
            file.write(country_codes[country] + ",")
            if i == 0:
                file.write("point,NA,")
                if values[median_quantile_index] < 0:
                    file.write(str(-values[median_quantile_index]) + "\n")
                else:
                    file.write(str(values[median_quantile_index]) + "\n")
                if len(values) > 1:
                    file.write("forecast,")
                    file.write(date_init + ",")
                    file.write(str(num_weeks) + " wk ahead inc case,")
                    file.write(date_end + ",")
                    file.write(country_codes[country] + ",")
                    file.write("quantile,")
                    file.write(str(quantiles[i] / 100.00) + ",")
                    if values[i] < 0:
                        file.write(str(-values[i]) + "\n")
                    else:
                        file.write(str(values[i]) + "\n")
            else:
                file.write("quantile,")
                file.write(str(quantiles[i] / 100.00) + ",")
                if values[i] < 0:
                    file.write(str(-values[i]) + "\n")
                else:
                    file.write(str(values[i]) + "\n")
    file.close()

    return

In [None]:
# Generamos predicciones y los cuantiles para poder comparar con los modelos del Hub
y_hat_1_w = model_lr_1_w(x_test_1_w)
post_quantile_1_w = np.percentile(y_hat_1_w.mean(), quantiles, interpolation="midpoint")
y_hat_2_w = model_lr_2_w(x_test_2_w)
post_quantile_2_w = np.percentile(y_hat_2_w.mean(), quantiles, interpolation="midpoint")
y_hat_3_w = model_lr_3_w(x_test_3_w)
post_quantile_3_w = np.percentile(y_hat_3_w.mean(), quantiles, interpolation="midpoint")
y_hat_4_w = model_lr_4_w(x_test_4_w)
post_quantile_4_w = np.percentile(y_hat_4_w.mean(), quantiles, interpolation="midpoint")

In [None]:
create_forecasts_file(country=country, values=post_quantile_1_w, date_init="2021-08-02", date_end="2021-08-07",
                      num_weeks=1, quantiles=quantiles, plot_col_index=0)
create_forecasts_file(country=country, values=post_quantile_2_w, date_init="2021-08-02", date_end="2021-08-14",
                      num_weeks=2, quantiles=quantiles, plot_col_index=0)
create_forecasts_file(country=country, values=post_quantile_3_w, date_init="2021-08-02", date_end="2021-08-21",
                      num_weeks=3, quantiles=quantiles, plot_col_index=0)
create_forecasts_file(country=country, values=post_quantile_4_w, date_init="2021-08-02", date_end="2021-08-28",
                      num_weeks=4, quantiles=quantiles, plot_col_index=0)

In [None]:
last_day = "2021-08-07"
df = read_truth_data(data_path + "truth_JHU-Incident Cases.csv",
                     data_path + "truth_JHU-Incident Deaths.csv", country=country, period=period, last_day=last_day)
df_test = df[-int(len(df) * (test_data_pcnt / 100)):]

In [None]:
shift=0
data_test = np.array(df_test, dtype=np.float32)
x_test_1_w = data_test[:-label_width-shift]
shift=1
x_test_2_w = data_test[:-label_width-shift]
shift=2
x_test_3_w = data_test[:-label_width-shift]
shift=3
x_test_4_w = data_test[:-label_width-shift]

In [None]:
# Generamos predicciones y los cuantiles para poder comparar con los modelos del Hub
y_hat_1_w = model_lr_1_w(x_test_1_w)
post_quantile_1_w = np.percentile(y_hat_1_w.mean(), quantiles, interpolation="midpoint")
y_hat_2_w = model_lr_2_w(x_test_2_w)
post_quantile_2_w = np.percentile(y_hat_2_w.mean(), quantiles, interpolation="midpoint")
y_hat_3_w = model_lr_3_w(x_test_3_w)
post_quantile_3_w = np.percentile(y_hat_3_w.mean(), quantiles, interpolation="midpoint")
y_hat_4_w = model_lr_4_w(x_test_4_w)
post_quantile_4_w = np.percentile(y_hat_4_w.mean(), quantiles, interpolation="midpoint")

In [None]:
create_forecasts_file(country=country, values=post_quantile_1_w, date_init="2021-08-09", date_end="2021-08-14",
                      num_weeks=1, quantiles=quantiles, plot_col_index=0)
create_forecasts_file(country=country, values=post_quantile_2_w, date_init="2021-08-09", date_end="2021-08-21",
                      num_weeks=2, quantiles=quantiles, plot_col_index=0)
create_forecasts_file(country=country, values=post_quantile_3_w, date_init="2021-08-09", date_end="2021-08-28",
                      num_weeks=3, quantiles=quantiles, plot_col_index=0)
create_forecasts_file(country=country, values=post_quantile_4_w, date_init="2021-08-09", date_end="2021-09-04",
                      num_weeks=4, quantiles=quantiles, plot_col_index=0)

In [None]:
last_day = "2021-08-14"
df = read_truth_data(data_path + "truth_JHU-Incident Cases.csv",
                     data_path + "truth_JHU-Incident Deaths.csv", country=country, period=period, last_day=last_day)
df_test = df[-int(len(df) * (test_data_pcnt / 100)):]

In [None]:
shift=0
data_test = np.array(df_test, dtype=np.float32)
x_test_1_w = data_test[:-label_width-shift]
shift=1
x_test_2_w = data_test[:-label_width-shift]
shift=2
x_test_3_w = data_test[:-label_width-shift]
shift=3
x_test_4_w = data_test[:-label_width-shift]

In [None]:
# Generamos predicciones y los cuantiles para poder comparar con los modelos del Hub
y_hat_1_w = model_lr_1_w(x_test_1_w)
post_quantile_1_w = np.percentile(y_hat_1_w.mean(), quantiles, interpolation="midpoint")
y_hat_2_w = model_lr_2_w(x_test_2_w)
post_quantile_2_w = np.percentile(y_hat_2_w.mean(), quantiles, interpolation="midpoint")
y_hat_3_w = model_lr_3_w(x_test_3_w)
post_quantile_3_w = np.percentile(y_hat_3_w.mean(), quantiles, interpolation="midpoint")
y_hat_4_w = model_lr_4_w(x_test_4_w)
post_quantile_4_w = np.percentile(y_hat_4_w.mean(), quantiles, interpolation="midpoint")

In [None]:
create_forecasts_file(country=country, values=post_quantile_1_w, date_init="2021-08-16", date_end="2021-08-21",
                      num_weeks=1, quantiles=quantiles, plot_col_index=0)
create_forecasts_file(country=country, values=post_quantile_2_w, date_init="2021-08-16", date_end="2021-08-28",
                      num_weeks=2, quantiles=quantiles, plot_col_index=0)
create_forecasts_file(country=country, values=post_quantile_3_w, date_init="2021-08-16", date_end="2021-09-04",
                      num_weeks=3, quantiles=quantiles, plot_col_index=0)
create_forecasts_file(country=country, values=post_quantile_4_w, date_init="2021-08-16", date_end="2021-09-11",
                      num_weeks=4, quantiles=quantiles, plot_col_index=0)

In [None]:
last_day = "2021-08-21"
df = read_truth_data(data_path + "truth_JHU-Incident Cases.csv",
                     data_path + "truth_JHU-Incident Deaths.csv", country=country, period=period, last_day=last_day)
df_test = df[-int(len(df) * (test_data_pcnt / 100)):]

In [None]:
shift=0
data_test = np.array(df_test, dtype=np.float32)
x_test_1_w = data_test[:-label_width-shift]
shift=1
x_test_2_w = data_test[:-label_width-shift]
shift=2
x_test_3_w = data_test[:-label_width-shift]
shift=3
x_test_4_w = data_test[:-label_width-shift]

In [None]:
# Generamos predicciones y los cuantiles para poder comparar con los modelos del Hub
y_hat_1_w = model_lr_1_w(x_test_1_w)
post_quantile_1_w = np.percentile(y_hat_1_w.mean(), quantiles, interpolation="midpoint")
y_hat_2_w = model_lr_2_w(x_test_2_w)
post_quantile_2_w = np.percentile(y_hat_2_w.mean(), quantiles, interpolation="midpoint")
y_hat_3_w = model_lr_3_w(x_test_3_w)
post_quantile_3_w = np.percentile(y_hat_3_w.mean(), quantiles, interpolation="midpoint")
y_hat_4_w = model_lr_4_w(x_test_4_w)
post_quantile_4_w = np.percentile(y_hat_4_w.mean(), quantiles, interpolation="midpoint")


In [None]:
create_forecasts_file(country=country, values=post_quantile_1_w, date_init="2021-08-23", date_end="2021-08-28",
                      num_weeks=1, quantiles=quantiles, plot_col_index=0)
create_forecasts_file(country=country, values=post_quantile_2_w, date_init="2021-08-23", date_end="2021-09-04",
                      num_weeks=2, quantiles=quantiles, plot_col_index=0)
create_forecasts_file(country=country, values=post_quantile_3_w, date_init="2021-08-23", date_end="2021-09-11",
                      num_weeks=3, quantiles=quantiles, plot_col_index=0)
create_forecasts_file(country=country, values=post_quantile_4_w, date_init="2021-08-23", date_end="2021-09-18",
                      num_weeks=4, quantiles=quantiles, plot_col_index=0)

In [None]:
last_day = "2021-08-28"
df = read_truth_data(data_path + "truth_JHU-Incident Cases.csv",
                     data_path + "truth_JHU-Incident Deaths.csv", country=country, period=period, last_day=last_day)
df_test = df[-int(len(df) * (test_data_pcnt / 100)):]

In [None]:
shift=0
data_test = np.array(df_test, dtype=np.float32)
x_test_1_w = data_test[:-label_width-shift]
shift=1
x_test_2_w = data_test[:-label_width-shift]
shift=2
x_test_3_w = data_test[:-label_width-shift]
shift=3
x_test_4_w = data_test[:-label_width-shift]

In [None]:
# Generamos predicciones y los cuantiles para poder comparar con los modelos del Hub
y_hat_1_w = model_lr_1_w(x_test_1_w)
post_quantile_1_w = np.percentile(y_hat_1_w.mean(), quantiles, interpolation="midpoint")
y_hat_2_w = model_lr_2_w(x_test_2_w)
post_quantile_2_w = np.percentile(y_hat_2_w.mean(), quantiles, interpolation="midpoint")
y_hat_3_w = model_lr_3_w(x_test_3_w)
post_quantile_3_w = np.percentile(y_hat_3_w.mean(), quantiles, interpolation="midpoint")
y_hat_4_w = model_lr_4_w(x_test_4_w)
post_quantile_4_w = np.percentile(y_hat_4_w.mean(), quantiles, interpolation="midpoint")


In [None]:
create_forecasts_file(country=country, values=post_quantile_1_w, date_init="2021-08-30", date_end="2021-09-04",
                      num_weeks=1, quantiles=quantiles, plot_col_index=0)
create_forecasts_file(country=country, values=post_quantile_2_w, date_init="2021-08-30", date_end="2021-09-11",
                      num_weeks=2, quantiles=quantiles, plot_col_index=0)
create_forecasts_file(country=country, values=post_quantile_3_w, date_init="2021-08-30", date_end="2021-09-18",
                      num_weeks=3, quantiles=quantiles, plot_col_index=0)
create_forecasts_file(country=country, values=post_quantile_4_w, date_init="2021-08-30", date_end="2021-09-25",
                      num_weeks=4, quantiles=quantiles, plot_col_index=0)



