# Visualizing the forecastings

In [13]:
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
import pandas as pd
from code import util
import matplotlib.dates as mdates

## Reading data

In [14]:
df_results_sirds = pd.read_csv('../sirds/data/df_deaths_forecasted.csv')
df_results_sirds['max_date_to_fit'] = pd.to_datetime(df_results_sirds['max_date_to_fit'])
df_results_sirds['date'] = pd.to_datetime(df_results_sirds['date'], format='mixed')
df_results_sirds = df_results_sirds.rename(columns={'forecast': 'forecast_sirds'})
df_results_sirds = df_results_sirds[['municipality_id', 'municipality', 'max_date_to_fit', 'date', 'estimation', 'forecast_sirds']]

In [15]:
df_results_lstm = pd.read_csv('../lstm/data/df_forecasting.csv')
df_results_lstm['max_date_to_fit'] = pd.to_datetime(df_results_lstm['max_date_to_fit'])
df_results_lstm['date'] = pd.to_datetime(df_results_lstm['date'], format='mixed')
df_results_lstm = df_results_lstm.rename(columns={'execution': 'estimation', 'forecast': 'forecast_lstm_univariate'})
df_results_lstm = df_results_lstm[['municipality_id', 'municipality', 'max_date_to_fit', 'date', 'estimation', 'forecast_lstm_univariate']]

In [16]:
df_results_sirds_hybrid = pd.read_csv('../sirds_hybrid/data/df_deaths_forecasted.csv')
df_results_sirds_hybrid['max_date_to_fit'] = pd.to_datetime(df_results_sirds_hybrid['max_date_to_fit'])
df_results_sirds_hybrid['date'] = pd.to_datetime(df_results_sirds_hybrid['date'], format='mixed')
df_results_sirds_hybrid = df_results_sirds_hybrid.rename(columns={'forecast': 'forecast_sirds_hybrid'})
df_results_sirds_hybrid = df_results_sirds_hybrid[['municipality_id', 'municipality', 'max_date_to_fit', 'date', 'estimation', 'forecast_sirds_hybrid']]

In [17]:
df_results_lstm_hybrid = pd.read_csv('../lstm_hybrid_simple_without_vaccination/data/df_forecasting.csv')
df_results_lstm_hybrid['max_date_to_fit'] = pd.to_datetime(df_results_lstm_hybrid['max_date_to_fit'])
df_results_lstm_hybrid['date'] = pd.to_datetime(df_results_lstm_hybrid['date'], format='mixed')
df_results_lstm_hybrid = df_results_lstm_hybrid.rename(columns={'execution': 'estimation', 'forecast': 'forecast_lstm_hybrid'})
df_results_lstm_hybrid = df_results_lstm_hybrid[['municipality_id', 'municipality', 'max_date_to_fit', 'date', 'estimation', 'forecast_lstm_hybrid']]

### Combining forecasts

In [18]:
df = df_results_sirds.merge(df_results_lstm, how='outer', on=['municipality_id', 'municipality', 'max_date_to_fit', 'date', 'estimation'])

In [19]:
df = df.merge(df_results_sirds_hybrid, how='outer', on=['municipality_id', 'municipality', 'max_date_to_fit', 'date', 'estimation'])
df = df.merge(df_results_lstm_hybrid, how='outer', on=['municipality_id', 'municipality', 'max_date_to_fit', 'date', 'estimation'])

In [20]:
df

Unnamed: 0,municipality_id,municipality,max_date_to_fit,date,estimation,forecast_sirds,forecast_lstm_univariate,forecast_sirds_hybrid,forecast_lstm_hybrid
0,280030.0,Aracaju,2020-04-25,2020-04-26,0.0,0.026982,0.014681,0.008211,0.049671
1,280030.0,Aracaju,2020-04-25,2020-04-27,0.0,0.028788,0.009759,0.007756,0.051311
2,280030.0,Aracaju,2020-04-25,2020-04-28,0.0,0.030689,0.006912,0.007353,0.050429
3,280030.0,Aracaju,2020-04-25,2020-04-29,0.0,0.032698,0.010024,0.006966,0.052750
4,280030.0,Aracaju,2020-04-25,2020-04-30,0.0,0.034857,0.005650,0.006600,0.054124
...,...,...,...,...,...,...,...,...,...
617675,291080.0,Feira de Santana,2020-04-25,2020-05-19,19.0,,0.000326,,
617676,291080.0,Feira de Santana,2020-04-25,2020-05-20,19.0,,0.000505,,
617677,291080.0,Feira de Santana,2020-04-25,2020-05-21,19.0,,0.000579,,
617678,291080.0,Feira de Santana,2020-04-25,2020-05-22,19.0,,0.001109,,


## Treating data

## Forecasting using the mean

In [21]:
# df['forecast'] = df[['forecast_sirds', 'forecast_lstm_univariate', 'forecast_sirds_hybrid', 'forecast_lstm_hybrid']].mean(axis=1)
df['forecast'] = df[['forecast_sirds', 'forecast_lstm_univariate', 'forecast_sirds_hybrid']].mean(axis=1)

In [22]:
df = df.loc[df['forecast'].isna() == False]

In [23]:
df

Unnamed: 0,municipality_id,municipality,max_date_to_fit,date,estimation,forecast_sirds,forecast_lstm_univariate,forecast_sirds_hybrid,forecast_lstm_hybrid,forecast
0,280030.0,Aracaju,2020-04-25,2020-04-26,0.0,0.026982,0.014681,0.008211,0.049671,0.016625
1,280030.0,Aracaju,2020-04-25,2020-04-27,0.0,0.028788,0.009759,0.007756,0.051311,0.015434
2,280030.0,Aracaju,2020-04-25,2020-04-28,0.0,0.030689,0.006912,0.007353,0.050429,0.014985
3,280030.0,Aracaju,2020-04-25,2020-04-29,0.0,0.032698,0.010024,0.006966,0.052750,0.016563
4,280030.0,Aracaju,2020-04-25,2020-04-30,0.0,0.034857,0.005650,0.006600,0.054124,0.015702
...,...,...,...,...,...,...,...,...,...,...
617675,291080.0,Feira de Santana,2020-04-25,2020-05-19,19.0,,0.000326,,,0.000326
617676,291080.0,Feira de Santana,2020-04-25,2020-05-20,19.0,,0.000505,,,0.000505
617677,291080.0,Feira de Santana,2020-04-25,2020-05-21,19.0,,0.000579,,,0.000579
617678,291080.0,Feira de Santana,2020-04-25,2020-05-22,19.0,,0.001109,,,0.001109


## Saving

In [24]:
df.to_csv('data/df_forecasting.csv')

df_forecasting_mean = df.groupby(['municipality','municipality_id','max_date_to_fit','date'])['forecast'].mean().reset_index()
df_forecasting_mean.to_csv('data/df_forecasting_mean.csv')