# Extracting the forecastings

In [3]:
import pandas as pd
import numpy as np
from tensorflow.keras.models import load_model
from pickle import load
import datetime
import os

In [4]:
df_original = pd.read_csv('../data/df_original_extended.csv')
df_original['DATA'] = pd.to_datetime(df_original['DATA'], format='mixed')

In [5]:
df_original.columns

Index(['DATA', 'CODIGO_MUNICIPIO_6', 'NOVOS_CASOS_SRAG', 'OBITOS_NOVOS',
       'OBITOS', 'OBITOS_NOVOS_MEDIA_MOVEL_7_DIAS', 'TAXA_OBITOS_NOVOS',
       'TAXA_OBITOS_NOVOS_MEDIA_MOVEL_7_DIAS', 'TAXA_OBITOS', 'casosNovos',
       'casosAcumulado', 'CASOS_NOVOS_MEDIA_MOVEL_7_DIAS', 'MUNICIPIO',
       'SIGLA_ESTADO', 'ESTADO', 'REGIAO', 'POPULACAO_2022',
       'NUMERO_REPRODUCAO_EFETIVO_SRAG_MEDIA',
       'NUMERO_REPRODUCAO_EFETIVO_SRAG_VARIANCIA',
       'NUMERO_REPRODUCAO_EFETIVO_SRAG_QUANTIL_0.025',
       'NUMERO_REPRODUCAO_EFETIVO_SRAG_MEDIANA',
       'NUMERO_REPRODUCAO_EFETIVO_SRAG_QUANTIL_0.975',
       'NUMERO_REPRODUCAO_EFETIVO_MEDIA',
       'NUMERO_REPRODUCAO_EFETIVO_VARIANCIA',
       'NUMERO_REPRODUCAO_EFETIVO_QUANTIL_0.025',
       'NUMERO_REPRODUCAO_EFETIVO_MEDIANA',
       'NUMERO_REPRODUCAO_EFETIVO_QUANTIL_0.975',
       'NUMERO_REPRODUCAO_EFETIVO_ATRASADO_MEDIA',
       'NUMERO_REPRODUCAO_EFETIVO_ATRASADO_VARIANCIA',
       'NUMERO_REPRODUCAO_EFETIVO_ATRASADO_QUANTIL

In [6]:
list_id_forecasting_moment = ['2020-04-25 00:00:00_7', '2020-07-18 00:00:00_14', '2020-10-10 00:00:00_21', '2021-01-02 00:00:00_84', '2021-03-27 00:00:00_84', '2021-06-19 00:00:00_84', '2021-09-11 00:00:00_84', '2021-12-04 00:00:00_84', '2022-02-26 00:00:00_84']

df_forecasting = pd.DataFrame()
delta = datetime.timedelta(days=1)

for id_forecasting_moment in list_id_forecasting_moment:
    df = pd.read_csv('data/df_test_'+id_forecasting_moment+'.csv')
    df['max_date_to_fit'] = pd.to_datetime(df['max_date_to_fit'])
    
    file_path = 'data/feature_transformer_train_'+id_forecasting_moment+'.pkl'
    with open(file_path, 'rb') as file:
        feature_transformer_train = load(file)
    file_path = 'data/target_transformer_train_'+id_forecasting_moment+'.pkl'
    with open(file_path, 'rb') as file:
        target_transformer_train = load(file)    

    print(id_forecasting_moment)
        
    for execution in range(20):
        model_name = 'model/best_model_'+id_forecasting_moment+'_'+str(execution)+'.keras'        
        model = load_model(model_name)
        
        for index, row in df[:].iterrows():
            municipality = row['municipality']
            municipality_id = row['municipality_id']
            max_date_to_fit = row['max_date_to_fit']            
    
            X = row['X']
            X = np.fromstring(X.strip('[]'), sep=' ')
            X = np.reshape(X, (1, X.shape[0]))
            X_transformed = feature_transformer_train.transform(X)
            X_transformed = np.reshape(X_transformed, (X.shape[0], X.shape[1], 1))
                
            y_hat = model.predict(X_transformed)
            y_hat = target_transformer_train.inverse_transform(y_hat)
            
            df_forecasting_municipality = pd.DataFrame()
            df_forecasting_municipality['forecast'] = y_hat[0]
            df_forecasting_municipality['municipality'] = municipality
            df_forecasting_municipality['municipality_id'] = municipality_id
            df_forecasting_municipality['max_date_to_fit'] = max_date_to_fit        
            date_list = [max_date_to_fit + (delta * (i+1)) for i in range(len(y_hat[0]))]
            df_forecasting_municipality['date'] = date_list
            df_forecasting_municipality['execution'] = execution
            
            #=========== For forecasting using stationary timeseries
            #=== Begin
            # first_date = max_date_to_fit + pd.DateOffset(days=1)
            # reference_value = df_original[(df_original['CODIGO_MUNICIPIO_6']==municipality_id) & (df_original['DATA']==first_date)]['TAXA_OBITOS_NOVOS_MEDIA_MOVEL_7_DIAS'].values[0]
            # df_forecasting_municipality['forecast'] = df_forecasting_municipality['forecast'] + reference_value
            #=== End
            
            df_forecasting = pd.concat([df_forecasting, df_forecasting_municipality])
        
df_forecasting.to_csv('data/df_forecasting.csv')

2020-04-25 00:00:00_7
















































































2020-07-18 00:00:00_14
















































































2020-10-10 00:00:00_21
















































































2021-01-02 00:00:00_84
















































































2021-03-27 00:00:00_84
















































































2021-06-19 00:00:00_84
















































































2021-09-11 00:00:00_84
















































































2021-12-04 00:00:00_84
















































































2022-02-26 00:00:00_84


















































































In [7]:
df_forecasting[['max_date_to_fit','municipality']].drop_duplicates()

Unnamed: 0,max_date_to_fit,municipality
0,2020-04-25,Aparecida de Goiânia
0,2020-04-25,Aracaju
0,2020-04-25,Belo Horizonte
0,2020-04-25,Belém
0,2020-04-25,Brasília
...,...,...
0,2022-02-26,São José dos Campos
0,2022-02-26,São Luís
0,2022-02-26,São Paulo
0,2022-02-26,Teresina


In [8]:
df_forecasting_mean = df_forecasting.groupby(['municipality','municipality_id','max_date_to_fit','date'])['forecast'].mean().reset_index()
df_forecasting_mean.to_csv('data/df_forecasting_mean.csv')