In [1]:
import ast 
import itertools
import numpy as np
import pandas as pd #from prophet import Prophet
import matplotlib.pyplot as plt
from neuralprophet import NeuralProphet
from sklearn.metrics import mean_squared_error as mse 
from neuralprophet import set_random_seed

set_random_seed(0)
import warnings
warnings.filterwarnings('ignore')




Neste notebook são gerados as previsões após o tunning dos parâmetros nos dados de treino e validação:

In [2]:
PATH = '/Users/eduardoaraujo/Documents/Github/paper-dengue-sc/data/cases'

def get_data(state, geocode): 
    
    df = pd.read_parquet(f'{PATH}/{state}_dengue.parquet')
    
    df = df.loc[df.municipio_geocodigo == geocode]
    
    df.index = pd.to_datetime(df.index)
    
    df = df.sort_index()
    
    df.reset_index(inplace = True)
    
    df = df.rename(columns = {'data_iniSE': 'ds', 'casos': 'y'})
    
    df = df[['ds', 'y']]
    
    df.y = np.log(df.y)
    
    return df

def update_columns(df):
    
    for col in df.columns:
        
        if col.startswith('y'):
            
            df[col] = np.exp(df[col])
            
    return df


def preprocess_data(preds, state, geocode):

    
    preds_df = preds
    
    
    preds_df = preds_df.loc[preds_df.ds >= '2023-01-01']
    
    preds_df = preds_df[['ds',  'yhat4', 'yhat4 2.5%', 'yhat4 97.5%']].rename(
                            columns = {'ds':'dates',  'yhat4': 'preds', 'yhat4 2.5%':'lower', 'yhat4 97.5%': 'upper'}
    ) 
    
    
    preds_df[['lower', 'preds', 'upper']]  = np.exp(preds_df[['lower', 'preds', 'upper']])
    
    preds_df['adm_0'] = 'BRA'
    preds_df['adm_1'] = state
    preds_df['adm_2'] = geocode

    preds_df.to_csv(f'./preds/preds_2023_{geocode}.csv', index = False)
    return preds_df


In [3]:
state = 'PR'
geocode = 4108304 

df = get_data(state, geocode)

df = df.loc[df.ds < '2023-01-01']

df.tail()

Unnamed: 0,ds,y
673,2022-11-27,5.811141
674,2022-12-04,5.669881
675,2022-12-11,5.774552
676,2022-12-18,5.327876
677,2022-12-25,5.298317


In [4]:
df_par = pd.read_csv('best_params.csv', index_col = 'Unnamed: 0')

df_par.head()

Unnamed: 0,geocode,params,best_rmse
0,2704302,"{'n_lags': 8, 'n_changepoints': 20, 'learning_...",166.46569
1,2927408,"{'n_lags': 40, 'n_changepoints': 15, 'learning...",21.762376
2,2111300,"{'n_lags': 8, 'n_changepoints': 22, 'learning_...",30.012326
3,2211001,"{'n_lags': 40, 'n_changepoints': 16, 'learning...",183.789246
4,2800308,"{'n_lags': 8, 'n_changepoints': 15, 'learning_...",45.831992


In [5]:
state = 'CE'
geocode = 2304400

In [6]:
%%time 
cities = [2704302, 2927408, 2111300, 2211001,
            2800308, 2408102, 2304400, 2507507, 2611606]
states  = ['AL', 'BA', 'MA', 'PI', 'SE', 'RN', 'CE', 'PB', 'PE'] 

for c, s in zip(cities, states):
    df = get_data(s, c)
    
    pars = ast.literal_eval(df_par.loc[df_par.geocode == geocode].params.values[0])
    
    pars['quantiles']=[0.025, 0.975]
    
    pars['n_forecasts'] =  4
    
    pars['changepoints_range'] = 0.05
    
    df_train_val = df.loc[df.ds < '2023-01-01']
    
    m = NeuralProphet(**pars)
    
    df_train, df_val = m.split_df(df=df_train_val, freq="W", valid_p=0.2)
    
    train = m.fit(df=df_train, freq="W", epochs = 150, validation_df = df_val, early_stopping = 20, progress=None)
    
    preds = m.predict(df)

    preprocess_data(preds, s, c)


INFO - (NP.df_utils._infer_frequency) - Major frequency W-SUN corresponds to 99.558% of the data.
INFO - (NP.df_utils._infer_frequency) - Defined frequency is equal to major frequency - W
INFO - (NP.data.processing._handle_missing_data_single_id) - 1 NaN values in column y were auto-imputed.
INFO - (NP.df_utils.return_df_in_original_format) - Returning df with no ID column
INFO - (NP.df_utils.return_df_in_original_format) - Returning df with no ID column
INFO - (NP.df_utils._infer_frequency) - Major frequency W-SUN corresponds to 99.818% of the data.
INFO - (NP.df_utils._infer_frequency) - Defined frequency is equal to major frequency - W
INFO - (NP.config.init_data_params) - Setting normalization to global as only one dataframe provided for training.
INFO - (NP.utils.set_auto_seasonalities) - Disabling weekly seasonality. Run NeuralProphet with weekly_seasonality=True to override this.
INFO - (NP.utils.set_auto_seasonalities) - Disabling daily seasonality. Run NeuralProphet with daily

CPU times: user 34.4 s, sys: 5.51 s, total: 39.9 s
Wall time: 37.4 s


In [7]:
df = pd.read_csv('./preds/preds_2023_2304400.csv')

df

Unnamed: 0,dates,preds,lower,upper,adm_0,adm_1,adm_2
0,2023-01-01,144.221895,62.955637,212.575208,BRA,CE,2304400
1,2023-01-08,159.82516,72.226574,239.46263,BRA,CE,2304400
2,2023-01-15,205.452206,97.583973,303.946069,BRA,CE,2304400
3,2023-01-22,338.933444,169.859418,516.923186,BRA,CE,2304400
4,2023-01-29,661.070968,332.955917,1032.705271,BRA,CE,2304400
5,2023-02-05,492.918148,250.279487,749.636822,BRA,CE,2304400
6,2023-02-12,562.441335,272.742763,844.535437,BRA,CE,2304400
7,2023-02-19,419.985932,197.72113,624.268558,BRA,CE,2304400
8,2023-02-26,389.014775,180.310645,583.645448,BRA,CE,2304400
9,2023-03-05,409.923952,193.645075,624.994711,BRA,CE,2304400


In [20]:
preds_df.columns

Index(['ds', 'y', 'yhat1', 'yhat2', 'yhat3', 'yhat4', 'yhat1 2.5%',
       'yhat2 2.5%', 'yhat3 2.5%', 'yhat4 2.5%', 'yhat1 97.5%', 'yhat2 97.5%',
       'yhat3 97.5%', 'yhat4 97.5%', 'ar1', 'ar2', 'ar3', 'ar4', 'trend',
       'season_yearly'],
      dtype='object')