In [1]:
import ast 
import itertools
import numpy as np
import pandas as pd #from prophet import Prophet
import matplotlib.pyplot as plt
from neuralprophet import NeuralProphet
from sklearn.metrics import mean_squared_error as mse 
from neuralprophet import set_random_seed

set_random_seed(0)
import warnings
warnings.filterwarnings('ignore')




Neste notebook são gerados as previsões após o tunning dos parâmetros nos dados de treino e validação:

In [12]:
PATH = '/Users/eduardoaraujo/Documents/Github/paper-dengue-sc/data/cases'

def get_data(state, geocode): 
    
    df = pd.read_parquet(f'{PATH}/{state}_dengue.parquet')
    
    df = df.loc[df.municipio_geocodigo == geocode]
    
    df.index = pd.to_datetime(df.index)
    
    df = df.sort_index()
    
    df.reset_index(inplace = True)
    
    df = df.rename(columns = {'data_iniSE': 'ds', 'casos': 'y'})
    
    df = df[['ds', 'y']]
    
    df.y = np.log(df.y)
    
    return df

def update_columns(df):
    
    for col in df.columns:
        
        if col.startswith('y'):
            
            df[col] = np.exp(df[col])
            
    return df


def preprocess_data(preds, state, geocode):

    
    preds_df = preds
    
    
    preds_df = preds_df.loc[preds_df.ds >= '2022-01-01']


    preds_df = preds_df.loc[preds_df.ds < '2023-01-01']
    
    preds_df = preds_df[['ds',  'yhat4', 'yhat4 2.5%', 'yhat4 97.5%']].rename(
                            columns = {'ds':'dates',  'yhat4': 'preds', 'yhat4 2.5%':'lower', 'yhat4 97.5%': 'upper'}
    ) 
    
    
    preds_df[['lower', 'preds', 'upper']]  = np.exp(preds_df[['lower', 'preds', 'upper']])
    
    preds_df['adm_0'] = 'BRA'
    preds_df['adm_1'] = state
    preds_df['adm_2'] = geocode

    preds_df.to_csv(f'./preds/preds_2022_{geocode}.csv', index = False)
    return preds_df


In [13]:
df_par = pd.read_csv('best_params.csv', index_col = 'Unnamed: 0')

df_par.head()

Unnamed: 0,geocode,params,best_rmse
0,2704302,"{'n_lags': 8, 'n_changepoints': 19, 'learning_...",74.171573
1,2927408,"{'n_lags': 8, 'n_changepoints': 15, 'learning_...",96.310679
2,2111300,"{'n_lags': 40, 'n_changepoints': 17, 'learning...",17.309432
3,2211001,"{'n_lags': 24, 'n_changepoints': 18, 'learning...",36.72169
4,2800308,"{'n_lags': 40, 'n_changepoints': 23, 'learning...",22.996628


In [4]:
state = 'CE'
geocode = 2304400

In [5]:
%%time 
cities = [2704302, 2927408, 2111300, 2211001,
            2800308, 2408102, 2304400, 2507507, 2611606]
states  = ['AL', 'BA', 'MA', 'PI', 'SE', 'RN', 'CE', 'PB', 'PE'] 

for c, s in zip(cities, states):
    df = get_data(s, c)
    
    pars = ast.literal_eval(df_par.loc[df_par.geocode == geocode].params.values[0])
    
    pars['quantiles']=[0.025, 0.975]
    
    pars['n_forecasts'] =  4
    
    pars['changepoints_range'] = 0.05
    
    df_train_val = df.loc[df.ds < '2022-01-01']
    
    m = NeuralProphet(**pars)
    
    df_train, df_val = m.split_df(df=df_train_val, freq="W", valid_p=0.2)
    
    train = m.fit(df=df_train, freq="W", epochs = 150, validation_df = df_val, early_stopping = 20, progress=None)
    
    preds = m.predict(df)

    preprocess_data(preds, s, c)


INFO - (NP.df_utils._infer_frequency) - Major frequency W-SUN corresponds to 99.521% of the data.
INFO - (NP.df_utils._infer_frequency) - Defined frequency is equal to major frequency - W
INFO - (NP.data.processing._handle_missing_data_single_id) - 1 NaN values in column y were auto-imputed.
INFO - (NP.df_utils.return_df_in_original_format) - Returning df with no ID column
INFO - (NP.df_utils.return_df_in_original_format) - Returning df with no ID column
INFO - (NP.df_utils._infer_frequency) - Major frequency W-SUN corresponds to 99.8% of the data.
INFO - (NP.df_utils._infer_frequency) - Defined frequency is equal to major frequency - W
INFO - (NP.config.init_data_params) - Setting normalization to global as only one dataframe provided for training.
INFO - (NP.utils.set_auto_seasonalities) - Disabling weekly seasonality. Run NeuralProphet with weekly_seasonality=True to override this.
INFO - (NP.utils.set_auto_seasonalities) - Disabling daily seasonality. Run NeuralProphet with daily_s

CPU times: user 29.8 s, sys: 749 ms, total: 30.5 s
Wall time: 30.6 s


In [9]:
df = pd.read_csv('./preds/preds_2023_2304400.csv')

df

Unnamed: 0,dates,preds,lower,upper,adm_0,adm_1,adm_2
0,2023-01-01,122.066666,61.007408,274.479285,BRA,CE,2304400
1,2023-01-08,115.340713,60.256095,257.721956,BRA,CE,2304400
2,2023-01-15,139.120956,78.958788,316.507553,BRA,CE,2304400
3,2023-01-22,219.758979,140.739125,516.587578,BRA,CE,2304400
4,2023-01-29,375.116467,245.095271,905.990323,BRA,CE,2304400
5,2023-02-05,307.08108,181.062891,575.920384,BRA,CE,2304400
6,2023-02-12,314.811669,185.627948,524.910338,BRA,CE,2304400
7,2023-02-19,320.265626,185.64158,601.876377,BRA,CE,2304400
8,2023-02-26,284.206916,152.732125,484.967112,BRA,CE,2304400
9,2023-03-05,347.531183,180.044651,563.835517,BRA,CE,2304400


In [7]:
preds_df.columns

NameError: name 'preds_df' is not defined