In [1]:
import numpy as np 
import pandas as pd 
import preprocess_data as prep
import matplotlib.pyplot as plt
from keras.models import load_model
from models import make_predictions

import warnings
warnings.simplefilter(action='ignore', category=pd.errors.SettingWithCopyWarning)


In [2]:
state_to_code = {'RJ': 33, 'ES': 32, 'PR': 41, 'CE': 23, 'MA': 21,
                 'MG': 31, 'SC': 42, 'PE': 26, 'PB': 25, 'RN': 24,
                 'PI': 22, 'AL': 27, 'SE': 28, 'SP': 35, 'RS': 43,
                 'PA': 15, 'AP': 16, 'RR': 14, 'RO': 11, 'AM': 13,
                 'AC': 12, 'MT': 51, 'MS': 50, 'GO': 52, 'TO': 17,
                 'DF': 53, 'BA': 29}

In [3]:
STATE = 'GO'
model_name = 'att'

In [4]:
df_macro = pd.read_csv('./data/macro_saude.csv', index_col = 'Unnamed: 0')

df_macro = df_macro.loc[df_macro.state == STATE]

df_macro.head()

Unnamed: 0,geocode,name_muni,name_region,code_region,name_macro,code_macro,state
354,5208806,Goianira,Central,52001,Centro-Oeste,5208,GO
355,5208905,Goiás,Rio Vermelho,52012,Centro-Oeste,5208,GO
356,5209101,Goiatuba,Sul,52017,Centro Sudeste,5210,GO
393,5213756,Montividiu,Sudoeste I,52015,Sudoeste,5206,GO
394,5213772,Montividiu do Norte,Norte,52008,Centro-Norte,5209,GO


In [5]:
df_macro.code_region.unique().shape

(18,)

In [6]:
df_all = prep.load_cases_data()
enso = prep.load_sea_indicators()
df_pop_region = pd.read_csv('./data/pop_regional.csv')
df_all_epi = pd.read_csv('./data/episcanner_regional.csv.gz')

In [7]:
df = df_all.loc[df_all.uf == STATE]

In [8]:
cols_to_norm = ['casos','epiweek', 'enso', 'R0', 
                                                                  'peak_week',
                                                                  'total_cases', 
                                                                  'perc_geocode']

list_of_enso_indicators = ['enso', 'iod', 'pdo']
indicators = [item for item in list_of_enso_indicators if item in cols_to_norm]

In [9]:
%%time 

for test_year in np.arange(2013, 2025):

    for region in df_macro.code_region.unique(): 

        df_w = prep.aggregate_data(df, region, column = 'regional_geocode')

        #df_w['inc'] = 10*df_w['casos']/df_pop_region.loc[df_pop_region.regional_geocode==geo]['pop'].values[0]
        df_w['pop_norm'] = df_pop_region.loc[df_pop_region.regional_geocode==region]['pop_norm'].values[0]
        
        df_w = df_w.reset_index().merge( df_all_epi.loc[df_all_epi.code_region == region][['year',
                                                                  'R0', 
                                                                  'peak_week',
                                                                  'total_cases', 
                                                                  'perc_geocode']], how = 'left', left_on = 'year', right_on = 'year').set_index('date')
        
        df_w = df_w.fillna(0)
                
        data = df_w.merge(enso[indicators], left_index = True, right_index = True)
        
        X_train, y_train, norm_values = prep.get_train_data(data.loc[data.year < test_year], columns_to_normalize= cols_to_norm)
        
        X_test, y_test =  prep.get_test_data(norm_values, data, test_year, columns_to_normalize = cols_to_norm, target_1 = False) 

        dates = prep.gen_forecast_dates(test_year)

        if test_year == 2024:
            model = load_model(f'./saved_models/model_{STATE}_2023_{model_name}.keras') 
        else:
            model = load_model(f'./saved_models/model_{STATE}_2022_{model_name}.keras') 


        predicted_ = np.stack([model(X_test.astype(np.float32), training =True) for i in range(100)], axis=2)

        predicted_ = predicted_*norm_values['casos']

        pd.DataFrame(predicted_.reshape(52,100)).to_csv(f'./predictions_ensemble/{STATE}_{region}_{test_year}.csv.gz')

        #df_preds = make_predictions(model, X_test, norm_values, dates = dates)

        
        
            

CPU times: user 23min 33s, sys: 6.65 s, total: 23min 40s
Wall time: 24min 9s
