In [1]:
import sys
import numpy as np
import pandas as pd
sys.path.append('../')
from utils import *
from bayes_ensemble.bayes_ensemble import *
import warnings
warnings.filterwarnings("ignore")

In [2]:
df_preds_all, models_by_state, data_all = load_preds(rename = False)

df_preds_all = df_preds_all.loc[df_preds_all.model_id.isin(['21', '22', '25', '26', '27', '28', '30', '34'] )]

df_preds_all['model_id'] = df_preds_all['model_id'].astype(int)

df_preds_all.head()

Unnamed: 0,date,pred,lower,upper,state,model_id
0,2022-10-09,110.690113,64.291013,219.826948,AC,21
2800,2025-08-10,192.641953,142.814506,261.828157,PR,21
2801,2025-08-17,171.137457,119.943438,233.918928,PR,21
2802,2025-08-24,196.875395,132.696765,267.461459,PR,21
2803,2025-08-31,179.610112,136.303073,247.230651,PR,21


In [3]:
df_preds_all.model_id.unique()

array([21, 22, 25, 27, 28, 30, 34])

In [4]:
models_by_state

Unnamed: 0,state,model_id
0,AC,"[21, 22, 27, 28, 30, 34]"
1,AL,"[22, 27, 28, 30, 34]"
2,AM,"[21, 22, 27, 28, 30, 34]"
3,AP,"[21, 22, 27, 28, 30, 34]"
4,BA,"[21, 22, 27, 28, 30, 34]"
5,CE,"[21, 22, 27, 28, 30, 34]"
6,DF,"[21, 22, 27, 28, 30, 34]"
7,ES,"[22, 30, 34]"
8,GO,"[21, 22, 25, 27, 28, 30, 34]"
9,MA,"[21, 22, 27, 28, 30, 34]"


In [5]:
df_preds_all.groupby('state')['model_id'].agg(lambda x: list(set(x))).reset_index()


Unnamed: 0,state,model_id
0,AC,"[34, 21, 22, 27, 28, 30]"
1,AL,"[34, 22, 27, 28, 30]"
2,AM,"[34, 21, 22, 27, 28, 30]"
3,AP,"[34, 21, 22, 27, 28, 30]"
4,BA,"[34, 21, 22, 27, 28, 30]"
5,CE,"[34, 21, 22, 27, 28, 30]"
6,DF,"[34, 21, 22, 27, 28, 30]"
7,ES,"[34, 22, 30]"
8,GO,"[34, 21, 22, 25, 27, 28, 30]"
9,MA,"[34, 21, 22, 27, 28, 30]"


### Applying the CRPS

In [6]:
%%time 
df_for_val_base = pd.DataFrame()

df_23_24 = pd.DataFrame()

df_25 = pd.DataFrame()

df_w = pd.DataFrame()

metric = 'crps'
dist = 'log_normal'
fn_loss = 'median'
for state in ['AM', 'CE', 'GO', 'MG', 'PR']: 
    
    data_23, data_24, preds_23, preds_24, preds_25, models = format_data(state, models_by_state, data_all, df_preds_all)
    
    ens_23 = Ensemble(df= preds_23,
            order_models = models, dist = dist, fn_loss = fn_loss)

    weights_23 = ens_23.compute_weights(df_obs=data_23, metric = metric)
    
    df_23_in = ens_23.apply_ensemble(weights = weights_23['weights'])

    ens_24 = Ensemble(df= preds_24,
            order_models = models,  dist = dist, fn_loss = fn_loss)

    df_24_out = ens_24.apply_ensemble(weights = weights_23['weights'])

    df_23_24_ = pd.concat([df_23_in, df_24_out], ignore_index = True)

    df_23_24_['state'] = state
 
    df_23_24 = pd.concat([df_23_24, df_23_24_], ignore_index = True)

    #### Forecast 2025
    
    weights_24 = ens_24.compute_weights(df_obs=data_24, metric = metric)

    ens_25 =  Ensemble(df = preds_25,
            order_models = models,  dist = dist, fn_loss = fn_loss)

    df_25_23 = ens_25.apply_ensemble(weights = weights_23['weights'])

    df_25_24 = ens_25.apply_ensemble(weights = weights_24['weights'])


    df_25_ = df_25_23.rename(columns = {'pred':'pred_ensemble_23',
                           'lower':'lower_ensemble_23', 
                           'upper':'upper_ensemble_23'}).merge(df_25_24.rename(columns = {'pred':'pred_ensemble_24',
                           'lower':'lower_ensemble_24', 
                           'upper':'upper_ensemble_24'}), left_on = 'date', right_on = 'date' )

    df_25_['state'] = state


    df_25 = pd.concat([df_25, df_25_], ignore_index = True)

    

    df_w_ = pd.DataFrame([[weights_23['weights'], 2023, state]], 
             #[weights_24['weights'], 2024, state]], 
                          columns = ['weights', 'year', 'state'])


    df_w = pd.concat([df_w, df_w_])
    

CPU times: user 3min 2s, sys: 795 ms, total: 3min 3s
Wall time: 3min 11s


In [7]:
df_w.to_csv('weights_E2.csv', index = False)

In [9]:
df_23_24.to_csv(f'../predictions/ensemble_2023_2024_E2.csv', index = False)
df_25.to_csv(f'../predictions/ensemble_2025_E2.csv', index = False)