In [None]:
import pandas as pd
import numpy as np
from datetime import datetime
import matplotlib.pylab as plt
from matplotlib.pylab import rcParams
import pmdarima as pm
from pmdarima import auto_arima,arima
import warnings
# get functions from utils.py
from utils import train_data,eval_metrics,plot_train_test
from statsmodels.tsa.statespace.sarimax import SARIMAX
from joblib import dump, load
import gc
from statsmodels.graphics.tsaplots import plot_acf

In [None]:
ari = pd.read_csv("data_ari.csv",sep=",",dtype={'location':str,'year_week':str,
                                                'value':np.float32,'relative_humidity_2m':np.float64,
                                                'temperature_2m_max':np.float64,'temperature_2m_min':np.float64},
                                                parse_dates=['truth_date'])
ari = ari.drop(columns=['Unnamed: 0']).reset_index(drop=True)


In [None]:
ili = pd.read_csv("data_ili.csv",sep=",",dtype={'location':str,'year_week':str,
                                                'value':np.float32,'relative_humidity_2m':np.float64,
                                                'temperature_2m_max':np.float64,'temperature_2m_min':np.float64},
                                                parse_dates=['truth_date'])
ili = ili.drop(columns=['Unnamed: 0']).reset_index(drop=True)

In [None]:
mape_ari = pd.DataFrame(columns=['location','model','prediction_window','mae','rmse'])
mape_ili = pd.DataFrame(columns=['location','model','prediction_window','mae','rmse'])

In [None]:
def arima_model_auto(train,exogenous_var = None,loc=None,type=None):
    # finding best ARIMA and SARIMA model
    ARIMA_model = pm.auto_arima(
        train["value"],
        start_p=0, start_q=0,
        max_p=4, max_q=4,
        d=None,
        start_d=0,         
        max_d=2,
        exogenous=exogenous_var,
        seasonal=False,  
        stepwise=True,
        test='adf',      
        trace=False,
        suppress_warnings=True,
        error_action='ignore'
    )
    order_arima = ARIMA_model.get_params()['order']
    
    #save the models
    if exogenous_var is not None:
        dump(ARIMA_model, f"models/arimax_model_{loc}_{type}.joblib")
    else:
        dump(ARIMA_model, f"models/arima_model_{loc}_{type}.joblib")

    
    return order_arima


In [None]:
def sarima_model_auto(train,sp = 52,exogenous_var = None,loc=None,type=None):
    # finding best SARIMA model
    SARIMA_model = pm.auto_arima(
        train["value"],
        start_p=0, start_q=0,
        max_p=2, max_q=2,
        start_P=0, max_P=1,
        start_Q=0, max_Q=1,
        start_D=0,D = 0 ,
        max_d=2, max_D=1,
        m=sp,  # 1 year seasonality
        d=None,
        seasonal=True,
        exogenous=exogenous_var,
        test='adf',
        trace=False,
        stepwise=True,
        suppress_warnings=True,
        error_action='ignore'
    )
    order_sarima = SARIMA_model.get_params()['order']
    seasonal_order = SARIMA_model.get_params()['seasonal_order']
    
    
    #save the models
    if exogenous_var is not None:
        dump(SARIMA_model, f"models/sarimax_model_{loc}_{type}.joblib")
    else:
        dump(SARIMA_model, f"models/sarima_model_{loc}_{type}.joblib")
    
    return order_sarima,seasonal_order


In [None]:
def forecast_arima_sarima_model(train, test, mape, order_model, seasonal_order_model=(0,0,0,0), model_name="no_model_def", country="no_country_def",exogenous_var=None):
    test_aux = test.copy()

    # Prepare prediction columns
    for h in range(4):
        test_aux[f"prediction_{h+1}_weeks"] = np.nan

    # Rolling forecast
    for i in range(len(test_aux)):
        # Combine train and observed test values so far
        train_series = pd.concat([train["value"], test_aux.iloc[:i]["value"]])
        if exogenous_var is not None:
            exog_train = pd.concat([train[exogenous_var], test_aux.iloc[:i][exogenous_var]])
            exog_forecast = test_aux.iloc[i:i+4][exogenous_var]
        else:
            exog_train = None
            exog_forecast = None        

        # Fit model
        model = SARIMAX(train_series, order=order_model, seasonal_order=seasonal_order_model,exog=exog_train)
        model_fit = model.fit(disp=False)
        
        # Forecast 1 to 4 weeks ahead, or less at the end
        forecast_steps = min(4, len(test_aux) - i)
        forecast = model_fit.forecast(steps=forecast_steps, exog=exog_forecast.iloc[:forecast_steps])

        # Save forecasted values
        #for h, pred in enumerate(forecast):
        #    test_aux.loc[test_aux.index[i + h], f"prediction_{h+1}_weeks"] = pred
        
        for h in range(forecast_steps):
            test_aux.loc[test_aux.index[i + h], f"prediction_{h+1}_weeks"] = forecast.iloc[h]

    # Evaluate predictions
    for h in range(4):
        shifted = test_aux["value"].shift(-h)
        preds = test_aux[f"prediction_{h+1}_weeks"]
        valid_idx = ~shifted.isna()
        mae, rmse = eval_metrics(shifted[valid_idx], preds[valid_idx])
        mape = pd.concat([
            mape,
            pd.DataFrame([[country, model_name, f"{h+1}_week", mae, rmse]],
                         columns=['location', 'model', 'prediction_window', 'mae', 'rmse'])
        ], ignore_index=True)

    return mape, test_aux


In [None]:
mape_ari = pd.DataFrame(columns=['location','model','prediction_window','mae','rmse'])
#mape_ili = pd.DataFrame(columns=['location','model','prediction_window','mae','rmse'])

In [None]:
#mape_ari =pd.read_csv("mape_ari_arima_sarima.csv",index_col=0)
#mape_ili= pd.read_csv("mape_ili_arima_sarima.csv",index_col=0)


In [None]:
name_ari = ari["location"].unique()
name_ili = ili["location"].unique()

In [None]:
for i in name_ari:
    print(f"Processing ARI for location: {i}")  
    df = ari[ari['location']==i]
    plot_acf(df['value'], lags=52)  # Check 2 years (104 weeks)


In [None]:
for i in name_ari:
    print(f"Processing location: {i}")
    train, test = train_data(ari,i, "2023-10-13")
    order_arima = arima_model_auto(train,exogenous_var=None,loc=i,type='ARI') 
    mape_ari, test_ari_arima = forecast_arima_sarima_model(train, test,mape_ari, order_arima,seasonal_order_model = (0,0,0,0), model_name="ARIMA", country=i)
    plot_train_test(train, test_ari_arima,"ARI_ARIMA",i,'arima_sarima')


In [None]:
mape_ari[mape_ari['location']=='ES']

In [None]:
'''
for i in name_ili:
    print(f"Processing location: {i}")
    train, test = train_data(ili,i, "2023-10-13")
    order_arima = arima_model_auto(train,exogenous_var=None,loc=i,type='ILI') 
    mape_ili, test_ili_arima = forecast_arima_sarima_model(train, test,mape_ili, order_arima,seasonal_order_model = (0,0,0,0), model_name="ARIMA", country=i)
    plot_train_test(train, test_ili_arima,"ILI_ARIMA",i,'arima_sarima')
'''

In [None]:
'''
for i in name_ili:
    print(f"Processing location: {i}")
    train, test = train_data(ili,i, "2023-10-13")
    order_sarima,seasonal_patron = sarima_model_auto(train,sp=52,exogenous_var=None,loc=i,type='ILI') 
    print(f'Procesando prediccion SARIMA para {i}')
    mape_ili, test_ili_sarima = forecast_arima_sarima_model(train, test,mape_ili, order_sarima,seasonal_order_model =seasonal_patron, model_name="SARIMA", country=i)
    plot_train_test(train, test_ili_sarima,"ILI_SARIMA",i,'arima_sarima')
'''

In [None]:
for i in name_ari:
    print(f"Processing location: {i}")
    train, test = train_data(ari,i, "2023-10-13")
    order_sarima,seasonal_patron = sarima_model_auto(train,exogenous_var=None,loc=i,type='ARI') 
    mape_ari, test_ari_arima = forecast_arima_sarima_model(train, test,mape_ari, order_sarima,seasonal_order_model =seasonal_patron, model_name="SARIMA", country=i)
    plot_train_test(train, test_ari_arima,"ARI_SARIMA",i,'arima_sarima')

In [None]:
mape_ari.to_csv("mape_ari_arima_sarima.csv",index=False)
#mape_ili.to_csv("mape_ili_arima_sarima.csv",index=False)

In [None]:
mape_ari

In [None]:
mape_arix = pd.DataFrame(columns=['location','model','prediction_window','mae','rmse'])
mape_ilix = pd.DataFrame(columns=['location','model','prediction_window','mae','rmse'])

In [None]:
for i in name_ari:
    print(f"Processing location: {i}")
    train, test = train_data(ari,i, "2023-10-13")
    order_arimax = arima_model_auto(train,exogenous_var=train[['relative_humidity_2m','temperature_2m_max','temperature_2m_min','covid']],loc=i,type='ARI') 
    mape_arix, test_ari_arimax = forecast_arima_sarima_model(train, test,mape_ari, order_arima,seasonal_order_model = (0,0,0,0), model_name="ARIMAX", country=i,exogenous_var=['relative_humidity_2m','temperature_2m_max','temperature_2m_min','covid'])
    plot_train_test(train, test_ari_arimax,"ARI_ARIMAX",i,'arimax_sarimax')


In [None]:
for i in name_ili:
    print(f"Processing location: {i}")
    train, test = train_data(ili,i, "2023-10-13")
    order_arimax = arima_model_auto(train,loc=i,type='ILI',exogenous_var=train[['relative_humidity_2m','temperature_2m_max','temperature_2m_min','covid']]) 
    mape_ilix, test_ili_arimax = forecast_arima_sarima_model(train, test,mape_ili, order_arima,seasonal_order_model = (0,0,0,0), model_name="ARIMAX", country=i,exogenous_var=['relative_humidity_2m','temperature_2m_max','temperature_2m_min','covid'])
    plot_train_test(train, test_ili_arimax,"ILI_ARIMAX",i,'arimax_sarimax')


In [None]:
for i in name_ili:
    print(f"Processing location: {i}")
    train, test = train_data(ili,i, "2023-10-13")
    order_sarimax,seasonal_patron = sarima_model_auto(train,sp=52,loc=i,type='ILI',exogenous_var=train[['relative_humidity_2m','temperature_2m_max','temperature_2m_min','covid']]) 
    print(f'Procesando prediccion SARIMA para {i}')
    mape_ilix, test_ili_sarimax = forecast_arima_sarima_model(train, test,mape_ili, order_sarimax,seasonal_order_model =seasonal_patron, model_name="SARIMAX", country=i,exogenous_var=['relative_humidity_2m','temperature_2m_max','temperature_2m_min','covid'])
    plot_train_test(train, test_ili_sarimax,"ILI_SARIMAX_cov",i,'arimax_sarimax')


In [None]:
for i in name_ari:
    print(f"Processing location: {i}")
    train, test = train_data(ari,i, "2023-10-13")
    order_sarimax,seasonal_patron = sarima_model_auto(train,loc=i,type='ARI',exogenous_var=train[['relative_humidity_2m','temperature_2m_max','temperature_2m_min','covid']]) 
    mape_arix, test_ari_arimax = forecast_arima_sarima_model(train, test,mape_ari, order_sarimax,seasonal_order_model =seasonal_patron, model_name="SARIMAX", country=i,exogenous_var=['relative_humidity_2m','temperature_2m_max','temperature_2m_min','covid'])
    plot_train_test(train, test_ari_arimax,"ARI_SARIMAX_cov",i,'arimax_sarimax')

In [None]:
mape_arix.to_csv("mape_ari_arimax_sarimax.csv",index=False)
mape_ilix.to_csv("mape_ili_arimax_sarimax.csv",index=False)