**Models Prophet**

In [1]:
#%pip install prophet

In [2]:
import pandas as pd
from prophet import Prophet
from sklearn.preprocessing import OneHotEncoder
import pickle
import os
import numpy as np
from datetime import timedelta
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score



Importing plotly failed. Interactive plots will not work.


In [3]:
# Carreguem les dades
dades_temporals = pd.read_csv('daily_covidMatrix.csv') 
clusters_targets = pd.read_csv('state_clusters.csv') 

In [4]:
targets = {
    'positiveIncrease': 'Cluster_positiveIncrease',
    'hospitalizedIncrease': 'Cluster_hospitalizedIncrease',
    'deathIncrease': 'Cluster_deathIncrease'
}

In [5]:
# Inicialitzem encoder per l'estat
encoder = OneHotEncoder(sparse=False)
states_encoded = encoder.fit_transform(dades_temporals[['state']])
state_columns = encoder.get_feature_names_out(['state'])
states_encoded_df = pd.DataFrame(states_encoded, columns=state_columns)

dades_temporals = pd.concat([dades_temporals.reset_index(drop=True), states_encoded_df.reset_index(drop=True)], axis=1)




In [6]:
altres_regressors = [
    'totalTestResults', 'positive', 'death', 'negativeIncrease', 'total', 
    'totalTestResultsIncrease', 'posNeg', 'Dose1_Total', 'Dose1_65Plus', 
    'Complete_Total', 'Complete_65Plus', 'neighbor_contagions'
]



In [7]:
# Dividir en train i test

dades_temporals['date'] = pd.to_datetime(dades_temporals['date'])

train = dades_temporals[dades_temporals['date'] < '2021-03-01']
test = dades_temporals[dades_temporals['date'] >= '2021-03-01']

Entrenar els diferents models

In [8]:
# Crear la carpeta per guardar els models
os.makedirs('models_prophet', exist_ok=True)

# Iterar per cada variable target
for target_var, cluster_col in targets.items():
    # Tots els clusters possibles per aquesta variable
    clusters = clusters_targets[cluster_col].unique()
    
    for cluster in clusters:
        # Estats que pertanyen a aquest cluster
        estats_cluster = clusters_targets[clusters_targets[cluster_col] == cluster]['State'].unique()
        
        dades_cluster = train[train['state'].isin(estats_cluster)]
        
        if dades_cluster.empty:
            continue  
        
        columns_model = ['date', target_var] + list(state_columns) + altres_regressors
        df_prophet = dades_cluster[columns_model].rename(columns={'date': 'ds', target_var: 'y'})
        
        model = Prophet(
            changepoint_prior_scale=0.1,
            seasonality_prior_scale=10.0,
            yearly_seasonality=True,
            weekly_seasonality=True,
            seasonality_mode='multiplicative',
            interval_width=0.90
)
        
        regressors = list(state_columns) + altres_regressors              
        for reg in regressors:
            model.add_regressor(reg)

        model.fit(df_prophet)
            
        model_filename = f'models_prophet/prophet_{target_var}_cluster{cluster}.pkl'
        with open(model_filename, 'wb') as f:
            pickle.dump(model, f)
        
        print(f"Model entrenat i guardat per {target_var}, cluster {cluster}")


23:03:09 - cmdstanpy - INFO - Chain [1] start processing
23:03:25 - cmdstanpy - INFO - Chain [1] done processing


Model entrenat i guardat per positiveIncrease, cluster 3


23:03:27 - cmdstanpy - INFO - Chain [1] start processing
23:03:35 - cmdstanpy - INFO - Chain [1] done processing


Model entrenat i guardat per positiveIncrease, cluster 6


23:03:38 - cmdstanpy - INFO - Chain [1] start processing
23:03:59 - cmdstanpy - INFO - Chain [1] done processing


Model entrenat i guardat per positiveIncrease, cluster 5


23:04:01 - cmdstanpy - INFO - Chain [1] start processing
23:04:11 - cmdstanpy - INFO - Chain [1] done processing


Model entrenat i guardat per positiveIncrease, cluster 2


23:04:13 - cmdstanpy - INFO - Chain [1] start processing
23:04:27 - cmdstanpy - INFO - Chain [1] done processing


Model entrenat i guardat per positiveIncrease, cluster 4


23:04:28 - cmdstanpy - INFO - Chain [1] start processing
23:04:33 - cmdstanpy - INFO - Chain [1] done processing


Model entrenat i guardat per positiveIncrease, cluster 1


23:04:36 - cmdstanpy - INFO - Chain [1] start processing
23:04:47 - cmdstanpy - INFO - Chain [1] done processing


Model entrenat i guardat per hospitalizedIncrease, cluster 4


23:04:50 - cmdstanpy - INFO - Chain [1] start processing
23:05:04 - cmdstanpy - INFO - Chain [1] done processing


Model entrenat i guardat per hospitalizedIncrease, cluster 3
Model entrenat i guardat per hospitalizedIncrease, cluster 2


23:05:06 - cmdstanpy - INFO - Chain [1] start processing
23:05:10 - cmdstanpy - INFO - Chain [1] done processing


Model entrenat i guardat per hospitalizedIncrease, cluster 6
Model entrenat i guardat per hospitalizedIncrease, cluster 1


23:05:14 - cmdstanpy - INFO - Chain [1] start processing
23:05:28 - cmdstanpy - INFO - Chain [1] done processing


Model entrenat i guardat per hospitalizedIncrease, cluster 7


23:05:29 - cmdstanpy - INFO - Chain [1] start processing
23:05:30 - cmdstanpy - INFO - Chain [1] done processing


Model entrenat i guardat per hospitalizedIncrease, cluster 5


23:05:33 - cmdstanpy - INFO - Chain [1] start processing
23:05:42 - cmdstanpy - INFO - Chain [1] done processing


Model entrenat i guardat per deathIncrease, cluster 6


23:05:45 - cmdstanpy - INFO - Chain [1] start processing
23:06:03 - cmdstanpy - INFO - Chain [1] done processing


Model entrenat i guardat per deathIncrease, cluster 3


23:06:04 - cmdstanpy - INFO - Chain [1] start processing
23:06:09 - cmdstanpy - INFO - Chain [1] done processing


Model entrenat i guardat per deathIncrease, cluster 5


23:06:11 - cmdstanpy - INFO - Chain [1] start processing
23:06:19 - cmdstanpy - INFO - Chain [1] done processing


Model entrenat i guardat per deathIncrease, cluster 4


23:06:21 - cmdstanpy - INFO - Chain [1] start processing
23:06:35 - cmdstanpy - INFO - Chain [1] done processing


Model entrenat i guardat per deathIncrease, cluster 1


23:06:36 - cmdstanpy - INFO - Chain [1] start processing
23:06:43 - cmdstanpy - INFO - Chain [1] done processing


Model entrenat i guardat per deathIncrease, cluster 2


Predir el test

In [9]:
metriques_totals = []

# Iterar per cada target
for target_var, cluster_col in targets.items():
    # Tots els clusters possibles per aquest target
    clusters = clusters_targets[cluster_col].unique()
    
    for cluster in clusters:
        model_filename = f'models_prophet/prophet_{target_var}_cluster{cluster}.pkl'
        if not os.path.exists(model_filename):
            print(f"Model no trobat per {target_var} cluster {cluster}")
            continue

        with open(model_filename, 'rb') as f:
            model = pickle.load(f)
        
        estats_cluster = clusters_targets[clusters_targets[cluster_col] == cluster]['State'].unique()
        
        for estat in estats_cluster:
            dades_estat = train[train['state'] == estat]
            if dades_estat.empty:
                continue
            
            columns_model = ['date', target_var] + list(state_columns) + altres_regressors
            df_prophet = dades_estat[columns_model].rename(columns={'date': 'ds', target_var: 'y'})
            df_prophet['ds'] = pd.to_datetime(df_prophet['ds'])
            
            # Última fila coneguda de regressors
            regressors = list(state_columns) + altres_regressors
            last_known = df_prophet[regressors].iloc[-1]

            # Genera les dates futures (7 dies a partir de l'última data coneguda)
            start_date = df_prophet['ds'].max() + timedelta(days=1)
            future_dates = pd.date_range(start=start_date, periods=7, freq='D')
            future = pd.DataFrame({'ds': future_dates})

            # Repetir últims valors de regressors
            future_states_df = pd.DataFrame([last_known.values] * 7, columns=regressors).reset_index(drop=True)
            future = pd.concat([future, future_states_df], axis=1)

            # Predicció
            forecast = model.predict(future)
            prediccions = forecast[['ds', 'yhat']].copy()
            prediccions['state'] = estat
            prediccions['target'] = target_var

            # Comparació 
            test_estat = test[
                (test['state'] == estat) &
                (test['date'].isin(prediccions['ds']))
            ]       
                      
            comparacio = pd.merge(prediccions, test_estat, left_on=['ds', 'state'], right_on=['date', 'state'])
            
            y_true = comparacio[target_var]
            y_pred = comparacio['yhat']
            
            # Mètriques
            mae = mean_absolute_error(y_true, y_pred)
            mse = mean_squared_error(y_true, y_pred)
            rmse = np.sqrt(mse)
            r2 = r2_score(y_true, y_pred)
           
            metriques_totals.append({
                'target': target_var,
                'state': estat,
                'cluster': cluster,
                'MAE': mae,
                'MSE': mse,
                'R2': r2
            })
df_metriques = pd.DataFrame(metriques_totals)



In [10]:
df_metriques['model'] = df_metriques['target'] + '_cluster' + df_metriques['cluster'].astype(str)

mitjanes = df_metriques.groupby('model').agg({
    'MAE': 'mean',
    'MSE': 'mean',
    'R2': 'mean'
}).reset_index()
mitjanes = mitjanes.rename(columns=lambda x: f'mitjana_{x}' if x != 'model' else x)

# Millor resultat per model (mínim MAE, mínim MSE, màxim R2)
millors = df_metriques.groupby('model').agg({
    'MAE': 'min',
    'MSE': 'min',
    'R2': 'max'
}).reset_index()
millors = millors.rename(columns=lambda x: f'millor_{x}' if x != 'model' else x)

# Pitjor resultat per model (màxim MAE, màxim MSE, mínim R2)
pitjors = df_metriques.groupby('model').agg({
    'MAE': 'max',
    'MSE': 'max',
    'R2': 'min'
}).reset_index()
pitjors = pitjors.rename(columns=lambda x: f'pitjor_{x}' if x != 'model' else x)

# Juntar-ho tot en un sol DataFrame final
resultats_models = mitjanes.merge(millors, on='model').merge(pitjors, on='model')

resultats_models


Unnamed: 0,model,mitjana_MAE,mitjana_MSE,mitjana_R2,millor_MAE,millor_MSE,millor_R2,pitjor_MAE,pitjor_MSE,pitjor_R2
0,deathIncrease_cluster1,13.097464,475.003,-0.868948,1.750448,4.689418,-0.039769,24.280006,1332.112,-2.902653
1,deathIncrease_cluster2,123.208078,20059.81,-3441.758049,33.976026,1547.215126,-0.373395,198.162381,44483.77,-21348.191069
2,deathIncrease_cluster3,49.891932,3273.508,-312.200335,37.755046,1511.731859,0.28606,67.13605,6460.927,-1559.912296
3,deathIncrease_cluster4,11.241975,224.8633,-27.941521,5.64469,44.013273,-0.031859,25.332276,707.2586,-245.367115
4,deathIncrease_cluster5,34.320397,1730.484,-11.686677,22.663161,904.401027,0.269807,53.479714,3530.556,-32.308546
5,deathIncrease_cluster6,29.268299,1248.662,-20.086882,18.415666,418.46908,0.015084,51.081704,3240.225,-87.269185
6,hospitalizedIncrease_cluster1,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0
7,hospitalizedIncrease_cluster2,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0
8,hospitalizedIncrease_cluster3,23.484497,1220.563,-2.89445,6.685122,50.974057,0.711702,51.640174,3934.542,-17.359957
9,hospitalizedIncrease_cluster4,46.180278,3512.939,-2.759072,16.822773,319.458368,0.413002,106.339794,12906.71,-17.586058
