**Models Prophet**

In [96]:
#%pip install prophet

In [97]:
import pandas as pd
from prophet import Prophet
from sklearn.preprocessing import OneHotEncoder
import pickle
import os
import numpy as np
from datetime import timedelta
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score



In [98]:
# Carreguem les dades
dades_temporals = pd.read_csv('daily_covidMatrix.csv') 
dades_estatiques = pd.read_csv('static_stateMatrix.csv')
clusters_targets = pd.read_csv('state_clusters.csv') 

In [99]:
targets = {
    'positiveIncrease': 'Cluster_positiveIncrease',
    'hospitalizedIncrease': 'Cluster_hospitalizedIncrease',
    'deathIncrease': 'Cluster_deathIncrease'
}

In [100]:
dades = pd.merge(dades_temporals, dades_estatiques, on='state', how='left')

In [101]:
# Inicialitzem encoder per l'estat
encoder = OneHotEncoder(sparse=False)
states_encoded = encoder.fit_transform(dades[['state']])
state_columns = encoder.get_feature_names_out(['state'])
states_encoded_df = pd.DataFrame(states_encoded, columns=state_columns)

dades = pd.concat([dades.reset_index(drop=True), states_encoded_df.reset_index(drop=True)], axis=1)




In [102]:
altres_regressors = [
    'totalTestResults', 'positive', 'death', 'negativeIncrease', 'total', 
    'totalTestResultsIncrease', 'posNeg', 'Dose1_Total', 'Dose1_65Plus', 
    'Complete_Total', 'Complete_65Plus', 'neighbor_contagions', 'no_coverage',
    'bedsState_local_government','bedsNon_profit','bedsFor_profit',
    'bedsTotal','population_state','pop_density_state', 'Low_SVI_CTGY',
    'Moderate_Low_SVI_CTGY','Moderate_High_SVI_CTGY', 'Metro'
]


In [103]:
# Dividir en train i test

dades['date'] = pd.to_datetime(dades['date'])

train = dades[dades['date'] < '2021-03-01']
test = dades[dades['date'] >= '2021-03-01']

Entrenar els diferents models

In [104]:
# Crear la carpeta per guardar els models
os.makedirs('models_prophet', exist_ok=True)

# Iterar per cada variable target
for target_var, cluster_col in targets.items():
    # Tots els clusters possibles per aquesta variable
    clusters = clusters_targets[cluster_col].unique()
    
    for cluster in clusters:
        # Estats que pertanyen a aquest cluster
        estats_cluster = clusters_targets[clusters_targets[cluster_col] == cluster]['State'].unique()
        
        dades_cluster = train[train['state'].isin(estats_cluster)]      
          
        columns_model = ['date', target_var] + list(state_columns) + altres_regressors
        df_prophet = dades_cluster[columns_model].rename(columns={'date': 'ds', target_var: 'y'})
        
        model = Prophet(
            changepoint_prior_scale=0.1,
            seasonality_prior_scale=10.0,
            yearly_seasonality=True,
            weekly_seasonality=True,
            interval_width=0.90)
        
        regressors = list(state_columns) + altres_regressors              
        for reg in regressors:
            model.add_regressor(reg)

        model.fit(df_prophet)
            
        model_filename = f'models_prophet/prophet_{target_var}_cluster{cluster}.pkl'
        with open(model_filename, 'wb') as f:
            pickle.dump(model, f)
        
        print(f"Model entrenat i guardat per {target_var}, cluster {cluster}")


16:32:21 - cmdstanpy - INFO - Chain [1] start processing
16:32:24 - cmdstanpy - INFO - Chain [1] done processing


Model entrenat i guardat per positiveIncrease, cluster 3


16:32:27 - cmdstanpy - INFO - Chain [1] start processing
16:32:29 - cmdstanpy - INFO - Chain [1] done processing


Model entrenat i guardat per positiveIncrease, cluster 6


16:32:34 - cmdstanpy - INFO - Chain [1] start processing
16:32:42 - cmdstanpy - INFO - Chain [1] done processing


Model entrenat i guardat per positiveIncrease, cluster 5


16:32:45 - cmdstanpy - INFO - Chain [1] start processing
16:32:47 - cmdstanpy - INFO - Chain [1] done processing


Model entrenat i guardat per positiveIncrease, cluster 2


16:32:50 - cmdstanpy - INFO - Chain [1] start processing
16:32:51 - cmdstanpy - INFO - Chain [1] done processing


Model entrenat i guardat per positiveIncrease, cluster 4


16:32:53 - cmdstanpy - INFO - Chain [1] start processing
16:32:53 - cmdstanpy - INFO - Chain [1] done processing


Model entrenat i guardat per positiveIncrease, cluster 1


16:32:57 - cmdstanpy - INFO - Chain [1] start processing
16:32:58 - cmdstanpy - INFO - Chain [1] done processing


Model entrenat i guardat per hospitalizedIncrease, cluster 4


16:33:03 - cmdstanpy - INFO - Chain [1] start processing
16:33:06 - cmdstanpy - INFO - Chain [1] done processing


Model entrenat i guardat per hospitalizedIncrease, cluster 3
Model entrenat i guardat per hospitalizedIncrease, cluster 2


16:33:08 - cmdstanpy - INFO - Chain [1] start processing
16:33:08 - cmdstanpy - INFO - Chain [1] done processing


Model entrenat i guardat per hospitalizedIncrease, cluster 6
Model entrenat i guardat per hospitalizedIncrease, cluster 1


16:33:13 - cmdstanpy - INFO - Chain [1] start processing
16:33:14 - cmdstanpy - INFO - Chain [1] done processing


Model entrenat i guardat per hospitalizedIncrease, cluster 7


16:33:16 - cmdstanpy - INFO - Chain [1] start processing
16:33:16 - cmdstanpy - INFO - Chain [1] done processing


Model entrenat i guardat per hospitalizedIncrease, cluster 5


16:33:20 - cmdstanpy - INFO - Chain [1] start processing
16:33:23 - cmdstanpy - INFO - Chain [1] done processing


Model entrenat i guardat per deathIncrease, cluster 6


16:33:27 - cmdstanpy - INFO - Chain [1] start processing
16:33:29 - cmdstanpy - INFO - Chain [1] done processing


Model entrenat i guardat per deathIncrease, cluster 3


16:33:32 - cmdstanpy - INFO - Chain [1] start processing
16:33:39 - cmdstanpy - INFO - Chain [1] done processing


Model entrenat i guardat per deathIncrease, cluster 5


16:33:43 - cmdstanpy - INFO - Chain [1] start processing
16:33:44 - cmdstanpy - INFO - Chain [1] done processing


Model entrenat i guardat per deathIncrease, cluster 4


16:33:47 - cmdstanpy - INFO - Chain [1] start processing
16:33:49 - cmdstanpy - INFO - Chain [1] done processing


Model entrenat i guardat per deathIncrease, cluster 1


16:33:51 - cmdstanpy - INFO - Chain [1] start processing
16:33:53 - cmdstanpy - INFO - Chain [1] done processing


Model entrenat i guardat per deathIncrease, cluster 2


Predir el test

In [105]:
metriques_totals = []

# Iterar per cada target
for target_var, cluster_col in targets.items():
    # Tots els clusters possibles per aquest target
    clusters = clusters_targets[cluster_col].unique()
    
    for cluster in clusters:
        model_filename = f'models_prophet/prophet_{target_var}_cluster{cluster}.pkl'
        
        with open(model_filename, 'rb') as f:
            model = pickle.load(f)
        
        estats_cluster = clusters_targets[clusters_targets[cluster_col] == cluster]['State'].unique()
        
        for estat in estats_cluster:
            dades_estat = train[train['state'] == estat]
            if dades_estat.empty:
                continue
            
            columns_model = ['date', target_var] + list(state_columns) + altres_regressors
            df_prophet = dades_estat[columns_model].rename(columns={'date': 'ds', target_var: 'y'})
            df_prophet['ds'] = pd.to_datetime(df_prophet['ds'])
            
            # Última fila coneguda de regressors
            regressors = list(state_columns) + altres_regressors
            last_known = df_prophet[regressors].iloc[-1]
            #print(estat)
            

            # Genera les dates futures (7 dies a partir de l'última data coneguda)
            start_date = df_prophet['ds'].max() + timedelta(days=1)
            future_dates = pd.date_range(start=start_date, periods=7, freq='D')
            future = pd.DataFrame({'ds': future_dates})

            # Repetir últims valors de regressors
            future_states_df = pd.DataFrame([last_known.values] * 7, columns=regressors).reset_index(drop=True)
            future = pd.concat([future, future_states_df], axis=1)
            #print('------------------------------')
            #print(future)
            
            # Predicció
            forecast = model.predict(future)
            #print('------------------')
            #print(forecast)
            prediccions = forecast[['ds', 'yhat']].copy()
            prediccions['state'] = estat
            prediccions['target'] = target_var
            #print('------------------')
            #print(prediccions)

            # Comparació 
            test_estat = test[
                (test['state'] == estat) &
                (test['date'].isin(prediccions['ds']))
            ]       
                      
            comparacio = pd.merge(prediccions, test_estat, left_on=['ds', 'state'], right_on=['date', 'state'])
            
            y_true = comparacio[target_var]
            y_pred = comparacio['yhat']
            
            # Mètriques
            mae = mean_absolute_error(y_true, y_pred)
            mse = mean_squared_error(y_true, y_pred)
            rmse = np.sqrt(mse)
            r2 = r2_score(y_true, y_pred)
           
            metriques_totals.append({
                'target': target_var,
                'state': estat,
                'cluster': cluster,
                'MAE': mae,
                'MSE': mse,
                'R2': r2
            })
                
df_metriques = pd.DataFrame(metriques_totals)



In [106]:
df_metriques['model'] = df_metriques['target'] + '_cluster' + df_metriques['cluster'].astype(str)

mitjanes = df_metriques.groupby('model').agg({
    'MAE': 'mean',
    'MSE': 'mean',
    'R2': 'mean'
}).reset_index()
mitjanes = mitjanes.rename(columns=lambda x: f'mitjana_{x}' if x != 'model' else x)

# Millor resultat per model (mínim MAE, mínim MSE, màxim R2)
millors = df_metriques.groupby('model').agg({
    'MAE': 'min',
    'MSE': 'min',
    'R2': 'max'
}).reset_index()
millors = millors.rename(columns=lambda x: f'millor_{x}' if x != 'model' else x)

# Pitjor resultat per model (màxim MAE, màxim MSE, mínim R2)
pitjors = df_metriques.groupby('model').agg({
    'MAE': 'max',
    'MSE': 'max',
    'R2': 'min'
}).reset_index()
pitjors = pitjors.rename(columns=lambda x: f'pitjor_{x}' if x != 'model' else x)

# Juntar-ho tot en un sol DataFrame final
resultats_models = mitjanes.merge(millors, on='model').merge(pitjors, on='model')

resultats_models


Unnamed: 0,model,mitjana_MAE,mitjana_MSE,mitjana_R2,millor_MAE,millor_MSE,millor_R2,pitjor_MAE,pitjor_MSE,pitjor_R2
0,deathIncrease_cluster1,27.547715,1062.147,-28.607618,14.681555,265.330497,-0.034427,51.334345,2796.822,-124.011484
1,deathIncrease_cluster2,64.387576,10647.46,-62.461935,6.1055,43.149825,0.009337,156.831437,32087.2,-316.399916
2,deathIncrease_cluster3,25.743007,1845.717,-39.077287,6.73679,65.885989,0.138727,67.782542,9075.844,-155.539757
3,deathIncrease_cluster4,11.768382,245.7216,-66.937453,4.940514,46.064301,0.548281,22.378568,816.8224,-402.681141
4,deathIncrease_cluster5,32.335051,2131.957,-4.379094,11.379598,150.999873,-0.619739,82.795791,8296.884,-17.375593
5,deathIncrease_cluster6,19.917848,708.3918,-4.881258,4.367382,38.153263,0.220611,38.07775,2025.64,-22.451568
6,hospitalizedIncrease_cluster1,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0
7,hospitalizedIncrease_cluster2,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0
8,hospitalizedIncrease_cluster3,27.875218,1103.42,-22.848073,10.569809,138.515901,0.274156,44.003224,2237.653,-165.602135
9,hospitalizedIncrease_cluster4,70.388972,7002.609,-7.7779,13.124022,212.082337,0.571533,120.213623,17208.78,-29.54464
