# Extracting the death forecasting time series

In [4]:
import pandas as pd

from sirds_model_visualization import get_sirds, get_sirds_extras
import datetime

## Reading data

In [5]:
# df_results = pd.read_csv('data/estimation_results.csv')
df_results = pd.read_csv('data/estimation_results.csv')
df_results['max_date_to_fit'] = pd.to_datetime(df_results['max_date_to_fit']) 

In [6]:
df_results['max_date_to_fit'].unique()

<DatetimeArray>
['2020-04-25 00:00:00', '2020-07-18 00:00:00', '2020-10-10 00:00:00',
 '2021-01-02 00:00:00', '2021-03-27 00:00:00', '2021-06-19 00:00:00',
 '2021-09-11 00:00:00', '2021-12-04 00:00:00', '2022-02-26 00:00:00']
Length: 9, dtype: datetime64[ns]

In [7]:
df_results = df_results.sort_values(['max_date_to_fit','municipality'])

In [8]:
FORECASTING_HORIZON = 84

## Executing results

In [9]:
df_results

Unnamed: 0,municipality_id,max_date_to_fit,municipality,estimation,result_fun,result_nfev,result_nit,result_success,start_time,end_time,...,x_breakpoint_11,x_transition_days_between_epidemic_periods_11,x_days_between_infections_12,x_breakpoint_12,x_transition_days_between_epidemic_periods_12,x_case_fatality_probability_3,x_loss_immunity_in_days_3,x_days_between_infections_13,x_breakpoint_13,x_transition_days_between_epidemic_periods_13
0,280030.0,2020-04-25,Aracaju,0.0,1.019098,936.0,15.0,True,2024-03-20 23:46:10,2024-03-20 23:46:13,...,,,,,,,,,,
244,280030.0,2020-04-25,Aracaju,1.0,1.023177,763.0,16.0,True,2024-03-22 08:38:04,2024-03-22 08:38:05,...,,,,,,,,,,
488,280030.0,2020-04-25,Aracaju,2.0,1.023987,518.0,9.0,True,2024-03-23 06:04:44,2024-03-23 06:04:44,...,,,,,,,,,,
732,280030.0,2020-04-25,Aracaju,3.0,1.016046,1034.0,13.0,True,2024-03-23 21:19:05,2024-03-23 21:19:07,...,,,,,,,,,,
976,280030.0,2020-04-25,Aracaju,4.0,1.020951,848.0,15.0,True,2024-03-24 11:56:02,2024-03-24 11:56:03,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6847,317020.0,2022-02-26,Uberlândia,15.0,0.271926,264240.0,1187.0,True,2024-06-12 08:53:49,2024-06-12 10:07:10,...,569.928202,47.206719,3.824699,646.472339,8.005792,,,,,
6970,317020.0,2022-02-26,Uberlândia,16.0,0.284146,243800.0,1099.0,True,2024-06-14 22:42:30,2024-06-14 23:49:21,...,565.697677,49.262054,3.792183,645.475261,21.852954,,,,,
7093,317020.0,2022-02-26,Uberlândia,17.0,0.287858,192395.0,868.0,True,2024-06-17 12:46:29,2024-06-17 13:38:10,...,568.291681,55.651519,3.698293,646.873019,18.736535,,,,,
7216,317020.0,2022-02-26,Uberlândia,18.0,0.273810,315580.0,1422.0,True,2024-06-20 00:32:47,2024-06-20 02:00:52,...,567.257610,34.899812,3.846722,646.183331,23.282623,,,,,


In [10]:
df_deaths_forecasted = pd.DataFrame()
df_simulations = pd.DataFrame()
delta = datetime.timedelta(days=1)

for max_date_to_fit in df_results['max_date_to_fit'].unique():
    df_results_max_data_to_fit = df_results.loc[df_results['max_date_to_fit'] == max_date_to_fit]
    date_list_forecasting = [pd.to_datetime(max_date_to_fit) + (delta * (i + 1)) for i in range(FORECASTING_HORIZON)]    
    for id_municipality in df_results_max_data_to_fit.municipality_id.unique():        
        df_results_municipality = df_results_max_data_to_fit.loc[df_results_max_data_to_fit.municipality_id == id_municipality]
        municipality = df_results_municipality.municipality.iloc[0]
    
        for index, result in df_results_municipality.iterrows():
            result = result.dropna()
            
            estimation = result.estimation
            date_first_case = pd.to_datetime(result.date_first_case)
            days_to_recovery = result.days_to_recovery
            period_in_days_to_fit = int(result.period_in_days)
            period_in_days_general = period_in_days_to_fit + FORECASTING_HORIZON
    
            y = get_sirds(result, FORECASTING_HORIZON)
            S, I, R, D, I_accumulated = y
            N = S[0] + I[0] + R[0] + D[0]
    
            D_new_deaths, reproduction_number_sird, I_new_cases, epidemic_periods_with_fast_transition_fuzzy_variable, epidemic_periods_with_slow_transition_fuzzy_variable, days_between_infections_values, case_fatality_probability_values, loss_immunity_in_days_values, estimated_days_between_infections, estimated_case_fatality_probability, estimated_loss_immunity_in_days = get_sirds_extras(result, S, D, I_accumulated, forecast_horizon=FORECASTING_HORIZON)
                        
    
            forecast = D_new_deaths[-FORECASTING_HORIZON:]
            df_forecasting_municipality = pd.DataFrame()
            df_forecasting_municipality['forecast'] = forecast
            df_forecasting_municipality['municipality'] = municipality
            df_forecasting_municipality['municipality_id'] = id_municipality
            df_forecasting_municipality['max_date_to_fit'] = max_date_to_fit
            df_forecasting_municipality['estimation'] = estimation            
            df_forecasting_municipality['date'] = date_list_forecasting
            df_deaths_forecasted = pd.concat([df_deaths_forecasted, df_forecasting_municipality])
            
            df_simulation = pd.DataFrame()
            df_simulation['S'] = S[1:]
            df_simulation['I'] = I[1:]
            df_simulation['R'] = R[1:]
            df_simulation['D_new_deaths'] = D_new_deaths
            df_simulation['effective_reproduction_number'] = reproduction_number_sird[1:]
            R0 = days_to_recovery/estimated_days_between_infections
            df_simulation['basic_reproduction_number'] = R0[1:]
            df_simulation['infection_fatality_rate'] = estimated_case_fatality_probability[1:]
            date_list_simulation = [pd.to_datetime(date_first_case) + (delta * (i + 1)) for i in range(period_in_days_general-1)]
            df_simulation['date'] = date_list_simulation
            df_simulation['municipality'] = municipality
            df_simulation['municipality_id'] = id_municipality
            df_simulation['max_date_to_fit'] = max_date_to_fit
            df_simulation['estimation'] = estimation                
            df_simulations = pd.concat([df_simulations, df_simulation])

## Saving data

### Forecasted deaths 

In [11]:
df_deaths_forecasted.to_csv('data/df_deaths_forecasted.csv', index=False)

In [12]:
df_forecasting_mean = df_deaths_forecasted.groupby(['municipality','municipality_id','max_date_to_fit','date'])['forecast'].mean().reset_index()
df_forecasting_mean.to_csv('data/df_deaths_forecasted_mean.csv')

### Simulated time series

In [13]:
df_simulations.to_csv('data/df_simulations.csv', index=False)
df_simulations_mean = df_simulations.drop(columns=['estimation']).groupby(['municipality','municipality_id','max_date_to_fit','date']).mean().reset_index()
df_simulations_mean.to_csv('data/df_simulations_mean.csv', index=False)