# Estimating parameters from univariate LSTM model and Fuzzy SIRDS 

In [4]:
import epyestim
import numpy as np
import pandas as pd
from epyestim import covid19
from scipy.stats import gamma, lognorm
from math import exp

## Loading data

In [5]:
df_output_lstm = pd.read_csv('../lstm/data/df_forecasting_mean.csv', index_col=0)
df_output_lstm['date'] = pd.to_datetime(df_output_lstm['date'])
df_output_lstm['max_date_to_fit'] = pd.to_datetime(df_output_lstm['max_date_to_fit'])
df_output_sirds = pd.read_csv('../sirds/data/df_simulations.csv')
df_output_sirds['date'] = pd.to_datetime(df_output_sirds['date'])
df_output_sirds['max_date_to_fit'] = pd.to_datetime(df_output_sirds['max_date_to_fit'])

In [6]:
df_original = pd.read_csv('../data/df_original_extended.csv')
df_original.DATA = pd.to_datetime(df_original.DATA, format='mixed')

## Estimating

In [7]:
df_output_lstm

Unnamed: 0,municipality,municipality_id,max_date_to_fit,date,forecast
0,Aparecida de Goiânia,520140,2020-04-25,2020-04-26,0.008996
1,Aparecida de Goiânia,520140,2020-04-25,2020-04-27,0.007964
2,Aparecida de Goiânia,520140,2020-04-25,2020-04-28,0.008593
3,Aparecida de Goiânia,520140,2020-04-25,2020-04-29,0.005758
4,Aparecida de Goiânia,520140,2020-04-25,2020-04-30,0.005567
...,...,...,...,...,...
24103,Uberlândia,317020,2022-02-26,2022-05-17,0.601449
24104,Uberlândia,317020,2022-02-26,2022-05-18,0.607563
24105,Uberlândia,317020,2022-02-26,2022-05-19,0.613106
24106,Uberlândia,317020,2022-02-26,2022-05-20,0.619586


In [8]:
RECOVERY_RATE = 1/8

# Regarding IFR=0.66% from Estimates of the severity of coronavirus disease 2019: a model-based analysis. Verity, Robert et al. The Lancet Infectious Diseases, Volume 20, Issue 6, 669 - 677
SURVIVAL_RATE = 100/0.66

In [9]:
# Verity, Robert, et al. "Estimates of the severity of coronavirus disease 2019: a model-based analysis." The Lancet infectious diseases 20.6 (2020): 669-677.
# Gamma distribution with delay mean equal to 18.8 days
mean_days_delay_onset_to_death = 18.8  # mean days onset to death.
s_death = 0.45
shape_death = 1/(s_death**2)
scale_death= mean_days_delay_onset_to_death * s_death ** 2
delay_onset_to_death_distribution = epyestim.discrete_distrb(gamma(a=shape_death, scale=scale_death))

#Bi, Qifang, et al. "Epidemiology and transmission of COVID-19 in 391 cases and 1286 of their close contacts in Shenzhen, China: a retrospective cohort study." The Lancet infectious diseases 20.8 (2020): 911-919.
# Lognormal distribution with incabation mean equal to 5.93 days, Lognormal(mu=1.57, sigma²=0.42)
incubation_time_distribution = epyestim.discrete_distrb(lognorm(scale=exp(1.57), s=0.65))

#Bi, Qifang, et al. "Epidemiology and transmission of COVID-19 in 391 cases and 1286 of their close contacts in Shenzhen, China: a retrospective cohort study." The Lancet infectious diseases 20.8 (2020): 911-919.
# Gamma distribution with serial interval mean equal to 6.36 days, gamma(a=2.29, b=0.36)
standard_serial_inteval_distribution = epyestim.discrete_distrb(gamma(a=2.29, scale=1/0.36))

In [10]:
window_days_onset_to_death = round(mean_days_delay_onset_to_death)
window_days_onset_to_death

19

In [11]:
df = pd.DataFrame()

df_output_lstm = df_output_lstm.sort_values(['municipality', 'date'])
df_output_sirds = df_output_sirds.sort_values(['municipality', 'max_date_to_fit', 'estimation', 'date'])

for max_date_to_fit in df_output_sirds['max_date_to_fit'].unique():    
    df_output_sirds_max_date_to_fit = df_output_sirds.loc[(df_output_sirds['max_date_to_fit']==max_date_to_fit) & (df_output_sirds['date'] <= max_date_to_fit)]
    df_output_lstm_max_date_to_fit = df_output_lstm.loc[df_output_lstm['max_date_to_fit'] == max_date_to_fit]
    for municipality in df_output_sirds_max_date_to_fit['municipality'].unique():
        try:
            df_output_sirds_municipality = df_output_sirds_max_date_to_fit.loc[df_output_sirds_max_date_to_fit['municipality']==municipality]
            df_output_lstm_municipality = df_output_lstm_max_date_to_fit.loc[df_output_lstm_max_date_to_fit['municipality'] == municipality]
            
            municipality_id = df_output_sirds_municipality['municipality_id'].unique()[0]
            
            average_estimated_deaths = df_output_lstm_municipality.iloc[:7]['forecast'].mean()
            
            df_original_temp = df_original.loc[(df_original['CODIGO_MUNICIPIO_6']==municipality_id) & (df_original['DATA']<=max_date_to_fit)]
            deaths_actual_and_forecasted = np.concatenate([df_original_temp['TAXA_OBITOS_NOVOS_MEDIA_MOVEL_7_DIAS'], df_output_lstm_municipality['forecast']])
            dates_actual_and_forecasted = np.concatenate([df_original_temp['DATA'], df_output_lstm_municipality['date']])
            
            population = df_original['POPULACAO_2022'].unique()[0]
            
            estimated_cases = deaths_actual_and_forecasted * population / 100000 * SURVIVAL_RATE
            df_estimated_cases = pd.DataFrame({'date': dates_actual_and_forecasted, 'estimated_cases': estimated_cases})
            df_estimated_cases['date'] = pd.to_datetime(df_estimated_cases['date'])
            df_estimated_cases['estimated_onset_symptoms'] = df_estimated_cases['estimated_cases'].rolling(window=window_days_onset_to_death, min_periods=1).mean().shift(-(window_days_onset_to_death - 1))
            
            df_cases = df_estimated_cases.set_index('date')['estimated_onset_symptoms'].dropna()
            
            # df_effective_reproduction_number_estimated_cases = covid19.r_covid(df_cases, smoothing_window = 28, r_window_size = 14, auto_cutoff=True, n_samples=100, delay_distribution=incubation_time_distribution, gt_distribution=standard_serial_inteval_distribution, a_prior=9.9, b_prior=1/9.25)
            df_effective_reproduction_number_estimated_cases = covid19.r_covid(df_cases, smoothing_window = 28, r_window_size = 14, auto_cutoff=True, n_samples=5, delay_distribution=incubation_time_distribution, gt_distribution=standard_serial_inteval_distribution, a_prior=9.9, b_prior=1/9.25)
            df_effective_reproduction_number_estimated_cases = df_effective_reproduction_number_estimated_cases.reset_index()
            df_effective_reproduction_number_estimated_cases['index'] = pd.to_datetime(df_effective_reproduction_number_estimated_cases['index'])
            
            average_estimated_rt = df_effective_reproduction_number_estimated_cases.loc[df_effective_reproduction_number_estimated_cases['index']>max_date_to_fit].iloc[:7]['R_mean'].mean()
            
            for estimation in df_output_sirds_municipality['estimation'].unique():
                df_output_sirds_estimation = df_output_sirds_municipality.loc[df_output_sirds_municipality['estimation']==estimation]
                
                average_estimated_infecteds = df_output_sirds_estimation.iloc[-7:]['I'].mean()            
                estimated_fatality = average_estimated_deaths / (average_estimated_infecteds * RECOVERY_RATE)
                
                average_estimated_susceptibles = df_output_sirds_estimation.iloc[-7:]['S'].mean()
                estimated_contact_rate = 100000 * average_estimated_rt / average_estimated_susceptibles * RECOVERY_RATE
                
                row = {'municipality_id': municipality_id,'municipality': municipality, 'max_date_to_fit': max_date_to_fit, 'estimation': estimation, 'estimated_fatality': estimated_fatality, 'estimated_contact_rate': estimated_contact_rate}
                df = pd.concat([df, pd.DataFrame([row])], ignore_index=True)
        except ValueError as e:            
            print(municipality)
            print(max_date_to_fit)
            print(e)

In [12]:
df.round(4)

Unnamed: 0,municipality_id,municipality,max_date_to_fit,estimation,estimated_fatality,estimated_contact_rate
0,520140.0,Aparecida de Goiânia,2020-07-18,0.0,0.0162,0.1411
1,520140.0,Aparecida de Goiânia,2020-07-18,1.0,0.0143,0.1415
2,520140.0,Aparecida de Goiânia,2020-07-18,2.0,0.0164,0.1411
3,520140.0,Aparecida de Goiânia,2020-07-18,3.0,0.0150,0.1416
4,520140.0,Aparecida de Goiânia,2020-07-18,4.0,0.0152,0.1414
...,...,...,...,...,...,...
7335,317020.0,Uberlândia,2020-04-25,15.0,0.0027,0.0730
7336,317020.0,Uberlândia,2020-04-25,16.0,0.0025,0.0731
7337,317020.0,Uberlândia,2020-04-25,17.0,0.0014,0.0731
7338,317020.0,Uberlândia,2020-04-25,18.0,0.0025,0.0731


## Saving data

In [13]:
df.to_csv('data/df_estimated_parameters.csv', index=False)