# # Forecasting the pandemic in outbreak zero with differents days to fit

In [1]:
import numpy as np
import pandas as pd
from scipy.optimize import differential_evolution
from datetime import datetime

from sirds_model import get_bounds_and_arguments, sirds_objective_function

## Reading data

In [2]:
df = pd.read_csv('data/output/df_ts_epidemic_episodes.csv', index_col=0)
df.DATA = pd.to_datetime(df.DATA)

## Preparing data

In [3]:
df = df.sort_values(by=['DATA'])

In [4]:
df['TAXA_CASOS_NOVOS_MEDIA_MOVEL_7_DIAS_PAINEL'] = df['CASOS_NOVOS_MEDIA_MOVEL_7_DIAS_PAINEL']/df['POPULACAO_2022'] * 100000 

In [5]:
df['TAXA_CASOS_NOVOS_MEDIA_MOVEL_7_DIAS_PAINEL'] = df['TAXA_CASOS_NOVOS_MEDIA_MOVEL_7_DIAS_PAINEL'].fillna(0)

In [6]:
df['TAXA_CASOS_NOVOS_MEDIA_MOVEL_7_DIAS_PAINEL'] = df['TAXA_CASOS_NOVOS_MEDIA_MOVEL_7_DIAS_PAINEL'].replace([np.inf, -np.inf], 0)

## Forecasting

In [7]:
try:
    df_initial_results = pd.read_csv('data/output/estimation_results_outbreak_0.csv')
except:
    df_initial_results = pd.DataFrame({'days_to_fit_within_outbreak':[]})

df_results = df_initial_results.copy()

FORECAST_HORIZON_IN_DAYS = 90
DAYS_TO_RECOVERY = 8
OUTBREAK = 0
ALPHA = 0

list_days_to_fit_within_outbreak = [28, 35, 42, 49, 56, 63, 70]

DIFFERENTIAL_EVOLUTION_POP_SIZE_FACTOR = 5
NUMBER_ESTIMATION_PER_DAYS_TO_FIT = 20

# Dates about begin time series and first case
date_first_case = min(df[(df.NOVOS_CASOS_SRAG > 0)].iloc[0].DATA, df[(df.CASOS_NOVOS_PAINEL > 0)].iloc[0].DATA)


row = df[df.ONSET_NUMERO_REPRODUCAO_EFETIVO_MEDIA == OUTBREAK].iloc[0]
outbreak_start_date = row['DATA']

for days_to_fit_within_outbreak in list_days_to_fit_within_outbreak:
    max_date_to_fit = outbreak_start_date + pd.DateOffset(days=days_to_fit_within_outbreak)
    
    print('days_to_fit_within_outbreak: ',days_to_fit_within_outbreak)
    print('max_date_to_fit: ',max_date_to_fit)
    
    # Period of analysis
    period_in_days = (max_date_to_fit - date_first_case).days + 1 + FORECAST_HORIZON_IN_DAYS
    max_date_to_analyze = date_first_case + pd.DateOffset(days=period_in_days)
    
    bounds, args = get_bounds_and_arguments(df, 'DATA', 'TAXA_OBITOS_NOVOS_MEDIA_MOVEL_7_DIAS_SIM', 'NUMERO_REPRODUCAO_EFETIVO_SRAG_MEDIA', 'TAXA_CASOS_NOVOS_MEDIA_MOVEL_7_DIAS_PAINEL', 'ONSET_NUMERO_REPRODUCAO_EFETIVO_MEDIA', DAYS_TO_RECOVERY, date_first_case, max_date_to_fit, df.POPULACAO_2022.iloc[0], period_in_days)    
                                                  
    for estimation in range(NUMBER_ESTIMATION_PER_DAYS_TO_FIT):
        estimations_performed = len(df_results[(df_results.days_to_fit_within_outbreak == days_to_fit_within_outbreak)])
    
        if (estimation == estimations_performed):
            print('estimation: '+str(estimation))
            
            # Record the start time
            start_time = datetime.now()
            print(start_time)
    
            result = differential_evolution(sirds_objective_function, bounds, args=args, popsize=DIFFERENTIAL_EVOLUTION_POP_SIZE_FACTOR, maxiter=10000, workers=3, updating='deferred')
    
            # Record the end time
            end_time = datetime.now()
    
            # Calculate the duration (in seconds) for the optimization
            duration = (end_time - start_time).total_seconds()
            print(duration)
            
            list_breakpoints_in_slow_transition = args[4]
            quantity_outbreaks = args[5]
            quantity_outbreak_adjustments = args[6]                
    
            # Create a dictionary to store results
            estimation_result = {
                'outbreak': OUTBREAK,
                'alpha': ALPHA,
                'days_to_fit_within_outbreak': days_to_fit_within_outbreak,
                'estimation': estimation,  # To differentiate between multiple estimations
                'result_fun': result.fun,
                'result_nfev': result.nfev,
                'result_nit': result.nit,
                'result_success': result.success,
                'start_time': start_time.strftime('%Y-%m-%d %H:%M:%S'),  # Format start time as a string
                'end_time': end_time.strftime('%Y-%m-%d %H:%M:%S'),  # Format end time as a string
                'duration_seconds': duration,  # Duration in seconds
                'pop_size': DIFFERENTIAL_EVOLUTION_POP_SIZE_FACTOR,
                'period_in_days': period_in_days,
                'days_to_recovery': DAYS_TO_RECOVERY,
                'date_first_case': date_first_case,
                'outbreak_start_date': outbreak_start_date,
                'max_date_to_fit': max_date_to_fit,
                'max_date_to_analyze': max_date_to_analyze,                    
                'list_breakpoints_in_slow_transition': list_breakpoints_in_slow_transition,
                'x_initial_infected_population': result.x[0],
                'x_days_between_infections_0': result.x[1]         
            }
    
            quantity_epidemic_periods_with_slow_transition = len(list_breakpoints_in_slow_transition) + 1            
            
            for p in range(quantity_epidemic_periods_with_slow_transition):
                estimation_result['x_case_fatality_probability_'+str(p)] = result.x[2 + p]
                estimation_result['x_loss_immunity_in_days_'+str(p)] = result.x[2 + quantity_epidemic_periods_with_slow_transition + p]                                                 
            quantity_breakpoints = (quantity_outbreaks - 1) + (quantity_outbreak_adjustments)
            begin_breakpoint_parameters = 2 + 2*quantity_epidemic_periods_with_slow_transition
            for b in range(quantity_breakpoints):
                estimation_result['x_days_between_infections_'+str(b+1)] = result.x[begin_breakpoint_parameters + b]
                estimation_result['x_breakpoint_'+str(b+1)] = result.x[begin_breakpoint_parameters + quantity_breakpoints + b]
                estimation_result['x_transition_days_between_epidemic_periods_'+str(b+1)] = result.x[begin_breakpoint_parameters + 2*quantity_breakpoints + b]
    
            print(estimation_result)
    
            # Append the estimation result to the list                
            df_results = pd.concat([df_results, pd.DataFrame.from_records([estimation_result])])
    
            df_results.to_csv('data/output/estimation_results_outbreak_0.csv', index=False)           

days_to_fit_within_outbreak:  28
max_date_to_fit:  2020-03-29 00:00:00
days_to_fit_within_outbreak:  35
max_date_to_fit:  2020-04-05 00:00:00
days_to_fit_within_outbreak:  42
max_date_to_fit:  2020-04-12 00:00:00
days_to_fit_within_outbreak:  49
max_date_to_fit:  2020-04-19 00:00:00
days_to_fit_within_outbreak:  56
max_date_to_fit:  2020-04-26 00:00:00
days_to_fit_within_outbreak:  63
max_date_to_fit:  2020-05-03 00:00:00
estimation: 0
2023-12-05 20:59:20.039751
28.993767
{'outbreak': 0, 'alpha': 0, 'days_to_fit_within_outbreak': 63, 'estimation': 0, 'result_fun': 0.1458884856289184, 'result_nfev': 4343, 'result_nit': 116, 'result_success': True, 'start_time': '2023-12-05 20:59:20', 'end_time': '2023-12-05 20:59:49', 'duration_seconds': 28.993767, 'pop_size': 5, 'period_in_days': 168, 'days_to_recovery': 8, 'date_first_case': Timestamp('2020-02-16 00:00:00'), 'outbreak_start_date': Timestamp('2020-03-01 00:00:00'), 'max_date_to_fit': Timestamp('2020-05-03 00:00:00'), 'max_date_to_analy

In [8]:
df_results.head()

Unnamed: 0,days_to_fit_within_outbreak,outbreak,alpha,estimation,result_fun,result_nfev,result_nit,result_success,start_time,end_time,...,max_date_to_fit,max_date_to_analyze,list_breakpoints_in_slow_transition,x_initial_infected_population,x_days_between_infections_0,x_case_fatality_probability_0,x_loss_immunity_in_days_0,x_days_between_infections_1,x_breakpoint_1,x_transition_days_between_epidemic_periods_1
0,28.0,0.0,0.0,0.0,0.34668,3388.0,83.0,True,2023-12-05 16:11:19,2023-12-05 16:11:40,...,2020-03-29,2020-06-28,[],0.019699,3.18264,0.003837,167.281061,5.727388,41.74186,1.825831
1,28.0,0.0,0.0,1.0,0.370704,2950.0,73.0,True,2023-12-05 16:11:40,2023-12-05 16:12:01,...,2020-03-29,2020-06-28,[],0.013683,3.169989,0.006136,202.146757,3.508126,37.431551,19.232071
2,28.0,0.0,0.0,2.0,0.374529,1861.0,46.0,True,2023-12-05 16:12:01,2023-12-05 16:12:13,...,2020-03-29,2020-06-28,[],0.014271,3.174482,0.005348,222.770294,3.227308,23.851242,7.346017
3,28.0,0.0,0.0,3.0,0.350492,3644.0,91.0,True,2023-12-05 16:12:13,2023-12-05 16:12:39,...,2020-03-29,2020-06-28,[],0.006963,3.180804,0.010648,197.54804,4.542095,41.562551,2.014805
4,28.0,0.0,0.0,4.0,0.372556,2184.0,47.0,True,2023-12-05 16:12:39,2023-12-05 16:12:56,...,2020-03-29,2020-06-28,[],0.006913,3.170051,0.011561,237.373834,3.325391,28.231655,13.698966
