# Execute fuzzy SIRDS model to data from other coutries

In [1]:
import pandas as pd
from scipy.optimize import differential_evolution
from datetime import timedelta, datetime

from sirds_model import get_bounds_and_arguments, sirds_objective_function

## Reading data

In [2]:
df = pd.read_csv('data/output/df_ts_epidemic_episodes_other_countries.csv', index_col=0)
df.date = pd.to_datetime(df.date)

In [3]:
df = df[df.location.isin(['Spain', 'United Kingdom', 'United States'])]
#df = df[df.location.isin(['United States'])]

In [4]:
dict_max_date_to_fit = {'United States': df.date.max(), 'United Kingdom': df.date.max(), 'Spain': df.date.max()}

In [5]:
dict_cumulative_days_in_first_outbreak_to_max_bound_I0 = {'United States': 0, 'United Kingdom': None, 'Spain': 0}

## Execution SIRDS model

In [6]:
df = df.sort_values(by=['location', 'date'])

In [None]:
try:
    df_initial_results = pd.read_csv('data/output/execution_other_countries_results.csv')
except:
    df_initial_results = pd.DataFrame({'location':[]})

df_results = df_initial_results.copy()

DAYS_TO_RECOVERY = 8

DIFFERENTIAL_EVOLUTION_POP_SIZE_FACTOR = 5
NUMBER_ESTIMATON_PER_COUNTRY = 20

for estimation in range(NUMBER_ESTIMATON_PER_COUNTRY):        
    for country in df.location.unique():
        print('country: '+country)
        
        max_date_to_fit = dict_max_date_to_fit[country]
        df_country = df[(df.location == country) & (df.date <= max_date_to_fit)].reset_index(drop=True)
        
        # Dates about begin time series and first case        
        if country == 'United Kingdom':            
            date_first_case = df_country[(df_country.ONSET_NUMERO_REPRODUCAO_EFETIVO_MEDIA == 0) & (df_country.NUMERO_REPRODUCAO_EFETIVO_ATRASADO_MEDIA == df_country.NUMERO_REPRODUCAO_EFETIVO_ATRASADO_MEDIA.max())].iloc[0].date
        else:
            date_first_case = df_country[(df_country.estimated_onset_by_deaths > 0)].iloc[0].date
        
        cumulative_days_in_first_outbreak_to_max_bound_I0 = dict_cumulative_days_in_first_outbreak_to_max_bound_I0[country]

        # Period of analysis
        period_in_days = (max_date_to_fit - date_first_case).days + 1
                           
        estimations_performed = len(df_results[(df_results.location == country)])
    
        if (estimation == estimations_performed):
            print('estimation: '+str(estimation))
            
            rt_column = 'NUMERO_REPRODUCAO_EFETIVO_ATRASADO_MEDIA'
            # rt_column = 'reproduction_rate'
            
            bounds, args = get_bounds_and_arguments(df_country, 'date', 'rate_new_deaths_moving_average', rt_column, 'rate_new_cases_moving_average', 'ONSET_NUMERO_REPRODUCAO_EFETIVO_MEDIA', DAYS_TO_RECOVERY, date_first_case, max_date_to_fit, df_country.population.iloc[0], period_in_days, cumulative_days_in_first_outbreak_to_max_bound_I0)
            
            print(bounds)
            
            # Record the start time
            start_time = datetime.now()
            print(start_time)

            result = differential_evolution(sirds_objective_function, bounds, args=args, popsize=DIFFERENTIAL_EVOLUTION_POP_SIZE_FACTOR, maxiter=10000, workers=4, updating='deferred')
    
            # Record the end time
            end_time = datetime.now()
    
            # Calculate the duration (in seconds) for the optimization
            duration = (end_time - start_time).total_seconds()
            print(duration)
            
            list_breakpoints_in_slow_transition = args[4]
            quantity_outbreaks = args[5]
            quantity_outbreak_adjustments = args[6]
    
            # Create a dictionary to store results
            estimation_result = {
                'location': country,
                'estimation': estimation,  # To differentiate between multiple estimations
                'result_fun': result.fun,
                'result_nfev': result.nfev,
                'result_nit': result.nit,
                'result_success': result.success,
                'start_time': start_time.strftime('%Y-%m-%d %H:%M:%S'),  # Format start time as a string
                'end_time': end_time.strftime('%Y-%m-%d %H:%M:%S'),  # Format end time as a string
                'duration_seconds': duration,  # Duration in seconds
                'pop_size': DIFFERENTIAL_EVOLUTION_POP_SIZE_FACTOR,
                'period_in_days': period_in_days,
                'days_to_recovery': DAYS_TO_RECOVERY,
                'cumulative_days_in_first_outbreak_to_max_bound_I0': cumulative_days_in_first_outbreak_to_max_bound_I0,
                'date_first_case': date_first_case,
                'list_breakpoints_in_slow_transition': list_breakpoints_in_slow_transition,
                'x_initial_infected_population': result.x[0],
                'x_days_between_infections_0': result.x[1]
            }
            
            quantity_epidemic_periods_with_slow_transition = len(list_breakpoints_in_slow_transition) + 1
            
            for p in range(quantity_epidemic_periods_with_slow_transition):
                estimation_result['x_case_fatality_probability_'+str(p)] = result.x[2 + p]
                estimation_result['x_loss_immunity_in_days_'+str(p)] = result.x[2 + quantity_epidemic_periods_with_slow_transition + p]                                                 
            quantity_breakpoints = (quantity_outbreaks - 1) + (quantity_outbreak_adjustments)
            begin_breakpoint_parameters = 2 + 2*quantity_epidemic_periods_with_slow_transition
            for b in range(quantity_breakpoints):
                estimation_result['x_days_between_infections_'+str(b+1)] = result.x[begin_breakpoint_parameters + b]
                estimation_result['x_breakpoint_'+str(b+1)] = result.x[begin_breakpoint_parameters + quantity_breakpoints + b]
                estimation_result['x_transition_days_between_epidemic_periods_'+str(b+1)] = result.x[begin_breakpoint_parameters + 2*quantity_breakpoints + b]
    
            print(estimation_result)
    
            # Append the estimation result to the list            
            df_results = pd.concat([df_results, pd.DataFrame.from_records([estimation_result])])
    
            df_results.to_csv('data/output/execution_other_countries_results.csv', index=False)           

country: Spain
country: United Kingdom
country: United States
country: Spain
country: United Kingdom
estimation: 1
[(0.0014802853812419738, 0.0014822853812419736), (1.7078268812841861, 2.9723033038735736), (0.0008495582981530875, 0.013300999999999999), (0.0014248387533001134, 0.013300999999999999), (0.0002725214617683035, 0.0032363158645566654), (0.000506092656804511, 0.002580694199344447), (0.00013576496312809819, 0.013300999999999999), (89.999999, 365.000001), (89.999999, 365.000001), (89.999999, 365.000001), (89.999999, 365.000001), (89.999999, 365.000001), (1.7078268812841861, 10.574045997561049), (1.6738519559784781, 5.579510853261595), (1.6738519559784781, 8.12002505899817), (1.9244442773351058, 6.41481859111702), (1.7646176457870097, 5.882063152623366), (1.7646176457870097, 8.655590253206244), (2.1332344534411587, 7.110785844803863), (1.9622339456507754, 6.540784152169251), (2.0600991079267277, 6.540785152169251), (1.8406668368632015, 6.135560456210672), (1.8491972564005288, 6.1

  df_results = pd.concat([df_results, pd.DataFrame.from_records([estimation_result])])


47474.572291
{'location': 'Spain', 'estimation': 2, 'result_fun': 0.31591107837202664, 'result_nfev': 631135, 'result_nit': 2318, 'result_success': True, 'start_time': '2023-11-22 04:42:32', 'end_time': '2023-11-22 17:53:47', 'duration_seconds': 47474.572291, 'pop_size': 5, 'period_in_days': 1068, 'days_to_recovery': 8, 'cumulative_days_in_first_outbreak_to_max_bound_I0': 0, 'date_first_case': Timestamp('2020-01-29 00:00:00'), 'list_breakpoints_in_slow_transition': [3, 5, 9, 12], 'x_initial_infected_population': 0.0029743821933900755, 'x_days_between_infections_0': 2.2371768497484372, 'x_case_fatality_probability_0': 0.0029038082154083577, 'x_loss_immunity_in_days_0': 137.83910816626883, 'x_case_fatality_probability_1': 0.0009782609503980865, 'x_loss_immunity_in_days_1': 172.9556632370642, 'x_case_fatality_probability_2': 0.0014471456422442057, 'x_loss_immunity_in_days_2': 200.23268401574256, 'x_case_fatality_probability_3': 0.0009292506929739372, 'x_loss_immunity_in_days_3': 134.27519

  df_results = pd.concat([df_results, pd.DataFrame.from_records([estimation_result])])


23735.090355
{'location': 'Spain', 'estimation': 3, 'result_fun': 0.31038229691230657, 'result_nfev': 320095, 'result_nit': 1166, 'result_success': True, 'start_time': '2023-11-22 21:59:48', 'end_time': '2023-11-23 04:35:23', 'duration_seconds': 23735.090355, 'pop_size': 5, 'period_in_days': 1068, 'days_to_recovery': 8, 'cumulative_days_in_first_outbreak_to_max_bound_I0': 0, 'date_first_case': Timestamp('2020-01-29 00:00:00'), 'list_breakpoints_in_slow_transition': [3, 5, 9, 12], 'x_initial_infected_population': 0.00216750532875331, 'x_days_between_infections_0': 2.1701548168378553, 'x_case_fatality_probability_0': 0.002156544041149889, 'x_loss_immunity_in_days_0': 289.1567831161725, 'x_case_fatality_probability_1': 0.0013784566628309172, 'x_loss_immunity_in_days_1': 234.0915957487661, 'x_case_fatality_probability_2': 0.0011603082112216442, 'x_loss_immunity_in_days_2': 179.87222503459634, 'x_case_fatality_probability_3': 0.00062482336359406, 'x_loss_immunity_in_days_3': 125.69606850439

In [None]:
bounds[0]

In [None]:
df_country[['date','rate_new_deaths_moving_average', 'rate_new_cases_moving_average', 'estimated_onset_by_deaths', 'NUMERO_REPRODUCAO_EFETIVO_ATRASADO_MEDIA']]

In [None]:
min(df_country[(df_country.rate_new_cases_moving_average > 0)].iloc[0].date, df_country[(df_country.estimated_onset_by_deaths > 0)].iloc[0].date)

In [None]:
df_country[(df_country.rate_new_cases_moving_average > 0)].iloc[0].date

In [None]:
df_country[(df_country.estimated_onset_by_deaths > 0)].iloc[0].date

In [None]:
outbreaks = df.groupby('ONSET_NUMERO_REPRODUCAO_EFETIVO_MEDIA').agg({'date': ['min', 'max']})
outbreaks

In [None]:
outbreaks.iloc[0][('date','min')]

In [None]:
rt_in_outbreak = df_country[
    (df_country['date'] >= outbreaks.iloc[0][('date','min')]) &
    (df_country['date'] <= outbreaks.iloc[0][('date','max')])][rt_column].values
rt_in_outbreak

In [None]:
df_results