# Parameter sensitive analysis

In [1]:
import pandas as pd
import numpy as np
import statsmodels.api as sm
from sklearn.preprocessing import StandardScaler
import pickle

import util

## Loading data

In [2]:
df_variables = pd.read_csv('data/output/df_without_collinearity_standardized.csv', index_col=0)

In [3]:
df_political = pd.read_csv('data/output/df_political_without_missing_points.csv', index_col=0)[['percentual_votes_for_bolsonaro']]

In [4]:
df_vaccination_fully_vaccinated_people = pd.read_csv('data/output/df_vaccination.csv', index_col=0)[['% people fully vaccinated (1/2020)', '% people fully vaccinated (2020)', '% people fully vaccinated (2020-2021)', '% people fully vaccinated (2020-2022)']]

In [5]:
df_y = pd.read_csv('data/output/df_mortality.csv', index_col=0)

In [6]:
df_cluster_probabilities = pd.read_csv('data/df_standardized_pca_2spherical_5_probability.csv', index_col=0)
df_cluster_probabilities.columns = ['Semi-urbanized', 'Urbanized', 'Rural with high human development', 'Urbanized with informal settlements', 'Rural with low human development']

In [7]:
list_population =  df_y['population']
list_offset = np.log(list_population)

## Parameter Sensitivity Analysis

In [8]:
list_columns_y = ['deaths_accumulated_first_semester_2020', 'deaths_accumulated_2020', 'deaths_2021', 'deaths_2022', 'deaths']
# list_columns_y = ['deaths']
list_periods = ['2020_1','2020', '2021', '2022', '2020_2022']
# list_periods = ['2020_2022']
list_delta_first_death_columns = ['delta_first_death_2020-06-30', 'delta_first_death_2020-12-31', 'delta_first_death_2021-12-31', 'delta_first_death_2022-12-31', 'delta_first_death_general_period']
# list_delta_first_death_columns = ['delta_first_death_general_period']

vaccination_columns = [0,1,2,3,3]

for sample in range(30):
    print('\n*** Sample: ', sample)
    for i in range(len(list_periods)):
        column_y = list_columns_y[i]
        period = list_periods[i]
        column_delta_first_death = list_delta_first_death_columns[i]
        list_offset_extra = np.log(df_y[column_delta_first_death]+1)
        print('\n*** Period: ', period)
            
        y = df_y[column_y]

        print('\*** Model 9')
        print('===>Full model:')
        with open('models/model_9_'+period+'.pkl', 'rb') as file:
            model = pickle.load(file)
        util.summarize_results(model)

        x = df_cluster_probabilities.drop(columns=['Urbanized']).copy()
        x = sm.add_constant(x)

        x_bootstrap = x.sample(frac=1, replace=True)
        y_bootstrap = y.loc[x_bootstrap.index]
        list_offset_bootstrap = list_offset.loc[x_bootstrap.index]
        list_offset_extra_bootstrap = list_offset_extra.loc[x_bootstrap.index]
        model = util.tunning_negative_binomial_model(x_bootstrap,y_bootstrap,list_offset_bootstrap, list_offset_extra_bootstrap)
        filename = 'model_9_'+period+'_sample_'+str(sample)
        util.save_model(model,filename,'models/sensitivity_analysis/parameter')
        util.summarize_results(model)

        # print('\*** Model 10')
        # print('===>Full model:')
        # with open('models/model_10_'+period+'.pkl', 'rb') as file:
        #     model = pickle.load(file)
        # summarize_results(model)
        #
        # x = df_variables.copy()
        # scaler = StandardScaler()
        # percentage_votes_for_bolsonaro_standardized = scaler.fit_transform(df_political)
        # x['percentage_votes_for_bolsonaro'] = percentage_votes_for_bolsonaro_standardized[:,0]
        # vaccination_column = vaccination_columns[i]
        # vaccination_standardized = scaler.fit_transform(df_vaccination_fully_vaccinated_people)
        # x['% people fully vaccinated'] = vaccination_standardized[:, vaccination_column]
        # x = sm.add_constant(x)
        #
        # x_bootstrap = x.sample(frac=1, replace=True)
        # y_bootstrap = y.loc[x_bootstrap.index]
        # list_offset_bootstrap = list_offset.loc[x_bootstrap.index]
        # list_offset_extra_bootstrap = list_offset_extra.loc[x_bootstrap.index]
        # model = tunning_negative_binomial_model(x_bootstrap,y_bootstrap,list_offset_bootstrap, list_offset_extra_bootstrap)
        # filename = 'model_10_'+period+'_sample_'+str(sample)
        # save_model(model,filename,'models/sensitivity_analysis/parameter')
        # summarize_results(model)


*** Sample:  0

*** Period:  2020_1
\*** Model 9
===>Full model:
                           Generalized Linear Model Regression Results                            
Dep. Variable:     deaths_accumulated_first_semester_2020   No. Observations:                 5560
Model:                                                GLM   Df Residuals:                     5555
Model Family:                            NegativeBinomial   Df Model:                            4
Link Function:                                        Log   Scale:                          1.0000
Method:                                              IRLS   Log-Likelihood:                -8535.4
Date:                                    Thu, 02 Jan 2025   Deviance:                       3482.4
Time:                                            22:39:26   Pearson chi2:                 5.38e+03
No. Iterations:                                        13   Pseudo R-squ. (CS):            0.08756
Covariance Type:                           

In [9]:
x_bootstrap

Unnamed: 0,const,Semi-urbanized,Rural with high human development,Urbanized with informal settlements,Rural with low human development
4617,1.0,0.000050,4.105485e-05,0.894154,0.000003
3498,1.0,0.012843,4.207260e-02,0.026432,0.000024
751,1.0,0.974890,6.228059e-04,0.002663,0.021627
3015,1.0,0.643302,3.061391e-01,0.007692,0.002113
3073,1.0,0.864527,1.043631e-05,0.009870,0.125575
...,...,...,...,...,...
1883,1.0,0.216389,1.314032e-02,0.116897,0.000844
27,1.0,0.838449,2.665368e-02,0.024461,0.002973
259,1.0,0.616270,9.919773e-05,0.343004,0.030690
262,1.0,0.973952,4.159738e-04,0.004651,0.020583


In [10]:
y_bootstrap

4617    909.0
3498    134.0
751       6.0
3015     19.0
3073     16.0
        ...  
1883    204.0
27      103.0
259     115.0
262      44.0
1637     94.0
Name: deaths, Length: 5560, dtype: float64

In [11]:
list_offset_bootstrap

4617    12.140547
3498    10.279009
751      8.392083
3015     8.733272
3073     9.768755
          ...    
1883    11.163538
27      10.239674
259     11.198475
262     10.111477
1637    10.624663
Name: population, Length: 5560, dtype: float64

In [13]:
df_variables.columns

Index(['percentage_population_age_range_60_more',
       'percentage_urban_population', 'percentage_male_population',
       'percentage_indigenous_population',
       'density_median_effectively_domiciled_area', 'gini',
       'percentage_estimated_households_in_informal_settlements',
       'demographic_density_in_informal_settlements',
       'percentage_hospitalizations_diseases_inadequate_sanitation',
       'percentage_self_employed_workers', 'unemployment_rate',
       'percentage_workers_commerce', 'percentage_workers_services',
       'percentage_workers_industry', 'expected_years_of_schooling_at_age_18'],
      dtype='object')