# Parameter sensitive analysis

In [1]:
import pandas as pd
import numpy as np
import statsmodels.api as sm
from sklearn.preprocessing import StandardScaler
import pickle

from util import save_model, summarize_results, \
    tunning_negative_binomial_model

## Loading data

In [2]:
df_variables = pd.read_csv('data/output/df_without_collinearity_standardized.csv', index_col=0)

In [3]:
df_y = pd.read_csv('data/output/df_mortality.csv', index_col=0)

In [4]:
list_population =  df_y['population']
list_offset = np.log(list_population)

In [5]:
df_cluster_labels = pd.read_csv('data/output/df_labeled_cluster.csv', index_col=0)[['cluster_label']].copy()
custom_order = pd.CategoricalDtype(
    categories=[
        'Urbanized',
        'Urbanized with informal settlements',        
        'Semi-urbanized',
        'Rural with high human development',
        'Rural with low human development'
    ],
    ordered=True
)
df_cluster_labels['cluster_label'] = df_cluster_labels['cluster_label'].astype(custom_order)
df_cluster_labels = df_cluster_labels.sort_values('cluster_label') 

In [6]:
df_cluster_probabilities = pd.read_csv('data/output/df_standardized_pca_all_spherical_5_probability.csv', index_col=0)
df_cluster_probabilities.columns = ['Semi-urbanized', 'Rural with high human development', 'Urbanized with informal settlements', 'Rural with low human development', 'Urbanized']

## Parameter Sensitivity Analysis

In [7]:
list_columns_y = ['deaths_accumulated_first_semester_2020', 'deaths_accumulated_2020', 'deaths_2021', 'deaths_2022', 'deaths']
# list_columns_y = ['deaths']
list_periods = ['2020_1','2020', '2021', '2022', '2020_2022']
# list_periods = ['2020_2022']
list_delta_first_death_columns = ['delta_first_death_2020-06-30', 'delta_first_death_2020-12-31', 'delta_first_death_2021-12-31', 'delta_first_death_2022-12-31', 'delta_first_death_general_period']
# list_delta_first_death_columns = ['delta_first_death_general_period']

for sample in range(30):
    print('\n*** Sample: ', sample)
    for i in range(len(list_periods)):
        column_y = list_columns_y[i]
        period = list_periods[i]
        column_delta_first_death = list_delta_first_death_columns[i]
        print('\n*** Period: ', period)
            
        y = df_y[column_y]
           
        # # Model 2      
        # print('\*** Model 2')
        # print('===>Full model:')
        # with open('models/model_2_'+period+'.pkl', 'rb') as file:
        #     model = pickle.load(file)
        # summarize_results(model)
        # 
        # x = df_cluster_labels.copy()
        # x = pd.get_dummies(x, columns=['cluster_label'], drop_first=True, dtype=int)
        # x = x.sort_index()        
        # x = pd.concat([x,df_variables], axis=1)    
        # scaler = StandardScaler()
        # delta_standardized = scaler.fit_transform(df_y[[column_delta_first_death]])
        # x['delta_first_death_period'] = delta_standardized[:,0]    
        # x = sm.add_constant(x)
        # 
        # x_bootstrap = x.sample(frac=1, replace=True)
        # y_bootstrap = y.loc[x_bootstrap.index]
        # list_offset_bootstrap = list_offset.loc[x_bootstrap.index]        
        # model = tunning_negative_binomial_model(x_bootstrap,y_bootstrap,list_offset_bootstrap)        
        # filename = 'model_2_'+period+'_sample_'+str(sample)
        # save_model(model,filename,'models/sensitivity_analysis/parameter')
        # summarize_results(model)
        #     
        # # Model 3    
        # print('\*** Model 3')
        # print('===>Full model:')
        # with open('models/model_3_'+period+'.pkl', 'rb') as file:
        #     model = pickle.load(file)
        # summarize_results(model)
        # 
        # x = df_cluster_probabilities.copy()
        # x['delta_first_death_period'] = df_y[column_delta_first_death]
        # scaler = StandardScaler()
        # x_standardized = scaler.fit_transform(x)
        # x = pd.DataFrame(x_standardized, index=x.index, columns=x.columns)  
        # x = sm.add_constant(x)
        # 
        # x_bootstrap = x.sample(frac=1, replace=True)
        # y_bootstrap = y.loc[x_bootstrap.index]
        # list_offset_bootstrap = list_offset.loc[x_bootstrap.index]
        # model = tunning_negative_binomial_model(x_bootstrap,y_bootstrap,list_offset_bootstrap)        
        # filename = 'model_3_'+period+'_sample_'+str(sample)
        # save_model(model,filename,'models/sensitivity_analysis/parameter')
        # summarize_results(model) 
        
        # Model 5 
        print('\*** Model 5')
        print('===>Full model:')
        with open('models/model_5_'+period+'.pkl', 'rb') as file:
            model = pickle.load(file)
        summarize_results(model)
        
        x = df_cluster_probabilities.copy()
        x['delta_first_death_period'] = df_y[column_delta_first_death]
        scaler = StandardScaler()
        x_standardized = scaler.fit_transform(x)
        x = pd.DataFrame(x_standardized, index=x.index, columns=x.columns)  
        x = pd.concat([x,df_variables], axis=1)
        x = sm.add_constant(x)
        
        x_bootstrap = x.sample(frac=1, replace=True)
        y_bootstrap = y.loc[x_bootstrap.index]
        list_offset_bootstrap = list_offset.loc[x_bootstrap.index]
        model = tunning_negative_binomial_model(x_bootstrap,y_bootstrap,list_offset_bootstrap)        
        filename = 'model_5_'+period+'_sample_'+str(sample)
        save_model(model,filename,'models/sensitivity_analysis/parameter')
        summarize_results(model)         


*** Sample:  0

*** Period:  2020_1
\*** Model 5
===>Full model:
                           Generalized Linear Model Regression Results                            
Dep. Variable:     deaths_accumulated_first_semester_2020   No. Observations:                 5560
Model:                                                GLM   Df Residuals:                     5538
Model Family:                            NegativeBinomial   Df Model:                           21
Link Function:                                        Log   Scale:                          1.0000
Method:                                              IRLS   Log-Likelihood:                -9113.1
Date:                                    Tue, 18 Jun 2024   Deviance:                       4546.0
Time:                                            23:47:14   Pearson chi2:                 5.72e+03
No. Iterations:                                       100   Pseudo R-squ. (CS):             0.6496
Covariance Type:                           

In [8]:
x_bootstrap

Unnamed: 0,const,Semi-urbanized,Rural with high human development,Urbanized with informal settlements,Rural with low human development,Urbanized,delta_first_death_period,percentage_population_age_range_60_more,percentage_urban_population,demographic_density,...,gini,percentage_estimated_households_in_informal_settlements,demographic_density_in_informal_settlements,percentage_hospitalizations_diseases_inadequate_sanitation,percentage_self_employed_workers,unemployment_rate,percentage_workers_commerce,percentage_workers_services,percentage_workers_industry,expected_years_of_schooling_at_age_18
1418,1.0,0.456604,0.828420,-0.334782,-0.324070,-0.812839,-1.105789,0.706258,-1.407957,-0.172384,...,-0.369034,-0.231671,-0.174949,0.413033,-1.718124,-0.547265,-0.672077,-0.655702,-1.016939,-0.103302
3238,1.0,-0.860770,-0.556115,0.320993,-0.333342,1.405318,1.066754,0.290904,0.474719,0.010804,...,-0.217652,0.115101,1.922059,-0.462661,-0.269622,0.626415,1.477362,1.992942,-0.302421,-0.522284
1279,1.0,1.402458,-0.563814,-0.243816,-0.278838,-0.836473,0.285840,-0.476842,0.527027,-0.137732,...,-0.369034,-0.231671,-0.174949,6.093207,-0.009765,0.205563,3.574598,-1.231788,1.669515,-0.467634
1207,1.0,1.374694,-0.565919,-0.210680,-0.220599,-0.851750,0.656274,-1.047237,-0.815832,0.078866,...,0.842020,-0.171691,-0.174949,-0.634787,-0.278758,1.238088,0.619402,0.494219,-0.015268,-0.412984
364,1.0,0.500858,-0.560549,0.957967,-0.301398,-0.423193,0.375945,-1.147015,0.796210,-0.185349,...,-0.520416,-0.231671,-0.174949,-0.533663,-1.055284,0.129758,2.101539,1.494493,-0.878971,0.525170
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
728,1.0,1.458991,-0.557718,-0.352604,-0.263566,-0.858319,0.135664,0.176606,-1.281736,-0.182279,...,0.236493,-0.231671,-0.174949,-0.204471,-0.680724,-0.910609,-1.278097,-0.810975,-0.868876,-0.340118
4826,1.0,-0.864811,1.813702,-0.321491,-0.333387,-0.127324,-1.556316,2.864454,0.318173,-0.163176,...,-1.580088,-0.231671,-0.174949,-0.361536,1.373772,-0.688420,0.444633,-0.317027,0.159717,1.390458
2526,1.0,1.099081,-0.184275,-0.319624,-0.322684,-0.737832,-0.264805,0.642422,-0.928741,-0.170555,...,-0.520416,-0.231671,-0.174949,-0.619726,-0.088940,-0.714560,-0.181814,-0.160628,-0.868876,-0.148844
4968,1.0,-0.864823,1.853968,-0.318711,-0.333387,-0.160159,0.816461,0.909690,-0.621702,-0.060999,...,-2.034234,-0.231671,-0.174949,-0.679970,1.502686,-1.287023,0.283481,-1.187906,1.488923,0.370329


In [9]:
y_bootstrap

1418      9.0
3238    249.0
1279     22.0
1207    100.0
364      11.0
        ...  
728       7.0
4826      4.0
2526      8.0
4968     13.0
2792     49.0
Name: deaths, Length: 5560, dtype: float64

In [10]:
list_offset_bootstrap

1418     7.871693
3238    10.938023
1279     9.518780
1207    10.763970
364      8.147578
          ...    
728      8.852379
4826     8.113127
2526     8.543446
4968     8.837391
2792     9.541082
Name: population, Length: 5560, dtype: float64