# Variable sensitive analysis

In [1]:
import pandas as pd
import numpy as np
import statsmodels.api as sm
from sklearn.preprocessing import StandardScaler
import pickle

import util

## Loading data

In [2]:
df_variables = pd.read_csv('data/output/df_without_collinearity_standardized.csv', index_col=0)

In [3]:
df_political = pd.read_csv('data/output/df_political_without_missing_points.csv', index_col=0)[['percentual_votes_for_bolsonaro']]

In [4]:
df_vaccination_fully_vaccinated_people = pd.read_csv('data/df_vaccination.csv', index_col=0)[['% people fully vaccinated (1/2020)', '% people fully vaccinated (2020)', '% people fully vaccinated (2020-2021)', '% people fully vaccinated (2020-2022)']]

In [5]:
df_y = pd.read_csv('data/output/df_mortality.csv', index_col=0)

In [6]:
list_population =  df_y['population']
list_offset = np.log(list_population)

## Variable Sensitivity Analysis

In [7]:
list_columns_y = ['deaths_accumulated_first_semester_2020', 'deaths_accumulated_2020', 'deaths_2021', 'deaths_2022', 'deaths']
list_periods = ['2020_1','2020', '2021', '2022', '2020_2022']
list_delta_first_death_columns = ['delta_first_death_2020-06-30', 'delta_first_death_2020-12-31', 'delta_first_death_2021-12-31', 'delta_first_death_2022-12-31', 'delta_first_death_general_period']
   
vaccination_columns = [0,1,2,3,3]   
   
for i in range(5):
    column_y = list_columns_y[i]
    period = list_periods[i]
    column_delta_first_death = list_delta_first_death_columns[i]    
    list_offset_extra = np.log(df_y[column_delta_first_death]+1)
    print('\n*** Period: ', period)
        
    y = df_y[column_y]
    
    # Model 10
    print('\*** Model 10')
    print('===>Full model:')
    with open('models/model_10_'+period+'.pkl', 'rb') as file:
        model = pickle.load(file)
    util.summarize_results(model)

    x = df_variables.copy()
    scaler = StandardScaler()
    percentage_votes_for_bolsonaro_standardized = scaler.fit_transform(df_political)
    x['percentage_votes_for_bolsonaro'] = percentage_votes_for_bolsonaro_standardized[:,0]
    vaccination_column = vaccination_columns[i]
    vaccination_standardized = scaler.fit_transform(df_vaccination_fully_vaccinated_people)
    x['% people fully vaccinated'] = vaccination_standardized[:, vaccination_column]    

    for variable in x.columns:
        print('\n*** Removed variable: ',variable)
        x_analysis = x.drop(columns=[variable])
        x_analysis = sm.add_constant(x_analysis)
        model = util.tunning_negative_binomial_model(x_analysis,y,list_offset,list_offset_extra)
        filename = 'model_10_'+period+'_'+variable
        util.save_model(model,filename,'models/sensitivity_analysis/variable')
        util.summarize_results(model)


*** Period:  2020_1
\*** Model 10
===>Full model:
                           Generalized Linear Model Regression Results                            
Dep. Variable:     deaths_accumulated_first_semester_2020   No. Observations:                 5560
Model:                                                GLM   Df Residuals:                     5543
Model Family:                            NegativeBinomial   Df Model:                           16
Link Function:                                        Log   Scale:                          1.0000
Method:                                              IRLS   Log-Likelihood:                -8500.3
Date:                                    Mon, 30 Dec 2024   Deviance:                       3459.1
Time:                                            22:59:20   Pearson chi2:                 5.27e+03
No. Iterations:                                        13   Pseudo R-squ. (CS):             0.1012
Covariance Type:                                nonrobust 

KeyboardInterrupt: 