In [128]:
import pandas as pd
import numpy as np

In [129]:
fund_name_dict = {
    0: 'Rural',
    1: 'Civil servants',
    2: 'Iranian', # Removed self-employed
    3: 'Universal',
    4: 'Foreign',
    5: 'Others'
}

fund_name_dict_reversed = {
    v: k for k, v in fund_name_dict.items()
}

In [130]:
def build_string_ci_2(val, low, up):

    val = val.map(lambda x: f'{x:.2f}')
    low = low.map(lambda x: f'{x:.2f}')
    up = up.map(lambda x: f'{x:.2f}')

    return val + ' (' + low + '-' + up + ')'


def build_string_pvalue(val):
    
    small_mask = (val < 0.0005)

    val = val.map(lambda x: f'{x:.3f}')

    val.loc[small_mask] = '<0.001'

    return val

## All Variables

In [131]:
all_df = pd.read_excel('../GLM_all_variables_f.xlsx',
                       header=[0, 1], index_col=0)
all_df

Unnamed: 0_level_0,params,params,pvalues,conf_int,conf_int,category,value
Unnamed: 0_level_1,0,exp,1,0,1,Unnamed: 6_level_1,Unnamed: 7_level_1
C(gender)[T.1],-0.024593,0.975707,0.00260188,-0.0406,-0.008586,gender,1
C(fund)[T.1],0.412595,1.510733,1.867081e-281,0.390037,0.435152,fund,1
C(fund)[T.2],0.411546,1.509149,7.929531e-185,0.383726,0.439367,fund,2
C(fund)[T.3],-0.324831,0.722649,3.63728e-118,-0.35238,-0.297283,fund,3
C(fund)[T.4],-0.465192,0.628015,4.742853e-06,-0.664446,-0.265937,fund,4
C(fund)[T.5],0.346776,1.4145,4.434949e-120,0.317606,0.375946,fund,5
"C(age_cat)[T.Interval(40, 64, closed='both')]",0.287182,1.332667,4.23169e-192,0.268144,0.306221,age_cat,"Interval(40, 64, closed='both')"
"C(age_cat)[T.Interval(65, 95, closed='both')]",0.406627,1.501744,6.894542e-257,0.383349,0.429906,age_cat,"Interval(65, 95, closed='both')"
C(cat)[T.1],0.327703,1.387777,2.5848739999999998e-161,0.303972,0.351435,cat,1
C(cat)[T.2],0.535773,1.708769,0.0,0.51166,0.559887,cat,2


In [132]:
all_df['conf_int'] = np.exp(all_df['conf_int'])

In [133]:
all_df

Unnamed: 0_level_0,params,params,pvalues,conf_int,conf_int,category,value
Unnamed: 0_level_1,0,exp,1,0,1,Unnamed: 6_level_1,Unnamed: 7_level_1
C(gender)[T.1],-0.024593,0.975707,0.00260188,0.960213,0.991451,gender,1
C(fund)[T.1],0.412595,1.510733,1.867081e-281,1.477036,1.545198,fund,1
C(fund)[T.2],0.411546,1.509149,7.929531e-185,1.467743,1.551724,fund,2
C(fund)[T.3],-0.324831,0.722649,3.63728e-118,0.703013,0.742834,fund,3
C(fund)[T.4],-0.465192,0.628015,4.742853e-06,0.514558,0.766487,fund,4
C(fund)[T.5],0.346776,1.4145,4.434949e-120,1.373835,1.456369,fund,5
"C(age_cat)[T.Interval(40, 64, closed='both')]",0.287182,1.332667,4.23169e-192,1.307535,1.358282,age_cat,"Interval(40, 64, closed='both')"
"C(age_cat)[T.Interval(65, 95, closed='both')]",0.406627,1.501744,6.894542e-257,1.46719,1.537113,age_cat,"Interval(65, 95, closed='both')"
C(cat)[T.1],0.327703,1.387777,2.5848739999999998e-161,1.355231,1.421106,cat,1
C(cat)[T.2],0.535773,1.708769,0.0,1.668057,1.750475,cat,2


In [134]:
all_table = pd.DataFrame(index=all_df.index, columns=['coef', 'p_value'])

In [135]:
all_table.loc[:, 'coef'] = build_string_ci_2(
    all_df[('params', 'exp')],
    all_df[('conf_int', 0)],
    all_df[('conf_int', 1)]
)
all_table.loc[:, 'p_value'] = build_string_pvalue(all_df[('pvalues', 1)])

In [136]:
all_table[['category', 'value']] = all_df[['category', 'value']].values

In [137]:
all_table['value'] = np.where(all_table['category'] == 'fund',
    all_table['value'].replace(
        {str(k): v for k, v in fund_name_dict.items()}
    ),
    all_table['value']
)
all_table

Unnamed: 0,coef,p_value,category,value
C(gender)[T.1],0.98 (0.96-0.99),0.003,gender,1
C(fund)[T.1],1.51 (1.48-1.55),<0.001,fund,Civil servants
C(fund)[T.2],1.51 (1.47-1.55),<0.001,fund,Iranian
C(fund)[T.3],0.72 (0.70-0.74),<0.001,fund,Universal
C(fund)[T.4],0.63 (0.51-0.77),<0.001,fund,Foreign
C(fund)[T.5],1.41 (1.37-1.46),<0.001,fund,Others
"C(age_cat)[T.Interval(40, 64, closed='both')]",1.33 (1.31-1.36),<0.001,age_cat,"Interval(40, 64, closed='both')"
"C(age_cat)[T.Interval(65, 95, closed='both')]",1.50 (1.47-1.54),<0.001,age_cat,"Interval(65, 95, closed='both')"
C(cat)[T.1],1.39 (1.36-1.42),<0.001,cat,1
C(cat)[T.2],1.71 (1.67-1.75),<0.001,cat,2


In [138]:
all_table.set_index(['category', 'value'], inplace=True)

In [139]:
all_table

Unnamed: 0_level_0,Unnamed: 1_level_0,coef,p_value
category,value,Unnamed: 2_level_1,Unnamed: 3_level_1
gender,1,0.98 (0.96-0.99),0.003
fund,Civil servants,1.51 (1.48-1.55),<0.001
fund,Iranian,1.51 (1.47-1.55),<0.001
fund,Universal,0.72 (0.70-0.74),<0.001
fund,Foreign,0.63 (0.51-0.77),<0.001
fund,Others,1.41 (1.37-1.46),<0.001
age_cat,"Interval(40, 64, closed='both')",1.33 (1.31-1.36),<0.001
age_cat,"Interval(65, 95, closed='both')",1.50 (1.47-1.54),<0.001
cat,1,1.39 (1.36-1.42),<0.001
cat,2,1.71 (1.67-1.75),<0.001


### Saving

In [140]:
# Saving GLM_all_variables_edited
# all_table.to_excel('../GLM_all_variables_edited_f.xlsx')

## Single Variable

In [141]:
single_df = pd.read_excel('../GLM_single_variable_f.xlsx',
                          header=[0, 1], index_col=[0, 1])
single_df

Unnamed: 0_level_0,Unnamed: 1_level_0,params,params,pvalues,conf_int,conf_int,category,value
Unnamed: 0_level_1,Unnamed: 1_level_1,0,exp,1,0,1,Unnamed: 7_level_1,Unnamed: 8_level_1
gender,C(gender)[T.1],0.145749,1.156906,2.955676e-116,0.133286,0.158213,gender,1
fund,C(fund)[T.1],0.659284,1.933408,0.0,0.643836,0.674732,fund,1
fund,C(fund)[T.2],0.607889,1.836551,0.0,0.588016,0.627763,fund,2
fund,C(fund)[T.3],-0.386155,0.679665,0.0,-0.406178,-0.366132,fund,3
fund,C(fund)[T.4],-0.514249,0.59795,3.029662e-11,-0.665925,-0.362572,fund,4
fund,C(fund)[T.5],0.643099,1.902367,0.0,0.622554,0.663643,fund,5
age_cat,"C(age_cat)[T.Interval(40, 64, closed='both')]",0.572727,1.773095,0.0,0.55829,0.587163,age_cat,"Interval(40, 64, closed='both')"
age_cat,"C(age_cat)[T.Interval(65, 95, closed='both')]",0.737035,2.089729,0.0,0.719666,0.754403,age_cat,"Interval(65, 95, closed='both')"
cat,C(cat)[T.1],0.305134,1.356807,2.9056000000000003e-213,0.285946,0.324322,cat,1
cat,C(cat)[T.2],0.517965,1.678609,0.0,0.498775,0.537156,cat,2


In [142]:
single_df['conf_int'] = np.exp(single_df['conf_int'])

In [143]:
single_df

Unnamed: 0_level_0,Unnamed: 1_level_0,params,params,pvalues,conf_int,conf_int,category,value
Unnamed: 0_level_1,Unnamed: 1_level_1,0,exp,1,0,1,Unnamed: 7_level_1,Unnamed: 8_level_1
gender,C(gender)[T.1],0.145749,1.156906,2.955676e-116,1.142576,1.171415,gender,1
fund,C(fund)[T.1],0.659284,1.933408,0.0,1.903771,1.963507,fund,1
fund,C(fund)[T.2],0.607889,1.836551,0.0,1.800412,1.873415,fund,2
fund,C(fund)[T.3],-0.386155,0.679665,0.0,0.666192,0.693411,fund,3
fund,C(fund)[T.4],-0.514249,0.59795,3.029662e-11,0.513798,0.695884,fund,4
fund,C(fund)[T.5],0.643099,1.902367,0.0,1.863682,1.941854,fund,5
age_cat,"C(age_cat)[T.Interval(40, 64, closed='both')]",0.572727,1.773095,0.0,1.747682,1.798878,age_cat,"Interval(40, 64, closed='both')"
age_cat,"C(age_cat)[T.Interval(65, 95, closed='both')]",0.737035,2.089729,0.0,2.053747,2.126342,age_cat,"Interval(65, 95, closed='both')"
cat,C(cat)[T.1],0.305134,1.356807,2.9056000000000003e-213,1.33102,1.383092,cat,1
cat,C(cat)[T.2],0.517965,1.678609,0.0,1.646702,1.711133,cat,2


In [144]:
single_table = pd.DataFrame(index=single_df.index, columns=['coef', 'p_value'])

In [145]:
single_table.loc[:, 'coef'] = build_string_ci_2(
    single_df[('params', 'exp')],
    single_df[('conf_int', 0)],
    single_df[('conf_int', 1)]
)
single_table.loc[:, 'p_value'] = build_string_pvalue(single_df[('pvalues', 1)])

In [146]:
single_table[['category', 'value']] = single_df[['category', 'value']].values

In [147]:
single_table['value'] = np.where(single_table['category'] == 'fund',
    single_table['value'].replace(
        {str(k): v for k, v in fund_name_dict.items()}
    ),
    single_table['value']
)
single_table

Unnamed: 0,Unnamed: 1,coef,p_value,category,value
gender,C(gender)[T.1],1.16 (1.14-1.17),<0.001,gender,1
fund,C(fund)[T.1],1.93 (1.90-1.96),<0.001,fund,Civil servants
fund,C(fund)[T.2],1.84 (1.80-1.87),<0.001,fund,Iranian
fund,C(fund)[T.3],0.68 (0.67-0.69),<0.001,fund,Universal
fund,C(fund)[T.4],0.60 (0.51-0.70),<0.001,fund,Foreign
fund,C(fund)[T.5],1.90 (1.86-1.94),<0.001,fund,Others
age_cat,"C(age_cat)[T.Interval(40, 64, closed='both')]",1.77 (1.75-1.80),<0.001,age_cat,"Interval(40, 64, closed='both')"
age_cat,"C(age_cat)[T.Interval(65, 95, closed='both')]",2.09 (2.05-2.13),<0.001,age_cat,"Interval(65, 95, closed='both')"
cat,C(cat)[T.1],1.36 (1.33-1.38),<0.001,cat,1
cat,C(cat)[T.2],1.68 (1.65-1.71),<0.001,cat,2


In [148]:
single_table.set_index(['category', 'value'], inplace=True)

In [149]:
single_table

Unnamed: 0_level_0,Unnamed: 1_level_0,coef,p_value
category,value,Unnamed: 2_level_1,Unnamed: 3_level_1
gender,1,1.16 (1.14-1.17),<0.001
fund,Civil servants,1.93 (1.90-1.96),<0.001
fund,Iranian,1.84 (1.80-1.87),<0.001
fund,Universal,0.68 (0.67-0.69),<0.001
fund,Foreign,0.60 (0.51-0.70),<0.001
fund,Others,1.90 (1.86-1.94),<0.001
age_cat,"Interval(40, 64, closed='both')",1.77 (1.75-1.80),<0.001
age_cat,"Interval(65, 95, closed='both')",2.09 (2.05-2.13),<0.001
cat,1,1.36 (1.33-1.38),<0.001
cat,2,1.68 (1.65-1.71),<0.001


### Saving

In [150]:
# Saving GLM_single_variable_edited
# single_table.to_excel('../GLM_single_variable_edited_f.xlsx')