In [1]:
import pandas as pd
import numpy as np
from statsmodels.stats.proportion import proportion_confint

In [2]:
table = pd.read_excel('../complete_prescription_table.xlsx', header=[0, 1],
                      index_col=0)
table = table['number']
# Removing outliers so No AB would be zero.
table.loc['ab', '0'] = 0
# Renaming
table.rename({'all': 'All', '0': 'No Antibiotic', '1':'Q1', '2': 'Q2', 
              '3': 'Q3', '4': 'Q4'},
              axis=1, inplace=True)
#
table

Unnamed: 0,All,No Antibiotic,Q1,Q2,Q3,Q4
ab,7455416,0,502401,1046963,1721120,4182168
dm,7783331,934428,1421837,1639246,1707564,2080256
total,99859824,6426526,12704037,17543904,22255883,40929474


In [3]:
concat_dict = dict()

In [4]:
concat_dict['number'] = table
concat_dict['proportion'] = table.loc[['ab', 'dm']] / table.loc['total']

In [5]:
concat_dict['proportion']

Unnamed: 0,All,No Antibiotic,Q1,Q2,Q3,Q4
ab,0.074659,0.0,0.039547,0.059677,0.077333,0.10218
dm,0.077943,0.145402,0.11192,0.093437,0.076724,0.050825


In [6]:
concat_dict['low'], concat_dict['up'] = proportion_confint(
    table.loc[['ab', 'dm']], table.loc['total']
)

In [23]:
col_names = table.columns.to_list()

In [19]:
def rename_cols(df, col_names):
    df.columns = col_names
    return df

In [27]:
concat_dict['low'] = rename_cols(concat_dict['low'], col_names=col_names)
concat_dict['up'] = rename_cols(concat_dict['up'], col_names=col_names)

In [28]:
concat_dict['low']
concat_dict['up']

Unnamed: 0,All,No Antibiotic,Q1,Q2,Q3,Q4
ab,0.07471,0.0,0.039654,0.059788,0.077444,0.102273
dm,0.077995,0.145674,0.112093,0.093573,0.076835,0.050893


In [29]:
pd.concat(concat_dict, axis=1)

Unnamed: 0_level_0,number,number,number,number,number,number,proportion,proportion,proportion,proportion,...,low,low,low,low,up,up,up,up,up,up
Unnamed: 0_level_1,All,No Antibiotic,Q1,Q2,Q3,Q4,All,No Antibiotic,Q1,Q2,...,Q1,Q2,Q3,Q4,All,No Antibiotic,Q1,Q2,Q3,Q4
ab,7455416,0,502401,1046963,1721120,4182168,0.074659,0.0,0.039547,0.059677,...,0.039439,0.059566,0.077222,0.102087,0.07471,0.0,0.039654,0.059788,0.077444,0.102273
dm,7783331,934428,1421837,1639246,1707564,2080256,0.077943,0.145402,0.11192,0.093437,...,0.111747,0.093301,0.076614,0.050758,0.077995,0.145674,0.112093,0.093573,0.076835,0.050893
total,99859824,6426526,12704037,17543904,22255883,40929474,,,,,...,,,,,,,,,,


In [36]:
def build_string(val_df, low_df, up_df, to_percentage, decimals=2):

    if to_percentage:
        val_df = val_df * 100
        low_df = low_df * 100
        up_df = up_df * 100

    val_df = val_df.map(lambda x: f'{x:.2f}')
    low_df = low_df.map(lambda x: f'{x:.2f}')
    up_df = up_df.map(lambda x: f'{x:.2f}')



    return val_df + ' (' + low_df + '-' + up_df + ')'

In [38]:
temp = build_string(
    concat_dict['proportion'].loc[['ab', 'dm']],
    concat_dict['low'].loc[['ab', 'dm']],
    concat_dict['up'].loc[['ab', 'dm']],
    to_percentage=True
)

In [39]:
temp.loc['total'] = table.loc['total']

In [41]:
temp

Unnamed: 0,All,No Antibiotic,Q1,Q2,Q3,Q4
ab,7.47 (7.46-7.47),0.00 (0.00-0.00),3.95 (3.94-3.97),5.97 (5.96-5.98),7.73 (7.72-7.74),10.22 (10.21-10.23)
dm,7.79 (7.79-7.80),14.54 (14.51-14.57),11.19 (11.17-11.21),9.34 (9.33-9.36),7.67 (7.66-7.68),5.08 (5.08-5.09)
total,99859824,6426526,12704037,17543904,22255883,40929474


In [44]:
# Saving prescription_table_edited.xlsx
# temp.to_excel('../prescription_table_edited.xlsx')