# TableOne Creation

## 0. Libraries

In [15]:
import pandas as pd
from tableone import TableOne
from IPython.display import HTML
import openpyxl

## 1. Load Data

Read the dataframe

In [16]:
df = pd.read_pickle("./data/patient_imputed_cleaned.pkl")

Columns realted to dates won't be include in the tableOne.

In [17]:
df_to_table = df.drop(columns=["death_datetime", "admission_datetime", "discharge_datetime"])

Set names more accurated to table one

In [18]:
df_to_table.rename(columns={"Status":"hospital_outcome", "Survival_in_Days":"followup_days"}, inplace=True)

A binary column is added indicating whether the patient had been vaccinated against Covid before admission or not.

In [19]:
df_to_table["vaccinated"].value_counts()

vaccinated
0    22724
1    14550
Name: count, dtype: int64

Change labels of binary variables to be more clear.

In [20]:
df_to_table.drop(columns=["id"], inplace=True)
df_to_table.info()

<class 'pandas.core.frame.DataFrame'>
Index: 37274 entries, 0 to 49968
Data columns (total 49 columns):
 #   Column                     Non-Null Count  Dtype   
---  ------                     --------------  -----   
 0   sex                        37274 non-null  category
 1   age                        37274 non-null  int64   
 2   num_shots                  37274 non-null  int64   
 3   type_center                37274 non-null  category
 4   vaccinated                 37274 non-null  category
 5   icu                        37274 non-null  category
 6   inpatient_days             37274 non-null  int64   
 7   hospital_outcome           37274 non-null  category
 8   followup_days              37274 non-null  int64   
 9   wave_3                     37274 non-null  category
 10  wave_4                     37274 non-null  category
 11  wave_5                     37274 non-null  category
 12  wave_6                     37274 non-null  category
 13  wave_7                     37274 non

In [21]:
# Sex vars
mapper = {0: 'Female', 1: 'Male'}
df_to_table['sex'] = df_to_table['sex'].map(mapper)

df_to_table['hospital_outcome'] = df_to_table['hospital_outcome'].map({0: "Survivor", 1: "Deceased"})


# Yes/No vars
mapper = {0: 'No', 1: 'Yes'}
df_to_table['icu'] = df_to_table['icu'].map(mapper)
df_to_table['vaccinated'] = df_to_table['vaccinated'].map(mapper)

list_waves_pmhx = [col for col in df_to_table.columns if col.lower().startswith(('wave', 'pmhx'))]

for var in list_waves_pmhx:
    df_to_table[var] = df_to_table[var].map(mapper)

**Atention**<br>
People who have not died have a value in `folloup_days` equal to the days spent in the hospital + 30. Their `hospital_outcome` is 0.

Columns are reorderer

In [22]:
new_order = ['id',
             'sex',
             'age',
             'num_shots',
             "type_center",
             'vaccinated',
             'icu',
             'inpatient_days',
             'admission_datetime',
             'discharge_datetime',
             'hospital_outcome',
             'death_datetime',
             'delta_days_death',
             'wave_1', 
             'wave_2',
             'wave_3','wave_4',
             'wave_5',
             'wave_6',
             'wave_7',
             'pmhx_activecancer',
             'pmhx_asthma',
             'pmhx_chf',
             'pmhx_chronicliver',
             'pmhx_ckd',
             'pmhx_copd','pmhx_dementia',
             'pmhx_diabetes',
             'pmhx_hld',
             'pmhx_htn',
             'pmhx_ihd','pmhx_obesity',
             'pmhx_stroke',
             'lab_alt',
             'lab_ast',
             'lab_creatinine',
             'lab_crp',
             'lab_ddimer',
             'lab_glucose',
             'lab_hct',
             'lab_hemoglobin',
             'lab_inr',
             'lab_ldh',
             'lab_leukocyte',
             'lab_lymphocyte',
             'lab_lymphocyte_percentage',
             'lab_mch',
             'lab_mcv',
             'lab_neutrophil',
             'lab_neutrophil_percentage',
             'lab_platelet',
             'lab_potassium',
             'lab_rbc',
             'lab_sodium',
             'lab_urea']

# Get the intersection of existing columns and the new order
columns_to_reorder = [col for col in new_order if col in df.columns]

# Reorder the DataFrame columns
df = df[columns_to_reorder]

## 2. Configure TableOne

In [23]:
# Get all columns names
columns = list(df_to_table.columns)

# Get ategorical columns
categorical = [col for col in df_to_table.columns if pd.api.types.is_categorical_dtype(df_to_table[col])]
# Remove the stratify from columns
categorical.remove("hospital_outcome")

# Variable to stratify
groupby = "hospital_outcome"

# Set order of keys and keep only yes rows from indicated variables
order = {key: ["Yes", "No"] for key in list_waves_pmhx} 
order.update({"icu": ["Yes", "No"]})

# ACTIVATE IF YOU ONLY WANT TO PLOT THE YES CASES
limit = {key: 1 for key in list_waves_pmhx}
limit.update({"icu": 1})

# All numerical variables are no normal.
nonormal = [col for col in df_to_table.columns if pd.api.types.is_numeric_dtype(df_to_table[col])]

# Rename mortality for a cleary understanding
labels = {'sex': 'Sex',
    'age': 'Age',
    'num_shots': 'Vaccine Doses Administered',
    'type_center': 'Medical Center Type',
    'icu': 'ICU Stay',
    'vaccinated': 'Vaccination Status',
    'inpatient_days': 'Inpatient Days',
    'hospital_outcome': 'Mortality',
    'followup_days': 'Followup Days',
    'wave_1': 'Wave 1',
    'wave_2': 'Wave 2', 
    'wave_3': 'Wave 3',
    'wave_4': 'Wave 4',
    'wave_5': 'Wave 5',
    'wave_6': 'Wave 6',
    'wave_7': 'Wave 7',
    'pmhx_activecancer': 'Preexisting Condition Active Cancer',
    'pmhx_asthma': 'Preexisting Condition Asthma',
    'pmhx_chf': 'Preexisting Condition Congestive Heart Failure',
    'pmhx_chronicliver': 'Preexisting Condition Chronic Liver',
    'pmhx_ckd': 'Preexisting Condition Chronic Kidney Disease',
    'pmhx_copd': 'Preexisting Condition COPD',
    'pmhx_dementia': 'Preexisting Condition Dementia',
    'pmhx_diabetes': 'Preexisting Condition Diabetes',
    'pmhx_hld': 'Preexisting Condition Hyperlipidemia',
    'pmhx_htn': 'Preexisting Condition Hypertension',
    'pmhx_ihd': 'Preexisting Condition Ischemic Heart Disease',
    'pmhx_obesity': 'Preexisting Condition Obesity',
    'pmhx_stroke': 'Preexisting Condition Stroke',
    'lab_alt': 'Alanine Transaminase (U/L)',
    'lab_ast': 'Aspartate Transaminase (U/L)',
    'lab_creatinine': 'Creatinine (mg/dL)',
    'lab_crp': 'C-Reactive Protein (mg/L)',
    'lab_ddimer': 'D-Dimer (ng/mL)',
    'lab_glucose': 'Glucose (mg/dL)',
    'lab_hct': 'Hematocrit (%)',
    'lab_hemoglobin': 'Hemoglobin (g/dL)',
    'lab_inr': 'Normalized Prothrombin Time - INR (-)',
    'lab_ldh': 'Lactate Dehydrogenase (U/L)',
    'lab_leukocyte': 'Leukocyte Count (x10^3/µL)',
    'lab_lymphocyte': 'Lymphocyte Count (x10^3/µL)',
    'lab_lymphocyte_percentage': 'Lymphocyte Percentage (%)',
    'lab_mch': 'Mean Corpuscular Hemoglobin (pg)',
    'lab_mcv': 'Mean Corpuscular Volume (fL)',
    'lab_neutrophil': 'Neutrophil Count (x10^3/µL)',
    'lab_neutrophil_percentage': 'Neutrophil Percentage (%)',
    'lab_platelet': 'Platelet Count (x10^3/µL)',
    'lab_potassium': 'Potassium (mEq/L in HM, mmol/L in other units)',
    'lab_rbc': 'Red Blood Cell Count (x10^6/µL)',
    'lab_sodium': 'Sodium (mEq/L in HM, mmol/L in other units)',
    'lab_urea': 'Urea (mg/dL)'
}

# Check the columns exists
labels = {column: dtype for column, dtype in labels.items() if column in df.columns}

# Set 0 decimals to int numeric variables
decimals = {"age": 0,
            "num_shots": 0,
            "inpatient_days": 0,
            "followup_days":0}

  categorical = [col for col in df_to_table.columns if pd.api.types.is_categorical_dtype(df_to_table[col])]
  categorical = [col for col in df_to_table.columns if pd.api.types.is_categorical_dtype(df_to_table[col])]
  categorical = [col for col in df_to_table.columns if pd.api.types.is_categorical_dtype(df_to_table[col])]
  categorical = [col for col in df_to_table.columns if pd.api.types.is_categorical_dtype(df_to_table[col])]
  categorical = [col for col in df_to_table.columns if pd.api.types.is_categorical_dtype(df_to_table[col])]
  categorical = [col for col in df_to_table.columns if pd.api.types.is_categorical_dtype(df_to_table[col])]
  categorical = [col for col in df_to_table.columns if pd.api.types.is_categorical_dtype(df_to_table[col])]
  categorical = [col for col in df_to_table.columns if pd.api.types.is_categorical_dtype(df_to_table[col])]
  categorical = [col for col in df_to_table.columns if pd.api.types.is_categorical_dtype(df_to_table[col])]
  categorical = [col for col

**Warning**: All numeric variables are considered non-normally distributed after performing the Kolmogorov test. Check Notebook `2_data_exploration`.

## 3. Generate TableOne

In [24]:
mytable = TableOne(df_to_table,
                   columns=columns, 
                   categorical=categorical,
                   groupby=groupby,
                   nonnormal =nonormal,
                   rename=labels,
                   decimals=decimals,
                   order=order,
                   limit=limit, #ACTIVATE TO USE LIMITS
                   pval=True)

  self._groupbylvls = sorted(data.groupby(groupby).groups.keys())  # type: ignore


  df_cont = pd.pivot_table(cont_data,
  df_cont = pd.pivot_table(cont_data,
  df_cont = pd.pivot_table(cont_data,
  df_cont = pd.pivot_table(cont_data,
  df_cont = pd.pivot_table(cont_data,
  df_cont = pd.pivot_table(cont_data,
  df_cont = pd.pivot_table(cont_data,
  df_cont = pd.pivot_table(cont_data,
  df_cont = pd.pivot_table(cont_data,
  df_cont = pd.pivot_table(cont_data,
  df_cont = pd.pivot_table(cont_data,
  df_cont = pd.pivot_table(cont_data,
  df_cont = pd.pivot_table(cont_data,


Let's plot the TableOne

In [25]:
HTML(mytable.tableone.to_html())

Unnamed: 0_level_0,Unnamed: 1_level_0,Grouped by Mortality,Grouped by Mortality,Grouped by Mortality,Grouped by Mortality,Grouped by Mortality
Unnamed: 0_level_1,Unnamed: 1_level_1,Missing,Overall,Deceased,Survivor,P-Value
n,,,37274,7202,30072,
"Sex, n (%)",Female,0.0,21420 (57.5),4211 (58.5),17209 (57.2),0.057
"Sex, n (%)",Male,,15854 (42.5),2991 (41.5),12863 (42.8),
"Age, median [Q1,Q3]",,0.0,"68 [54,80]","81 [72,87]","64 [51,77]",<0.001
"Vaccine Doses Administered, median [Q1,Q3]",,0.0,"0 [0,2]","0 [0,3]","0 [0,2]",<0.001
"Medical Center Type, n (%)",County Hospital,0.0,8445 (22.7),1940 (26.9),6505 (21.6),<0.001
"Medical Center Type, n (%)",Regional Hospital,,14888 (39.9),2634 (36.6),12254 (40.7),
"Medical Center Type, n (%)",Specialized Hospital,,13941 (37.4),2628 (36.5),11313 (37.6),
"Vaccination Status, n (%)",No,0.0,22724 (61.0),3726 (51.7),18998 (63.2),<0.001
"Vaccination Status, n (%)",Yes,,14550 (39.0),3476 (48.3),11074 (36.8),


Save the tableone in an html file

In [26]:
df_tableone = mytable.tableone
save_path = "./TableOne.html"

# # # # # # # # # # # # # # # # # # # # # # # # # # # 
pd.set_option('colheader_justify', 'center')   # FOR TABLE <th>

html_string = '''
<html>
  <head><title>HTML Pandas Dataframe with CSS</title></head>
  <link rel="stylesheet" type="text/css" href="df_style.css"/>
  <body>
    {table}
  </body>
</html>
'''

# OUTPUT AN HTML FILE
with open(save_path, 'w') as f:
    f.write(html_string.format(table=df_tableone.to_html(classes='mystyle')))

Save df as excel file to get a pretty table

In [27]:
excel_path = "../OUTPUT_figures_tables/Table_1.xlsx"
df_tableone.to_excel(excel_path, sheet_name="Table1")

Edit the file to looks pretty

In [28]:
wb = openpyxl.load_workbook(excel_path)
sheet = wb["Table1"]
# Clean format
for row in sheet.iter_rows():
    for cell in row:
        cell.font = openpyxl.styles.Font()
        cell.border = openpyxl.styles.Border()
        cell.fill = openpyxl.styles.fills.PatternFill()

# Width Columns
sheet.column_dimensions['A'].width = 56
sheet.column_dimensions['B'].width = 25
sheet.column_dimensions['C'].width = 12
sheet.column_dimensions['D'].width = 23
sheet.column_dimensions['E'].width = 23
sheet.column_dimensions['F'].width = 23
sheet.column_dimensions['G'].width = 12

# Align text to center in colum C
for row in sheet.iter_rows(min_row=5, min_col=3, max_col=3):
    for cell in row:
        cell.alignment = openpyxl.styles.Alignment(horizontal='center')

# Align text to center in row 4
for row in sheet.iter_rows(min_row=4, max_row=4, min_col=4, max_col=6):
    for cell in row:
        cell.alignment = openpyxl.styles.Alignment(horizontal='center')

# colors
color_green1 = openpyxl.styles.colors.Color(rgb='007932')
color_green2 = openpyxl.styles.colors.Color(rgb='368f3f')
color_green3 = openpyxl.styles.colors.Color(rgb='54995E')
color_white = openpyxl.styles.colors.Color(rgb='ffffff')
color_gray1 = openpyxl.styles.colors.Color(rgb='c9c9c9')

# Edit header
header_cell = sheet['C1']
sheet.row_dimensions[1].height=35
header_cell.alignment = openpyxl.styles.Alignment(horizontal='center', vertical='center')
header_cell.fill = openpyxl.styles.fills.PatternFill(patternType='solid', fgColor=color_green1)
header_cell.font = openpyxl.styles.Font(name="SourceSansPro", size=15, bold=True, color=color_white)


# Edit subheader cells
sheet.row_dimensions[2].height=20
for row in sheet['C2:G2']:
    for cell in row:
        cell.alignment = openpyxl.styles.Alignment(horizontal='center', vertical='center')
        cell.fill = openpyxl.styles.fills.PatternFill(patternType='solid', fgColor=color_green2)
        cell.font = openpyxl.styles.Font(name="SourceSansPro", size=12, bold=True, color=color_white)

# Add subheader for variables
sheet.row_dimensions[3].height=20
sheet['A3'] = 'Variable'
sheet['B3'] = 'Level'
for row in sheet['A3:G3']:
    for cell in row:
        cell.alignment = openpyxl.styles.Alignment(horizontal='center', vertical='center')
        cell.fill = openpyxl.styles.fills.PatternFill(patternType='solid', fgColor=color_green2)
        cell.font = openpyxl.styles.Font(name="SourceSansPro", size=12, color=color_white)

sheet.row_dimensions[4].height=20
for row in sheet['A4:G4']:
    for cell in row:
        cell.fill = openpyxl.styles.fills.PatternFill(patternType='solid', fgColor=color_green3)
        cell.font = openpyxl.styles.Font(name="SourceSansPro", size=10, color=color_white)
        cell.alignment = openpyxl.styles.Alignment(horizontal='center', vertical='center')

# Edit variables rows (alterning row color)
cells_vars = []
for row in sheet[f'A5:A{sheet.max_row}']:
    for cell in row:
        if not isinstance(cell, openpyxl.cell.cell.MergedCell):
            cells_vars.append(cell)
for i, cell in enumerate(cells_vars):
    if i % 2 == 0:
        for row in sheet[f'A{cell.row}:G{cell.row}']:
            for cell in row:
                cell.fill = openpyxl.styles.fills.PatternFill(patternType='solid', fgColor=color_gray1)

#Set font all table_body
for row in sheet[f'A5:G{sheet.max_row}']:
    for cell in row:
        cell.font = openpyxl.styles.Font(name="SourceSansPro", size=10)

# Border 
border1 = openpyxl.styles.Border(bottom=openpyxl.styles.Side(border_style='thin', color='000000'))
side =openpyxl.styles.Side(border_style='thin', color='000000')


# Body borders
cells_vars = [row[0] for row in sheet[f'A5:A{sheet.max_row}']]
for i, cell in enumerate(cells_vars):
    if isinstance(cell, openpyxl.cell.cell.MergedCell):
        cells_vars[i-1]=None
cells_vars = [cell for cell in cells_vars if cell is not None]

for cell in cells_vars:
    i = cell.row
    for row in sheet[f'A{i}:G{i}']:
        for cell in row:
            cell.border = border1

# header border
for row in sheet['A4:G4']:
    for cell in row:
        cell.border = border1

# Vertical border middle
for row in sheet[f'B3:B{sheet.max_row}']:
    for cell in row:
        prev_border = cell.border

        new_border = openpyxl.styles.Border(left=prev_border.left,
                                            right=side,
                                            top=prev_border.top,
                                            bottom=prev_border.bottom)
        
        cell.border = new_border


#Outborder left
for row in sheet[f'A3:A{sheet.max_row}']:
    for cell in row:
        prev_border = cell.border

        new_border = openpyxl.styles.Border(left=side,
                                            right=prev_border.right,
                                            top=prev_border.top,
                                            bottom=prev_border.bottom)
        
        cell.border = new_border

#Outborder bottom
for row in sheet[f'A{sheet.max_row}:G{sheet.max_row}']:
    for cell in row:
        prev_border = cell.border

        new_border = openpyxl.styles.Border(left=prev_border.left,
                                            right=prev_border.right,
                                            top=prev_border.top,
                                            bottom=side)
        
        cell.border = new_border

#Outborder right
for row in sheet[f'G{sheet.min_row}:G{sheet.max_row}']:
    for cell in row:
        prev_border = cell.border

        new_border = openpyxl.styles.Border(left=prev_border.left,
                                            right=side,
                                            top=prev_border.top,
                                            bottom=prev_border.bottom)
        
        cell.border = new_border        

#Outborder header
for row in sheet['C1:G1']:
    for cell in row:
        prev_border = cell.border

        new_border = openpyxl.styles.Border(left=prev_border.left,
                                            right=prev_border.right,
                                            top=side,
                                            bottom=prev_border.bottom)
        
        cell.border = new_border
for row in sheet['C1:C2']:
    for cell in row:
        prev_border = cell.border

        new_border = openpyxl.styles.Border(left=side,
                                            right=prev_border.right,
                                            top=prev_border.top,
                                            bottom=prev_border.bottom)
        
        cell.border = new_border 
for row in sheet['A3:B3']:
    for cell in row:
        prev_border = cell.border

        new_border = openpyxl.styles.Border(left=prev_border.top,
                                            right=prev_border.right,
                                            top=side,
                                            bottom=prev_border.bottom)
        
        cell.border = new_border 

wb.save(excel_path)