# Merge Table 5 and Table 9

In order to give a more compact summary of the obtained result we merge Table 5 and Table 9.

In [1]:
# imports

import os

import pandas as pd

In [2]:
# load data

folder_path = '/mnt/0A2AAC152AABFBB7/CGE/luxgiant-clinical/data'

df_5 = pd.read_csv(
    os.path.join(folder_path, 'Table_5.csv')
)
df_5 = df_5.drop(columns='Statistical Measure', inplace=False).fillna('')

df_9 = pd.read_csv(
    os.path.join(folder_path, 'Table_9.csv')
)
df_9 = df_9.fillna('')

We will use multi-level columns for each one of the data frames in order to give a clearer exposition after the merge of both tables.

In [3]:
# create multi-level columns for Table 5

multi_index_5 = pd.MultiIndex.from_tuples([
    ('', 'Variables'),
    ('Early/Late Onset', 'Age at onset <50 years'), 
    ('Early/Late Onset', 'Age at onset >=50 years'),
    ('Early/Late Onset', 'p-value'),
    ('Early/Late Onset', 'Total'),
    ('Early/Late Onset', 'Available Samples for Analysis')
])
df_5.columns = multi_index_5

In [4]:
# create multi-level columns for Table 9

multi_index_9 = pd.MultiIndex.from_tuples([
    ('', 'Variables'), 
    ('', 'Statistical Measure'),
    ('PD Duration Comp', 'PD duration <=5 yrs'), 
    ('PD Duration Comp', 'PD duration >5 yrs'),
    ('PD Duration Comp', 'p-value'),
    ('PD Duration Comp', 'Total'),
    ('PD Duration Comp', 'Available Samples for Analysis')
])
df_9.columns = multi_index_9

In [5]:
# merge  and save both dataframes

df_mrg = pd.merge(df_9, df_5, on=[('', 'Variables')], how='left', sort=False).fillna('')

df_mrg.to_csv(os.path.join(folder_path, 'Table_9_5.csv'), index=False)
df_mrg

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,PD Duration Comp,PD Duration Comp,PD Duration Comp,PD Duration Comp,PD Duration Comp,Early/Late Onset,Early/Late Onset,Early/Late Onset,Early/Late Onset,Early/Late Onset
Unnamed: 0_level_1,Variables,Statistical Measure,PD duration <=5 yrs,PD duration >5 yrs,p-value,Total,Available Samples for Analysis,Age at onset <50 years,Age at onset >=50 years,p-value,Total,Available Samples for Analysis
0,Age at Recruitment (years),mean (SD),59.4 (11.9),61.2 (10.6),p<0.001,60.4 (11.3),7468.0,48.8 (8.5),66.0 (7.5),p<0.001,60.4 (11.3),7468.0
1,Age at Onset (years),mean (SD),57.2 (12.0),51.7 (11.1),p<0.001,54.2 (11.8),7468.0,40.9 (6.8),60.7 (7.5),p<0.001,54.2 (11.8),7468.0
2,Body Mass Index,mean (SD),24.3 (3.6),24.2 (3.7),0.174,24.3 (3.6),6670.0,24.2 (3.7),24.3 (3.6),0.2923,24.3 (3.6),6667.0
3,Duration of Disease (years),median (IQR),2.0 (1.0 - 3.0),8.0 (6.0 - 12.0),p<0.001,5.0 (2.0 - 9.0),7473.0,6.0 (3.0 - 11.0),4.0 (2.0 - 7.0),p<0.001,5.0 (2.0 - 9.0),7468.0
4,Years of Education,median (IQR),12.0 (8.0 - 15.0),12.0 (10.0 - 15.0),p<0.001,12.0 (9.0 - 15.0),7095.0,12.0 (9.0 - 15.0),12.0 (9.2 - 15.0),0.002,12.0 (9.0 - 15.0),7091.0
5,Male,n (%),2346 (67.9),2720 (67.7),0.8809,5066 (67.8),7472.0,1591 (65.0),3473 (69.2),p<0.001,5064 (67.8),7467.0
6,Onset <50 years,n (%),854 (24.7),1596 (39.7),p<0.001,2450 (32.8),7468.0,,,,,
7,Onset <40 years,n (%),294 (8.5),539 (13.4),p<0.001,833 (11.1),7468.0,,,,,
8,PD-related Reason for Stop Working,n (%),515 (28.1),894 (37.8),p<0.001,1409 (18.9),4197.0,545 (51.6),864 (27.5),p<0.001,1409 (18.9),4194.0
9,Currently Employed,n (%),1781 (53.2),1856 (47.8),p<0.001,3637 (48.7),7229.0,1529 (64.3),2107 (43.5),p<0.001,3636 (48.7),7224.0
