# Merge Table 1 and Table 4

In order to give a more compact summary of the obtained result we merge Table 1 and Table 4.

In [1]:
# imports

import os

import pandas as pd

In [2]:
# load data

folder_path = '/mnt/0A2AAC152AABFBB7/CGE/luxgiant-clinical/data'

df_1 = pd.read_csv(
    os.path.join(folder_path, 'Table_1.csv')
)
df_1 = df_1.fillna('')

df_4 = pd.read_csv(
    os.path.join(folder_path, 'Table_4.csv')
)
df_4 = df_4.drop(columns='Statistical Measure', inplace=False).fillna('')

We will use multi-level columns for each one of the data frames in order to give a clearer exposition after the merge of both tables.

In [3]:
# create multi-level columns for Table 1

multi_index_1 = pd.MultiIndex.from_tuples(
    [('', 'Variables'),
     ('', 'Statistical Measure'),
     ('Sex Matched Cohort', 'Male'), 
     ('Sex Matched Cohort', 'Female'),
     ('Sex Matched Cohort', 'p-value'),
     ('Sex Matched Cohort', 'Total'),
     ('Sex Matched Cohort', 'Available Samples for Analysis')]
)
df_1.columns = multi_index_1

In [4]:
# create multi-level columns for Table 4

multi_index_4 = pd.MultiIndex.from_tuples(
    [('', 'Variables'), 
     ('Whole PD Cohort', 'Patient'), 
     ('Whole PD Cohort', 'Control'),
     ('Whole PD Cohort', 'p-value'),
     ('Whole PD Cohort', 'Total'),
     ('Whole PD Cohort', 'Available Samples for Analysis')]
)
df_4.columns = multi_index_4

In [5]:
# merge  and save both dataframes

df_mrg = pd.merge(df_1, df_4, on=[('', 'Variables')], how='left').fillna('')

df_mrg.to_csv(
    os.path.join(folder_path, 'Table_1_4.csv')
)
df_mrg

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Sex Matched Cohort,Sex Matched Cohort,Sex Matched Cohort,Sex Matched Cohort,Sex Matched Cohort,Whole PD Cohort,Whole PD Cohort,Whole PD Cohort,Whole PD Cohort,Whole PD Cohort
Unnamed: 0_level_1,Variables,Statistical Measure,Male,Female,p-value,Total,Available Samples for Analysis,Patient,Control,p-value,Total,Available Samples for Analysis
0,Age at Recruitment (years),mean (SD),60.9 (11.4),59.7 (11.0),p<0.001,60.5 (11.3),7905.0,56.7 (12.0),50.0 (12.6),p<0.001,53.3 (12.7),6640.0
1,Age at Onset (years),mean (SD),54.6 (11.9),53.5 (11.5),p<0.001,54.2 (11.8),7467.0,50.0 (12.6),50.0 (12.6),1.0,50.0 (12.6),6640.0
2,Body Mass Index,mean (SD),24.3 (3.5),24.2 (4.0),0.1928,24.3 (3.6),6904.0,24.2 (3.7),25.0 (3.8),p<0.001,24.6 (3.7),5979.0
3,Duration of Disease (years),median (IQR),5.0 (2.0 - 9.0),5.0 (2.0 - 9.0),0.6474,5.0 (2.0 - 9.0),7472.0,5.0 (3.0 - 10.0),2.0 (2.0 - 11.0),0.4396,5.0 (3.0 - 10.0),3325.0
4,Years of Education,median (IQR),12.0 (10.0 - 15.0),10.0 (5.0 - 15.0),p<0.001,12.0 (9.0 - 15.0),7365.0,12.0 (8.0 - 15.0),12.0 (9.0 - 15.0),0.0149,12.0 (8.0 - 15.0),6365.0
5,Onset <50 years,n (%),1591 (31.4),858 (35.7),p<0.001,2449 (30.9),7467.0,1642 (49.5),3 (60.0),0.9812,1645 (24.8),3325.0
6,Onset <40 years,n (%),565 (11.2),268 (11.2),1.0,833 (10.5),7467.0,733 (22.1),3 (60.0),0.1331,736 (11.1),3325.0
7,Currently Employed,n (%),2816 (54.7),943 (38.6),p<0.001,3759 (47.5),7585.0,1901 (58.5),2380 (76.2),p<0.001,4281 (64.5),6371.0
8,PD-related Reason for Stop Working,n (%),1036 (31.9),413 (37.6),p<0.001,1449 (18.3),4342.0,,,,,
9,Agriculture as a job,n (%),906 (16.9),246 (9.6),p<0.001,1152 (14.5),7918.0,502 (15.1),383 (11.5),p<0.001,885 (13.3),6640.0
