# Appendix 3

In order to give a more compact summary of the obtained result we merge Table 5 and Table 9.

In [1]:
def get_library_path()->str:

    cwd = os.getcwd()
    lst = cwd.split(os.sep)[:-1]

    path_lib = f'{os.sep}'

    for elem in lst:
        path_lib = os.path.join(path_lib, elem)

    return path_lib

# imports

import os

import pandas as pd

path_to_lib = get_library_path()

In [2]:
# load data

df_5 = pd.read_csv(
    os.path.join(path_to_lib, 'data/auxiliar/aux_table_5.csv')
)
df_5 = df_5.fillna('')

df_9 = pd.read_csv(
    os.path.join(path_to_lib, 'data/auxiliar/aux_table_9.csv')
)
df_9 = df_9.drop(columns='Statistical Measure', inplace=False).fillna('')

We will use multi-level columns for each one of the data frames in order to give a clearer exposition after the merge of both tables.

In [3]:
# create multi-level columns for Table 5

multi_index_5 = pd.MultiIndex.from_tuples([
    ('', 'Variables'),
    ('', 'Statistical Measure'),
    ('Early/Late Onset', 'Age at onset <50 years'), 
    ('Early/Late Onset', 'Age at onset >=50 years'),
    ('Early/Late Onset', 'p-value'),
    ('Early/Late Onset', 'Total'),
    ('Early/Late Onset', 'Available Samples for Analysis')
])
df_5.columns = multi_index_5

In [4]:
# create multi-level columns for Table 9

multi_index_9 = pd.MultiIndex.from_tuples([
    ('', 'Variables'), 
    ('PD Duration Comp', 'PD duration <=5 yrs'), 
    ('PD Duration Comp', 'PD duration >5 yrs'),
    ('PD Duration Comp', 'p-value'),
    ('PD Duration Comp', 'Total'),
    ('PD Duration Comp', 'Available Samples for Analysis')
])
df_9.columns = multi_index_9

In [5]:
# merge  and save both dataframes

df_mrg = pd.merge(df_5, df_9, on=[('', 'Variables')], how='right', sort=False).fillna('')

df_mrg.to_csv(os.path.join(path_to_lib, 'data/final/appendix_3.csv'), index=False)
df_mrg

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Early/Late Onset,Early/Late Onset,Early/Late Onset,Early/Late Onset,Early/Late Onset,PD Duration Comp,PD Duration Comp,PD Duration Comp,PD Duration Comp,PD Duration Comp
Unnamed: 0_level_1,Variables,Statistical Measure,Age at onset <50 years,Age at onset >=50 years,p-value,Total,Available Samples for Analysis,PD duration <=5 yrs,PD duration >5 yrs,p-value,Total,Available Samples for Analysis
0,Age at Recruitment (years),mean (SD),48.8 (8.5),66.0 (7.5),p<0.001,60.4 (11.3),7468.0,59.5 (11.7),61.4 (10.6),p<0.001,60.4 (11.3),7468.0
1,Age at Onset (years),mean (SD),40.9 (6.8),60.7 (7.5),p<0.001,54.2 (11.8),7468.0,56.8 (11.8),51.0 (11.0),p<0.001,54.2 (11.8),7468.0
2,Body Mass Index,mean (SD),24.2 (3.7),24.3 (3.6),0.2923,24.3 (3.6),6667.0,24.3 (3.6),24.2 (3.7),0.1301,24.3 (3.6),6670.0
3,Duration of Disease (years),median (IQR),6.0 (3.0 - 11.0),4.0 (2.0 - 7.0),p<0.001,5.0 (2.0 - 9.0),7468.0,3.0 (1.0 - 4.0),10.0 (7.0 - 12.0),p<0.001,5.0 (2.0 - 9.0),7473.0
4,Years of Education,median (IQR),12.0 (9.0 - 15.0),12.0 (9.2 - 15.0),0.002,12.0 (9.0 - 15.0),7091.0,12.0 (8.0 - 15.0),12.0 (10.0 - 15.0),p<0.001,12.0 (9.0 - 15.0),7095.0
5,Male,n (%),1591 (65.0),3473 (69.2),p<0.001,5064 (67.8),7467.0,2807 (67.7),2259 (67.9),0.8165,5066 (67.8),7472.0
6,Onset <50 years,,,,,,,1052 (25.4),1398 (42.0),p<0.001,2450 (32.8),7468.0
7,Onset <40 years,,,,,,,351 (8.5),482 (14.5),p<0.001,833 (11.2),7468.0
8,PD-related Reason for Stop Working,n (%),545 (51.6),864 (27.5),p<0.001,1409 (33.6),4194.0,640 (28.9),769 (38.7),p<0.001,1409 (33.6),4197.0
9,Currently Employed,n (%),1529 (64.3),2107 (43.5),p<0.001,3636 (50.3),7224.0,2122 (52.9),1515 (47.1),p<0.001,3637 (50.3),7229.0
