In [1]:
def get_library_path()->str:

    cwd = os.getcwd()
    lst = cwd.split(os.sep)[:-1]

    path_lib = f'{os.sep}'

    for elem in lst:
        path_lib = os.path.join(path_lib, elem)

    return path_lib

# imports

import os
import sys

path_to_lib = get_library_path()

sys.path.append(path_to_lib)
import luxgiant_clinical.TwoCatAnalysis as two

import pandas as pd

In [2]:
# load data

folder_path = os.path.join(path_to_lib, 'data')

df = pd.read_csv(
    os.path.join(folder_path, 'cleaned_file.csv'), 
    low_memory=False
)

In [3]:
# filter data to keep only patients with age information

mask_patients= (df['Status']=='Patient')
mask_onset   = (~df['agecat_1'].isnull()) 

df_cases = df[mask_patients & mask_onset].reset_index(drop=True)

df_cases["agecat_1"] = df_cases["agecat_1"].map({"<50":'Age at onset <50 years', ">=50":'Age at onset >=50 years'})

del df
df_cases.shape

(7468, 716)

In [4]:
df_on = df_cases[df_cases['hyonoff']=='On'].reset_index(drop=True)

df_on['Stage I-III']= df_on['hystage'].map({'Not severe':1, 'Severe':0})
df_on['Stage IV-V'] = df_on['hystage'].map({'Not severe':0, 'Severe':1})
df_on['UPDRS III-ON']= df_on['updrs_part_iii_total_score'].copy()

variables = ['participant_id', 'agecat_1', 'Stage I-III', 'Stage IV-V', 'UPDRS III-ON']

stats_meas1 = {
    'n': ['Stage I-III', 'Stage IV-V'],
    'median' : ['UPDRS III-ON']
}
groups = ['Age at onset <50 years', 'Age at onset >=50 years']

# keep only variables for analysis

df_on = df_on[variables].copy()

In [5]:
df_off = df_cases[df_cases['hyonoff']=='Off'].reset_index(drop=True)

df_off['Stage I-III']= df_off['hystage'].map({'Not severe':1, 'Severe':0})
df_off['Stage IV-V'] = df_off['hystage'].map({'Not severe':0, 'Severe':1})
df_off['UPDRS III-OFF']= df_off['updrs_part_iii_total_score'].copy()

variables = ['participant_id', 'agecat_1', 'Stage I-III', 'Stage IV-V', 'UPDRS III-OFF']

stats_meas2 = {
    'n': ['Stage I-III', 'Stage IV-V'],
    'median' : ['UPDRS III-OFF']
}

# keep only variables for analysis

df_off = df_off[variables].copy()

In [6]:
summary1 = [
    two.report_proportion(
        data_df    =df_off,
        variables  =stats_meas2['n'],
        groups     =groups,
        grouping_by='agecat_1',
        subheader  ='H & Y OFF'
        ),
    two.report_proportion(
        data_df    =df_on,
        variables  =stats_meas1['n'],
        groups     =groups,
        grouping_by='agecat_1',
        subheader  ='H & Y ON'
    ),
    two.report_median_iqr(
        data_df    =df_on,
        variables  =stats_meas1['median'],
        groups     =groups,
        grouping_by='agecat_1'
    ),
    two.report_median_iqr(
        data_df    =df_off,
        variables  =stats_meas2['median'],
        groups     =groups,
        grouping_by='agecat_1'
    )
]

In [7]:
variables_dict = {
    "total_score_for_moca": "MOCA Score",
    "total_score_for_bdi" : "BDI Score",
    "l_dopa_carbidopa"    : "L-dopa/C-dopa"
}
variables_1 = ['participant_id', 'agecat_1'] + list(variables_dict.keys())

# statistical measures
stats_meas3 = {
    'mean'  : ["MOCA Score"],
    'median': ["BDI Score"],
    'n'     : ["L-dopa/C-dopa"]
}

groups = ['Age at onset <50 years', 'Age at onset >=50 years']

# keep only variables for analysis

df_cases_1 = df_cases[variables_1].copy()
df_cases_1 = df_cases_1.rename(columns=variables_dict)

df_cases_1["L-dopa/C-dopa"] = df_cases_1["L-dopa/C-dopa"].map({'Yes':1, 'No':0})

In [8]:
summary2 = [ 
    two.report_mean_std(
        data_df    =df_cases_1,
        variables  =stats_meas3['mean'],
        groups     =groups,
        grouping_by='agecat_1'
    ),
    two.report_median_iqr(
        data_df    =df_cases_1,
        variables  =stats_meas3['median'],
        groups     =groups,
        grouping_by='agecat_1'
    ),
    two.report_proportion(
        data_df=df_cases_1,
        variables=stats_meas3['n'],
        groups=groups,
        grouping_by='agecat_1'
    )
]

In [9]:
df_13 = pd.concat(
    summary1 + summary2, axis=0, ignore_index=True
)
df_13 = two.final_formatter(df_13, groups)

df_13.to_csv(os.path.join(folder_path, 'Table_13.csv'), index=False)
df_13

Unnamed: 0,Variable,Statistical Measure,Age at onset <50 years,Age at onset >=50 years,p-value,Total,Available Samples for Analysis
0,H & Y OFF,,,,,,
1,Stage I-III,n (%),783 (92.6),1564 (94.7),0.0302,2347 (94.0),2497.0
2,Stage IV-V,n (%),63 (7.4),87 (5.3),0.0302,150 (6.0),2497.0
3,H & Y ON,,,,,,
4,Stage I-III,n (%),1260 (96.1),2400 (95.5),0.3518,3660 (95.7),3825.0
5,Stage IV-V,n (%),51 (3.9),114 (4.5),0.3518,165 (4.3),3825.0
6,UPDRS III-ON,median (IQR),20.0 (12.0 - 28.0),21.0 (13.0 - 30.0),0.0038,21.0 (13.0 - 30.0),3166.0
7,UPDRS III-OFF,median (IQR),38.0 (27.0 - 47.0),37.0 (27.0 - 46.0),0.6416,37.0 (27.0 - 46.0),1546.0
8,MOCA Score,mean (SD),24.2 (5.5),23.5 (5.5),p<0.001,23.7 (5.5),5147.0
9,BDI Score,median (IQR),9.0 (6.0 - 14.0),8.0 (5.0 - 13.0),p<0.001,9.0 (5.0 - 13.0),5698.0
