In [1]:
def get_library_path()->str:

    cwd = os.getcwd()
    lst = cwd.split(os.sep)[:-1]

    path_lib = f'{os.sep}'

    for elem in lst:
        path_lib = os.path.join(path_lib, elem)

    return path_lib

# imports

import os
import sys

path_to_lib = get_library_path()

sys.path.append(path_to_lib)
import luxgiant_clinical.ThreeCatAnalysis as thr

import pandas as pd

In [2]:
# load data

folder_path = os.path.join(path_to_lib, 'data')

df = pd.read_csv(
    os.path.join(folder_path, 'cleaned_file.csv'), 
    low_memory=False
)

In [3]:
# filter data to keep only patients with PD subtype information

mask_patients= (df['Status']=='Patient')
mask_agecat = ~df['age_category'].isnull()

df_cases = df[mask_patients & mask_agecat].reset_index(drop=True)

del df

df_cases = df_cases[~(df_cases['age_category']=='Onset <21 years')].reset_index(drop=True)
df_cases.shape

(7439, 716)

In [4]:
df_cases['age_category'].value_counts().reset_index()

Unnamed: 0,age_category,count
0,Onset 50-60 years,2688
1,Onset 21-49 years,2421
2,Onset >60 years,2330


In [5]:
# rename and select first group of variables

variables_dict = {
    "initial_motor_symptom_s___1"   : "Onset with motor symptoms",
    "initial_motor_symptom_s___2"   : "Rest tremor as onset symptom" ,
    "initial_motor_symptom_s___12"  : "Bradykinesia as onset symptom" ,
    "initial_motor_symptom_s___13"  : "Rigidity as onset symptom",
    "initial_non_motor_symptom___1" : "Non Motor Symp at onset",
    "initial_non_motor_symptom___18": "Constipation at onset",
    "initial_non_motor_symptom___8" : "Insomnia as onset symptom",
    "initial_non_motor_symptom___4" : "RBD at onset",
    "initial_non_motor_symptom___20": "Urinary urgency as onset symptom",
    "initial_non_motor_symptom___10": "Anosmia/hyposmia as onset symptom",
    "initial_non_motor_symptom___13": "Hyperhidrosis as onset symptom",
    "initial_non_motor_symptom___2" : "Cognitive symptoms at onset",
    "current_motor_symptom_s___2"   : "Current rest tremor",
    "current_motor_symptom_s___3"   : "Current postural/action tremor",
    "current_motor_symptom_s___14"  : "Current bradykinesia",
    "current_motor_symptom_s___4"   : "Current rigidity",
    "current_non_motor_symptom___3" : "Current NMS Psychosis",
    "current_non_motor_symptom___5" : "Current depression",
    "current_non_motor_symptom___6" : "Current RBD",
    "current_non_motor_symptom___7" : "Current RLS",
    "current_non_motor_symptom___9" : "Current anosmia/hyposmia",
    "current_non_motor_symptom___2" : "Current cognitive impairment",
    "current_non_motor_symptom___10": "Current EDSS",
    "current_non_motor_symptom___11": "Current insomnia",
    "current_non_motor_symptom___14": "Current orthostasis",
    "current_non_motor_symptom___16": "Current hyperhidrosis",
    "current_non_motor_symptom___17": "Current seborrhea",
    "current_non_motor_symptom___20": "Current constipation",
    "current_non_motor_symptom___22": "Current urinary incontinence"
}
variables = ['participant_id', 'age_category'] + list(variables_dict.keys())

# statistical measures
stats_meas = {
    'n'     : ["Onset with motor symptoms", "Rest tremor as onset symptom" , "Bradykinesia as onset symptom" ,
               "Rigidity as onset symptom", "Non Motor Symp at onset", "Constipation at onset", "Insomnia as onset symptom",
               "RBD at onset", "Urinary urgency as onset symptom", "Hyperhidrosis as onset symptom", 
               "Anosmia/hyposmia as onset symptom", "Cognitive symptoms at onset", "Current rest tremor", 
               "Current postural/action tremor", "Current bradykinesia", "Current rigidity",
               "Current NMS Psychosis", "Current depression", "Current RBD", "Current RLS", "Current anosmia/hyposmia",
               "Current cognitive impairment", "Current EDSS", "Current insomnia", "Current orthostasis",
               "Current hyperhidrosis", "Current seborrhea", "Current constipation", "Current urinary incontinence"],
    }

groups = ['Onset 21-49 years', 'Onset 50-60 years', 'Onset >60 years']

In [6]:
# keep only variables for analysis

df_cases_1 = df_cases[variables].copy()
df_cases_1 = df_cases_1.rename(columns=variables_dict)

del df_cases

In [7]:
# recode variable values

df_cases_1["Onset with motor symptoms"]        = df_cases_1["Onset with motor symptoms"].map({"Checked":0, "Unchecked":1})
df_cases_1["Rest tremor as onset symptom"]     = df_cases_1["Rest tremor as onset symptom"].map({"Checked":1, "Unchecked":0})
df_cases_1["Bradykinesia as onset symptom"]    = df_cases_1["Bradykinesia as onset symptom"].map({"Checked":1, "Unchecked":0})
df_cases_1["Rigidity as onset symptom"]        = df_cases_1["Rigidity as onset symptom"].map({"Checked":1, "Unchecked":0})
df_cases_1["Non Motor Symp at onset"]          = df_cases_1["Non Motor Symp at onset"].map({"Checked":0, "Unchecked":1})
df_cases_1["Constipation at onset"]            = df_cases_1["Constipation at onset"].map({"Checked":1, "Unchecked":0})
df_cases_1["RBD at onset"]                     = df_cases_1["RBD at onset"].map({"Checked":1, "Unchecked":0})
df_cases_1["Hyperhidrosis as onset symptom"]   = df_cases_1["Hyperhidrosis as onset symptom"].map({"Checked":1, "Unchecked":0})
df_cases_1["Insomnia as onset symptom"]        = df_cases_1["Insomnia as onset symptom"].map({"Checked":1, "Unchecked":0})
df_cases_1["Urinary urgency as onset symptom"] = df_cases_1["Urinary urgency as onset symptom"].map({"Checked":1, "Unchecked":0})
df_cases_1["Anosmia/hyposmia as onset symptom"]= df_cases_1["Anosmia/hyposmia as onset symptom"].map({"Checked":1, "Unchecked":0})
df_cases_1["Cognitive symptoms at onset"]      = df_cases_1["Cognitive symptoms at onset"].map({"Checked":1, "Unchecked":0})
df_cases_1["Current rest tremor"]              = df_cases_1["Current rest tremor"].map({"Checked":1, "Unchecked":0})
df_cases_1["Current postural/action tremor"]   = df_cases_1["Current postural/action tremor"].map({"Checked":1, "Unchecked":0})
df_cases_1["Current bradykinesia"]             = df_cases_1["Current bradykinesia"].map({"Checked":1, "Unchecked":0})
df_cases_1["Current rigidity"]                 = df_cases_1["Current rigidity"].map({"Checked":1, "Unchecked":0})
df_cases_1["Current NMS Psychosis"]            = df_cases_1["Current NMS Psychosis"].map({"Checked":1, "Unchecked":0})
df_cases_1["Current RBD"]                      = df_cases_1["Current RBD"].map({"Checked":1, "Unchecked":0})
df_cases_1["Current RLS"]                      = df_cases_1["Current RLS"].map({"Checked":1, "Unchecked":0})
df_cases_1["Current anosmia/hyposmia"]         = df_cases_1["Current anosmia/hyposmia"].map({"Checked":1, "Unchecked":0})
df_cases_1["Current cognitive impairment"]     = df_cases_1["Current cognitive impairment"].map({"Checked":1, "Unchecked":0})
df_cases_1["Current EDSS"]                     = df_cases_1["Current EDSS"].map({"Checked":1, "Unchecked":0})
df_cases_1["Current insomnia"]                 = df_cases_1["Current insomnia"].map({"Checked":1, "Unchecked":0})
df_cases_1["Current depression"]               = df_cases_1["Current depression"].map({"Checked":1, "Unchecked":0})
df_cases_1["Current orthostasis"]              = df_cases_1["Current orthostasis"].map({"Checked":1, "Unchecked":0})
df_cases_1["Current hyperhidrosis"]            = df_cases_1["Current hyperhidrosis"].map({"Checked":1, "Unchecked":0})
df_cases_1["Current seborrhea"]                = df_cases_1["Current seborrhea"].map({"Checked":1, "Unchecked":0})
df_cases_1["Current constipation"]             = df_cases_1["Current constipation"].map({"Checked":1, "Unchecked":0})
df_cases_1["Current urinary incontinence"]     = df_cases_1["Current urinary incontinence"].map({"Checked":1, "Unchecked":0})

In [8]:
df_12 = thr.report_proportion(
        data_df    =df_cases_1,
        variables  =stats_meas['n'],
        groups     =groups,
        grouping_by='age_category'
    )

In [9]:
df_cases_2 = df_cases_1[~(df_cases_1['age_category']=='Onset 50-60 years')].reset_index(drop=True)
df_121 = thr.bonferroni_proportions(
        data_df      =df_cases_2, 
        variables    =stats_meas['n'], 
        groups       =['Onset 21-49 years', 'Onset >60 years'],
        grouping_by  ='age_category',
        correc_factor=3
    )
df_121.columns = ['Variable', 'Adjusted p-value (Early vs Late)']

In [10]:
df_cases_3 = df_cases_1[~(df_cases_1['age_category']=='Onset >60 years')].reset_index(drop=True)
df_122 = thr.bonferroni_proportions(
              data_df      =df_cases_3, 
              variables    =stats_meas['n'], 
              groups       =['Onset 21-49 years', 'Onset 50-60 years'],
              grouping_by  ='age_category',
              correc_factor=3
          )
df_122.columns = ['Variable', 'Adjusted p-value (Early vs Medium)']

In [11]:
df_cases_4 = df_cases_1[~(df_cases_1['age_category']=='Onset 21-49 years')].reset_index(drop=True)
df_123 = thr.bonferroni_proportions(
    data_df      =df_cases_4, 
    variables    =stats_meas['n'], 
    groups       =['Onset 50-60 years', 'Onset >60 years'],
    grouping_by  ='age_category',
    correc_factor=3
)
df_123.columns = ['Variable', 'Adjusted p-value (Medium vs Late)']

In [12]:
df_12 = thr.final_formatter(
    overall_df=df_12,
    adjusted_df=[df_121, df_122, df_123],
    groups=groups
)

In [13]:
# save result in csv file

df_12.to_csv(os.path.join(folder_path, 'Table_12.csv'), index=False)
df_12

Unnamed: 0,Variable,Statistical Measure,Onset 21-49 years,Onset 50-60 years,Onset >60 years,Total,p-value,Adjusted p-value (Early vs Late),Adjusted p-value (Early vs Medium),Adjusted p-value (Medium vs Late),Available Samples for Analysis
0,Onset with motor symptoms,n (%),2414 (99.7),2681 (99.7),2315 (99.4),7410 (99.6),0.059,0.2156,0.9999,0.1211,7439
1,Rest tremor as onset symptom,n (%),1435 (59.3),1642 (61.1),1554 (66.7),4631 (62.3),p<0.001,p<0.001,0.5582,p<0.001,7439
2,Bradykinesia as onset symptom,n (%),994 (41.1),1092 (40.6),894 (38.4),2980 (40.1),0.1265,0.1751,0.9999,0.3095,7439
3,Rigidity as onset symptom,n (%),759 (31.4),870 (32.4),703 (30.2),2332 (31.3),0.2475,0.9999,0.9999,0.2841,7439
4,Non Motor Symp at onset,n (%),1026 (42.4),1442 (53.6),1435 (61.6),3903 (52.5),p<0.001,p<0.001,p<0.001,p<0.001,7439
5,Constipation at onset,n (%),280 (11.6),484 (18.0),547 (23.5),1311 (17.6),p<0.001,p<0.001,p<0.001,p<0.001,7439
6,Insomnia as onset symptom,n (%),187 (7.7),227 (8.4),165 (7.1),579 (7.8),0.1969,0.9999,0.9999,0.218,7439
7,RBD at onset,n (%),202 (8.3),304 (11.3),316 (13.6),822 (11.0),p<0.001,p<0.001,0.0012,0.0468,7439
8,Urinary urgency as onset symptom,n (%),108 (4.5),147 (5.5),188 (8.1),443 (6.0),p<0.001,p<0.001,0.2958,p<0.001,7439
9,Hyperhidrosis as onset symptom,n (%),43 (1.8),26 (1.0),12 (0.5),81 (1.1),p<0.001,p<0.001,0.0372,0.196,7439
