# Comparison between patients of PD

In this notebook we will make a comparison for PD patients stratified by the duration of the disease, taking into account motor and non motor symptoms.

In [1]:
# imports

import os
import sys
sys.path.append('/mnt/0A2AAC152AABFBB7/CGE/luxgiant-clinical')
import luxgiant_clinical.TwoCatAnalysis as two

import pandas as pd

In [2]:
# load data

folder_path = '/mnt/0A2AAC152AABFBB7/CGE/luxgiant-clinical/data'

df = pd.read_csv(
    os.path.join(folder_path, 'cleaned_file.csv'), 
    index_col=0, low_memory=False
)

In [3]:
# filter data to keep only patients with disease duration information

mask_patients= (df['Status']=='Patient')
mask_length  = (~df['pdsl'].isnull()) 

df_cases = df[mask_patients & mask_length].reset_index(drop=True)
del df
df_cases.shape

(7473, 714)

In [4]:
# rename and select first group of variables

variables_dict = {
    "initial_symptom_s___1"         : "Onset with motor symptoms",
    "initial_motor_symptom_s___2"   : "Rest tremor as onset symptom" ,
    "initial_motor_symptom_s___12"  : "Bradykinesia as onset symptom" ,
    "initial_motor_symptom_s___13"  : "Rigidity as onset symptom",
    "initial_symptom_s___2"         : "Non Motor Symp at onset",
    "initial_non_motor_symptom___18": "Constipation at onset",
    "initial_non_motor_symptom___8" : "Insomnia as onset symptom",
    "initial_non_motor_symptom___4" : "RBD at onset",
    "initial_non_motor_symptom___20": "Urinary urgency as onset symptom",
    "initial_non_motor_symptom___10": "Anosmia/hyposmia as onset symptom",
    "initial_non_motor_symptom___13": "Hyperhidrosis as onset symptom",
    "initial_non_motor_symptom___2" : "Cognitive symptoms at onset",
    "current_motor_symptom_s___2"   : "Current rest tremor",
    "current_motor_symptom_s___3"   : "Current postural/action tremor",
    "current_motor_symptom_s___14"  : "Current bradykinesia",
    "current_motor_symptom_s___4"   : "Current rigidity",
    "current_non_motor_symptom___3" : "Current NMS Psychosis",
    "current_non_motor_symptom___5" : "Current depression",
    "current_non_motor_symptom___6" : "Current RBD",
    "current_non_motor_symptom___7" : "Current RLS",
    "current_non_motor_symptom___9" : "Current anosmia/hyposmia",
    "current_non_motor_symptom___2" : "Current cognitive impairment",
    "current_non_motor_symptom___10": "Current EDSS",
    "current_non_motor_symptom___11": "Current insomnia",
    "current_non_motor_symptom___14": "Current orthostasis",
    "current_non_motor_symptom___16": "Current hyperhidrosis",
    "current_non_motor_symptom___17": "Current seborrhea",
    "current_non_motor_symptom___20": "Current constipation",
    "current_non_motor_symptom___22": "Current urinary incontinence"
}
variables = ['participant_id', 'pdsl'] + list(variables_dict.keys())

# statistical measures
stats_meas = {
    'n'     : ["Onset with motor symptoms", "Rest tremor as onset symptom" , "Bradykinesia as onset symptom" ,
               "Rigidity as onset symptom", "Non Motor Symp at onset", "Constipation at onset", "Insomnia as onset symptom",
               "RBD at onset", "Urinary urgency as onset symptom", "Hyperhidrosis as onset symptom", 
               "Anosmia/hyposmia as onset symptom", "Cognitive symptoms at onset", "Current rest tremor", 
               "Current postural/action tremor", "Current bradykinesia", "Current rigidity",
               "Current NMS Psychosis", "Current depression", "Current RBD", "Current RLS", "Current anosmia/hyposmia",
               "Current cognitive impairment", "Current EDSS", "Current insomnia", "Current orthostasis",
               "Current hyperhidrosis", "Current seborrhea", "Current constipation", "Current urinary incontinence"],
    }

In [5]:
# keep only variables for analysis

df_cases_1 = df_cases[variables].copy()
df_cases_1 = df_cases_1.rename(columns=variables_dict)

del df_cases

In [6]:
# recode variable values

df_cases_1["pdsl"] = df_cases_1["pdsl"].map({"<=5":'PD duration <=5 yrs', ">5":'PD duration >5 yrs'})

df_cases_1["Onset with motor symptoms"]        = df_cases_1["Onset with motor symptoms"].map({"Checked":1, "Unchecked":0})
df_cases_1["Rest tremor as onset symptom"]     = df_cases_1["Rest tremor as onset symptom"].map({"Checked":1, "Unchecked":0})
df_cases_1["Bradykinesia as onset symptom"]    = df_cases_1["Bradykinesia as onset symptom"].map({"Checked":1, "Unchecked":0})
df_cases_1["Rigidity as onset symptom"]        = df_cases_1["Rigidity as onset symptom"].map({"Checked":1, "Unchecked":0})
df_cases_1["Non Motor Symp at onset"]          = df_cases_1["Non Motor Symp at onset"].map({"Checked":1, "Unchecked":0})
df_cases_1["Constipation at onset"]            = df_cases_1["Constipation at onset"].map({"Checked":1, "Unchecked":0})
df_cases_1["RBD at onset"]                     = df_cases_1["RBD at onset"].map({"Checked":1, "Unchecked":0})
df_cases_1["Hyperhidrosis as onset symptom"]   = df_cases_1["Hyperhidrosis as onset symptom"].map({"Checked":1, "Unchecked":0})
df_cases_1["Insomnia as onset symptom"]        = df_cases_1["Insomnia as onset symptom"].map({"Checked":1, "Unchecked":0})
df_cases_1["Urinary urgency as onset symptom"] = df_cases_1["Urinary urgency as onset symptom"].map({"Checked":1, "Unchecked":0})
df_cases_1["Anosmia/hyposmia as onset symptom"]= df_cases_1["Anosmia/hyposmia as onset symptom"].map({"Checked":1, "Unchecked":0})
df_cases_1["Cognitive symptoms at onset"]      = df_cases_1["Cognitive symptoms at onset"].map({"Checked":1, "Unchecked":0})
df_cases_1["Current rest tremor"]              = df_cases_1["Current rest tremor"].map({"Checked":1, "Unchecked":0})
df_cases_1["Current postural/action tremor"]   = df_cases_1["Current postural/action tremor"].map({"Checked":1, "Unchecked":0})
df_cases_1["Current bradykinesia"]             = df_cases_1["Current bradykinesia"].map({"Checked":1, "Unchecked":0})
df_cases_1["Current rigidity"]                 = df_cases_1["Current rigidity"].map({"Checked":1, "Unchecked":0})
df_cases_1["Current NMS Psychosis"]            = df_cases_1["Current NMS Psychosis"].map({"Checked":1, "Unchecked":0})
df_cases_1["Current RBD"]                      = df_cases_1["Current RBD"].map({"Checked":1, "Unchecked":0})
df_cases_1["Current RLS"]                      = df_cases_1["Current RLS"].map({"Checked":1, "Unchecked":0})
df_cases_1["Current anosmia/hyposmia"]         = df_cases_1["Current anosmia/hyposmia"].map({"Checked":1, "Unchecked":0})
df_cases_1["Current cognitive impairment"]     = df_cases_1["Current cognitive impairment"].map({"Checked":1, "Unchecked":0})
df_cases_1["Current EDSS"]                     = df_cases_1["Current EDSS"].map({"Checked":1, "Unchecked":0})
df_cases_1["Current insomnia"]                 = df_cases_1["Current insomnia"].map({"Checked":1, "Unchecked":0})
df_cases_1["Current depression"]               = df_cases_1["Current depression"].map({"Checked":1, "Unchecked":0})
df_cases_1["Current orthostasis"]              = df_cases_1["Current orthostasis"].map({"Checked":1, "Unchecked":0})
df_cases_1["Current hyperhidrosis"]            = df_cases_1["Current hyperhidrosis"].map({"Checked":1, "Unchecked":0})
df_cases_1["Current seborrhea"]                = df_cases_1["Current seborrhea"].map({"Checked":1, "Unchecked":0})
df_cases_1["Current constipation"]             = df_cases_1["Current constipation"].map({"Checked":1, "Unchecked":0})
df_cases_1["Current urinary incontinence"]     = df_cases_1["Current urinary incontinence"].map({"Checked":1, "Unchecked":0})

In [7]:
# create empty dataframe for summary statistics

summary_cols = ['Variable', 'Statistical Measure', 'PD duration <=5 yrs', 'PD duration >5 yrs', 'Available Sample for Analysis']
ordered_cols = ['Variable', 'Statistical Measure', 'PD duration <=5 yrs', 'PD duration >5 yrs', 'p-value', 'Total', 'Available Sample for Analysis']
df_summary= pd.DataFrame(columns=summary_cols)

In [8]:
# count and proportion of categorical variables

res = two.count_percent(df_cases_1, stats_meas['n'], 'pdsl')

df_summary = two.summaryze_count_percent(df_summary, res, stats_meas['n'], 'PD duration <=5 yrs', 'PD duration >5 yrs')
df_summary = df_summary\
    .merge(
        two.count_simple(df_cases_1, features=stats_meas['n']), on='Variable'
    )\
    .merge(
        two.chi_squared_tests(df_cases_1, stats_meas['n'], group_var='pdsl'), on='Variable'
    )
df_10 = df_summary[ordered_cols].copy()

In [9]:
# save result in csv file

df_10.to_csv(os.path.join(folder_path, 'Table_10.csv'), index=False)
df_10

Unnamed: 0,Variable,Statistical Measure,PD duration <=5 yrs,PD duration >5 yrs,p-value,Total,Available Sample for Analysis
0,Onset with motor symptoms,n (%),3199 (92.6),3763 (93.7),0.0768,6962 (93.2),7473
1,Rest tremor as onset symptom,n (%),2249 (65.1),2404 (59.8),p<0.001,4653 (62.3),7473
2,Bradykinesia as onset symptom,n (%),1429 (41.4),1565 (38.9),0.036,2994 (40.1),7473
3,Rigidity as onset symptom,n (%),1043 (30.2),1297 (32.3),0.055,2340 (31.3),7473
4,Non Motor Symp at onset,n (%),222 (6.4),283 (7.0),0.3103,505 (6.8),7473
5,Constipation at onset,n (%),585 (16.9),729 (18.1),0.1799,1314 (17.6),7473
6,Insomnia as onset symptom,n (%),284 (8.2),299 (7.4),0.2271,583 (7.8),7473
7,RBD at onset,n (%),329 (9.5),494 (12.3),p<0.001,823 (11.0),7473
8,Urinary urgency as onset symptom,n (%),194 (5.6),251 (6.2),0.2706,445 (6.0),7473
9,Hyperhidrosis as onset symptom,n (%),33 (1.0),48 (1.2),0.3763,81 (1.1),7473


In [10]:
to_excel = pd.ExcelWriter('/mnt/0A2AAC152AABFBB7/CGE/luxgiant-clinical/data/Table_10.xlsx')
df_10.to_excel(to_excel, index=False)
to_excel.close()