In [22]:
def get_library_path()->str:

    cwd = os.getcwd()
    lst = cwd.split(os.sep)[:-1]

    path_lib = f'{os.sep}'

    for elem in lst:
        path_lib = os.path.join(path_lib, elem)

    return path_lib

# imports

import os
import sys

path_to_lib = get_library_path()

sys.path.append(path_to_lib)
import luxgiant_clinical.ThreeCatAnalysis as thr

import pandas as pd
import numpy as np

In [23]:
# load data
folder_path = os.path.join(path_to_lib, 'data/source')

df = pd.read_csv(
    os.path.join(folder_path, 'cleaned_file.csv'), 
    low_memory=False
)

In [24]:
# filter data to keep only patients with PD subtype information

mask_patients= (df['Status']=='Patient')
mask_subtype = ~df['subtype'].isnull()
mask_off     = (df['on_off']=='OFF: Off is the typical functional state when patien ts have a poor response in spite of taking medications.')

df_cases = df[mask_patients & mask_off & mask_subtype].reset_index(drop=True)
del df

df_cases.shape

(1815, 716)

In [25]:
df_on = df_cases[df_cases['hyonoff']=='On'].reset_index(drop=True)

df_on['Stage I-III']= df_on['hystage'].map({'Not severe':1, 'Severe':0})
df_on['Stage IV-V'] = df_on['hystage'].map({'Not severe':0, 'Severe':1})
df_on['UPDRS III-ON']= df_on['updrs_part_iii_total_score'].copy()

variables = ['participant_id', 'subtype', 'Stage I-III', 'Stage IV-V', 'UPDRS III-ON']

stats_meas1 = {
    'n': ['Stage I-III', 'Stage IV-V'],
    'median' : ['UPDRS III-ON']
}
groups = ['Tremor Dominant', 'Indeterminate', 'Postural instability and gait difficulty']

# keep only variables for analysis

df_on = df_on[variables].copy()

In [26]:
df_off = df_cases[df_cases['hyonoff']=='Off'].reset_index(drop=True)

df_off['Stage I-III']= df_off['hystage'].map({'Not severe':1, 'Severe':0})
df_off['Stage IV-V'] = df_off['hystage'].map({'Not severe':0, 'Severe':1})
df_off['UPDRS III-OFF']= df_off['updrs_part_iii_total_score'].copy()

variables = ['participant_id', 'subtype', 'Stage I-III', 'Stage IV-V', 'UPDRS III-OFF']

stats_meas2 = {
    'n': ['Stage I-III', 'Stage IV-V'],
    'median' : ['UPDRS III-OFF']
}

# keep only variables for analysis

df_off = df_off[variables].copy()

In [27]:
dummy = {
    'Variable'            : ['H & Y ON', 'Stage I-III', 'Stage VI-V'],
    'Statistical Measure' : ['']*3,
    'Tremor Dominant'     : ['']*3, 
    'Indeterminate'       : ['']*3, 
    'Postural instability and gait difficulty': ['']*3,
    'Total'               : ['']*3,
    'p-value'             : [np.nan]*3,
    'Available Samples for Analysis'           : ['']*3
}
df_dummy = pd.DataFrame(dummy)

In [28]:
summary1 = [
    thr.report_proportion(
        data_df    =df_off,
        variables  =stats_meas2['n'],
        groups     =groups,
        grouping_by='subtype',
        subheader  ='H & Y OFF'
        ),
    thr.report_median_iqr(
        data_df    =df_on,
        variables  =stats_meas1['median'],
        groups     =groups,
        grouping_by='subtype'
    ),
    thr.report_median_iqr(
        data_df    =df_off,
        variables  =stats_meas2['median'],
        groups     =groups,
        grouping_by='subtype'
    )
]

In [29]:
variables_dict = {
    "total_score_for_moca": "MOCA Score",
    "total_score_for_bdi" : "BDI Score"
}
variables_1 = ['participant_id', 'subtype'] + list(variables_dict.keys())

# statistical measures
stats_meas3 = {
    'mean'  : ["MOCA Score"],
    'median': ["BDI Score"]
}

groups = ['Tremor Dominant', 'Indeterminate', 'Postural instability and gait difficulty']

# keep only variables for analysis

df_cases_1 = df_cases[variables_1].copy()
df_cases_1 = df_cases_1.rename(columns=variables_dict)

In [30]:
summary2 = [ 
    thr.report_mean_std(
        data_df    =df_cases_1,
        variables  =stats_meas3['mean'],
        groups     =groups,
        grouping_by='subtype'
    ),
    thr.report_median_iqr(
        data_df    =df_cases_1,
        variables  =stats_meas3['median'],
        groups     =groups,
        grouping_by='subtype'
    )
]

In [31]:
df_71 = pd.concat(
    summary1 + summary2, axis=0, ignore_index=True
)
df_71

Unnamed: 0,Variable,Statistical Measure,Tremor Dominant,Indeterminate,Postural instability and gait difficulty,Total,p-value,Available Samples for Analysis
0,H & Y OFF,,,,,,,
1,Stage I-III,n (%),588 (98.3),338 (96.3),769 (90.7),1695 (94.3),1.002783e-09,1797.0
2,Stage IV-V,n (%),10 (1.7),13 (3.7),79 (9.3),102 (5.7),1.002783e-09,1797.0
3,UPDRS III-ON,median (IQR),2.0 (2.0 - 2.0),9.0 (9.0 - 9.0),46.0 (46.0 - 46.0),9.0 (5.5 - 27.5),0.3679,3.0
4,UPDRS III-OFF,median (IQR),35.0 (25.0 - 46.0),39.0 (31.0 - 46.0),39.0 (28.0 - 48.0),38.0 (27.0 - 47.0),0.0013,1473.0
5,MOCA Score,mean (SD),25.3 (6.1),24.8 (6.0),24.2 (5.8),24.7 (6.0),0.0054,1533.0
6,BDI Score,median (IQR),8.0 (4.0 - 12.0),9.0 (5.0 - 13.0),9.0 (6.0 - 14.0),9.0 (5.0 - 13.0),0.0,1646.0


In [32]:
df_on_1 = df_on[~(df_on['subtype']=='Indeterminate')].reset_index(drop=True)
df_off_1= df_off[~(df_off['subtype']=='Indeterminate')].reset_index(drop=True)

In [33]:
summary3 = [
    thr.bonferroni_proportions(
        data_df      =df_off_1, 
        variables    =stats_meas2['n'], 
        groups       =['Tremor Dominant',  'Postural instability and gait difficulty'],
        grouping_by  ='subtype',
        subheader  ='H & Y OFF',
        correc_factor=3
    ),
    thr.bonferroni_median_iqr(
        data_df      =df_off_1, 
        variables    =stats_meas2['median'], 
        groups       =['Tremor Dominant',  'Postural instability and gait difficulty'],
        grouping_by  ='subtype',
        correc_factor=3
    ),
    thr.bonferroni_median_iqr(
        data_df      =df_on_1,
        variables    =stats_meas1['median'], 
        groups       =['Tremor Dominant',  'Postural instability and gait difficulty'],
        grouping_by  ='subtype',
        correc_factor=3
    ),
]

In [34]:
variables_dict = {
    "total_score_for_moca": "MOCA Score",
    "total_score_for_bdi" : "BDI Score"
}
variables_1 = ['participant_id', 'subtype'] + list(variables_dict.keys())

# statistical measures
stats_meas3 = {
    'mean'  : ["MOCA Score"],
    'median': ["BDI Score"]
}

groups = ['Tremor Dominant', 'Indeterminate', 'Postural instability and gait difficulty']

# keep only variables for analysis

df_cases_1 = df_cases[variables_1].copy()
df_cases_1 = df_cases_1.rename(columns=variables_dict)

In [35]:
df_cases_11 = df_cases_1[~(df_cases_1['subtype']=='Indeterminate')].reset_index(drop=True)

In [36]:
summary4 =  [
    thr.bonferroni_mean_std(
        data_df      =df_cases_11,
        variables    =stats_meas3['mean'], 
        groups       =['Tremor Dominant',  'Postural instability and gait difficulty'],
        grouping_by  ='subtype',
        correc_factor=3
    ),
    thr.bonferroni_median_iqr(
        data_df      =df_cases_11,
        variables    =stats_meas3['median'], 
        groups       =['Tremor Dominant',  'Postural instability and gait difficulty'],
        grouping_by  ='subtype',
        correc_factor=3
    ),
]

In [37]:
to_concat1 = [summary1[0], df_dummy] + summary1[1:] + summary2
df_711 = pd.concat(to_concat1, axis=0, ignore_index=True)
df_711

Unnamed: 0,Variable,Statistical Measure,Tremor Dominant,Indeterminate,Postural instability and gait difficulty,Total,p-value,Available Samples for Analysis
0,H & Y OFF,,,,,,,
1,Stage I-III,n (%),588 (98.3),338 (96.3),769 (90.7),1695 (94.3),1.002783e-09,1797.0
2,Stage IV-V,n (%),10 (1.7),13 (3.7),79 (9.3),102 (5.7),1.002783e-09,1797.0
3,H & Y ON,,,,,,,
4,Stage I-III,,,,,,,
5,Stage VI-V,,,,,,,
6,UPDRS III-ON,median (IQR),2.0 (2.0 - 2.0),9.0 (9.0 - 9.0),46.0 (46.0 - 46.0),9.0 (5.5 - 27.5),0.3679,3.0
7,UPDRS III-OFF,median (IQR),35.0 (25.0 - 46.0),39.0 (31.0 - 46.0),39.0 (28.0 - 48.0),38.0 (27.0 - 47.0),0.0013,1473.0
8,MOCA Score,mean (SD),25.3 (6.1),24.8 (6.0),24.2 (5.8),24.7 (6.0),0.0054,1533.0
9,BDI Score,median (IQR),8.0 (4.0 - 12.0),9.0 (5.0 - 13.0),9.0 (6.0 - 14.0),9.0 (5.0 - 13.0),0.0,1646.0


In [38]:
df_dummy1 = pd.DataFrame({
    'Variable'            : ['H & Y ON', 'Stage I-III', 'Stage VI-V'],
    'Adjusted p-value'    : [np.nan]*3,
})

to_concat2 = [summary3[0], df_dummy1] + summary3[1:] + summary4
df_712 = pd.concat(to_concat2, axis=0, ignore_index=True)
df_712

Unnamed: 0,Variable,Adjusted p-value
0,H & Y OFF,
1,Stage I-III,7.753166e-09
2,Stage IV-V,7.753166e-09
3,H & Y ON,
4,Stage I-III,
5,Stage VI-V,
6,UPDRS III-OFF,0.008513943
7,UPDRS III-ON,3.0
8,MOCA Score,0.003924713
9,BDI Score,1.088444e-08


In [43]:
df_71 = pd.concat([df_711, df_712[['Adjusted p-value']]], axis=1)

In [44]:
for col in df_71.columns:
    if 'p-value' in col:
        df_71[col] = df_71[col].apply(lambda x: thr.pvalue_formatter(x))

df_71 = df_71.fillna('')
ordered_cols = ['Variable', 'Statistical Measure', 'Tremor Dominant', 'Indeterminate', 'Postural instability and gait difficulty', 'Total', 'p-value', 'Adjusted p-value', 'Available Samples for Analysis']
df_71 = df_71[ordered_cols].copy()
df_71

Unnamed: 0,Variable,Statistical Measure,Tremor Dominant,Indeterminate,Postural instability and gait difficulty,Total,p-value,Adjusted p-value,Available Samples for Analysis
0,H & Y OFF,,,,,,,,
1,Stage I-III,n (%),588 (98.3),338 (96.3),769 (90.7),1695 (94.3),p<0.001,p<0.001,1797.0
2,Stage IV-V,n (%),10 (1.7),13 (3.7),79 (9.3),102 (5.7),p<0.001,p<0.001,1797.0
3,H & Y ON,,,,,,,,
4,Stage I-III,,,,,,,,
5,Stage VI-V,,,,,,,,
6,UPDRS III-ON,median (IQR),2.0 (2.0 - 2.0),9.0 (9.0 - 9.0),46.0 (46.0 - 46.0),9.0 (5.5 - 27.5),0.3679,0.0085,3.0
7,UPDRS III-OFF,median (IQR),35.0 (25.0 - 46.0),39.0 (31.0 - 46.0),39.0 (28.0 - 48.0),38.0 (27.0 - 47.0),0.0013,0.9999,1473.0
8,MOCA Score,mean (SD),25.3 (6.1),24.8 (6.0),24.2 (5.8),24.7 (6.0),0.0054,0.0039,1533.0
9,BDI Score,median (IQR),8.0 (4.0 - 12.0),9.0 (5.0 - 13.0),9.0 (6.0 - 14.0),9.0 (5.0 - 13.0),p<0.001,p<0.001,1646.0


In [45]:
df_71.to_csv(os.path.join(path_to_lib, 'data/auxiliar/aux_table_7_1.csv'), index=False)