In [None]:
def get_library_path()->str:

    cwd = os.getcwd()
    lst = cwd.split(os.sep)[:-1]

    path_lib = f'{os.sep}'

    for elem in lst:
        path_lib = os.path.join(path_lib, elem)

    return path_lib

# imports

import os
import sys

path_to_lib = get_library_path()

sys.path.append(path_to_lib)
import luxgiant_clinical.OddRatios as odd

import pandas as pd
import numpy as np

In [None]:
# load data

folder_path = os.path.join(path_to_lib, 'data/source')

df = pd.read_csv(
    os.path.join(folder_path, 'cleaned_file.csv'), 
    low_memory=False
)
matched_ids = pd.read_csv(
    os.path.join(folder_path, 'matched_ids.csv'), 
    low_memory=False
)
matched = pd.read_csv(
    os.path.join(folder_path, 'matching_ids.csv'), 
    low_memory=False
)
df = df.merge(matched_ids, on='participant_id')

In [None]:
# rename and select first group of variables

variables_dict_1 = {
    "sex"                           : "Sex",
    "age_at_onset"                  : "Age at Onset",
    "nature_of_work___1"            : "Agricultural job",
    "over_your_lifetime_have_yo"    : "Life time direct exposure to pesticide/insecticide/fungicide",
    "in_your_lifetime_have_you"     : "Smoked 100 or more cigarettes",
    "in_your_lifetime_have"         : "Regular consumption of caffeinated drinks for >6months",
    "have_you_ever_had_a_head_i"    : "Head injury/concussion",
    "medical_history_metabolic___1" : "Diabetes",
    "medical_history_cardiovasc___2": "Hypertension",
    "medical_history_cardiovasc___3": "History of CAD",
}
variables_1 = ['participant_id', 'Status'] + list(variables_dict_1.keys())

cofounders = [
    "Agricultural job", "Life time direct exposure to pesticide/insecticide/fungicide",
    "Smoked 100 or more cigarettes", "Regular consumption of caffeinated drinks for >6months",
    "Head injury/concussion", "Diabetes", "Hypertension", "History of CAD"
]

In [None]:
# keep only variables for analysis

df_1 = df[variables_1].copy()
df_1 = df_1.rename(columns=variables_dict_1)

In [None]:
# recode variable values

df_1["Agricultural job"]= df_1["Agricultural job"].map({'Checked':1, 'Unchecked':0})
df_1["Diabetes"]        = df_1["Diabetes"].map({"Checked":1, "Unchecked":0})
df_1["Hypertension"]    = df_1["Hypertension"].map({"Checked":1, "Unchecked":0})
df_1["History of CAD"]  = df_1["History of CAD"].map({"Checked":1, "Unchecked":0})

df_1["Life time direct exposure to pesticide/insecticide/fungicide"] = \
    df_1["Life time direct exposure to pesticide/insecticide/fungicide"]\
        .map({'Yes':1, 'No':0, 'Dont Know':np.nan, 'Refused':np.nan})
df_1["Smoked 100 or more cigarettes"] = \
    df_1["Smoked 100 or more cigarettes"]\
        .map({'Yes':1, 'No':0, 'Dont Know':np.nan, 'Refused':np.nan})
df_1["Regular consumption of caffeinated drinks for >6months"] = \
    df_1["Regular consumption of caffeinated drinks for >6months"]\
        .map({'Yes':1, 'No':0, 'Dont Know':np.nan, 'Refused':np.nan})
df_1["Head injury/concussion"] = \
    df_1["Head injury/concussion"].map({'Yes':1, 'No':0, 'Dont Know':np.nan, 'Refused':np.nan, 'Possibly':np.nan})

In [None]:
unadjusted_OR = odd.report_mcnemar(df_1, df_matched=matched, variables=cofounders, id_col="participant_id")

In [None]:
adjusted_OR = odd.adjusted_odds_ratios(
    data=df_1, 
    target='Status', 
    target_code={'Patient':1, 'Control':0},
    variables=cofounders, 
    match_1='Sex',
    match_2='Age at Onset'
)

In [None]:
table_4 = pd.merge(unadjusted_OR, adjusted_OR, on='Variables')
table_4.to_csv(os.path.join(path_to_lib, 'data/final/table_4.csv'))
table_4