In [1]:
# !pip install openpyxl

In [2]:
import os
import dxpy
import pandas as pd

In [3]:
def map_meds_to_atc(meds, med_to_atc_dict):
    if not pd.isnull(meds):
        meds = meds.split("|")
        meds = [med_to_atc_dict[m].replace(" ", "") for m in meds if m in med_to_atc_dict ]
        meds = "|".join(meds)
    return meds

def process_med_df(med_df, med_to_atc_dict):
    med_df["med_mapped"] = med_df.medications.apply(map_meds_to_atc, args=(med_to_atc_dict,))
    # ATC code for anti psychotics: N05A
    med_df["antipsychotics"] = med_df.med_mapped.str.contains("N05A").fillna(False).astype(int)
    # ATC code for anti depressants: N06A
    med_df["antidepressants"] = med_df.med_mapped.str.contains("N06A").fillna(False).astype(int)
    med_df["meds"] = ((med_df.antipsychotics==1)|(med_df.antidepressants==1)).astype(int)
    med_df = med_df.rename(columns={"eid": "IID"})
    return med_df.loc[:, ["IID", "meds"]]

def upload_file_to_project(filename, proj_dir):
    dxpy.upload_local_file(filename, folder=proj_dir, parents=True)
    print(f"*********{filename} uploaded!!*********")
    return



In [4]:
med_df = pd.read_csv(
    "/mnt/project/notebooks/bmi/data/medications_raw.csv.gz", dtype={"eid": str}
)
med_to_atc_df = pd.read_excel(
    "/mnt/project/notebooks/bmi/data/downstream/medications/medication_maps_wu_natcomm.xlsx",
    comment="Supplementary", usecols=["Category", "Medication ATC code"]
)
med_to_atc_dict = med_to_atc_df.set_index("Category").to_dict()["Medication ATC code"]
med_df = process_med_df(med_df, med_to_atc_dict)

In [5]:
for ancestry in ["afr", "amr", "eas", "eur", "sas", "mid"]:
    anc_file = f"/mnt/project/notebooks/bmi/data/processed/{ancestry}_phenotype.tsv.gz"
    anc_df = pd.read_csv(anc_file, sep="\t", dtype={"FID": str, "IID": str})
    anc_df = anc_df.merge(med_df, on="IID")
    proj_dir = f"/notebooks/bmi/data/downstream/medications/"
    filename = f"{ancestry}_phenotype.tsv.gz"
    anc_df.fillna("NA").to_csv(filename, index=False, sep="\t")
    upload_file_to_project(filename, proj_dir)


*********afr_phenotype.tsv.gz uploaded!!*********
*********amr_phenotype.tsv.gz uploaded!!*********
*********eas_phenotype.tsv.gz uploaded!!*********
*********eur_phenotype.tsv.gz uploaded!!*********
*********sas_phenotype.tsv.gz uploaded!!*********
*********mid_phenotype.tsv.gz uploaded!!*********
