In [124]:
import pandas as pd
import numpy as np
from tqdm import tqdm

print("reading files...")
indications = pd.read_csv("../merge_lists/indicationList.tsv", sep='\t')
contraindications = pd.read_excel("../contraindications/contraindications_to_diseases/diseaseList_to_ids/contraindication_list_filled.xlsx")

print("removing unneeded columns and dropping duplicate entries")
contraindications.drop('Unnamed: 0.2', axis=1, inplace=True)
contraindications.drop('Unnamed: 0.1', axis=1, inplace=True)
contraindications.drop('Unnamed: 0', axis=1, inplace=True)
contraindications.drop_duplicates(subset=['active ingredients', 'drug ID', 'disease curie'], keep='first')

print("tagging contraindications and indications")
contraindications['contraindication'] = True
indications['indication'] = True
contraindications = contraindications.rename(columns={'disease list': 'disease name', 'disease curie':'disease ID'})
indications = indications.rename(columns={'disease list':'disease name', 
                                          'disease curie':'disease ID', 
                                          'active ingredients in therapy':'active ingredients',
                                          'disease ID labels':'disease label',
                                          'drug ID Label': 'drug label', 
                                          'disease IDs': 'disease ID',
                                          'list of diseases': 'disease name',
                                          })
contraindications['drug|disease'] = list(f"{row['drug ID']}|{row['disease ID']}" for idx,row in contraindications.iterrows())
indications = indications[['active ingredients', 
                           'drug ID', 
                           'drug label', 
                           'disease name', 
                           'disease ID', 
                           'disease label',  
                           'indication', 
                           'drug|disease',
                          ]]
print("combining lists...")
ground_truths_list = pd.concat([indications, contraindications], axis=1)
result = pd.concat([indications, contraindications], axis=0).reset_index(drop=True)
result.drop('source list', axis=1, inplace=True)

print("adding indication / contraindication tags to drugs...")
for idx, row in tqdm(result.iterrows(), total = len(result)):
    if np.isnan((row['indication'])):
        result.loc[idx, "indication"] = False
        #result['indication'][idx]=False
    if np.isnan((row['contraindication'])):
        #result['contraindication'][idx]=False
        result.loc[idx, "contraindication"] = False
print("saving file...")
result.to_csv("ground-truths-list.tsv", sep='\t')
print("completed indication and contraindication merging")

reading files...
removing unneeded columns and dropping duplicate entries
tagging contraindications and indications
combining lists...
adding indication / contraindication tags to drugs...


100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 118712/118712 [00:03<00:00, 36219.05it/s]


saving file...
completed indication and contraindication merging


In [121]:
print(result.loc[10, "contraindication"])

10    False
10    False
Name: contraindication, dtype: object
