In [10]:
import pandas as pd
from io import StringIO
import requests

def getCurie_Disease(name):
    itemRequest = 'https://name-resolution-sri.renci.org/lookup?string=' + name + '&autocomplete=false&offset=0&limit=10&biolink_type=DiseaseOrPhenotypicFeature'
    returned = (pd.read_json(StringIO(requests.get(itemRequest).text)))
    resolvedName = returned.curie
    resolvedLabel = returned.label
    return resolvedName, resolvedLabel

def getCurie_Drug(name):
    itemRequest = 'https://name-resolution-sri.renci.org/lookup?string=' + name + '&autocomplete=false&offset=0&limit=10&biolink_type=ChemicalOrDrugOrTreatment'
    returned = (pd.read_json(StringIO(requests.get(itemRequest).text)))
    resolvedName = returned.curie
    resolvedLabel = returned.label
    return resolvedName, resolvedLabel

def build_string_from_list(list):
    outString = "["
    for item in list:
        outString += item + ", "
    outString = outString[:-2] + "]"
    return outString


diseaseData = pd.read_excel('../drug-disease-pairs-ema.xlsx')



diseaseLabelList = []
diseaseIDList = []
diseaseList = []
drugList = []
drugIDList = []
drugLabelList = []

for index, row in diseaseData.iterrows():

    curr_row_diseasesTreated = row['diseases']
    if type(curr_row_diseasesTreated)!=float:
        print(index)
        
        curr_row_drugsInTherapy = row['drug active ingredients']
        curr_row_disease_ids = []
        curr_row_disease_id_labels = []
        curr_row_diseaseList = curr_row_diseasesTreated.replace("[","").replace("]","").replace('\'','').split(',')
        print("disease list: ", curr_row_diseaseList)
        
        try:
            drugCurie,drugLabel = getCurie_Drug(curr_row_drugsInTherapy)
            drugID = drugCurie[0]
            drugIDLabel = drugLabel[0]
                
        except:
            print("could not identify drug: ", curr_row_drugsInTherapy)
            drugID = "NameRes Failed"
            drugIDLabel = "NameRes Failed"

        for idx2,item in enumerate(curr_row_diseaseList):
            item = item.strip().replace(" \n","").replace(" (PREVENTATIVE)","")
            curr_row_diseaseList[idx2] = item
            print(item)
            try:
                print(item)
                diseaseCurie,diseaseLabel = getCurie_Disease(item)
                print(diseaseCurie[0])
                diseaseIDList.append(diseaseCurie[0])
                diseaseLabelList.append(diseaseLabel[0])
                diseaseList.append(item)
                drugList.append(curr_row_drugsInTherapy)
                drugIDList.append(drugID)
                drugLabelList.append(drugIDLabel)
                
            except:
                print("error during name resolving")
    
    

0
disease list:  ['UTERINE FIBROIDS', ' ENDOMETRIOSIS (PREVENTATIVE) \n']
UTERINE FIBROIDS
UTERINE FIBROIDS
UMLS:C0151995
ENDOMETRIOSIS
ENDOMETRIOSIS
MONDO:0005133
1
disease list:  ['NEOVASCULAR (WET) AGE-RELATED MACULAR DEGENERATION (AMD)', ' VISUAL IMPAIRMENT DUE TO MACULAR OEDEMA SECONDARY TO RETINAL VEIN OCCLUSION (BRANCH RVO OR CENTRAL RVO)', ' VISUAL IMPAIRMENT DUE TO DIABETIC MACULAR OEDEMA (DME)', ' VISUAL IMPAIRMENT DUE TO MYOPIC CHOROIDAL NEOVASCULARISATION (MYOPIC CNV) \n']
NEOVASCULAR (WET) AGE-RELATED MACULAR DEGENERATION (AMD)
NEOVASCULAR (WET) AGE-RELATED MACULAR DEGENERATION (AMD)
MONDO:0005417
VISUAL IMPAIRMENT DUE TO MACULAR OEDEMA SECONDARY TO RETINAL VEIN OCCLUSION (BRANCH RVO OR CENTRAL RVO)
VISUAL IMPAIRMENT DUE TO MACULAR OEDEMA SECONDARY TO RETINAL VEIN OCCLUSION (BRANCH RVO OR CENTRAL RVO)
MONDO:0002303
VISUAL IMPAIRMENT DUE TO DIABETIC MACULAR OEDEMA (DME)
VISUAL IMPAIRMENT DUE TO DIABETIC MACULAR OEDEMA (DME)
MONDO:0004728
VISUAL IMPAIRMENT DUE TO MYOPIC CHOR

In [11]:
sheetData = pd.DataFrame(data=[diseaseIDList, diseaseLabelList, diseaseList, drugList, drugIDList, drugLabelList]).transpose()
sheetData.columns = ['disease IDs', 'disease ID labels', 'list of diseases', 'active ingredients in therapy', 'drug ID', 'drug ID Label']
print(sheetData)
sheetData.to_excel("indication-list-ema-v1.xlsx")

        disease IDs                disease ID labels  \
0     UMLS:C0151995  Degeneration of uterine fibroid   
1     MONDO:0005133                    endometriosis   
2     MONDO:0005417         wet macular degeneration   
3     MONDO:0002303   central retinal vein occlusion   
4     MONDO:0004728           diabetic macular edema   
...             ...                              ...   
4028  MONDO:0004781      acute myocardial infarction   
4029  MONDO:0005344      hepatitis B virus infection   
4030  MONDO:0005504                       diphtheria   
4031  MONDO:0005526                          tetanus   
4032     HP:0005202    Helicobacter pylori infection   

                                       list of diseases  \
0                                      UTERINE FIBROIDS   
1                                         ENDOMETRIOSIS   
2     NEOVASCULAR (WET) AGE-RELATED MACULAR DEGENERA...   
3     VISUAL IMPAIRMENT DUE TO MACULAR OEDEMA SECOND...   
4     VISUAL IMPAIRMENT DUE TO D