In [39]:
import pandas as pd
from io import StringIO
pd.options.mode.copy_on_write = True
import requests
import re

def getCurie(name):
    itemRequest = 'https://name-resolution-sri.renci.org/lookup?string=' + name + '&autocomplete=false&offset=0&limit=10&biolink_type=ChemicalEntity'
    returned = (pd.read_json(StringIO(requests.get(itemRequest).text)))
    resolvedName = returned.curie[0]
    resolvedLabel = returned.label[0]
    return resolvedName, resolvedLabel

def fetch_all_curies(drugList):
    curieList=[]
    labelList = []
    drugsListOut = []
    for idx, item in enumerate(drugList):
        print(idx)
        if type(item)!=float:
            curie, label = getCurie(item)
            curieList.append(curie)
            labelList.append(label.upper())
            drugsListOut.append(item)
    return curieList, labelList, drugsListOut

def getCombinationTherapiesAndSingleTherapiesPMDA(pmdaList, exclusions):
    pmdaCombinationTherapies = []
    pmdaSingleTherapies = []
    for item in pmdaList:
        if type(item)!=float and (("," in item) or ("/" in item) or (" AND " in item)) and item not in exclusions:
            newItem = item.replace(",","; ").replace(" AND ", "; ").replace("/","; ").replace(";;",";").replace(";  ", "; ").replace("  ;", ";").replace(" ;",";").strip()
            print(item, "\n", newItem, "\n")
            pmdaCombinationTherapies.append(newItem)
        else:
            pmdaSingleTherapies.append(item)
    return pmdaCombinationTherapies, pmdaSingleTherapies

def makeUppercase(list):
    for index, item in enumerate(list):
        if not type(item)==float:
            list[index] = item.upper().replace('\n',' ').strip()
    return list


print("Ingesting PMDA list")
pmda_approvals_df = pd.read_csv("pmda_approvals.csv")
drugList = pmda_approvals_df['Active Ingredient (underlined: new active ingredient)']

drugList = makeUppercase(drugList)
splitExclusions = set(list(pd.read_excel("pmda_split_exclusions.xlsx")['name']))
pmdaCombinationTherapies, pmdaSingleTherapies = getCombinationTherapiesAndSingleTherapiesPMDA(drugList, splitExclusions)
pmdaSingleSet = (set(pmdaSingleTherapies))
print("Splitting PMDA combination therapies (currently ", len(pmdaSingleSet), "unique items in list)")

for item in pmdaCombinationTherapies:
    ingList = re.split(' , |,|/| \ | AND ', item)
    for i in ingList:
        i = i.strip()
        if i not in pmdaSingleSet:
            pmdaSingleTherapies.append(i.strip())
            pmdaSingleSet.add(i.strip())

print(len(set(pmdaSingleTherapies)), "single-component therapies after splitting")
print(len(set(pmdaSingleTherapies+pmdaCombinationTherapies)), " total therapies after splitting")
exclusions = pd.read_excel('exclusions_pmda.xlsx')['name']
print("removing excluded therapies")
pmdaDrugSet = set(pmdaSingleTherapies+pmdaCombinationTherapies).difference(exclusions)
pmdaDrugSet = set(pmdaDrugSet).difference(pd.read_excel("pmda_deduplication.xlsx")['To Remove'])

print(len(pmdaDrugSet), " therapies after exclusions")
drugData = pd.DataFrame(data=[pmdaDrugSet]).transpose()
drugData.columns = ['Drug Name']
drugData.to_excel("pmda_list.xlsx")

# get curies
#curies, labels, newDrugsList = fetch_all_curies(pmdaDrugSet)

Ingesting PMDA list
APADAMTASE ALFA / CINAXADAMTASE ALFA 
 APADAMTASE ALFA; CINAXADAMTASE ALFA 

EFGARTIGIMOD ALFA  AND VORHYALURONIDASE ALFA 
 EFGARTIGIMOD ALFA; VORHYALURONIDASE ALFA 

DAUNORUBICIN HYDROCHLORIDE, CYTARABINE 
 DAUNORUBICIN HYDROCHLORIDE; CYTARABINE 

FOSLEVODOPA, FOSCARBIDOPA HYDRATE 
 FOSLEVODOPA; FOSCARBIDOPA HYDRATE 

RIPASUDIL HYDROCHLORIDE HYDRATE, BRIMONIDINE TARTRATE 
 RIPASUDIL HYDROCHLORIDE HYDRATE; BRIMONIDINE TARTRATE 

TIXAGEVIMAB , CILGAVIMAB 
 TIXAGEVIMAB; CILGAVIMAB 

RELEBACTAM HYDRATE/IMIPENEM HYDRATE/CILASTATIN SODIUM 
 RELEBACTAM HYDRATE; IMIPENEM HYDRATE; CILASTATIN SODIUM 

NIRMATRELVIR/RITONAVIR 
 NIRMATRELVIR; RITONAVIR 

L-LYSINE HYDROCHLORIDE, L-ARGININE HYDROCHLORIDE 
 L-LYSINE HYDROCHLORIDE; L-ARGININE HYDROCHLORIDE 

ANHYDROUS SODIUM SULFATE, POTASSIUM SULFATE, MAGNESIUM SULFATE HYDRATE 
 ANHYDROUS SODIUM SULFATE; POTASSIUM SULFATE; MAGNESIUM SULFATE HYDRATE 

DARATUMUMAB / VORHYALURONIDASE ALFA 
 DARATUMUMAB; VORHYALURONIDASE ALFA 

INSULI