In [4]:
from RxNorm_API import RxNorm
import pandas as pd
from IPython.display import clear_output
import numpy as np

In [5]:
df = pd.read_csv(r'C:\Users\houstonan\Documents\Projects\RxNorm_Mapping\data\raw\pseudo_medications.csv')
df.head()

Unnamed: 0,ORDER_MNEM_TXT
0,paracetamol
1,midazolam
2,ondansetron
3,sodium chloride 0.9% intravenous solution 500 ...
4,glyceryl trinitrate


**Preprocessing**

In [6]:
# De-capitalise

df['ORDER_MNEM_TXT'] = df['ORDER_MNEM_TXT'].str.lower()

# Removing unwanted terms
replace = '|'.join(['\d+',
    'ml',
    'mg' ,
    'intravenous',
    'solution',
    '[\(\[].*?[\)\]]',
    'syringe driver -',
    'injection',
    '\.',
    '\%',
    'infusion',
    'topical'])

df['ORDER_MNEM_TXT'] = df['ORDER_MNEM_TXT'].str.replace(replace,'', regex=True)
df = df.drop_duplicates(subset='ORDER_MNEM_TXT', keep="first")
df.head(5)

Unnamed: 0,ORDER_MNEM_TXT
0,paracetamol
1,midazolam
2,ondansetron
3,sodium chloride
4,glyceryl trinitrate


In [7]:
df.to_csv('Processed_Medications.csv', index=None)

**Mapping**

In [7]:
medications_df = pd.DataFrame(columns = ['input_term','Name','rxcui','SNOMEDCT','MMSL'])
for value in df['ORDER_MNEM_TXT']:
    rxnorm = RxNorm()

    names_ = []
    rxcui_ = []
    snomed_ = []
    mmsl_ = []

    for concept in value.split('+'):

        term = concept
        
        print('Mapping: ' + term)
        
        rxcui = rxnorm.approximate_term(term = term, timeout=5)

        if isinstance(rxcui,list):
            i=0
            while i < len(rxcui):
                snomed, mmsl = rxnorm.get_codes(rxcui = rxcui[i], timeout=5)

                if (snomed == 'NULL') & (mmsl == 'NULL'):
                    selected_rxcui = 'NULL'
                    i+=1
                else:
                    selected_rxcui = rxcui[i]
                    a=i
                    i=len(rxcui)
        else:
            selected_rxcui = rxcui

        if selected_rxcui == 'NULL':
            names_.append('NULL')
            rxcui_.append(selected_rxcui)
            snomed_.append('NULL')
            mmsl_.append('NULL')
        else:
            if a != 0:
                ingredient_rxcui = rxnorm.primary_ingredient(rxcui = selected_rxcui, timeout=5)
                if ingredient_rxcui == 'NULL':
                    names = rxnorm.get_names(rxcui = selected_rxcui, timeout=5)
                    rxcui_.append(selected_rxcui)
                else:
                    names_.append(rxnorm.get_names(rxcui = ingredient_rxcui, timeout=5))
                    rxcui_.append(ingredient_rxcui)
                    snomed, mmsl = rxnorm.get_codes(rxcui = ingredient_rxcui, timeout=5)
                    
            else:
                names_.append(rxnorm.get_names(rxcui = selected_rxcui, timeout=5))
                rxcui_.append(selected_rxcui)
            
            snomed_.append(snomed)
            mmsl_.append(mmsl)


    codes = pd.DataFrame(data = np.array([value, ', '.join(names_), ', '.join(rxcui_),', '.join(snomed_), ', '.join(mmsl_)]).reshape(1,-1), 
        columns = ['input_term','Name','rxcui','SNOMEDCT','MMSL'])
    medications_df = medications_df.append(codes, ignore_index=True)

    clear_output()

In [45]:
print('Missing (%): ', (medications_df["input_term"][medications_df["rxcui"]=='NULL'].count()/medications_df["input_term"].count()).round(3)*100)

Missing (%):  12.7


In [46]:
medications_df.to_csv('Medication_Post_Mapping.csv', index = False)

**Deep Dive using Spacy**

In [47]:
import spacy

In [None]:
nlp = spacy.load("en_core_web_md")
medications_df['input_term_parsed'] = medications_df['input_term'].str.replace('+','').apply(nlp)
medications_df['Name_parsed'] = medications_df['Name'].str.replace(',','').apply(nlp)

In [None]:
medications_df['similarity'] = medications_df.apply(lambda x: x['input_term_parsed'].similarity(x['Name_parsed']),axis=1)

In [64]:
medications_df.sort_values(by='similarity',ascending=1)[medications_df['Name']!='NULL'].head(60)

  medications_df.sort_values(by='similarity',ascending=1)[medications_df['Name']!='NULL'].head(60)


Unnamed: 0,input_term,Name,rxcui,SNOMEDCT,MMSL,input_term_parsed,Name_parsed,similarity
657,eptifibatide + neat,"eptifibatide, neatsfoot oil","75635, 1358967","116065005, 386998009, NULL","BN213655, GNd04316, IN4670, MSNOCODE","(eptifibatide, , neat, )","(eptifibatide, neatsfoot, oil)",-0.056819
347,metaraminol + neat,"metaraminol, neatsfoot oil","6805, 1358967","372728001, 41015006, NULL","GNd00702, MSNOCODE","(metaraminol, , neat, )","(metaraminol, neatsfoot, oil)",-0.056819
485,atracurium + neat,"atracurium, neatsfoot oil","1218, 1358967","372835000, 74074008, NULL","GNd00173, MSNOCODE","(atracurium, , neat, )","(atracurium, neatsfoot, oil)",-0.056819
25,salbutamol,albuterol,435,"372897005, 91143003","BN46702, GNd00749, IN4143",(salbutamol),(albuterol),-0.021776
54,senna,"sennosides, USP",36387,"30125007, 387162007, 412302003, 420709001","BN5149, BN84266, GNd01013, IN5452, IN5455",(senna),"(sennosides, USP)",-0.009191
870,calcipotriol,calcipotriene,29365,"126232005, 395766004","BN136766, IN4322",(calcipotriol),(calcipotriene),0.0
874,levobupivicaine,levobupivacaine,259453,"116103003, 387011006","GNd04449, IN8289",(levobupivicaine),(levobupivacaine),0.0
443,epoprostenol,epoprostenol,8814,"108593000, 372513003",GNd03842,"(epoprostenol, )",(epoprostenol),0.0
112,cefalexin,cephalexin,2231,"387304003, 54887004",GNd00096,(cefalexin),(cephalexin),0.0
425,levomepromazine,methotrimeprazine,6852,"387509007, 89029005","GNd00814, IN5078",(levomepromazine),(methotrimeprazine),0.0
