In [221]:
import pandas as pd
import numpy as np
import plotly.express as px
from mylib import flatten, EurekaUtil
from IPython.display import display, HTML
import time

%load_ext autoreload
%autoreload 2

_rxnorm_ingredients_txt_path = "/Users/bubbles/src/python/OpenEpic/libraries/RxTerms202203/RxTermsIngredients202203.txt"
_class_output_csv_path = "/Users/bubbles/src/python/OpenEpic/output/rxclass.csv"


The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [3]:

eureka_util = EurekaUtil(_rxnorm_ingredients_txt_path)

In [15]:
## Query RxClass API
rxcui = 5487
search_str = 'codein'
result = eureka_util.searchIngredientByName(search_str)
print(result)
if len(result) == 0:
    print('rxcui not found')
else:
    rxcui = result['ING_RXCUI'][0]
    print(f'Using {rxcui}')
    classTypes_ignore = ['DISEASE', 'PE', 'MOA', 'CHEM', 'STRUCT', 'DISPOS', 'EPC', 'PK']
    removeClassTypes = True
    classType = ['ATC1-4']
    onlyClassTypes = False
    url_s = f'https://rxnav.nlm.nih.gov/REST/rxclass/class/byRxcui.json?rxcui={str(rxcui)}'
    # with urllib.request.urlopen(url_s) as url:
    #     json = json.loads(url.read().decode())
    json = pd.read_json(url_s)
    for item in json['rxclassDrugInfoList']['rxclassDrugInfo']:
        if (item['minConcept']['rxcui'] == str(rxcui)
            and (
                (removeClassTypes and item['rxclassMinConceptItem']['classType'] not in classTypes_ignore)
            or  (onlyClassTypes and item['rxclassMinConceptItem']['classType'] in classType)
                )
            ):
            #         print(item['minConcept'])
            print(item['rxclassMinConceptItem'])
        

   ING_RXCUI      INGREDIENT
0       2670         codeine
1      23088  dihydrocodeine
Using 2670
{'classId': 'D000701', 'className': 'Analgesics, Opioid', 'classType': 'MESHPA'}
{'classId': 'D000996', 'className': 'Antitussive Agents', 'classType': 'MESHPA'}
{'classId': 'D009294', 'className': 'Narcotics', 'classType': 'MESHPA'}
{'classId': 'R05DA', 'className': 'Opium alkaloids and derivatives', 'classType': 'ATC1-4'}


In [218]:
rxcui1 = 2670
rxcui2 = 10582
classType = 'ATC1-4'
trans = 0 # DIRECT relations only
print("Reading classes from RxClass API")
url_s = f'https://rxnav.nlm.nih.gov/REST/rxclass/allClasses.json?classTypes={classType}&trans={trans}'
# with urllib.request.urlopen(url_s) as url:
#     json = json.loads(url.read().decode())
json = pd.read_json(url_s)

classes = json['rxclassMinConceptList']['rxclassMinConcept']
classes = pd.DataFrame(classes)
print(f'Successfully read {len(classes)} classes from RxClass API!')
classes.head()

Reading classes from RxClass API
Successfully read 1297 classes from RxClass API!


Unnamed: 0,classId,className,classType
0,0,Anatomical Therapeutic Chemical (ATC1-4),ATC1-4
1,A,ALIMENTARY TRACT AND METABOLISM,ATC1-4
2,A01,STOMATOLOGICAL PREPARATIONS,ATC1-4
3,A01A,STOMATOLOGICAL PREPARATIONS,ATC1-4
4,A01AA,Caries prophylactic agents,ATC1-4


In [33]:
eureka_util.loadAllClasses(['ATC1-4'])
eureka_util.rxclasses.head()

Reading classes from RxClass API
Successfully read 1297 classes from RxClass API!


Unnamed: 0,classId,className,classType
0,0,Anatomical Therapeutic Chemical (ATC1-4),ATC1-4
1,A,ALIMENTARY TRACT AND METABOLISM,ATC1-4
2,A01,STOMATOLOGICAL PREPARATIONS,ATC1-4
3,A01A,STOMATOLOGICAL PREPARATIONS,ATC1-4
4,A01AA,Caries prophylactic agents,ATC1-4


In [191]:
agg = []
final_df = pd.DataFrame()

In [214]:
_batch = 100
_step = 0 # starts with 0
_wait = 0.5 # seconds

# aggregator = pd.DataFrame([], 
#                           columns=['classId', 'className', 'classType'])
_start = _batch*_step
_stop = _batch*(_step+1)

_start = 1290
_stop = 1299
subset = eureka_util.rxclasses.iloc[_start:_stop,:]
subset

Unnamed: 0,classId,className,classType
1290,V10AX,Other antiinflammatory therapeutic radiopharma...,ATC1-4
1291,V10B,PAIN PALLIATION (BONE SEEKING AGENTS),ATC1-4
1292,V10BX,Various pain palliation radiopharmaceuticals,ATC1-4
1293,V10X,OTHER THERAPEUTIC RADIOPHARMACEUTICALS,ATC1-4
1294,V10XA,Iodine (131I) compounds,ATC1-4
1295,V10XX,Various therapeutic radiopharmaceuticals,ATC1-4
1296,V20,SURGICAL DRESSINGS,ATC1-4


In [211]:
relaSource = 'ATC'
for i, c in subset.iterrows():
    start_time = time.time()
    classId = c['classId']
    url_s = f'https://rxnav.nlm.nih.gov/REST/rxclass/classMembers.json?classId={classId}&relaSource={relaSource}'
    json = pd.read_json(url_s)
    if (len(json) == 0): 
        print(f'RxClass # : {i}, found: 0 ............. SKIPPING')
        continue;
#     if (not json.has_key('drugMemberGroup')): 
#         continue
#     if (not json['drugMemberGroup'].has_key('drugMember')): 
#         continue

    item_top = json['drugMemberGroup']['drugMember']
    item = [{**x.pop('minConcept'), **c} for x in item_top]
    agg.extend(item)
    time_taken = round(time.time() - start_time, 2)
    number = len(json['drugMemberGroup']['drugMember'])
#     if (i%10==0):
    print(f'RxClass #: {i}, found: {number}, time taken: {time_taken}s')

    time.sleep(_wait)
    

RxClass #: 1000, found: 3, time taken: 0.72s
RxClass #: 1001, found: 5, time taken: 0.45s
RxClass #: 1002, found: 5, time taken: 0.49s
RxClass #: 1003, found: 3, time taken: 0.44s
RxClass #: 1004, found: 3, time taken: 0.49s
RxClass #: 1005, found: 13, time taken: 0.48s
RxClass # : 1006, found: 0 ............. SKIPPING
RxClass #: 1007, found: 13, time taken: 0.43s
RxClass #: 1008, found: 81, time taken: 0.52s
RxClass #: 1009, found: 44, time taken: 0.5s
RxClass #: 1010, found: 20, time taken: 0.48s
RxClass #: 1011, found: 3, time taken: 0.45s
RxClass #: 1012, found: 6, time taken: 0.45s
RxClass #: 1013, found: 2, time taken: 0.45s
RxClass #: 1014, found: 1, time taken: 0.48s
RxClass #: 1015, found: 8, time taken: 0.48s
RxClass #: 1016, found: 15, time taken: 0.49s
RxClass #: 1017, found: 5, time taken: 0.45s
RxClass #: 1018, found: 2, time taken: 0.49s
RxClass #: 1019, found: 2, time taken: 0.49s
RxClass #: 1020, found: 1, time taken: 0.45s
RxClass #: 1021, found: 3, time taken: 0.48s


RxClass # : 1179, found: 0 ............. SKIPPING
RxClass # : 1180, found: 0 ............. SKIPPING
RxClass #: 1181, found: 55, time taken: 0.49s
RxClass #: 1182, found: 55, time taken: 0.46s
RxClass #: 1183, found: 26, time taken: 0.45s
RxClass #: 1184, found: 3, time taken: 0.49s
RxClass #: 1185, found: 8, time taken: 0.45s
RxClass #: 1186, found: 8, time taken: 0.48s
RxClass #: 1187, found: 2, time taken: 0.45s
RxClass #: 1188, found: 1, time taken: 0.45s
RxClass # : 1189, found: 0 ............. SKIPPING
RxClass # : 1190, found: 0 ............. SKIPPING
RxClass #: 1191, found: 5, time taken: 0.48s
RxClass #: 1192, found: 2, time taken: 0.44s
RxClass #: 1193, found: 1, time taken: 0.43s
RxClass #: 1194, found: 34, time taken: 0.44s
RxClass # : 1195, found: 0 ............. SKIPPING
RxClass #: 1196, found: 34, time taken: 0.48s
RxClass #: 1197, found: 2, time taken: 0.44s
RxClass # : 1198, found: 0 ............. SKIPPING
RxClass #: 1199, found: 3, time taken: 0.47s
RxClass #: 1200, fou

In [226]:
agg_df = pd.DataFrame(agg)
print(list(agg_df))
agg_df

['rxcui', 'name', 'tty', 'classId', 'className', 'classType']


Unnamed: 0,rxcui,name,tty,classId,className,classType
0,1000082,alcaftadine,IN,0,Anatomical Therapeutic Chemical (ATC1-4),ATC1-4
1,1000581,trichlorfon,IN,0,Anatomical Therapeutic Chemical (ATC1-4),ATC1-4
2,1001,antipyrine,IN,0,Anatomical Therapeutic Chemical (ATC1-4),ATC1-4
3,1001472,dutasteride / tamsulosin,MIN,0,Anatomical Therapeutic Chemical (ATC1-4),ATC1-4
4,1002293,formoterol / mometasone,MIN,0,Anatomical Therapeutic Chemical (ATC1-4),ATC1-4
...,...,...,...,...,...,...
16446,2053281,iobenguane I-131,PIN,V10XA,Iodine (131I) compounds,ATC1-4
16447,234449,iodine-131-tositumomab,PIN,V10XA,Iodine (131I) compounds,ATC1-4
16448,91535,sodium iodide I131,PIN,V10XA,Iodine (131I) compounds,ATC1-4
16449,1999335,lutetium Lu 177 dotatate,PIN,V10XX,Various therapeutic radiopharmaceuticals,ATC1-4


In [222]:
agg_df.to_csv(_class_output_csv_path)

In [241]:
out = agg_df['className'].value_counts().reset_index(name="counts")
out.head(20)

Unnamed: 0,index,counts
0,Anatomical Therapeutic Chemical (ATC1-4),2864
1,NERVOUS SYSTEM,449
2,ALIMENTARY TRACT AND METABOLISM,408
3,ANTINEOPLASTIC AND IMMUNOMODULATING AGENTS,347
4,ANTIINFECTIVES FOR SYSTEMIC USE,338
5,CARDIOVASCULAR SYSTEM,333
6,DERMATOLOGICALS,250
7,ANTINEOPLASTIC AGENTS,222
8,RESPIRATORY SYSTEM,217
9,ANTIBACTERIALS FOR SYSTEMIC USE,189
