## [UMLS API Doc](https://documentation.uts.nlm.nih.gov/rest/home.html)

### 0. Include your UMLS API key in the .env file.

In [None]:
import os
from dotenv import load_dotenv

load_dotenv()
api_key = os.getenv('UMLS_API_KEY')

### 1. Get example CUIs from Biolama-UMLS

In [1]:
from datasets import load_dataset
import pandas as pd


#https://huggingface.co/datasets/CDT-BMAI-GP/Biolama-UMLS

dataset = load_dataset("CDT-BMAI-GP/Biolama-UMLS")
train_df = pd.DataFrame(dataset['validation'])

print(train_df.columns)

example_idx = 3
example_cui = train_df['sub_uri'][example_idx]

print(f"example CUI: {example_cui}, label: {train_df['sub_label'][example_idx]}")



  from .autonotebook import tqdm as notebook_tqdm


Index(['uuid', 'predicate_id', 'sub_uri', 'sub_label', 'sub_type',
       'sub_aliases', 'sub_alias_scores', 'obj_uris', 'obj_labels',
       'obj_types', 'obj_aliases'],
      dtype='object')
example CUI: C0118168, label: fosinopril


### 2. Get source-specific identifiers associated with the CUI

(NLM does not assert parent or child relationships between concepts.)

View source vocabularies below.

- [UMLS Source Vocabularies](https://www.nlm.nih.gov/research/umls/new_users/online_learning/Meta_002.html)

- [UMLS Metathesaurus Vocabulary Documentation](https://www.nlm.nih.gov/research/umls/sourcereleasedocs/index.html)

In [2]:
from umls_api_wrappers import get_all_relations, get_related_entities, simple_url_request # , crosswalk

relation_types = ['parents', 'children', 'ancestors', 'descendants', 'relations']

In [3]:

relations = get_all_relations(example_cui)

results = relations['result']
# "relatedFromId" "relatedFromIdName"

for res in results:
    print(f"Root Source: {res['rootSource']}")

source_specific_nodes = []
for res in results:
    components = res['relatedFromId'].split('/')
    source, source_id = components[-2], components[-1]
    label = res['relatedFromIdName']
    
    source_specific_nodes.append((source, source_id, label))
    

source_specific_nodes = set(source_specific_nodes)

# for res in results:
#     print(res)
    # print(f"Root Source: {res['rootSource']}")

for node in source_specific_nodes:
    print(node)
    

https://uts-ws.nlm.nih.gov/rest/content/current/CUI/C0118168/relations?apiKey=19f1ce1b-5bd6-4448-9cdf-0e01f9fa4456
Root Source: RXNORM
Root Source: SNOMEDCT_US
Root Source: RXNORM
Root Source: SNOMEDCT_US
Root Source: SCTSPA
Root Source: SNOMEDCT_US
Root Source: SCTSPA
Root Source: SCTSPA
Root Source: RXNORM
Root Source: RXNORM
Root Source: ATC
Root Source: LNC
Root Source: LNC
Root Source: SNOMEDCT_US
Root Source: SCTSPA
Root Source: MED-RT
Root Source: MED-RT
Root Source: MED-RT
Root Source: MSH
Root Source: MED-RT
Root Source: MED-RT
Root Source: MED-RT
Root Source: MED-RT
Root Source: RXNORM
Root Source: RXNORM
('SNOMEDCT_US', '372510000', 'Fosinopril')
('SCTSPA', '372510000', 'fosinopril')
('MSH', 'D017328', 'Fosinopril')
('LNC', 'LP171400-7', 'Fosinopril')
('SNOMEDCT_US', '108569005', 'Fosinopril-containing product')
('RXNORM', '50166', 'fosinopril')
('AUI', 'A22731561', 'fosinopril')
('SCTSPA', '108569005', 'producto con fosinopril')


### 3. Retrieve all {'parents', 'children', 'ancestors', 'descendants' or 'relations'} of a source-asserted identifier. 

(NLM does not assert parent or child relationships between concepts.)

In [4]:
# query_example = ('RXNORM', '50166', 'fosinopril') -> doesn't provide parent/child relations

relation_query_res = get_related_entities('D017328', source='MSH', relation='parents')

rel_types = {}
for rel_type in relation_types:
    
    rel_types[rel_type] = relation_query_res['result'][0][rel_type] + f'?apiKey={api_key}' if relation_query_res['result'][0][rel_type] else None
    
    
print(rel_types)

https://uts-ws.nlm.nih.gov/rest/content/current/source/MSH/D017328/parents?apiKey=19f1ce1b-5bd6-4448-9cdf-0e01f9fa4456
{'parents': 'https://uts-ws.nlm.nih.gov/rest/content/2023AB/source/MSH/D010721/parents?apiKey=19f1ce1b-5bd6-4448-9cdf-0e01f9fa4456', 'children': 'https://uts-ws.nlm.nih.gov/rest/content/2023AB/source/MSH/D010721/children?apiKey=19f1ce1b-5bd6-4448-9cdf-0e01f9fa4456', 'ancestors': 'https://uts-ws.nlm.nih.gov/rest/content/2023AB/source/MSH/D010721/ancestors?apiKey=19f1ce1b-5bd6-4448-9cdf-0e01f9fa4456', 'descendants': 'https://uts-ws.nlm.nih.gov/rest/content/2023AB/source/MSH/D010721/descendants?apiKey=19f1ce1b-5bd6-4448-9cdf-0e01f9fa4456', 'relations': 'https://uts-ws.nlm.nih.gov/rest/content/2023AB/source/MSH/D010721/relations?apiKey=19f1ce1b-5bd6-4448-9cdf-0e01f9fa4456'}


In [5]:
relation_query_res

{'pageSize': 25,
 'pageNumber': 1,
 'pageCount': 1,
 'result': [{'classType': 'SourceAtomCluster',
   'ui': 'D010721',
   'suppressible': False,
   'obsolete': False,
   'rootSource': 'MSH',
   'atomCount': 6,
   'cVMemberCount': 0,
   'attributes': 'https://uts-ws.nlm.nih.gov/rest/content/2023AB/source/MSH/D010721/attributes',
   'atoms': 'https://uts-ws.nlm.nih.gov/rest/content/2023AB/source/MSH/D010721/atoms',
   'ancestors': 'https://uts-ws.nlm.nih.gov/rest/content/2023AB/source/MSH/D010721/ancestors',
   'parents': 'https://uts-ws.nlm.nih.gov/rest/content/2023AB/source/MSH/D010721/parents',
   'children': 'https://uts-ws.nlm.nih.gov/rest/content/2023AB/source/MSH/D010721/children',
   'descendants': 'https://uts-ws.nlm.nih.gov/rest/content/2023AB/source/MSH/D010721/descendants',
   'relations': 'https://uts-ws.nlm.nih.gov/rest/content/2023AB/source/MSH/D010721/relations',
   'definitions': 'NONE',
   'concepts': 'https://uts-ws.nlm.nih.gov/rest/search/2023AB?string=D010721&sabs=MS