# UMLS API Call # 
This document will pull all search results from the UMLS API of the given concept(s) in the parent_code variable and output in an excel file any SNOMED-CT, ICD10, CPT, and LOINC codes associated with the concept(s) listed in the parent_code variable.

Before being able to use this script, you must create an account with UMLS.  The website can be found here: https://uts.nlm.nih.gov/uts/profile . After your account has been approved, navigate to your profile and find your APIKey. This is required for this script to run.

This script is very thorough and will frequently return more codes than required.  Some codes returned may not be directly relevant to the diagnosis.  It is recommended that the analyst reviews the data returned for accuracy concerning their specific diagnosis. 

## Variable Explanations ##
- parent_code
    - This must be inputted as a list of strings.  Below is an example of proper structure: 
    - ["apple","orange","cat","puppy"] 
    
- apiKey 
    - This must be inputted as a string.
    - example: apiKey = '123a4b56-7c8d-9d12-e3fg-4h5i67j89k0d'
    - You get this by navigating by accessing "My Profile" on this UMLS website: https://uts.nlm.nih.gov/uts/profile

- Excel_Sheet_Name
    - This must be inputted as a string.  A string is required to have quotations around the text: "Apple", "Orange", and "Fruit" are examples of strings. 
    - This variable is responsible for naming the final exported Excel Sheet.
    - Note that the excel sheet will be exported wherever this python file is saved.  If you do not know where this file is saved on your computer, use your system's search bar to locate the file by the name that you assigned the variable Excel_Sheet_Name

## Notes and Limitations  
- If you do not change the Excel_Sheet_Name variable and attempt to run the code again, the code will rewrite your existing file with the parent_code variables
- This will return more codes than required.  Some codes returned may not be directly relevant to the diagnosis.  It is recommended that the analyst throughly reviews the data returned. 

## Common Errors
- No current common errors have been reported.  If you run into an error with this script, please contact me at alyssa.warnock@amida.com and I will do my best to help troubleshoot any issues with the code. 

In [1]:
## DO NOT CHANGE THIS BOX ##
import pandas as pd
import numpy as np
import json
import requests
false = False
true = True
## DO NOT CHANGE ABOVE ## 

In [2]:
# CHANGE THE CODES HERE: 
parent_code = ["Fibrosis of lung", "Interstitial lung disease", "Pneumonia", "Pneumonitis", 'Proteinosis', "Eosinophilic granuloma", "Right ventricular failure due to disorder of pulmonary circulation", "Pulmonary hypertension"]

apiKey = 'YOUR API CODE HERE'

## WHAT DO YOU WANT YOUR EXCEL SHEET NAMED? ##
Excel_Sheet_Name = "SNOMED_ICD10_CPT_LOINC"

In [3]:
#### DO NOT CHANGE BELOW THIS LINE ####
## This calls the SNOMED-CT Codes
empty_pd_formatted = pd.DataFrame()
test_codes = []
test_values = []
root_source_snomed = []
    
for each in parent_code: 
# This collects the JSON for each value in parent_code 
    URL = f"https://uts-ws.nlm.nih.gov/rest/search/current?apiKey={apiKey}&string={each}&returnIdType=code&sabs=SNOMEDCT_US&pageSize=2000"
    response = requests.get(URL)
    variable = response.json()

    for code in np.arange(0, len(variable['result']['results']), 1):
        codes = variable['result']['results'][code]["ui"]
        test_codes.append(codes)

    for code in np.arange(0, len(variable['result']['results']), 1):
        name = variable['result']['results'][code]["name"]
        test_values.append(name)  
    
    for code in np.arange(0, len(variable['result']['results']), 1):
        source = variable['result']['results'][code]["rootSource"]
        root_source_snomed.append(source) 
        
# This is where the dataframe stores the previous pandas dataframe iterations     
new_new_row = pd.DataFrame({"Data Concept": "Diagnosis Code", "Data Sub-Concept": "N/A", "Coding Standard": root_source_snomed, "Code Value": test_codes, "Code Description": test_values})
empty_pd_new = pd.concat([new_new_row, empty_pd_formatted.loc[:]]).drop_duplicates().reset_index(drop=True)

In [4]:
# Calls all the ICD10 codes

test_codes_icd10 = []
test_values_icd10 = []
root_source_icd10 = []
    
for each in parent_code: 
# This collects the JSON for each value in parent_code 
    URL = f"https://uts-ws.nlm.nih.gov/rest/search/current?string={each}&sabs=ICD10&returnIdType=code&apiKey={apiKey}&pageSize=2000"
    response = requests.get(URL)
    variable = response.json()

    for code in np.arange(0, len(variable['result']['results']), 1):
        codes = variable['result']['results'][code]["ui"]
        test_codes_icd10.append(codes)

    for code in np.arange(0, len(variable['result']['results']), 1):
        name = variable['result']['results'][code]["name"]
        test_values_icd10.append(name)  
        
    for code in np.arange(0, len(variable['result']['results']), 1):
        source = variable['result']['results'][code]["rootSource"]
        root_source_icd10.append(source)
        
# This is where the dataframe stores the previous pandas dataframe iterations     
new_new_row_icd10 = pd.DataFrame({"Data Concept": "Diagnosis Code", "Data Sub-Concept": "N/A", "Coding Standard": root_source_icd10, "Code Value": test_codes_icd10, "Code Description": test_values_icd10})
empty_pd_new_icd10 = pd.concat([new_new_row_icd10, empty_pd_new.loc[:]]).drop_duplicates().reset_index(drop=True)

In [5]:
# Calls all the CPT codes

test_codes_CPT = []
test_values_CPT = []
root_source_CPT = []
    
for each in parent_code: 
# This collects the JSON for each value in parent_code 
    URL = f"https://uts-ws.nlm.nih.gov/rest/search/current?string={each}&sabs=CPT&returnIdType=code&apiKey={apiKey}&pageSize=2000"
    response = requests.get(URL)
    variable = response.json()

    for code in np.arange(0, len(variable['result']['results']), 1):
        codes = variable['result']['results'][code]["ui"]
        test_codes_CPT.append(codes)

    for code in np.arange(0, len(variable['result']['results']), 1):
        name = variable['result']['results'][code]["name"]
        test_values_CPT.append(name)  
        
    for code in np.arange(0, len(variable['result']['results']), 1):
        source = variable['result']['results'][code]["rootSource"]
        root_source_CPT.append(source)
        
# This is where the dataframe stores the previous pandas dataframe iterations     
new_new_row_cpt = pd.DataFrame({"Data Concept": "Procedure Code", "Data Sub-Concept": "N/A", "Coding Standard": root_source_CPT, "Code Value": test_codes_CPT, "Code Description": test_values_CPT})
empty_pd_new_cpt = pd.concat([new_new_row_cpt, empty_pd_new_icd10.loc[:]]).drop_duplicates().reset_index(drop=True)

In [6]:
# Calls all the LOINC codes

test_codes_LOINC = []
test_values_LOINC = []
root_source_LOINC = []
    
for each in parent_code: 
# This collects the JSON for each value in parent_code 
    URL = f"https://uts-ws.nlm.nih.gov/rest/search/current?string={each}&sabs=LNC&returnIdType=code&apiKey={apiKey}&pageSize=2000"
    response = requests.get(URL)
    variable = response.json()

    for code in np.arange(0, len(variable['result']['results']), 1):
        codes = variable['result']['results'][code]["ui"]
        test_codes_LOINC.append(codes)

    for code in np.arange(0, len(variable['result']['results']), 1):
        name = variable['result']['results'][code]["name"]
        test_values_LOINC.append(name)  
        
    for code in np.arange(0, len(variable['result']['results']), 1):
        source = variable['result']['results'][code]["rootSource"]
        root_source_LOINC.append(source)
        
# This is where the dataframe stores the previous pandas dataframe iterations     
new_new_row_LOINC = pd.DataFrame({"Data Concept": "Observation Code", "Data Sub-Concept": "N/A", "Coding Standard": root_source_LOINC, "Code Value": test_codes_LOINC, "Code Description": test_values_LOINC})
empty_pd_new_LOINC = pd.concat([empty_pd_new_cpt, new_new_row_LOINC.loc[:]]).drop_duplicates().reset_index(drop=True)

In [7]:
#### DO NOT CHANGE BELOW THIS LINE ####

excel_name = f'{Excel_Sheet_Name}' + ".xlsx"

empty_pd_new_LOINC.to_excel(excel_name)

#### DO NOT CHANGE ABOVE THIS LINE ####