In [1]:
from __future__ import print_function
from Authentication import *
import requests
import json
import simplejson

In [2]:
def FindCui(apikey, version, string):
    uri = "https://uts-ws.nlm.nih.gov"
    content_endpoint = "/rest/search/"+version
    ##get at ticket granting ticket for the session
    AuthClient = Authentication(apikey)
    tgt = AuthClient.gettgt()
    ticket = AuthClient.getst(tgt)
    query = {'string':string,'ticket':ticket}
    r = requests.get(uri+content_endpoint,params=query)
    r.encoding = 'utf-8'
    items  = json.loads(r.text)
    jsonData = items["result"]
    uilist = []
    for result in jsonData["results"]:
        try:
            uilist.append(result["ui"])
        except:
            NameError
    return uilist

In [3]:
def search(apikey, version, cui):
    uri = "https://uts-ws.nlm.nih.gov"
    path = "/rest/content/"+version+"/CUI/"+cui+"/atoms"
    AuthClient = Authentication(apikey)
    tgt = AuthClient.gettgt()
    pageNumber=0
    word=[]
    while True:
        ##generate a new service ticket for each page if needed
        ticket = AuthClient.getst(tgt)
        pageNumber += 1
        query = {"ticket":AuthClient.getst(tgt),"language":'ENG', 'pageNumber':pageNumber}
        r = requests.get(uri+path, params=query)
        r.encoding = 'utf-8'
        items  = json.loads(r.text)
        try:
            jsonData = items["result"]
        except:
            # there will be words/phrases that are not in the library
            print("empty")
            break
        for result in jsonData:
            try:
                word.append(result["name"])
            except:
                NameError
                
        ##Either our search returned nothing, or we're at the end
        try:
            if jsonData[0]['name'] == "NONE":
                print(pageNumber)
                break
        except:

            break        
#     clean the text
    word = [s.strip().lower() for s in word]
              
    return set(word)

In [4]:
# input: 
#     1. a list of phrases, 
#     2. the released version of UMLS library (https://www.nlm.nih.gov/research/umls/licensedcontent/umlsknowledgesources.html)
#     3. your apikey (after registration in the UMLS website)

# output: 
#     1. a list of words/phrases containing both the input and output
#     2. a dictionary with input word/phrase as key, its corresponding set of medical terminology as the value

def extractSynonym(phrases, version, apikey):
    uidic = {}
    for p in phrases:
        uilst = FindCui(apikey, version, p)
        uidic[p] = uilst[0]
    synonymdic = {}
    finalList = []
    for word in uidic:
        finalList.append(word)
        synonymSet = search(apikey, version, uidic[word])
        if len(synonymSet) != 0:
            synonymdic[word] = synonymSet
            for s in synonymSet:
                finalList.append(s)
#     return finalList, synonymdic
    return finalList, synonymdic

In [5]:
# test with a small bag of key words
apikey = yourapikey # need to input your key
phrases = ['memory impairment', 'fatigue']
version = '2020AB'
extractSynonym(phrases, version, apikey)

(['memory impairment',
  'memory deficit',
  'impairment memory',
  'memory problems',
  'impaired memory',
  'difficulty;remembering',
  'disturbance of memory',
  'memory; disturbance',
  'poor memory',
  'memory impairment',
  'disturbance memory/concentration',
  'impairments memory',
  'memory poor',
  'rndx memory impairment',
  'deficit, memory',
  'memory impaired',
  'impair memory',
  'memory deficits',
  'memory disturbance',
  'bad memory',
  'memory problem',
  'disturbance of memory, nos',
  'deficits, memory',
  'rndx memory impairment (diagnosis)',
  'impairment;memory',
  'memory impairment (finding)',
  'disturbance; memory',
  'memory retention disorder',
  'fatigue',
  'energy loss',
  'tatt',
  'fatigue - symptom',
  'fatigue nos',
  'fatigue',
  'lack of energy (finding)',
  'tired time',
  'feeling of total lack of energy',
  'fatigue symptoms',
  'lack (of);energy',
  'decreased energy',
  'lack of energy',
  'fatigue (finding)',
  'fatigue (lassitude)',
  'decr

In [None]:
# for a bunch of key words storing at a txt file
phrases=[]
with open('word/phrase.txt', 'r+') as file:
     for line in file:
        line_lst = line.split(',')
        for p in line_lst:
            if p.strip() != "\n":
                p = str(p).lower()
                p = str(p).strip()
                phrases.append(p)
                phrases = list(set(phrases))
                
                
apikey = yourapikey # need to input your key
version = '2020AB'
outdic = extractSynonym(phrases, version, apikey)
finallist = extractSynonym(phrases, version, apikey)

In [None]:
# output the result as a txt file
with open('new dictionary_with UMLS.txt', 'w', encoding="utf-8") as f:
    for item in finallist:
        f.write("%s\n" % item)