[API Explanation](https://id.nlm.nih.gov/mesh/swagger/ui)

In [8]:
# importing the requests library
import requests
from pymongo import MongoClient
from collections import defaultdict
import time

In [None]:
import os
import environ

env = environ.Env()
env.read_env(env.str('ENV_PATH', '.env'))

In [1]:
mongo_cli_username = os.environ.get('MONGO_CLI_USERNAME')
mongo_cli_password = os.environ.get('MONGO_CLI_PASSWORD')

NameError: name 'os' is not defined

In [11]:
client = MongoClient("mongodb+srv://{}:{}@cluster0.plop5.mongodb.net/myFirstDatabase?retryWrites=true&w=majority".format(mongo_cli_username, mongo_cli_password))
db = client['healdash']

In [12]:
# input location
input_location = '../data/input/diseases-english.txt'

In [13]:
class Mesh:
    def __init__(self, input_location, db):
        self.query_term = ""
        self.query = ""
        self.keyword_dict = defaultdict(list)
        self.keywords_to_update = []
        self.api_url = "https://id.nlm.nih.gov/mesh/sparql"
        self.query_terms = set()
        self.db = db

        with open(input_location) as my_file:
            for line in my_file:
                self.query_terms.add(line.replace("\n", ""))

    def update_query(self, query_term):
        self.query_term = query_term
        self.query = """
            PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
            PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
            PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
            PREFIX owl: <http://www.w3.org/2002/07/owl#>
            PREFIX meshv: <http://id.nlm.nih.gov/mesh/vocab#>
            PREFIX mesh: <http://id.nlm.nih.gov/mesh/>

            SELECT ?d ?dName ?c ?cName ?t ?v
            WHERE { 
              ?d a meshv:Descriptor .
              ?d meshv:concept ?c .
              ?d ?t ?c .
              ?t rdfs:label ?v .
              ?d rdfs:label ?dName .
              ?c rdfs:label ?cName
              FILTER(REGEX(?dName,"%s",'i') || REGEX(?cName,"%s",'i')) 
            }
            """ % (query_term, query_term)

    def send_request(self, format="JSON", limit=1000, inference="true", offset=0):

        # defining a params dict for the parameters to be sent to the API
        PARAMS = {'query': self.query, 'format': format,
                  'limit': limit, 'offset': offset, 'inference': inference}

        # sending get request and saving the response as response object
        result = requests.get(url=self.api_url, params=PARAMS)

        # extracting data in json format
        values_dict = result.json()['results']['bindings']

        existing_words = []
        temp_arr = []

        for sub in values_dict:
            temp_dict = defaultdict(list)
            temp_word = sub['cName']['value']

            if temp_word not in existing_words:
                temp_dict['resource'] = sub['c']['value']
                temp_dict['word'] = temp_word
                temp_dict['label'] = sub['dName']['value']
                temp_arr.append(temp_dict)
                existing_words.append(temp_word)

        self.keyword_dict[self.query_term] = temp_arr

    def get_keyword_synonyms(self) -> dict:

        existing_mesh_keywords = set()
        for document in self.db["mesh_synonyms"].find():
            existing_mesh_keywords.add(document['keyword'])  

        # don't use existing mesh synonyms
        self.query_terms = self.query_terms.difference(existing_mesh_keywords)

        for keyword in self.query_terms:
            self.update_query(keyword)
            start_time = time.time()
            self.send_request()

            if len(self.keyword_dict[keyword]) != 0:
                # add data to mongodb
                print("{} - {:.0f} seconds".format(keyword,
                                                   time.time() - start_time))
                self.db.mesh_synonyms.update_many({"keyword": keyword}, {
                                             "$set": {"synonyms": self.keyword_dict[keyword]}}, upsert=True)
            else:
                self.keywords_to_update.append(keyword)
                print("{} - No synonymns".format(keyword))

        return self.keyword_dict

In [14]:
mesh = Mesh(input_location, db)

In [15]:
keyword_dict = mesh.get_keyword_synonyms()

cleft lip - 10 seconds
cleft palate - 10 seconds
herpes - 11 seconds
mumps - 10 seconds
sialadenitis - 10 seconds
tracheoesophageal fistula - 10 seconds
esophageal - 26 seconds
zenker diverticulum - 30 seconds
mallory-weiss syndrome - 33 seconds
esophageal varices - 32 seconds
achalasia - 26 seconds
reflux - 27 seconds
barrett esophagus - 27 seconds
gastroschisis - 10 seconds
omphalocele - 10 seconds
pyloric stenosis - 10 seconds
gastritis - 10 seconds
ulcer - 11 seconds
gastric - 10 seconds
atresia - 10 seconds
meckel diverticulum - 10 seconds
volvulus - 10 seconds
intussusception - 10 seconds
lactose intolerance - 10 seconds
celiac - 10 seconds
sprue - 10 seconds
whipple disease - 10 seconds
abetalipoproteinemia - 10 seconds
carcinoid tumor - 10 seconds
carcinoid syndrome - 26 seconds
carcinoid heart disease - 26 seconds
appendicitis - 10 seconds
hirschsprung disease - 10 seconds
diverticula - 10 seconds
angiodysplasia - 10 seconds
telangiectasia - 27 seconds
colitis - 27 seconds
irr

KeyboardInterrupt: 