## MedlinePlus API (NIH)

In [8]:
import requests
import urllib.parse

def sentence_to_query_params(sentence):
    # Convert sentence to URL query parameters
    query_params = urllib.parse.urlencode({'q': sentence})
    return query_params


In [9]:
#https://wsearch.nlm.nih.gov/ws/query?db=healthTopics&term=asthma

sentence_to_query_params('what is diabetes')

'q=what+is+diabetes'

In [16]:
def get_medlineplus_data(query):
    query_params = sentence_to_query_params(query)
    query = query_params.replace('q=', '')
    url = f'https://wsearch.nlm.nih.gov/ws/query?db=healthTopics&term={query}'
    response = requests.get(url)
    return response

response = get_medlineplus_data('what is diabetes?')
print(response.headers['Content-Type'])


text/xml; charset=UTF-8


In [17]:
print(len(response.text))

52762


In [18]:
import xml.etree.ElementTree as ET

def extract_first_document(xml_string):
    """
    Extract the first document from NLM search results XML.
    
    Args:
        xml_string (str): The XML string containing the search results
        
    Returns:
        dict: A dictionary containing the first document's information
    """
    # Parse the XML string
    root = ET.fromstring(xml_string)
    
    # Find the first document element
    first_doc = root.find('.//document[@rank="1"]')
    
    if first_doc is None:
        return None
    
    # Extract document information
    doc_info = {
        'url': first_doc.get('url'),
        'title': first_doc.find('.//content[@name="title"]').text,
        'organization': first_doc.find('.//content[@name="organizationName"]').text,
        'summary': first_doc.find('.//content[@name="FullSummary"]').text
    }
    
    # Try to get alternative titles if they exist
    alt_titles = first_doc.findall('.//content[@name="altTitle"]')
    if alt_titles:
        doc_info['alternative_titles'] = [title.text for title in alt_titles]
        
    return doc_info

In [21]:
extract_first_document(response.text)

{'url': 'https://medlineplus.gov/diabeticdiet.html',
 'title': 'Diabetic Diet',
 'organization': 'National Library of Medicine',
 'summary': '<span class="qt0">What</span> is <span class="qt3">diabetes</span>?<p>If you have <span class="qt3">diabetes</span>, your blood glucose, or blood sugar, levels are too high. Glucose comes from foods you eat. The cells of your body need glucose for energy. A hormone called insulin helps the glucose get into your cells.</p><p>With type 1 <span class="qt3">diabetes</span>, your body doesn\'t make insulin. With type 2 <span class="qt3">diabetes</span>, your body doesn\'t make or use insulin well. Without enough insulin, glucose builds up in your blood and causes high blood glucose levels.</p><p>Prediabetes means that your blood glucose levels are higher than normal but not high enough to be called <span class="qt3">diabetes</span>. If you have prediabetes, you are more likely to develop type 2 <span class="qt3">diabetes</span>.</p>How do the foods I 