In [4]:
from urllib.request import Request, urlopen
from bs4 import BeautifulSoup as bs

In [38]:
MC_URL = 'https://www.mayoclinic.org'
SC_URL = '/symptom-checker/select-symptom/itt-20009075'

def extract_mayo_clinic_symptoms():
    req = Request(MC_URL + SC_URL, headers={'User-Agent': 'Mozilla/5.0'})
    webpage = urlopen(req).read()
    soup = bs(webpage)
    adult_symptom_list = soup.find('div', {'class': 'adult'})
        
    return [{
        'name': a.text, 
        'url': a['href'], 
        'factors': extract_factors(a['href'])
    } for a in adult_symptom_list.find_all('a')]

def extract_factors(symptom_url):
    req = Request(MC_URL + symptom_url, headers={'User-Agent': 'Mozilla/5.0'})
    webpage = urlopen(req).read()
    soup = bs(webpage)
    fieldsets = soup.find_all('fieldset')
    return [[fieldset.find('legend').text, [label.text for label in fieldset.find_all('label')]] for fieldset in fieldsets]

In [57]:
symptoms = extract_mayo_clinic_symptoms()

In [62]:
import json
with open('mayo_clinic_symptoms.json', 'w') as f:
    json.dump(symptoms, f)

In [64]:
import json
with open('mayo_clinic_symptoms.json') as f:
    symptoms = json.load(f)

In [205]:
import spacy
from spacy import displacy

nlp = spacy.load('en_core_web_sm')

printed = set()
for symptom in symptoms:
    for prompt, factors in symptom['factors']:
        doc = nlp(prompt)
        pos = [token.pos_ for token in doc]
        tag = [token.tag_ for token in doc]
        if prompt not in printed:
            printed.add(prompt)
            print(prompt)
            print(' '.join(pos))
            print(factors)
            
            START = "IS"
            SYMPTOM = {symptom['name']}
            POSESSIVE = "YOUR"
            modified_doc = doc
            modified_doc_texts = [tok.text for tok in doc]
            
            PN_index = pos.index('PROPN') if 'PROPN' in pos else -1
            N_index = PN_index if PN_index > -1 else (pos.index('NOUN') if 'NOUN' in pos else -1)

            # IS/DOES NOUN...?
            if N_index > -1 and doc[N_index].text != 'use':
                SYMPTOM = ""
                
                if tag[N_index] == 'NNS':
                    START = "ARE"
                if pos[-1] == "VERB" and modified_doc_texts[-1] != 'located':
                    modified_doc_texts[-1] = modified_doc[-1].lemma_

            V_index = pos.index('VERB') if 'VERB' in pos else -1
            # IS VERB-ing...?
            if V_index > -1 and not doc[V_index].text.endswith("ed"):
                START = "DOES"
                if modified_doc_texts[0].lower() == 'you' or (N_index > -1 and tag[N_index] == 'NNS'):
                    START = 'DO'
                    POSESSIVE = ''
                    SYMPTOM = ''
                    
            if pos[-1] == 'AUX':
                modified_doc_texts = modified_doc_texts[:-1]
            
            # Singular cases
            if pos[0] == 'VERB' and len(pos) == 1 and doc[0].text != "Located":
                POSESSIVE = ''
                SYMPTOM = ''
            
            if prompt == 'Pain best described as':
                START = "IS"
                SYMPTOM = ''
            
            print(f"{START} {POSESSIVE} {SYMPTOM} {' '.join(modified_doc_texts)}...")
            print()
            print('_____')
            print()

Pain is
NOUN AUX
['Burning', 'Crampy', 'Dull', 'Gnawing', 'Intense', 'Intermittent or episodic', 'Ongoing (chronic)', 'Sharp', 'Steady', 'Sudden (acute)', 'Worsening or progressing']
IS YOUR  Pain...

_____

Pain located in
NOUN VERB ADP
['Abdomen but radiates to other parts of the body', 'Lower abdomen', 'Middle abdomen', 'One or both sides', 'Upper abdomen']
IS YOUR  Pain located in...

_____

Triggered or worsened by
VERB CCONJ VERB ADP
['Coughing or other jarring movements', 'Drinking alcohol', 'Eating certain foods', 'Menstrual cycle', 'Stress']
IS YOUR {'Abdominal pain in adults'} Triggered or worsened by...

_____

Relieved by
VERB ADP
['Antacids', 'Avoiding certain foods', 'Changing position', 'Drinking more water', 'Eating certain foods', 'Eating more fiber']
IS YOUR {'Abdominal pain in adults'} Relieved by...

_____

Accompanied by
VERB ADP
['Abdominal swelling', 'Black or bloody stools', 'Constipation', 'Diarrhea', 'Fever', 'Inability to move bowels in spite of urge', 'Loose