In [34]:
import time
import os
import csv
from bs4 import BeautifulSoup
import requests
from tqdm import tqdm


In [26]:

def data_extractor(base_url, retries=3, delay=5):
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36",
        "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8"
    }

    diagnosis_treatment_link = ""
    doctors_departments_link = ""

    for attempt in range(retries):
        try:
            response = requests.get(base_url, headers=headers, timeout=20)
            response.raise_for_status()
            soup = BeautifulSoup(response.text, "html.parser")

            # Extract Diagnosis & Treatment link
            content1 = soup.find('a', id="et_genericNavigation_diagnosis-treatment")
            if not content1:
                # fallback: search by link text containing both words
                for a in soup.find_all('a'):
                    link_text = a.get_text(separator=' ').strip().lower()
                    if "diagnosis" in link_text and "treatment" in link_text:
                        content1 = a
                        break
            if content1:
                href1 = content1.get('href')
                diagnosis_treatment_link = f"https://www.mayoclinic.org{href1}" if href1 and href1.startswith("/") else href1

            # Extract Doctors & Departments link
            content2 = soup.find('a', id="et_genericNavigation_doctors-departments")
            if not content2:
                # fallback: search by link text containing both words
                for a in soup.find_all('a'):
                    link_text = a.get_text(separator=' ').strip().lower()
                    if "doctors" in link_text and "departments" in link_text:
                        content2 = a
                        break
            if content2:
                href2 = content2.get('href')
                doctors_departments_link = f"https://www.mayoclinic.org{href2}" if href2 and href2.startswith("/") else href2

            break  # success, exit retry loop

        except requests.exceptions.RequestException as e:
            print(f"[Attempt {attempt + 1}] Error fetching {base_url}: {e}")
            if attempt < retries - 1:
                time.sleep(delay)

    return diagnosis_treatment_link, doctors_departments_link

def web_scraping(base_url):
    # Define the expected headers in order
    expected_headers = ["disease", "main_link", "Diagnosis_treatment_link", "Doctors_departments_link"]
    
    # Check if file exists and read existing headers if it does
    file_exists = os.path.isfile("mayo_diseases.csv")
    existing_headers = []
    
    if file_exists:
        with open("mayo_diseases.csv", "r", encoding="utf-8") as file:
            reader = csv.reader(file)
            existing_headers = next(reader, [])
    
    # Determine if we need to write headers
    write_headers = not file_exists or existing_headers != expected_headers
    
    # Get the webpage content
    response = requests.get(base_url)
    if response.status_code != 200:
        print("Failed to retrieve page")
        exit()

    soup = BeautifulSoup(response.text, "html.parser")
    items = soup.select(".cmp-results-with-primary-name__see-link, .cmp-results-with-primary-name a")

    with open("mayo_diseases.csv", "a", newline="", encoding="utf-8") as file:
        writer = csv.writer(file)
        
        # Write headers if needed
        if write_headers:
            writer.writerow(expected_headers)
        
        for item in tqdm(items, desc="Scraping Diseases"):
            disease_name = item.text.strip()
            main_link = f"https://www.mayoclinic.org{item['href']}" if item['href'].startswith("/") else item['href']

            link1, link2 = data_extractor(main_link)
            
            # Create a row with all expected columns
            row_data = {
                "disease": disease_name,
                "main_link": main_link,
                "Diagnosis_treatment_link": link1,
                "Doctors_departments_link": link2
            }
            
            # If appending to existing file with different headers, align data with existing headers
            if file_exists and existing_headers:
                row = [row_data.get(header, "") for header in existing_headers]
            else:
                row = [row_data[header] for header in expected_headers]
            
            writer.writerow(row)

    print("Scraping Completed! Data Saved")

# Example usage:
# web_scraping("https://www.mayoclinic.org/diseases-conditions")

In [27]:
base_url = f"https://www.mayoclinic.org/diseases-conditions/index?letter=A"
web_scraping(base_url) 

Scraping Diseases: 100%|██████████| 132/132 [00:45<00:00,  2.92it/s]

Scraping Completed! Data Saved





In [35]:
from bs4 import BeautifulSoup


base_url="https://www.mayoclinic.org/diseases-conditions/hyperhidrosis/symptoms-causes/syc-20367152"
headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36",
        "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8"
    }
response = requests.get(base_url, headers=headers, timeout=20)
response.raise_for_status()
soup = BeautifulSoup(response.text, 'html.parser')
extraction_list = ["overview","symptoms","when-to-see-a-doctor","causes","complications","prevention","risk-factors"]

for i in extraction_list:
# Find the overview section by its aria-labelledby attribute
   overview_section = soup.find('section', {'aria-labelledby': i})

# Extract all paragraph text from the overview content
   overview_content = overview_section.find('div', class_='cmp-text__rich-content')
   overview_paragraphs = [p.get_text() for p in overview_content.find_all('p')]

# Join paragraphs into a single string
   overview_text = '\n\n'.join(overview_paragraphs)

   print(overview_text)

AttributeError: 'NoneType' object has no attribute 'find'

In [37]:
from bs4 import BeautifulSoup

def extract_overview(html_content):
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36",
        "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8"
    }
    response = requests.get(base_url, headers=headers, timeout=20)
    response.raise_for_status()
    soup = BeautifulSoup(response.text, 'html.parser')
    
    # Find the Overview section - looking for h2 with text "Overview"
    overview_header = soup.find(lambda tag: tag.name == 'h2' and 'when-to-see-a-doctor"' in tag.text)
    
    if not overview_header:
        return "Overview section not found"
    
    # Initialize overview paragraphs
    overview_paragraphs = []
    
    # Get all siblings after the h2 until the next h2
    for sibling in overview_header.find_next_siblings():
        if sibling.name == 'h2':
            break  # Stop when we reach the next section
        if sibling.name == 'p':
            overview_paragraphs.append(sibling.get_text(strip=True))
    
    if not overview_paragraphs:
        return "No overview content found"
    
    return '\n\n'.join(overview_paragraphs)

# Example usage:
base_url="https://www.mayoclinic.org/diseases-conditions/hyperhidrosis/symptoms-causes/syc-20367152"

html_content = """ (your HTML content here) """
overview_text = extract_overview(base_url)
print(overview_text)

Overview section not found


In [43]:
from bs4 import BeautifulSoup
import requests

def extract_mayo_clinic_sections(url):
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36",
        "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8"
    }
    
    try:
        response = requests.get(url, headers=headers, timeout=20)
        response.raise_for_status()
        soup = BeautifulSoup(response.text, 'html.parser')
        
        main_content = soup.find('div', class_='content')
        if not main_content:
            return {"error": "Main content not found"}
        
        sections = {
            "Overview": None,
            "Symptoms": None,
            "When to see a doctor": None,
            "Causes": None,
            "Risk factors": None,
            "Complications": None,
            "Prevention": None
        }
        
        # Find all headings (h2 and h3)
        headings = main_content.find_all(['h2', 'h3'])
        
        for heading in headings:
            heading_text = heading.get_text(strip=True)
            
            # Handle the typo "dotor" in the HTML
            if "When to see a dotor" in heading_text:
                heading_text = "When to see a doctor"
                
            if heading_text in sections:
                content = []
                next_node = heading.next_sibling
                
                # Collect content until next heading
                while next_node and next_node.name not in ['h2', 'h3']:
                    if next_node.name == 'p':
                        content.append(next_node.get_text(strip=True))
                    elif next_node.name in ['ul', 'ol']:
                        items = [li.get_text(strip=True) for li in next_node.find_all('li')]
                        content.extend(items)
                    next_node = next_node.next_sibling
                
                sections[heading_text] = '\n'.join(content) if content else "No content found"
        
        return {k: v for k, v in sections.items() if v is not None}
    
    except requests.exceptions.RequestException as e:
        return {"error": f"Failed to fetch page: {str(e)}"}

# Example usage:
url = "https://www.mayoclinic.org/diseases-conditions/hyperhidrosis/symptoms-causes/syc-20367152"
sections = extract_mayo_clinic_sections(url)

for section_name, content in sections.items():
    print(f"=== {section_name.upper()} ===")
    print(content)
    print("\n" + "="*50 + "\n")

=== OVERVIEW ===
Hyperhidrosis (hi-pur-hi-DROE-sis) is excessive sweating that's not always related to heat or exercise. You may sweat so much that it soaks through your clothes or drips off your hands. Heavy sweating can disrupt your day and cause social anxiety and embarrassment.
Hyperhidrosis treatment usually helps. It often begins with antiperspirants. If these don't help, you may need to try different medications and therapies. In severe cases, your health care provider may suggest surgery to remove the sweat glands or to disconnect the nerves related to producing too much sweat.
Sometimes an underlying condition may be found and treated.


=== SYMPTOMS ===
The main symptom of hyperhidrosis is heavy sweating. This goes beyond the sweating from being in a hot environment, exercising, or feeling anxious or stressed. The type of hyperhidrosis that usually affects the hands, feet, underarms or face causes at least one episode a week when you're awake. And the sweating usually happens

In [44]:
import csv
from bs4 import BeautifulSoup
import requests
import os

def extract_sections(url):
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36",
        "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8"
    }
    
    try:
        response = requests.get(url, headers=headers, timeout=20)
        response.raise_for_status()
        soup = BeautifulSoup(response.text, 'html.parser')
        
        main_content = soup.find('div', class_='content') or soup.find('article', id='main-content')
        if not main_content:
            return {"error": "Main content not found"}
        
        sections = {
            "Overview": None,
            "Symptoms": None,
            "When to see a doctor": None,
            "Causes": None,
            "Risk factors": None,
            "Complications": None,
            "Prevention": None
        }
        
        headings = main_content.find_all(['h2', 'h3'])
        
        for heading in headings:
            heading_text = heading.get_text(strip=True)
            if "When to see a dotor" in heading_text:
                heading_text = "When to see a doctor"
                
            if heading_text in sections:
                content = []
                next_node = heading.next_sibling
                while next_node and next_node.name not in ['h2', 'h3']:
                    if next_node.name == 'p':
                        content.append(next_node.get_text(strip=True))
                    elif next_node.name in ['ul', 'ol']:
                        items = [li.get_text(strip=True) for li in next_node.find_all('li')]
                        content.extend(items)
                    next_node = next_node.next_sibling
                sections[heading_text] = '\n'.join(content) if content else None
        
        return {k: v for k, v in sections.items() if v is not None}
    
    except Exception as e:
        return {"error": f"Error processing {url}: {str(e)}"}

def update_csv_with_sections(csv_file):
    # Read existing data
    rows = []
    existing_headers = []
    
    if os.path.exists(csv_file):
        with open(csv_file, 'r', newline='', encoding='utf-8') as f:
            reader = csv.DictReader(f)
            existing_headers = reader.fieldnames
            rows = list(reader)
    
    # Define all possible headers
    all_headers = existing_headers.copy() if existing_headers else ['disease', 'main_link']
    section_headers = [
        'Overview', 'Symptoms', 'When to see a doctor', 
        'Causes', 'Risk factors', 'Complications', 'Prevention'
    ]
    
    # Add section headers if not already present
    for header in section_headers:
        if header not in all_headers:
            all_headers.append(header)
    
    # Process each row
    for row in tqdm(rows):
        if 'main_link' in row and row['main_link']:
            print(f"Processing: {row.get('disease', 'Unknown')}")
            sections = extract_sections(row['main_link'])
            
            # Update row with new sections
            for section, content in sections.items():
                if section in all_headers:
                    row[section] = content
                else:
                    print(f"Warning: Unexpected section '{section}' found")
    
    # Write back to CSV
    with open(csv_file, 'w', newline='', encoding='utf-8') as f:
        writer = csv.DictWriter(f, fieldnames=all_headers)
        writer.writeheader()
        writer.writerows(rows)
    
    print(f"CSV file updated successfully with {len(rows)} rows")

# Example usage
csv_file = "mayo_diseases.csv"
update_csv_with_sections(csv_file)

  0%|          | 0/132 [00:00<?, ?it/s]

Processing: Atrial fibrillation


  1%|          | 1/132 [00:00<00:49,  2.66it/s]

Processing: Hyperhidrosis


  2%|▏         | 2/132 [00:00<00:48,  2.66it/s]

Processing: Bartholin's cyst


  2%|▏         | 3/132 [00:01<00:48,  2.64it/s]

Processing: Infant reflux


  3%|▎         | 4/132 [00:01<00:46,  2.74it/s]

Processing: Hidradenitis suppurativa


  4%|▍         | 5/132 [00:01<00:44,  2.87it/s]

Processing: HIV/AIDS


  5%|▍         | 6/132 [00:02<00:44,  2.82it/s]

Processing: Acute myelogenous leukemia


  5%|▌         | 7/132 [00:02<00:41,  3.02it/s]

Processing: Guillain-Barre syndrome


  6%|▌         | 8/132 [00:02<00:39,  3.16it/s]

Processing: Acute kidney injury


  7%|▋         | 9/132 [00:03<00:38,  3.21it/s]

Processing: Acute lymphocytic leukemia


  8%|▊         | 10/132 [00:03<00:36,  3.31it/s]

Processing: Acute lymphocytic leukemia


  8%|▊         | 11/132 [00:03<00:40,  2.96it/s]

Processing: Acute myelogenous leukemia


  9%|▉         | 12/132 [00:04<00:39,  3.06it/s]

Processing: Acute myelogenous leukemia


 10%|▉         | 13/132 [00:04<00:37,  3.16it/s]

Processing: Acute myelogenous leukemia


 11%|█         | 14/132 [00:04<00:37,  3.17it/s]

Processing: Radiation sickness


 11%|█▏        | 15/132 [00:05<00:38,  3.03it/s]

Processing: Radiation sickness


 12%|█▏        | 16/132 [00:05<00:38,  3.00it/s]

Processing: Acute kidney injury


 13%|█▎        | 17/132 [00:05<00:35,  3.23it/s]

Processing: ARDS


 14%|█▎        | 18/132 [00:05<00:34,  3.29it/s]

Processing: Acute sinusitis


 14%|█▍        | 19/132 [00:06<00:33,  3.33it/s]

Processing: Autoimmune epilepsy


 15%|█▌        | 20/132 [00:06<00:36,  3.04it/s]

Processing: Alcohol use disorder


 16%|█▌        | 21/132 [00:06<00:34,  3.25it/s]

Processing: Compulsive gambling


 17%|█▋        | 22/132 [00:07<00:34,  3.18it/s]

Processing: Nicotine dependence


 17%|█▋        | 23/132 [00:07<00:33,  3.29it/s]

Processing: Mesenteric lymphadenitis


 18%|█▊        | 24/132 [00:07<00:35,  3.01it/s]

Processing: Attention-deficit/hyperactivity disorder (ADHD) in children


 19%|█▉        | 25/132 [00:08<00:34,  3.08it/s]

Processing: Adult attention-deficit/hyperactivity disorder (ADHD)


 20%|█▉        | 26/132 [00:08<00:32,  3.24it/s]

Processing: Frozen shoulder


 20%|██        | 27/132 [00:08<00:31,  3.35it/s]

Processing: Childhood schizophrenia


 21%|██        | 28/132 [00:08<00:30,  3.41it/s]

Processing: Benign adrenal tumors


 22%|██▏       | 29/132 [00:09<00:34,  2.95it/s]

Processing: Autoimmune encephalitis


 23%|██▎       | 30/132 [00:09<00:34,  2.96it/s]

Processing: Acute flaccid myelitis (AFM)


 23%|██▎       | 31/132 [00:10<00:34,  2.93it/s]

Processing: Dry macular degeneration


 24%|██▍       | 32/132 [00:10<00:32,  3.06it/s]

Processing: Macular degeneration, wet


 25%|██▌       | 33/132 [00:10<00:31,  3.12it/s]

Processing: Myelofibrosis


 26%|██▌       | 34/132 [00:11<00:32,  3.02it/s]

Processing: Guillain-Barre syndrome


 27%|██▋       | 35/132 [00:11<00:30,  3.16it/s]

Processing: HIV/AIDS


 27%|██▋       | 36/132 [00:11<00:30,  3.19it/s]

Processing: Alcohol use disorder


 28%|██▊       | 37/132 [00:11<00:30,  3.14it/s]

Processing: Alcoholic hepatitis


 29%|██▉       | 38/132 [00:12<00:31,  2.98it/s]

Processing: Churg-Strauss syndrome


 30%|██▉       | 39/132 [00:12<00:32,  2.90it/s]

Processing: Churg-Strauss syndrome


 30%|███       | 40/132 [00:13<00:31,  2.91it/s]

Processing: Hay fever


 31%|███       | 41/132 [00:13<00:29,  3.04it/s]

Processing: Dust mite allergy


 32%|███▏      | 42/132 [00:13<00:32,  2.81it/s]

Processing: Egg allergy


 33%|███▎      | 43/132 [00:14<00:31,  2.80it/s]

Processing: Food allergy


 33%|███▎      | 44/132 [00:14<00:29,  2.99it/s]

Processing: Latex allergy


 34%|███▍      | 45/132 [00:14<00:27,  3.14it/s]

Processing: Milk allergy


 35%|███▍      | 46/132 [00:15<00:28,  3.01it/s]

Processing: Mold allergy


 36%|███▌      | 47/132 [00:15<00:30,  2.79it/s]

Processing: Nickel allergy


 36%|███▋      | 48/132 [00:15<00:30,  2.79it/s]

Processing: Peanut allergy


 37%|███▋      | 49/132 [00:16<00:28,  2.88it/s]

Processing: Penicillin allergy


 38%|███▊      | 50/132 [00:16<00:27,  3.01it/s]

Processing: Pet allergy


 39%|███▊      | 51/132 [00:16<00:28,  2.85it/s]

Processing: Shellfish allergy


 39%|███▉      | 52/132 [00:17<00:25,  3.08it/s]

Processing: Wheat allergy


 40%|████      | 53/132 [00:17<00:25,  3.06it/s]

Processing: Hair loss


 41%|████      | 54/132 [00:17<00:24,  3.13it/s]

Processing: Amyotrophic lateral sclerosis (ALS)


 42%|████▏     | 55/132 [00:18<00:26,  2.96it/s]

Processing: Dry socket


 42%|████▏     | 56/132 [00:18<00:25,  2.97it/s]

Processing: Atypical genitalia


 43%|████▎     | 57/132 [00:18<00:24,  3.12it/s]

Processing: Lazy eye (amblyopia)


 44%|████▍     | 58/132 [00:19<00:23,  3.16it/s]

Processing: Chagas disease


 45%|████▍     | 59/132 [00:19<00:23,  3.11it/s]

Processing: Transient global amnesia


 45%|████▌     | 60/132 [00:19<00:25,  2.78it/s]

Processing: Amnesia


 46%|████▌     | 61/132 [00:20<00:24,  2.90it/s]

Processing: Amyloidosis


 47%|████▋     | 62/132 [00:20<00:24,  2.88it/s]

Processing: Aplastic anemia


 48%|████▊     | 63/132 [00:20<00:23,  2.90it/s]

Processing: Thalassemia


 48%|████▊     | 64/132 [00:21<00:24,  2.77it/s]

Processing: Iron deficiency anemia


 49%|████▉     | 65/132 [00:21<00:23,  2.82it/s]

Processing: Thalassemia


 50%|█████     | 66/132 [00:21<00:22,  2.93it/s]

Processing: Sickle cell anemia


 51%|█████     | 67/132 [00:22<00:22,  2.92it/s]

Processing: Vitamin deficiency anemia


 52%|█████▏    | 68/132 [00:22<00:25,  2.54it/s]

Processing: Abdominal aortic aneurysm


 52%|█████▏    | 69/132 [00:23<00:22,  2.76it/s]

Processing: Aortic aneurysm


 53%|█████▎    | 70/132 [00:23<00:21,  2.88it/s]

Processing: Brain aneurysm


 54%|█████▍    | 71/132 [00:23<00:20,  3.03it/s]

Processing: Popliteal artery aneurysm


 55%|█████▍    | 72/132 [00:23<00:18,  3.29it/s]

Processing: Thoracic aortic aneurysm


 55%|█████▌    | 73/132 [00:24<00:20,  2.94it/s]

Processing: Vasculitis


 56%|█████▌    | 74/132 [00:24<00:19,  2.93it/s]

Processing: Angina


 57%|█████▋    | 75/132 [00:24<00:18,  3.07it/s]

Processing: Hives and angioedema


 58%|█████▊    | 76/132 [00:25<00:17,  3.17it/s]

Processing: Castleman disease


 58%|█████▊    | 77/132 [00:25<00:18,  2.98it/s]

Processing: Broken ankle


 59%|█████▉    | 78/132 [00:25<00:18,  2.96it/s]

Processing: Sprained ankle


 60%|█████▉    | 79/132 [00:26<00:17,  3.07it/s]

Processing: Tongue-tie (ankyloglossia)


 61%|██████    | 80/132 [00:26<00:16,  3.10it/s]

Processing: Anorexia nervosa


 61%|██████▏   | 81/132 [00:26<00:15,  3.27it/s]

Processing: ACL injury


 62%|██████▏   | 82/132 [00:27<00:16,  2.95it/s]

Processing: Anterior vaginal prolapse (cystocele)


 63%|██████▎   | 83/132 [00:27<00:15,  3.11it/s]

Processing: Pseudomembranous colitis


 64%|██████▎   | 84/132 [00:27<00:17,  2.77it/s]

Processing: Generalized anxiety disorder


 64%|██████▍   | 85/132 [00:28<00:16,  2.92it/s]

Processing: Social anxiety disorder (social phobia)


 65%|██████▌   | 86/132 [00:28<00:16,  2.79it/s]

Processing: Abdominal aortic aneurysm


 66%|██████▌   | 87/132 [00:29<00:16,  2.77it/s]

Processing: Coarctation of the aorta


 67%|██████▋   | 88/132 [00:29<00:15,  2.87it/s]

Processing: Aortic valve regurgitation


 67%|██████▋   | 89/132 [00:29<00:14,  2.99it/s]

Processing: Aortic valve regurgitation


 68%|██████▊   | 90/132 [00:29<00:13,  3.10it/s]

Processing: Auditory processing disorder


 69%|██████▉   | 91/132 [00:30<00:14,  2.74it/s]

Processing: Primary progressive aphasia


 70%|██████▉   | 92/132 [00:30<00:13,  2.89it/s]

Processing: Canker sore


 70%|███████   | 93/132 [00:30<00:12,  3.14it/s]

Processing: Canker sore


 71%|███████   | 94/132 [00:31<00:11,  3.21it/s]

Processing: Broken heart syndrome


 72%|███████▏  | 95/132 [00:31<00:12,  2.87it/s]

Processing: Antiphospholipid syndrome


 73%|███████▎  | 96/132 [00:32<00:12,  2.85it/s]

Processing: Viral hemorrhagic fevers


 73%|███████▎  | 97/132 [00:32<00:11,  3.03it/s]

Processing: Viral hemorrhagic fevers


 74%|███████▍  | 98/132 [00:32<00:11,  3.09it/s]

Processing: Broken arm


 75%|███████▌  | 99/132 [00:32<00:10,  3.04it/s]

Processing: Chiari malformation


 76%|███████▌  | 100/132 [00:33<00:11,  2.89it/s]

Processing: Heart arrhythmia


 77%|███████▋  | 101/132 [00:33<00:10,  3.01it/s]

Processing: Giant cell arteritis


 77%|███████▋  | 102/132 [00:33<00:09,  3.16it/s]

Processing: Takayasu's arteritis


 78%|███████▊  | 103/132 [00:34<00:09,  3.09it/s]

Processing: Thumb arthritis


 79%|███████▉  | 104/132 [00:34<00:09,  2.88it/s]

Processing: Osteoarthritis


 80%|███████▉  | 105/132 [00:35<00:10,  2.56it/s]

Processing: Gout


 80%|████████  | 106/132 [00:35<00:09,  2.78it/s]

Processing: Septic arthritis


 81%|████████  | 107/132 [00:35<00:08,  2.82it/s]

Processing: Juvenile idiopathic arthritis


 82%|████████▏ | 108/132 [00:36<00:07,  3.02it/s]

Processing: Osteoarthritis


 83%|████████▎ | 109/132 [00:36<00:08,  2.79it/s]

Processing: Psoriatic arthritis


 83%|████████▎ | 110/132 [00:36<00:08,  2.69it/s]

Processing: Reactive arthritis


 84%|████████▍ | 111/132 [00:37<00:07,  2.74it/s]

Processing: Rheumatoid arthritis


 85%|████████▍ | 112/132 [00:37<00:06,  2.89it/s]

Processing: Septic arthritis


 86%|████████▌ | 113/132 [00:38<00:07,  2.66it/s]

Processing: Thumb arthritis


 86%|████████▋ | 114/132 [00:38<00:06,  2.84it/s]

Processing: Atrial septal defect (ASD)


 87%|████████▋ | 115/132 [00:38<00:05,  3.05it/s]

Processing: Autism spectrum disorder


 88%|████████▊ | 116/132 [00:38<00:05,  3.05it/s]

Processing: Avascular necrosis (osteonecrosis)


 89%|████████▊ | 117/132 [00:39<00:05,  2.75it/s]

Processing: Childhood asthma


 89%|████████▉ | 118/132 [00:39<00:04,  2.80it/s]

Processing: Exercise-induced asthma


 90%|█████████ | 119/132 [00:40<00:04,  2.89it/s]

Processing: Occupational asthma


 91%|█████████ | 120/132 [00:40<00:04,  2.97it/s]

Processing: Atrial tachycardia


 92%|█████████▏| 121/132 [00:40<00:03,  3.04it/s]

Processing: Arteriosclerosis / atherosclerosis


 92%|█████████▏| 122/132 [00:41<00:03,  2.86it/s]

Processing: Atrioventricular canal defect


 93%|█████████▎| 123/132 [00:41<00:03,  2.69it/s]

Processing: Vaginal atrophy


 94%|█████████▍| 124/132 [00:41<00:02,  2.85it/s]

Processing: Reactive attachment disorder


 95%|█████████▍| 125/132 [00:42<00:02,  2.96it/s]

Processing: Adult attention-deficit/hyperactivity disorder (ADHD)


 95%|█████████▌| 126/132 [00:42<00:02,  2.95it/s]

Processing: Auditory processing disorder


 96%|█████████▌| 127/132 [00:42<00:01,  3.05it/s]

Processing: Autoimmune epilepsy


 97%|█████████▋| 128/132 [00:43<00:01,  3.19it/s]

Processing: Autoimmune encephalitis


 98%|█████████▊| 129/132 [00:43<00:00,  3.07it/s]

Processing: Bird flu (avian influenza)


 98%|█████████▊| 130/132 [00:43<00:00,  3.18it/s]

Processing: Avascular necrosis (osteonecrosis)


 99%|█████████▉| 131/132 [00:44<00:00,  2.86it/s]

Processing: Atrioventricular nodal reentry tachycardia (AVNRT)


100%|██████████| 132/132 [00:44<00:00,  2.97it/s]

CSV file updated successfully with 132 rows





In [31]:
extraction_list = ["overview","symptoms","when-to-see-a-doctor","causes","complications","prevention","risk-factors"]