In [1]:
import os
import time
import pandas as pd
from tqdm import tqdm

from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options

from webdriver_manager.chrome import ChromeDriverManager

In [2]:
options = Options()
# options.add_argument("--headless")
options.add_argument("--disable-gpu")
options.add_argument("--no-sandbox")
options.add_argument("enable-automation")
options.add_argument("--disable-infobars")
options.add_argument("--disable-dev-shm-usage")
options.binary_location = os.environ.get("GOOGLE_CHROME_BIN")

In [3]:
dis = pd.read_csv('mayo disease links.csv')
mayo_links = dis.link.values
dis.head()

Unnamed: 0,name,link
0,"A fib, also known as\nAtrial fibrillation",https://www.mayoclinic.org/diseases-conditions...
1,Abdominal aortic aneurysm,https://www.mayoclinic.org/diseases-conditions...
2,"Abnormally excessive sweating, also known as\n...",https://www.mayoclinic.org/diseases-conditions...
3,"Abscess, Bartholin's, also known as\nBartholin...",https://www.mayoclinic.org/diseases-conditions...
4,Absence seizure,https://www.mayoclinic.org/diseases-conditions...


In [4]:
df = pd.DataFrame(
    dis['name'].apply(lambda x: x.replace('\n', '').split(', also known as')).to_list(),
    columns=['name', 'name2']
)
df

Unnamed: 0,name,name2
0,A fib,Atrial fibrillation
1,Abdominal aortic aneurysm,
2,Abnormally excessive sweating,Hyperhidrosis
3,"Abscess, Bartholin's",Bartholin's cyst
4,Absence seizure,
...,...,...
2188,Yersinia pestis,Plague
2189,Yips,
2190,Zika virus,
2191,Zollinger-Ellison syndrome,


## Scrape full page of disease

In [7]:
def scrape_diseases(links):
    diseases = {
        'link': [],
        'overview': [],
        'treatment': []
    }

    driver = webdriver.Chrome(
        ChromeDriverManager().install(),
        options=options
    )

    viable_tags = ['h2', 'p', 'ul', 'li']
    for path in tqdm(links):
        driver.get(path)
        diseases['link'].append(path)

        # overview and symptoms
        try:
            try:
                content = driver.find_element(By.XPATH, '/html/body/form/div[5]/article/div[1]/div[1]/div[2]')
                content_childs = content.find_elements(By.XPATH, './/*')
            except:
                try:
                    content = driver.find_element(By.XPATH, '/html/body/form/div[5]/article/div[1]/div[1]/div[3]')
                    content_childs = content.find_elements(By.XPATH, './/*')
                except:
                    continue

            temp = []
            for i in content_childs:
                if i.tag_name in viable_tags and i.get_attribute('class') != 'listGroup' and i.get_attribute('class') != 'acces-listitems':
                    i = i.text.strip()
                    if i != '' and i != ' ':
                        temp.append(i)

            diseases['overview'].append(".\n\n".join(temp).replace('..', '.').replace(':.', ':'))

        except:
            diseases['overview'].append('')

        # diagnosis and treatment
        try:
            driver.find_element(By.ID, 'et_genericNavigation_diagnosis-treatment').click()
            try:
                content = driver.find_element(By.XPATH, '/html/body/form/div[5]/article/div[1]/div[1]/div[2]')
                content_childs = content.find_elements(By.XPATH, './/*')
            except:
                try:
                    content = driver.find_element(By.XPATH, '/html/body/form/div[5]/article/div[1]/div[1]/div[3]')
                    content_childs = content.find_elements(By.XPATH, './/*')
                except:
                    continue

            temp = []
            for i in content_childs:
                if i.tag_name in viable_tags and i.get_attribute('class') != 'listGroup' and i.get_attribute('class') != 'acces-listitems':
                    i = i.text.strip()
                    if i != '' and i != ' ':
                        temp.append(i)

            diseases['treatment'].append(".\n\n".join(temp).replace('..', '.').replace(':.', ':'))

        except:
            diseases['treatment'].append('')

    return diseases

In [8]:
x = scrape_diseases(mayo_links)




  driver = webdriver.Chrome(
100%|████████████████████████████████████| 2193/2193 [5:33:48<00:00,  9.13s/it]


In [12]:
details = pd.DataFrame.from_dict(x, 'index').T
details

Unnamed: 0,link,overview,treatment
0,https://www.mayoclinic.org/diseases-conditions...,,Diagnosis.\n\nSome people are unaware that the...
1,https://www.mayoclinic.org/diseases-conditions...,Overview.\n\nAn abdominal aortic aneurysm is a...,Diagnosis.\n\nAbdominal aortic aneurysms are o...
2,https://www.mayoclinic.org/diseases-conditions...,,Diagnosis.\n\nDiagnosing hyperhidrosis may sta...
3,https://www.mayoclinic.org/diseases-conditions...,Overview.\n\nThe Bartholin's (BAHR-toe-linz) g...,"Diagnosis.\n\nTo diagnose a Bartholin's cyst, ..."
4,https://www.mayoclinic.org/diseases-conditions...,,Diagnosis.\n\nYour doctor will ask for a detai...
...,...,...,...
2188,https://www.mayoclinic.org/diseases-conditions...,,
2189,https://www.mayoclinic.org/diseases-conditions...,,
2190,https://www.mayoclinic.org/diseases-conditions...,,
2191,https://www.mayoclinic.org/diseases-conditions...,,


In [13]:
pd.concat([df, details], axis=1).to_excel('mayooooooooooooooooooo.xlsx')

In [None]:
stop

In [29]:
# print(x['overview'][0].replace('..', '.').replace(':.', ':'))

In [34]:
details = pd.DataFrame(x)
details

Unnamed: 0,link,overview,treatment
0,https://www.mayoclinic.org/diseases-conditions...,Overview.\n\nGastroesophageal reflux disease (...,Diagnosis.\n\nYour health care provider might ...
1,https://www.mayoclinic.org/diseases-conditions...,Overview.\n\nAcne is a skin condition that occ...,Treatment.\n\nIf you've tried over-the-counter...
2,https://www.mayoclinic.org/diseases-conditions...,Overview.\n\nAcute sinusitis causes the spaces...,Diagnosis.\n\nYour doctor may ask about your s...
3,https://www.mayoclinic.org/diseases-conditions...,Overview.\n\nAllergies occur when your immune ...,Diagnosis.\n\nTo evaluate whether you have an ...
4,https://www.mayoclinic.org/diseases-conditions...,Overview.\n\nHair loss (alopecia) can affect j...,"Diagnosis.\n\nBefore making a diagnosis, your ..."
5,https://www.mayoclinic.org/diseases-conditions...,"Overview.\n\nBad breath, also called halitosis...",Diagnosis.\n\nYour dentist will likely smell b...
6,https://www.mayoclinic.org/diseases-conditions...,Overview.\n\nBags under eyes are mild swelling...,Treatment.\n\nBags under eyes are usually a co...
7,https://www.mayoclinic.org/diseases-conditions...,Overview.\n\nHair loss (alopecia) can affect j...,"Diagnosis.\n\nBefore making a diagnosis, your ..."
8,https://www.mayoclinic.org/diseases-conditions...,Overview.\n\nBee stings are a common outdoor n...,Diagnosis.\n\nIf you've had a reaction to bee ...
9,https://www.mayoclinic.org/diseases-conditions...,"Overview.\n\nBody lice are tiny insects, about...",Diagnosis.\n\nYou or your doctor can usually c...


In [None]:
# details.overview[0].replace('. . . ', '')

In [46]:
final = pd.concat([dis.iloc[:, :5], details.iloc[:, 1:]], axis=1)
final.head()
final.to_excel('../datasets/updated details 2.xlsx')

In [13]:
def scrape_remedies(links):
    driver = webdriver.Chrome(
        ChromeDriverManager().install(),
        options=options
    )

    disease_info = {
        'link': [],
        'alternatives':[],
        'remedies': []
    }

    for path in tqdm(links):
        disease_info['link'].append(path)
        driver.get(path)

        # going to diagnosis-treatment
        driver.find_element(By.ID, 'et_genericNavigation_diagnosis-treatment').click()

        try:
            # alternatives
            alts = driver.find_element(By.XPATH, "//h2[contains(text(), 'Alternative')]/following-sibling::ul")
            alts = alts.find_elements(By.TAG_NAME, 'li')

            temp = []
            for a in alts:
                temp.append(a.text)
            disease_info['alternatives'].append("\n".join(temp))

        except:
            disease_info['alternatives'].append('')

        try:
            # home remedies
            remedies = driver.find_element(By.XPATH, "//h2[contains(text(), 'Lifestyle')]/following-sibling::")
            disease_info['remedies'].append(remedies.text)
            # remedies = remedies.find_elements(By.TAG_NAME, 'li')

            # temp = []
            # for r in remedies:
            #     temp.append(r.text)
            # disease_info['remedies'].append("\n".join(temp))

        except:
            disease_info['remedies'].append('')

    return disease_info

In [14]:
l = ['https://www.mayoclinic.org/diseases-conditions/head-lice/symptoms-causes/syc-20356180']
x = scrape_remedies(l)




  driver = webdriver.Chrome(
100%|████████████████████████████████████████████| 1/1 [00:01<00:00,  1.12s/it]


In [15]:
x

{'link': ['https://www.mayoclinic.org/diseases-conditions/head-lice/symptoms-causes/syc-20356180'],
 'alternatives': [''],
 'remedies': ['']}

In [None]:
# details = pd.DataFrame(x)
# details

## Scrape links for each disease page

In [3]:
alphabet = [
    'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J',
    'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T',
    'U', 'V', 'W', 'X', 'Y', 'Z'
]

In [4]:
def scrape_diseases():
    diseases = {
        'name': [],
        'link': []
    }

    driver = webdriver.Chrome(
        ChromeDriverManager().install(),
        options=options
    )

    for letter in tqdm(alphabet):
        path = f"https://www.mayoclinic.org/diseases-conditions/index?letter={letter}"
        driver.get(path)

        total = driver.find_element(By.ID, 'index').find_element(By.TAG_NAME, 'ol')
        all_eles = total.find_elements(By.TAG_NAME, 'li')

        for ele in all_eles:
            temp = ele.find_element(By.TAG_NAME, 'a')
            diseases['name'].append(temp.text)
            diseases['link'].append(temp.get_attribute('href'))

    return diseases

In [5]:
# all_diseases = scrape_diseases()

In [6]:
# df = pd.DataFrame(all_diseases)
# df.to_csv('mayo disease links.csv', index=False)
# df

## Scrape only remedies

In [5]:
def scrape_remedies(name, path):
    driver = webdriver.Chrome(
        ChromeDriverManager().install(),
        options=options
    )

    disease_info['disease'].append(name)
    disease_info['link'].append(path)

    driver.get(path)

    # finding overview
    try:
        overview = driver.find_element(By.XPATH, "//h2[contains(text(), 'Overview')]/following-sibling::p")
        disease_info['overview'].append(overview.text)
    except:
        disease_info['overview'].append('')

    # finding symptoms
    try:
        symptoms = driver.find_element(By.XPATH, "//h2[contains(text(), 'Symptoms')]/following-sibling::ul")
        symptoms = symptoms.find_elements(By.TAG_NAME, "li")

        temp = []
        for s in symptoms:
            temp.append(s.text)

        disease_info['symptoms'].append(temp)

    except:
        disease_info['symptoms'].append([])

    # going to diagnosis-treatment
    try:
        driver.find_element(By.ID, 'et_genericNavigation_diagnosis-treatment').click()

        # finding remedy list
        remedies = driver.find_element(By.XPATH, "//h2[contains(text(), 'Lifestyle and home remedies')]/following-sibling::ul")
        remedies = remedies.find_elements(By.TAG_NAME, 'li')

        temp = []
        for r in remedies:
            temp.append(r.text)

        disease_info['remedies'].append(temp)
    except:
        disease_info['remedies'].append([])

In [6]:
df = pd.read_csv('mayo disease links.csv')
df

Unnamed: 0,name,link
0,"A fib, also known as\nAtrial fibrillation",https://www.mayoclinic.org/diseases-conditions...
1,Abdominal aortic aneurysm,https://www.mayoclinic.org/diseases-conditions...
2,"Abnormally excessive sweating, also known as\n...",https://www.mayoclinic.org/diseases-conditions...
3,"Abscess, Bartholin's, also known as\nBartholin...",https://www.mayoclinic.org/diseases-conditions...
4,Absence seizure,https://www.mayoclinic.org/diseases-conditions...
...,...,...
2188,"Yersinia pestis, also known as\nPlague",https://www.mayoclinic.org/diseases-conditions...
2189,Yips,https://www.mayoclinic.org/diseases-conditions...
2190,Zika virus,https://www.mayoclinic.org/diseases-conditions...
2191,Zollinger-Ellison syndrome,https://www.mayoclinic.org/diseases-conditions...


In [7]:
# x = 1288+207+494
# print(x)
# df.to_numpy()[x:]

In [12]:
disease_info = {
    'disease': [],
    'link': [],
    'overview': [],
    'symptoms': [],
    'remedies': [],
}

for name, link in tqdm(df.to_numpy()):
    scrape_remedies(name, link)

  0%|                                                  | 0/203 [00:00<?, ?it/s]




  driver = webdriver.Chrome(
  0%|▏                                         | 1/203 [00:09<30:38,  9.10s/it]




  1%|▍                                         | 2/203 [00:22<39:26, 11.77s/it]




  1%|▌                                         | 3/203 [00:29<31:29,  9.45s/it]




  2%|▊                                         | 4/203 [00:39<32:39,  9.85s/it]




  2%|█                                         | 5/203 [00:50<32:56,  9.98s/it]




  3%|█▏                                        | 6/203 [00:59<31:35,  9.62s/it]




  3%|█▍                                        | 7/203 [01:05<28:09,  8.62s/it]




  4%|█▋                                        | 8/203 [01:13<27:38,  8.51s/it]




  4%|█▊                                        | 9/203 [01:21<26:37,  8.23s/it]




  5%|██                                       | 10/203 [01:31<28:13,  8.77s/it]




  5%|██▏                                      | 11/203 [01:40<28:37,  8.95s/it]




  6%|██▍                                      | 12/203 [01:47<26:07,  8.21s/it]




  6%|██▋                                      | 13/203 [01:58<29:09,  9.21s/it]




  7%|██▊                                      | 14/203 [02:06<27:44,  8.80s/it]




  7%|███                                      | 15/203 [02:13<25:30,  8.14s/it]




  8%|███▏                                     | 16/203 [02:20<24:13,  7.77s/it]




  8%|███▍                                     | 17/203 [02:28<24:06,  7.78s/it]




  9%|███▋                                     | 18/203 [02:41<29:18,  9.51s/it]




  9%|███▊                                     | 19/203 [02:49<27:46,  9.06s/it]




 10%|████                                     | 20/203 [02:57<26:43,  8.76s/it]




 10%|████▏                                    | 21/203 [03:05<25:34,  8.43s/it]




 11%|████▍                                    | 22/203 [03:12<24:44,  8.20s/it]




 11%|████▋                                    | 23/203 [03:26<29:09,  9.72s/it]




 12%|████▊                                    | 24/203 [03:36<29:17,  9.82s/it]




 12%|█████                                    | 25/203 [03:50<33:13, 11.20s/it]




 13%|█████▎                                   | 26/203 [04:01<32:35, 11.05s/it]




 13%|█████▍                                   | 27/203 [04:11<31:39, 10.79s/it]




 14%|█████▋                                   | 28/203 [04:18<28:00,  9.60s/it]




 14%|█████▊                                   | 29/203 [04:25<25:18,  8.73s/it]




 15%|██████                                   | 30/203 [04:35<26:22,  9.15s/it]




 15%|██████▎                                  | 31/203 [04:41<23:53,  8.33s/it]




 16%|██████▍                                  | 32/203 [04:49<23:02,  8.09s/it]




 16%|██████▋                                  | 33/203 [04:57<22:46,  8.04s/it]




 17%|██████▊                                  | 34/203 [05:06<23:29,  8.34s/it]




 17%|███████                                  | 35/203 [05:14<23:29,  8.39s/it]




 18%|███████▎                                 | 36/203 [05:24<24:13,  8.70s/it]




 18%|███████▍                                 | 37/203 [05:30<22:03,  7.97s/it]




 19%|███████▋                                 | 38/203 [05:42<25:15,  9.19s/it]




 19%|███████▉                                 | 39/203 [05:49<23:14,  8.50s/it]




 20%|████████                                 | 40/203 [05:55<21:29,  7.91s/it]




 20%|████████▎                                | 41/203 [06:04<21:40,  8.03s/it]




 21%|████████▍                                | 42/203 [06:13<22:24,  8.35s/it]




 21%|████████▋                                | 43/203 [06:19<20:43,  7.77s/it]




 22%|████████▉                                | 44/203 [06:28<21:13,  8.01s/it]




 22%|█████████                                | 45/203 [06:34<19:46,  7.51s/it]




 23%|█████████▎                               | 46/203 [06:43<20:46,  7.94s/it]




 23%|█████████▍                               | 47/203 [06:56<24:14,  9.33s/it]




 24%|█████████▋                               | 48/203 [07:06<24:45,  9.58s/it]




 24%|█████████▉                               | 49/203 [07:18<26:59, 10.52s/it]




 25%|██████████                               | 50/203 [07:25<24:04,  9.44s/it]




 25%|██████████▎                              | 51/203 [07:34<23:12,  9.16s/it]




 26%|██████████▌                              | 52/203 [07:40<21:03,  8.37s/it]




 26%|██████████▋                              | 53/203 [07:50<21:36,  8.65s/it]




 27%|██████████▉                              | 54/203 [08:02<24:15,  9.77s/it]




 27%|███████████                              | 55/203 [08:09<22:20,  9.06s/it]




 28%|███████████▎                             | 56/203 [08:17<21:15,  8.68s/it]




 28%|███████████▌                             | 57/203 [08:28<22:53,  9.41s/it]




 29%|███████████▋                             | 58/203 [08:43<26:19, 10.89s/it]




 29%|███████████▉                             | 59/203 [08:55<27:13, 11.34s/it]




 30%|████████████                             | 60/203 [09:03<24:31, 10.29s/it]




 30%|████████████▎                            | 61/203 [09:12<23:50, 10.07s/it]




 31%|████████████▌                            | 62/203 [09:23<24:02, 10.23s/it]




 31%|████████████▋                            | 63/203 [09:30<21:33,  9.24s/it]




 32%|████████████▉                            | 64/203 [09:36<19:23,  8.37s/it]




 32%|█████████████▏                           | 65/203 [09:56<27:18, 11.87s/it]




 33%|█████████████▎                           | 66/203 [10:08<27:09, 11.89s/it]




 33%|█████████████▌                           | 67/203 [10:23<28:41, 12.66s/it]




 33%|█████████████▋                           | 68/203 [10:29<24:26, 10.86s/it]




 34%|█████████████▉                           | 69/203 [10:37<22:13,  9.95s/it]




 34%|██████████████▏                          | 70/203 [10:47<21:37,  9.75s/it]




 35%|██████████████▎                          | 71/203 [10:58<22:41, 10.32s/it]




 35%|██████████████▌                          | 72/203 [11:07<21:40,  9.93s/it]




 36%|██████████████▋                          | 73/203 [11:20<23:37, 10.90s/it]




 36%|██████████████▉                          | 74/203 [11:32<24:06, 11.21s/it]




 37%|███████████████▏                         | 75/203 [11:44<24:30, 11.49s/it]




 37%|███████████████▎                         | 76/203 [12:05<29:51, 14.11s/it]




 38%|███████████████▌                         | 77/203 [12:17<28:10, 13.42s/it]




 38%|███████████████▊                         | 78/203 [12:27<25:53, 12.43s/it]




 39%|███████████████▉                         | 79/203 [12:36<23:37, 11.43s/it]




 39%|████████████████▏                        | 80/203 [12:44<21:26, 10.46s/it]




 40%|████████████████▎                        | 81/203 [12:50<18:46,  9.23s/it]




 40%|████████████████▌                        | 82/203 [12:59<18:34,  9.21s/it]




 41%|████████████████▊                        | 83/203 [13:08<18:06,  9.05s/it]




 41%|████████████████▉                        | 84/203 [13:17<17:53,  9.02s/it]




 42%|█████████████████▏                       | 85/203 [13:30<20:02, 10.19s/it]




 42%|█████████████████▎                       | 86/203 [13:43<21:21, 10.95s/it]




 43%|█████████████████▌                       | 87/203 [13:59<24:21, 12.60s/it]




 43%|█████████████████▊                       | 88/203 [14:08<21:57, 11.45s/it]




 44%|█████████████████▉                       | 89/203 [14:29<27:07, 14.27s/it]




 44%|██████████████████▏                      | 90/203 [14:44<27:09, 14.42s/it]




 45%|██████████████████▍                      | 91/203 [14:55<25:01, 13.41s/it]




 45%|██████████████████▌                      | 92/203 [15:08<24:55, 13.48s/it]




 46%|██████████████████▊                      | 93/203 [15:36<32:38, 17.80s/it]




 46%|██████████████████▉                      | 94/203 [15:49<29:46, 16.39s/it]




 47%|███████████████████▏                     | 95/203 [16:03<27:57, 15.53s/it]




 47%|███████████████████▍                     | 96/203 [16:09<22:54, 12.84s/it]




 48%|███████████████████▌                     | 97/203 [16:22<22:30, 12.74s/it]




 48%|███████████████████▊                     | 98/203 [16:28<18:59, 10.85s/it]




 49%|███████████████████▉                     | 99/203 [16:35<16:44,  9.66s/it]




 49%|███████████████████▋                    | 100/203 [16:41<14:43,  8.58s/it]




 50%|███████████████████▉                    | 101/203 [16:53<16:03,  9.45s/it]




 50%|████████████████████                    | 102/203 [17:00<15:02,  8.93s/it]




 51%|████████████████████▎                   | 103/203 [17:07<13:44,  8.24s/it]




 51%|████████████████████▍                   | 104/203 [17:20<16:06,  9.77s/it]




 52%|████████████████████▋                   | 105/203 [17:30<15:46,  9.65s/it]




 52%|████████████████████▉                   | 106/203 [17:43<17:11, 10.64s/it]




 53%|█████████████████████                   | 107/203 [17:53<16:42, 10.44s/it]




 53%|█████████████████████▎                  | 108/203 [18:02<16:05, 10.17s/it]




 54%|█████████████████████▍                  | 109/203 [18:10<14:38,  9.35s/it]




 54%|█████████████████████▋                  | 110/203 [18:17<13:21,  8.62s/it]




 55%|█████████████████████▊                  | 111/203 [18:31<15:52, 10.36s/it]




 55%|██████████████████████                  | 112/203 [18:37<13:52,  9.15s/it]




 56%|██████████████████████▎                 | 113/203 [18:44<12:26,  8.29s/it]




 56%|██████████████████████▍                 | 114/203 [18:56<13:58,  9.42s/it]




 57%|██████████████████████▋                 | 115/203 [19:02<12:34,  8.57s/it]




 57%|██████████████████████▊                 | 116/203 [19:14<13:35,  9.38s/it]




 58%|███████████████████████                 | 117/203 [19:26<14:37, 10.20s/it]




 58%|███████████████████████▎                | 118/203 [19:35<14:07,  9.97s/it]




 59%|███████████████████████▍                | 119/203 [19:45<14:02, 10.03s/it]




 59%|███████████████████████▋                | 120/203 [19:52<12:20,  8.92s/it]




 60%|███████████████████████▊                | 121/203 [19:58<11:08,  8.15s/it]




 60%|████████████████████████                | 122/203 [20:06<10:50,  8.04s/it]




 61%|████████████████████████▏               | 123/203 [20:24<14:47, 11.09s/it]




 61%|████████████████████████▍               | 124/203 [20:33<13:39, 10.37s/it]




 62%|████████████████████████▋               | 125/203 [20:40<12:29,  9.61s/it]




 62%|████████████████████████▊               | 126/203 [20:51<12:41,  9.89s/it]




 63%|█████████████████████████               | 127/203 [20:58<11:37,  9.17s/it]




 63%|█████████████████████████▏              | 128/203 [21:07<11:15,  9.00s/it]




 64%|█████████████████████████▍              | 129/203 [21:18<11:55,  9.66s/it]




 64%|█████████████████████████▌              | 130/203 [21:26<10:52,  8.94s/it]




 65%|█████████████████████████▊              | 131/203 [21:36<11:09,  9.30s/it]




 65%|██████████████████████████              | 132/203 [21:44<10:34,  8.94s/it]




 66%|██████████████████████████▏             | 133/203 [21:53<10:27,  8.96s/it]




 66%|██████████████████████████▍             | 134/203 [22:05<11:26,  9.94s/it]




 67%|██████████████████████████▌             | 135/203 [22:15<11:25, 10.09s/it]




 67%|██████████████████████████▊             | 136/203 [22:24<10:44,  9.63s/it]




 67%|██████████████████████████▉             | 137/203 [22:36<11:15, 10.24s/it]




 68%|███████████████████████████▏            | 138/203 [22:47<11:27, 10.57s/it]




 68%|███████████████████████████▍            | 139/203 [23:10<15:20, 14.39s/it]




 69%|███████████████████████████▌            | 140/203 [23:18<12:58, 12.35s/it]




 69%|███████████████████████████▊            | 141/203 [23:32<13:22, 12.94s/it]




 70%|███████████████████████████▉            | 142/203 [23:48<13:52, 13.64s/it]




 70%|████████████████████████████▏           | 143/203 [23:54<11:24, 11.41s/it]




 71%|████████████████████████████▎           | 144/203 [24:07<11:50, 12.04s/it]




 71%|████████████████████████████▌           | 145/203 [24:24<12:57, 13.41s/it]




 72%|████████████████████████████▊           | 146/203 [24:30<10:45, 11.32s/it]




 72%|████████████████████████████▉           | 147/203 [24:44<11:16, 12.09s/it]




 73%|█████████████████████████████▏          | 148/203 [24:52<09:50, 10.73s/it]




 73%|█████████████████████████████▎          | 149/203 [24:59<08:51,  9.83s/it]




 74%|█████████████████████████████▌          | 150/203 [25:07<08:05,  9.17s/it]




 74%|█████████████████████████████▊          | 151/203 [25:14<07:16,  8.39s/it]




 75%|█████████████████████████████▉          | 152/203 [25:26<08:07,  9.56s/it]




 75%|██████████████████████████████▏         | 153/203 [25:48<11:02, 13.25s/it]




 76%|██████████████████████████████▎         | 154/203 [25:57<09:46, 11.96s/it]




 76%|██████████████████████████████▌         | 155/203 [26:06<08:48, 11.00s/it]




 77%|██████████████████████████████▋         | 156/203 [26:12<07:31,  9.60s/it]




 77%|██████████████████████████████▉         | 157/203 [26:18<06:36,  8.62s/it]




 78%|███████████████████████████████▏        | 158/203 [26:29<06:54,  9.22s/it]




 78%|███████████████████████████████▎        | 159/203 [26:35<06:12,  8.46s/it]




 79%|███████████████████████████████▌        | 160/203 [26:46<06:26,  8.98s/it]




 79%|███████████████████████████████▋        | 161/203 [26:55<06:21,  9.08s/it]




 80%|███████████████████████████████▉        | 162/203 [27:02<05:41,  8.33s/it]




 80%|████████████████████████████████        | 163/203 [27:10<05:28,  8.22s/it]




 81%|████████████████████████████████▎       | 164/203 [27:19<05:32,  8.53s/it]




 81%|████████████████████████████████▌       | 165/203 [27:25<05:00,  7.92s/it]




 82%|████████████████████████████████▋       | 166/203 [27:32<04:36,  7.47s/it]




 82%|████████████████████████████████▉       | 167/203 [27:46<05:40,  9.45s/it]




 83%|█████████████████████████████████       | 168/203 [28:02<06:43, 11.54s/it]




 83%|█████████████████████████████████▎      | 169/203 [28:18<07:13, 12.75s/it]




 84%|█████████████████████████████████▍      | 170/203 [28:24<05:57, 10.83s/it]




 84%|█████████████████████████████████▋      | 171/203 [28:39<06:28, 12.13s/it]




 85%|█████████████████████████████████▉      | 172/203 [28:45<05:20, 10.32s/it]




 85%|██████████████████████████████████      | 173/203 [28:52<04:33,  9.12s/it]




 86%|██████████████████████████████████▎     | 174/203 [29:01<04:25,  9.15s/it]




 86%|██████████████████████████████████▍     | 175/203 [29:11<04:26,  9.51s/it]




 87%|██████████████████████████████████▋     | 176/203 [29:17<03:49,  8.51s/it]




 87%|██████████████████████████████████▉     | 177/203 [29:31<04:19,  9.96s/it]




 88%|███████████████████████████████████     | 178/203 [29:40<04:00,  9.60s/it]




 88%|███████████████████████████████████▎    | 179/203 [29:51<04:06, 10.25s/it]




 89%|███████████████████████████████████▍    | 180/203 [30:01<03:51, 10.08s/it]




 89%|███████████████████████████████████▋    | 181/203 [30:15<04:04, 11.11s/it]




 90%|███████████████████████████████████▊    | 182/203 [30:30<04:20, 12.40s/it]




 90%|████████████████████████████████████    | 183/203 [30:40<03:56, 11.83s/it]




 91%|████████████████████████████████████▎   | 184/203 [30:47<03:12, 10.14s/it]




 91%|████████████████████████████████████▍   | 185/203 [30:59<03:12, 10.70s/it]




 92%|████████████████████████████████████▋   | 186/203 [31:21<03:59, 14.10s/it]




 92%|████████████████████████████████████▊   | 187/203 [31:27<03:07, 11.71s/it]




 93%|█████████████████████████████████████   | 188/203 [31:39<02:58, 11.89s/it]




 93%|█████████████████████████████████████▏  | 189/203 [31:46<02:23, 10.26s/it]




 94%|█████████████████████████████████████▍  | 190/203 [31:53<02:02,  9.41s/it]




 94%|█████████████████████████████████████▋  | 191/203 [32:11<02:23, 11.96s/it]




 95%|█████████████████████████████████████▊  | 192/203 [32:22<02:09, 11.75s/it]




 95%|██████████████████████████████████████  | 193/203 [32:51<02:49, 16.91s/it]




 96%|██████████████████████████████████████▏ | 194/203 [33:08<02:32, 16.91s/it]




 96%|██████████████████████████████████████▍ | 195/203 [33:34<02:37, 19.73s/it]




 97%|██████████████████████████████████████▌ | 196/203 [33:41<01:49, 15.67s/it]




 97%|██████████████████████████████████████▊ | 197/203 [33:47<01:17, 12.91s/it]




 98%|███████████████████████████████████████ | 198/203 [34:23<01:39, 19.96s/it]




 98%|███████████████████████████████████████▏| 199/203 [35:15<01:57, 29.43s/it]




 99%|███████████████████████████████████████▍| 200/203 [35:36<01:20, 26.84s/it]




 99%|███████████████████████████████████████▌| 201/203 [36:16<01:01, 30.84s/it]




100%|███████████████████████████████████████▊| 202/203 [37:09<00:37, 37.41s/it]




100%|████████████████████████████████████████| 203/203 [37:43<00:00, 11.15s/it]


In [13]:
pd.DataFrame.from_dict(disease_info, orient='index').T

Unnamed: 0,disease,link,overview,symptoms,remedies
0,Tennis elbow,https://www.mayoclinic.org/diseases-conditions...,Tennis elbow (lateral epicondylitis) is a pain...,"[Shake hands or grip an object, Turn a doorkno...",[Rest. Avoid activities that aggravate your el...
1,"Tenosynovitis, de Quervain's, also known as\nD...",https://www.mayoclinic.org/diseases-conditions...,De Quervain tenosynovitis (dih-kwer-VAIN ten-o...,"[Pain near the base of the thumb, Swelling nea...",[Avoid moving your wrists the same way repeate...
2,Tension headache,https://www.mayoclinic.org/diseases-conditions...,A tension-type headache (TTH) is generally a m...,"[Dull, aching head pain, Sensation of tightnes...",[Manage your stress level. One way to help red...
3,"Testicle, retractile, also known as\nRetractil...",https://www.mayoclinic.org/diseases-conditions...,A retractile testicle is a testicle that may m...,[The testicle may be moved by hand from the gr...,[]
4,"Testicle, undescended, also known as\nUndescen...",https://www.mayoclinic.org/diseases-conditions...,An undescended testicle (cryptorchidism) is a ...,"[A retractile testicle, which moves back and f...",[Teach your son the right words to use when ta...
...,...,...,...,...,...
198,"Yersinia pestis, also known as\nPlague",https://www.mayoclinic.org/diseases-conditions...,Plague is a serious bacterial infection that's...,"[Situated in the groin, armpit or neck, About ...",[]
199,Yips,https://www.mayoclinic.org/diseases-conditions...,The yips are involuntary wrist spasms that occ...,"[Older age, More experience playing golf, Tour...",[]
200,Zika virus,https://www.mayoclinic.org/diseases-conditions...,The Zika (ZEE-kuh) virus is most often spread ...,"[Mild fever, Rash, Joint pain, particularly in...",[]
201,Zollinger-Ellison syndrome,https://www.mayoclinic.org/diseases-conditions...,Zollinger-Ellison syndrome is a rare condition...,"[Abdominal pain, Diarrhea, Burning, aching, gn...",[]


In [14]:
pd.DataFrame.from_dict(disease_info, orient='index').T.to_csv('mayo disease remedies 4.csv')

In [5]:
df = pd.concat([
    pd.read_csv('mayo disease remedies 1.csv', index_col=0),
    pd.read_csv('mayo disease remedies 2.csv', index_col=0),
    pd.read_csv('mayo disease remedies 3.csv', index_col=0),
    pd.read_csv('mayo disease remedies 4.csv', index_col=0)
])

In [6]:
df.to_csv('mayo data.csv')