In [2]:

import requests
from bs4 import BeautifulSoup
import string
import os
from tqdm import tqdm
import csv
import pandas as pd
import logging 


In [None]:
def web_Scrapping(base_url):
    response = requests.get(base_url)
    if response.status_code != 200:
        print("Failed to retrieve page")
        exit()
        
    soup = BeautifulSoup(response.text, "html.parser")
    
    items = soup.select(".cmp-results-with-primary-name__see-link, .cmp-results-with-primary-name a")
    
    
    disease_data = []
    for item in tqdm(items, desc="Scrapping Diseases"):
        disease_name = item.text.strip()
        disease_link = f"https://www.mayoclinic.org{item['href']}" if item["href"].startswith("/") else item['href']
        disease_data.append([disease_name, disease_link])
        
    file_exists = os.path.isfile("mayo_diseases.csv")
        
    with open("mayo_diseases.csv", "a", newline="", encoding="utf-8") as file:
        writer = csv.writer(file)
        if not file_exists:
            writer.writerow(["Disease", "Link"])
        writer.writerows(disease_data)
        
    print("Scraping Completed! Data Saved")      
    

In [26]:
for letter in string.ascii_uppercase:
    base_url = f"https://www.mayoclinic.org/diseases-conditions/index?letter={letter}"
    web_Scrapping(base_url)
print("done")

Scrapping Diseases: 100%|██████████| 133/133 [00:00<?, ?it/s]


Scraping Completed! Data Saved


Scrapping Diseases: 100%|██████████| 38/38 [00:00<?, ?it/s]


Scraping Completed! Data Saved


Scrapping Diseases: 100%|██████████| 163/163 [00:00<?, ?it/s]


Scraping Completed! Data Saved


Scrapping Diseases: 100%|██████████| 51/51 [00:00<?, ?it/s]


Scraping Completed! Data Saved


Scrapping Diseases: 100%|██████████| 30/30 [00:00<?, ?it/s]


Scraping Completed! Data Saved


Scrapping Diseases: 100%|██████████| 49/49 [00:00<?, ?it/s]


Scraping Completed! Data Saved


Scrapping Diseases: 100%|██████████| 22/22 [00:00<?, ?it/s]


Scraping Completed! Data Saved


Scrapping Diseases: 100%|██████████| 75/75 [00:00<?, ?it/s]


Scraping Completed! Data Saved


Scrapping Diseases: 100%|██████████| 37/37 [00:00<?, ?it/s]


Scraping Completed! Data Saved


Scrapping Diseases: 100%|██████████| 3/3 [00:00<?, ?it/s]


Scraping Completed! Data Saved


Scrapping Diseases: 100%|██████████| 9/9 [00:00<00:00, 8994.22it/s]


Scraping Completed! Data Saved


Scrapping Diseases: 100%|██████████| 42/42 [00:00<?, ?it/s]


Scraping Completed! Data Saved


Scrapping Diseases: 100%|██████████| 43/43 [00:00<?, ?it/s]


Scraping Completed! Data Saved


Scrapping Diseases: 100%|██████████| 33/33 [00:00<00:00, 33312.16it/s]


Scraping Completed! Data Saved


Scrapping Diseases: 100%|██████████| 25/25 [00:00<?, ?it/s]


Scraping Completed! Data Saved


Scrapping Diseases: 100%|██████████| 100/100 [00:00<?, ?it/s]


Scraping Completed! Data Saved


Scrapping Diseases: 0it [00:00, ?it/s]


Scraping Completed! Data Saved


Scrapping Diseases: 100%|██████████| 34/34 [00:00<?, ?it/s]


Scraping Completed! Data Saved


Scrapping Diseases: 100%|██████████| 99/99 [00:00<?, ?it/s]


Scraping Completed! Data Saved


Scrapping Diseases: 100%|██████████| 47/47 [00:00<?, ?it/s]


Scraping Completed! Data Saved


Scrapping Diseases: 100%|██████████| 16/16 [00:00<?, ?it/s]


Scraping Completed! Data Saved


Scrapping Diseases: 100%|██████████| 20/20 [00:00<?, ?it/s]


Scraping Completed! Data Saved


Scrapping Diseases: 100%|██████████| 13/13 [00:00<?, ?it/s]


Scraping Completed! Data Saved


Scrapping Diseases: 100%|██████████| 3/3 [00:00<?, ?it/s]


Scraping Completed! Data Saved


Scrapping Diseases: 100%|██████████| 1/1 [00:00<?, ?it/s]


Scraping Completed! Data Saved


Scrapping Diseases: 100%|██████████| 1/1 [00:00<?, ?it/s]

Scraping Completed! Data Saved
done





In [None]:

# Set up logging once
logging.basicConfig(level=logging.ERROR)
logger = logging.getLogger(__name__)

def data_extractor(base_url):
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36",
        "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8"
    }
    
    try:
        response = requests.get(base_url, headers=headers, timeout=10)
        response.raise_for_status()
        
        soup = BeautifulSoup(response.text, "html.parser")
        
        link_tags = soup.find_all('a', class_="cmp-tab-navigation-tab__button")
        
        if len(link_tags) >= 2:
            href_1 = link_tags[1].get('href')
            href_2 = link_tags[2].get('href')

        else:
            print("Second link not found.")

    except requests.exceptions.RequestException as e:
        logger.error(f"Failed to fetch URL: {e}")
        return None


In [38]:
url = "https://www.mayoclinic.org/diseases-conditions/bartholin-cyst/symptoms-causes/syc-20369976"
second_link = data_extractor(url)

if second_link:
    print("Link extracted successfully:", second_link)
else:
    print("Link extraction failed.")


Extracted: /diseases-conditions/bartholin-cyst/diagnosis-treatment/drc-20369981
Link extracted successfully: /diseases-conditions/bartholin-cyst/diagnosis-treatment/drc-20369981


In [7]:
import requests
from bs4 import BeautifulSoup
from typing import Dict
import logging

def scrape_medical_info(url: str) -> Dict[str, str]:
    """
    Scrapes medical information from Mayo Clinic website.
    
    Args:
        url (str): Mayo Clinic URL to scrape
        
    Returns:
        Dict[str, str]: Dictionary containing medical information by section
    """
    # Setup logging
    logging.basicConfig(level=logging.INFO)
    logger = logging.getLogger(__name__)

    # Headers to mimic browser request
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36",
        "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8"
    }

    sections = [
        "Overview", "Symptoms", "When to see a doctor", 
        "Causes", "Risk factors", "Complications", "Prevention"
    ]
    
    content = {}

    try:
        # Fetch webpage
        response = requests.get(url, headers=headers, timeout=10)
        response.raise_for_status()
        
        # Parse HTML
        soup = BeautifulSoup(response.text, 'html.parser')
        
        # Extract content for each section
        for section in sections:
            section_content = []
            
            # Find section header
            header = soup.find(['h2', 'h3'], string=section)
            if header:
                # Get all text elements after header until next section
                for elem in header.find_next_siblings(['p', 'ul', 'li']):
                    if elem.name in ['h2', 'h3']:
                        break
                    section_content.append(elem.get_text(strip=True))
                    
            content[section] = " ".join(section_content) if section_content else "No data found"
            
        return content

    except requests.exceptions.RequestException as e:
        logger.error(f"Failed to fetch URL: {e}")
        return {section: "Error fetching data" for section in sections}
    
    except Exception as e:
        logger.error(f"Unexpected error: {e}")
        return {section: "Error processing data" for section in sections}

# Example usage
if __name__ == "__main__":
    url = df["Link"][1]
    data = scrape_medical_info(url)
    
    # Print results
    for section, text in data.items():
        print(f"\n{section}:")
        print(f"{text}\n")
        print("-" * 80)


Overview:
Hyperhidrosis (hi-pur-hi-DROE-sis) is excessive sweating that's not always related to heat or exercise. You may sweat so much that it soaks through your clothes or drips off your hands. Heavy sweating can disrupt your day and cause social anxiety and embarrassment. Hyperhidrosis treatment usually helps. It often begins with antiperspirants. If these don't help, you may need to try different medications and therapies. In severe cases, your health care provider may suggest surgery to remove the sweat glands or to disconnect the nerves related to producing too much sweat. Sometimes an underlying condition may be found and treated. The main symptom of hyperhidrosis is heavy sweating. This goes beyond the sweating from being in a hot environment, exercising, or feeling anxious or stressed. The type of hyperhidrosis that usually affects the hands, feet, underarms or face causes at least one episode a week when you're awake. And the sweating usually happens on both sides of the bod

In [9]:
df["Link"][2]

'https://www.mayoclinic.org/diseases-conditions/bartholin-cyst/symptoms-causes/syc-20369976'

In [None]:
import os
import csv
import requests
import logging
from bs4 import BeautifulSoup
from tqdm import tqdm

# Logging setup
logging.basicConfig(level=logging.ERROR)
logger = logging.getLogger(__name__)


def data_extractor(base_url):
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36",
        "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8"
    }

    try:
        response = requests.get(base_url, headers=headers, timeout=20)
        response.raise_for_status()

        soup = BeautifulSoup(response.text, "html.parser")
        link_tags = soup.find_all('a', class_="cmp-tab-navigation-tab__button")

        link1 = ""
        link2 = ""

        if len(link_tags) > 1:
            href1 = link_tags[1].get('href')
            if href1:
                link1 = f"https://www.mayoclinic.org{href1}" if href1.startswith("/") else href1

        if len(link_tags) > 2:
            href2 = link_tags[2].get('href')
            if href2:
                link2 = f"https://www.mayoclinic.org{href2}" if href2.startswith("/") else href2

        return link1, link2

    except requests.exceptions.RequestException as e:
        logger.error(f"Failed to fetch additional links for {base_url}: {e}")
        return "", ""


def web_Scrapping(base_url):
    response = requests.get(base_url)
    if response.status_code != 200:
        print("Failed to retrieve page")
        exit()

    soup = BeautifulSoup(response.text, "html.parser")
    items = soup.select(".cmp-results-with-primary-name__see-link, .cmp-results-with-primary-name a")

    file_exists = os.path.isfile("mayo_diseases.csv")
    with open("mayo_diseases.csv", "a", newline="", encoding="utf-8") as file:
        writer = csv.writer(file)
        if not file_exists:
            writer.writerow(["Disease", "Main Link", "Link 1", "Link 2"])

        for item in tqdm(items, desc="Scraping Diseases"):
            disease_name = item.text.strip()
            main_link = f"https://www.mayoclinic.org{item['href']}" if item['href'].startswith("/") else item['href']

            link1, link2 = data_extractor(main_link)
            writer.writerow([disease_name, main_link, link1, link2])

    print("Scraping Completed! Data Saved")


# Example usage
# web_Scrapping("https://www.mayoclinic.org/diseases-conditions/index")


In [42]:
base_url = f"https://www.mayoclinic.org/diseases-conditions/index?letter=A"
web_Scrapping(base_url)

Scraping Diseases:   1%|          | 1/133 [00:00<00:31,  4.25it/s]ERROR:__main__:Failed to fetch additional links for https://www.mayoclinic.org/diseases-conditions/hyperhidrosis/symptoms-causes/syc-20367152: HTTPSConnectionPool(host='www.mayoclinic.org', port=443): Read timed out. (read timeout=10)
Scraping Diseases:  10%|▉         | 13/133 [00:21<01:40,  1.20it/s]ERROR:__main__:Failed to fetch additional links for https://www.mayoclinic.org/diseases-conditions/acute-myelogenous-leukemia/symptoms-causes/syc-20369109: HTTPSConnectionPool(host='www.mayoclinic.org', port=443): Read timed out. (read timeout=10)
Scraping Diseases: 100%|██████████| 133/133 [01:09<00:00,  1.93it/s]

Scraping Completed! Data Saved





In [None]:
import time

def data_extractor(base_url, retries=3, delay=5):
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36",
        "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8"
    }

    diagnosis_treatment_link = ""
    doctors_departments_link = ""

    for attempt in range(retries):
        try:
            response = requests.get(base_url, headers=headers, timeout=20)
            response.raise_for_status()
            soup = BeautifulSoup(response.text, "html.parser")   

            content1 = soup.find('a', id="et_genericNavigation_diagnosis-treatment")
            if content1:
                href1 = content1.get('href')
                diagnosis_treatment_link = f"https://www.mayoclinic.org{href1}" if href1.startswith("/") else href1

            content2 = soup.find('a', id="et_genericNavigation_doctors-departments")
            if content2:
                href2 = content2.get('href')
                doctors_departments_link = f"https://www.mayoclinic.org{href2}" if href2.startswith("/") else href2

            break  # success, exit loop

        except requests.exceptions.RequestException as e:
            print(f"[Attempt {attempt + 1}] Error fetching {base_url}: {e}")
            time.sleep(delay)

    return diagnosis_treatment_link, doctors_departments_link

    
def web_Scrapping(base_url):
    response = requests.get(base_url)
    if response.status_code != 200:
        print("Failed to retrieve page")
        exit()

    soup = BeautifulSoup(response.text, "html.parser")
    items = soup.select(".cmp-results-with-primary-name__see-link, .cmp-results-with-primary-name a")

    file_exists = os.path.isfile("mayo_diseases.csv")
    with open("mayo_diseases.csv", "a", newline="", encoding="utf-8") as file:
        writer = csv.writer(file)
        if not file_exists:
            writer.writerow(["disease", "main_link", "Diagnosis_treatment_link", ""])

        for item in tqdm(items, desc="Scraping Diseases"):
            disease_name = item.text.strip()
            main_link = f"https://www.mayoclinic.org{item['href']}" if item['href'].startswith("/") else item['href']

            link1, link2 = data_extractor(main_link)
            writer.writerow([disease_name, main_link, link1, link2])

    print("Scraping Completed! Data Saved")

In [None]:
base_url = f"https://www.mayoclinic.org/diseases-conditions/index?letter=A"
web_Scrapping(base_url) 

Scraping Diseases: 100%|██████████| 133/133 [01:02<00:00,  2.14it/s]

Scraping Completed! Data Saved





In [2]:
import torch

print(torch.cuda.is_available())  # True if GPU is available
print(torch.cuda.current_device())  # ID of current GPU
print(torch.cuda.get_device_name(0))  # GPU name


True
0
NVIDIA GeForce RTX 3050 Laptop GPU


In [13]:
import torch
import kornia.augmentation as K
import kornia.filters as KF
import torchvision.transforms as T
from tqdm import tqdm

# Check GPU availability
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# Create a dummy image batch (e.g., 32 images of 3x224x224)
batch_size = 32
image_batch = torch.rand(batch_size, 3, 224, 224).to(device)

# Heavy Kornia transformations
transform = torch.nn.Sequential(
    K.RandomPerspective(p=1.0, distortion_scale=0.6),
    K.ColorJitter(brightness=0.8, contrast=0.8, saturation=0.8, hue=0.2, p=1.0),
    K.RandomGaussianBlur(kernel_size=(9, 9), sigma=(0.1, 2.0), p=1.0),
    K.RandomAffine(degrees=60, translate=(0.2, 0.2), scale=(0.5, 1.5), shear=30, p=1.0),
    K.RandomSolarize(thresholds=0.4, additions=0.2, p=1.0),
).to(device)

# Loop with tqdm to apply transformations
print("Applying heavy transformations on GPU...")
for i in tqdm(range(100)):  # Simulate 100 batches
    images = image_batch.clone()  # Simulate a new batch
    transformed = transform(images)
    assert transformed.device.type == 'cuda', "Transform not applied on GPU"


Using device: cuda
Applying heavy transformations on GPU...


100%|██████████| 100/100 [00:05<00:00, 19.22it/s]


In [14]:
import torch
import kornia.augmentation as K
import kornia.filters as KF
import torchvision.transforms as T
from tqdm import tqdm

# Force CPU usage
device = torch.device("cpu")
print(f"Using device: {device}")

# Create a dummy image batch
batch_size = 100
image_batch = torch.rand(batch_size, 3, 224, 224).to(device)

# Heavy Kornia transformations on CPU
transform = torch.nn.Sequential(
    K.RandomPerspective(p=1.0, distortion_scale=0.6),
    K.ColorJitter(brightness=0.8, contrast=0.8, saturation=0.8, hue=0.2, p=1.0),
    K.RandomGaussianBlur(kernel_size=(9, 9), sigma=(0.1, 2.0), p=1.0),
    K.RandomAffine(degrees=60, translate=(0.2, 0.2), scale=(0.5, 1.5), shear=30, p=1.0),
    K.RandomSolarize(thresholds=0.4, additions=0.2, p=1.0),
).to(device)

# Loop with tqdm to apply transformations
print("Applying heavy transformations on CPU...")
for i in tqdm(range(100)):
    images = image_batch.clone()
    transformed = transform(images)
    assert transformed.device.type == 'cpu', "Transform not applied on CPU"


Using device: cpu
Applying heavy transformations on CPU...


100%|██████████| 100/100 [01:14<00:00,  1.35it/s]


In [9]:
print(image.device)                            # Should say: cuda:0
print(next(transform.parameters()).device)     # Should also say: cuda:0


cuda:0


StopIteration: 

In [22]:
import os

file_path = "skin-disease-datasaet/test_set/BA- cellulitis"



file_names = os.listdir(file_path)

In [27]:
file_names.sort()

In [9]:
import torch
import torch.nn as nn
import kornia.augmentation as K
from kornia.geometry.transform import Resize
from kornia.constants import Resample
from PIL import Image
import torchvision.transforms.functional as TF


device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)


image_path = "skin-disease-datasaet/train_set/BA- cellulitis/BA- cellulitis (1).jpeg"

resnet_transform = nn.Sequential(
    Resize(size=(224, 224), interpolation=Resample.BILINEAR),
    K.Normalize(mean=torch.tensor([0.485, 0.456, 0.406]), 
                std=torch.tensor([0.229, 0.224, 0.225]))
).to(device)

image = Image.open(image_path).convert("RGB")

image_tensor = TF.to_tensor(image).unsqueeze(0).to(device)  # [1, C, H, W]


image =resnet_transform(image)

print(image.shape)


cuda
torch.Size([1, 3, 224, 224])
