In [None]:
import os
import requests
from bs4 import BeautifulSoup
from fpdf import FPDF


In [2]:
import os
import requests
from bs4 import BeautifulSoup
from urllib.parse import urljoin

# Base URL where the table with case IDs is located
BASE_URL = "https://medical-info.dghs.gov.bd/medical-cases/report?submit=Run&report_name=Report+For+Profile+Pic&rows_per_page=100"

# Folder to store PDFs
PDF_FOLDER = "id_cards"
os.makedirs(PDF_FOLDER, exist_ok=True)

# Function to get all case links from the report page
def get_case_links():
    response = requests.get(BASE_URL)
    soup = BeautifulSoup(response.content, "html.parser")

    case_links = []
    
    # Extract all <a> tags within <tr><td> that contain case IDs
    for a_tag in soup.select("tr td a"):
        link = a_tag.get("href")
        if link and "/medical-cases/" in link:
            full_link = urljoin(BASE_URL, link)  # Convert to absolute URL
            case_links.append(full_link)

    return case_links

# Function to download an ID card from a case page
def download_id_card(case_url):
    response = requests.get(case_url)
    soup = BeautifulSoup(response.content, "html.parser")

    # Find the ID card download button
    download_button = soup.find("a", class_="btn btn-primary", href=True)
    
    if download_button and "card" in download_button["href"]:
        pdf_url = urljoin(case_url, download_button["href"])  # Full PDF URL
        case_id = pdf_url.split("/")[-2]  # Extract case number
        pdf_file_path = os.path.join(PDF_FOLDER, f"id_card_{case_id}.pdf")

        # Download the PDF
        pdf_response = requests.get(pdf_url)
        with open(pdf_file_path, "wb") as pdf_file:
            pdf_file.write(pdf_response.content)
        
        print(f"✅ Downloaded: {pdf_file_path}")
    else:
        print(f"❌ No ID card found for {case_url}")

# Main execution
if __name__ == "__main__":
    print("🔍 Fetching case links...")
    case_pages = get_case_links()
    
    for case_url in case_pages:
        download_id_card(case_url)

    print("✅ All ID cards downloaded successfully!")


🔍 Fetching case links...
✅ All ID cards downloaded successfully!


In [3]:
import os
import requests
from bs4 import BeautifulSoup
from urllib.parse import urljoin

# Base URL where the table with case IDs is located
BASE_URL = "https://medical-info.dghs.gov.bd/medical-cases/report?submit=Run&report_name=Report+For+Profile+Pic&rows_per_page=100"

# Folder to store PDFs
PDF_FOLDER = "id_cards"
os.makedirs(PDF_FOLDER, exist_ok=True)

# Headers to mimic a real browser request
HEADERS = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:120.0) Gecko/20100101 Firefox/120.0"
}

# Function to get all case links from the report page
def get_case_links():
    print("🔍 Fetching case links...")
    response = requests.get(BASE_URL, headers=HEADERS)
    
    if response.status_code != 200:
        print(f"❌ Failed to load page. Status: {response.status_code}")
        return []

    soup = BeautifulSoup(response.content, "html.parser")
    case_links = []

    # Extract all <a> tags within <tr><td> that contain case IDs
    for a_tag in soup.select("tr td a"):
        link = a_tag.get("href")
        if link and "/medical-cases/" in link:
            full_link = urljoin(BASE_URL, link)  # Convert to absolute URL
            case_links.append(full_link)

    print(f"✅ Found {len(case_links)} case links")
    return case_links

# Function to download an ID card from a case page
def download_id_card(case_url):
    print(f"🔍 Checking case page: {case_url}")
    response = requests.get(case_url, headers=HEADERS)
    
    if response.status_code != 200:
        print(f"❌ Failed to load case page: {case_url} (Status {response.status_code})")
        return

    soup = BeautifulSoup(response.content, "html.parser")

    # Find the ID card download button
    download_button = soup.find("a", class_="btn btn-primary", href=True)
    
    if download_button and "card" in download_button["href"]:
        pdf_url = urljoin(case_url, download_button["href"])  # Full PDF URL
        case_id = pdf_url.split("/")[-2]  # Extract case number
        pdf_file_path = os.path.join(PDF_FOLDER, f"id_card_{case_id}.pdf")

        # Download the PDF
        print(f"📥 Downloading ID card for case {case_id}...")
        pdf_response = requests.get(pdf_url, headers=HEADERS)

        if pdf_response.status_code == 200:
            with open(pdf_file_path, "wb") as pdf_file:
                pdf_file.write(pdf_response.content)
            print(f"✅ Saved: {pdf_file_path}")
        else:
            print(f"⚠️ Failed to download PDF: {pdf_url} (Status {pdf_response.status_code})")
    else:
        print(f"❌ No ID card download link found for {case_url}")

# Main execution
if __name__ == "__main__":
    case_pages = get_case_links()
    
    for case_url in case_pages:
        download_id_card(case_url)

    print("✅ All ID cards downloaded successfully!")


🔍 Fetching case links...
✅ Found 0 case links
✅ All ID cards downloaded successfully!
