Test for one course link

In [26]:
import time
import os
import re
import pandas as pd
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager

# -------------------- DRIVER SETUP --------------------
def get_driver(headless=False):
    options = webdriver.ChromeOptions()
    options.add_argument("--disable-blink-features=AutomationControlled")
    options.add_argument("--window-size=1280,720")
    options.add_argument("--log-level=3")
    if headless:
        options.add_argument("--headless=new")  # modern headless mode
    driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)
    return driver


# -------------------- TEXT CLEANING --------------------
def clean_text(text):
    """Remove specified words from text"""
    words_to_remove = [
        "Objective:", "Objective :",
        "Eligibility:", "Eligibility :",
        "Duration:", "Duration :",
        "Professional Skills"
    ]
    for word in words_to_remove:
        text = text.replace(word, "")
    return text.strip()


# -------------------- OLD SYLLABUS EXTRACTION --------------------
def extract_old_syllabus(soup):
    syllabus_content = []
    stop_markers = [
        "After completing", "Graduates",
        "After successful", "After ",
        "Job Opportunities", "job opportunities"
    ]

    syllabus_section = soup.find(string=re.compile("Syllabus", re.IGNORECASE))
    if syllabus_section:
        next_elem = syllabus_section.find_next()
        while next_elem and next_elem.name in ["p", "ul", "ol"]:
            text_content = next_elem.get_text(strip=True)

            if any(marker.lower() in text_content.lower() for marker in stop_markers):
                break

            if next_elem.name in ["ul", "ol"]:
                for item in next_elem.find_all("li"):
                    item_text = clean_text(item.get_text(strip=True))
                    if not any(marker.lower() in item_text.lower() for marker in stop_markers):
                        syllabus_content.append(f"- {item_text}")
                    else:
                        break
            else:
                if text_content:
                    syllabus_content.append(f"- {clean_text(text_content)}")

            next_elem = next_elem.find_next_sibling()

    return "\n".join(syllabus_content) if syllabus_content else "Not available"


# -------------------- NEW SYLLABUS EXTRACTION --------------------
def extract_syllabus(soup):
    """Extract syllabus content up to Job Opportunities or Career"""
    headings = soup.find_all(['h1', 'h2', 'h3', 'h4', 'h5', 'h6'])
    
    for heading in headings:
        heading_text = heading.get_text(strip=True).lower()
        if 'syllabus' in heading_text:
            syllabus_content = []
            elements_to_process = []
            current_elem = heading
            while current_elem:
                current_elem = current_elem.find_next_sibling()
                if not current_elem:
                    break
                if current_elem.name in ['h1','h2','h3','h4','h5','h6']:
                    heading_text = current_elem.get_text(strip=True).lower()
                    if 'job opportunit' in heading_text or 'career' in heading_text:
                        break
                elements_to_process.append(current_elem)

            for elem in elements_to_process:
                elem_text = elem.get_text(strip=True).lower()
                if 'job opportunities' in elem_text and len(elem_text) < 100:
                    break
                if elem.name in ['p', 'ul', 'ol', 'div']:
                    if elem.name in ['ul','ol']:
                        for item in elem.find_all('li'):
                            item_text = clean_text(item.get_text(strip=True))
                            if item_text and 'job opportunit' not in item_text.lower():
                                syllabus_content.append(f"- {item_text}")
                    else:
                        paragraph_text = clean_text(elem.get_text(strip=True))
                        if paragraph_text and 'job opportunit' not in paragraph_text.lower():
                            syllabus_content.append(paragraph_text)
            if syllabus_content:
                return "\n".join(syllabus_content)
    return "Not available"


# -------------------- SYLLABUS TRANSFORMER --------------------
def transform_syllabus(text: str) -> str:
    """Ensure each syllabus item (starting with '-') has a blank line after it."""
    if pd.isna(text) or not isinstance(text, str):
        return text

    text = text.strip()
    # Add blank line between syllabus items
    text = re.sub(r'\.\s*-\s*', r'.\n\n- ', text.strip())
    # Add line break after '- Module X: Title'
    text = re.sub(r"(-\s*Module\s*\d+\s*:\s*[^-\n]+)", r"\1\n", text, flags=re.IGNORECASE)

    return text.strip()


# -------------------- SCRAPER --------------------
def scrape_course_data(url):
    driver = get_driver()
    try:
        print(f"🌐 Accessing URL: {url}")
        driver.get(url)

        # Wait for page to load
        WebDriverWait(driver, 20).until(EC.presence_of_element_located((By.TAG_NAME, "h1")))
        time.sleep(3)

        soup = BeautifulSoup(driver.page_source, "html.parser")

        # 1. Course Name
        course_name_tag = soup.find("h1", class_="product_title entry-title")
        course_name = course_name_tag.get_text(strip=True) if course_name_tag else "Course name not found"
        print(f"📛 Course Name: {course_name}")

        # 2. About Course
        about_course = "About course not found"
        about_sections = soup.find_all("p")
        for section in about_sections:
            strong_tag = section.find("strong")
            if strong_tag and "objective" in strong_tag.get_text(strip=True).lower():
                about_course = clean_text(section.get_text(" ", strip=True))
                break
        print(f"📝 About Course: {about_course[:80]}...")

        # 3. Eligibility
        eligibility = "Not available"
        for section in about_sections:
            strong_tag = section.find("strong")
            if strong_tag and "eligibility" in strong_tag.get_text(strip=True).lower():
                eligibility = clean_text(section.get_text(strip=True))
                break
        print(f"✅ Eligibility: {eligibility}")

        # 4. Duration
        duration = "Not available"
        for section in about_sections:
            strong_tag = section.find("strong")
            if strong_tag and "duration" in strong_tag.get_text(strip=True).lower():
                duration = clean_text(section.get_text(strip=True))
                break
        print(f"⏱️ Duration: {duration}")

        # 5. Price
        price = "Not available"
        price_tag = soup.find("bdi")
        if price_tag:
            price = price_tag.get_text(strip=True)
        print(f"💰 Price: {price}")

        # Fee Structure
        fee_structure = (
            f"{price} \n- All other fees remain unchanged\n"
            "- Education loans are available through leading banks and NBFCs."
        ) if price and price != "Not available" else (
            "- All other fees remain unchanged\n"
            "- Education loans are available through leading banks and NBFCs."
        )
        print(f"💳 Fee Structure: {fee_structure}")

        # 6. Who Should Take It
        who_content = []
        who_strong = soup.find("strong", string=re.compile(r"Who Should Enroll|Who Should Take It", re.IGNORECASE))
        if who_strong:
            next_ul = who_strong.find_next("ul")
            if next_ul:
                for li in next_ul.find_all("li"):
                    who_content.append(clean_text(li.get_text(strip=True)))
        who_should_take = "\n".join([f"- {item}" for item in who_content]) if who_content else "Not available"
        print(f"👥 Who Should Take It: {who_should_take}")

        # 7. Syllabus (Old + New)
        syllabus_old = transform_syllabus(extract_old_syllabus(soup))
        syllabus_new = transform_syllabus(extract_syllabus(soup))

        print(f"📚 Syllabus (Old): {syllabus_old[:120]}...")
        print(f"📚 Syllabus (New): {syllabus_new[:120]}...")

        # 8. Certificate
        cert_link = "Certificate not available"
        cert_tag = soup.find("a", href=re.compile(r"\.pdf$"))
        if cert_tag and cert_tag.has_attr("href"):
            cert_link = cert_tag["href"]
        else:
            cert_img = soup.find("img", src=re.compile(r"certificate", re.IGNORECASE))
            if cert_img and cert_img.has_attr("src"):
                cert_link = cert_img["src"]
        print(f"📜 Certificate: {cert_link}")

        return course_name, about_course, eligibility, duration, price, who_should_take, syllabus_old, syllabus_new, cert_link, fee_structure

    except Exception as e:
        print(f"🔥 Scraping failed for {url}: {str(e)}")
        import traceback
        traceback.print_exc()
        return ["Error"] * 10
    finally:
        driver.quit()
        print("🚪 Browser closed")


# -------------------- SAVE TO EXCEL --------------------
def save_to_excel(data, file_path, url):
    columns = [
        "Course Name", "About Course", "Eligibility", "Duration", "Price",
        "Who Should Take It", "Syllabus (Old)", "Syllabus (New)",
        "Certificate", "Fee Structure", "Course URL"
    ]
    (course_name, about_course, eligibility, duration, price,
     who_should_take, syllabus_old, syllabus_new, cert_link, fee_structure) = data
    try:
        if os.path.exists(file_path):
            df = pd.read_excel(file_path)
        else:
            df = pd.DataFrame(columns=columns)

        if url not in df["Course URL"].values:
            row = {
                "Course Name": course_name,
                "About Course": about_course,
                "Eligibility": eligibility,
                "Duration": duration,
                "Price": price,
                "Who Should Take It": who_should_take,
                "Syllabus (Old)": syllabus_old,
                "Syllabus (New)": syllabus_new,
                "Certificate": cert_link,
                "Fee Structure": fee_structure,
                "Course URL": url
            }
            df = pd.concat([df, pd.DataFrame([row])], ignore_index=True)
            df.to_excel(file_path, index=False)
            print(f"💾 Saved data for: {course_name}")
        else:
            print(f"⚠️ Skipped duplicate: {url}")

    except Exception as e:
        print(f"❌ Excel save error: {e}")


# -------------------- MAIN --------------------
if __name__ == "__main__":
    course_urls = [
        "https://iisdt.in/product/certificate-in-python-programming/"
    ]

    print("🚀 Starting scraping process...")
    file_path = r"C:\Users\taslim.siddiqui\Downloads\course_new.xlsx"

    for course_url in course_urls:
        print(f"\n🔍 Processing: {course_url}")
        course_data = scrape_course_data(course_url)
        if all(item != "Error" for item in course_data):
            save_to_excel(course_data, file_path, course_url)
        else:
            print(f"❌ Failed to scrape complete data for {course_url}")

    print("\n✅ Process completed")


🚀 Starting scraping process...

🔍 Processing: https://iisdt.in/product/certificate-in-python-programming/
🌐 Accessing URL: https://iisdt.in/product/certificate-in-python-programming/
📛 Course Name: Certificate in Python Programming
📝 About Course: The Certificate in Python Programming is designed to introduce learners to the f...
✅ Eligibility: Completion of10+2 (higher Secondary)or equivalent.
⏱️ Duration: Two Months.
💰 Price: Rs.6,000
💳 Fee Structure: Rs.6,000 
- All other fees remain unchanged
- Education loans are available through leading banks and NBFCs.
👥 Who Should Take It: Not available
📚 Syllabus (Old): - Introduction to Python Programming:History and Evolution of Python, Features and Applications of Python, Installing Py...
📚 Syllabus (New): Not available...
📜 Certificate: https://iisdt.in/wp-content/uploads/2025/06/Sample-Diploma.pdf
🚪 Browser closed
💾 Saved data for: Certificate in Python Programming

✅ Process completed


Input is excel file 

In [None]:
import time
import os
import re
import pandas as pd
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager

# -------------------- DRIVER SETUP --------------------
def get_driver(headless=False):
    options = webdriver.ChromeOptions()
    options.add_argument("--disable-blink-features=AutomationControlled")
    options.add_argument("--window-size=1280,720")
    options.add_argument("--log-level=3")
    if headless:
        options.add_argument("--headless=new")  # modern headless mode
    driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)
    return driver


# -------------------- TEXT CLEANING --------------------
def clean_text(text):
    """Remove specified words from text"""
    words_to_remove = [
        "Objective:", "Objective :",
        "Eligibility:", "Eligibility :",
        "Duration:", "Duration :",
        "Professional Skills"
    ]
    for word in words_to_remove:
        text = text.replace(word, "")
    return text.strip()


# -------------------- OLD SYLLABUS EXTRACTION --------------------
def extract_old_syllabus(soup):
    syllabus_content = []
    stop_markers = [
        "After completing", "Graduates",
        "After successful", "After ",
        "Job Opportunities", "job opportunities"
    ]

    syllabus_section = soup.find(string=re.compile("Syllabus", re.IGNORECASE))
    if syllabus_section:
        next_elem = syllabus_section.find_next()
        while next_elem and next_elem.name in ["p", "ul", "ol"]:
            text_content = next_elem.get_text(strip=True)

            if any(marker.lower() in text_content.lower() for marker in stop_markers):
                break

            if next_elem.name in ["ul", "ol"]:
                for item in next_elem.find_all("li"):
                    item_text = clean_text(item.get_text(strip=True))
                    if not any(marker.lower() in item_text.lower() for marker in stop_markers):
                        syllabus_content.append(f"- {item_text}")
                    else:
                        break
            else:
                if text_content:
                    syllabus_content.append(f"- {clean_text(text_content)}")

            next_elem = next_elem.find_next_sibling()

    return "\n".join(syllabus_content) if syllabus_content else "Not available"


# -------------------- NEW SYLLABUS EXTRACTION --------------------
def extract_syllabus(soup):
    """Extract syllabus content up to Job Opportunities or Career"""
    headings = soup.find_all(['h1', 'h2', 'h3', 'h4', 'h5', 'h6'])
    
    for heading in headings:
        heading_text = heading.get_text(strip=True).lower()
        if 'syllabus' in heading_text:
            syllabus_content = []
            elements_to_process = []
            current_elem = heading
            while current_elem:
                current_elem = current_elem.find_next_sibling()
                if not current_elem:
                    break
                if current_elem.name in ['h1','h2','h3','h4','h5','h6']:
                    heading_text = current_elem.get_text(strip=True).lower()
                    if 'job opportunit' in heading_text or 'career' in heading_text:
                        break
                elements_to_process.append(current_elem)

            for elem in elements_to_process:
                elem_text = elem.get_text(strip=True).lower()
                if 'job opportunities' in elem_text and len(elem_text) < 100:
                    break
                if elem.name in ['p', 'ul', 'ol', 'div']:
                    if elem.name in ['ul','ol']:
                        for item in elem.find_all('li'):
                            item_text = clean_text(item.get_text(strip=True))
                            if item_text and 'job opportunit' not in item_text.lower():
                                syllabus_content.append(f"- {item_text}")
                    else:
                        paragraph_text = clean_text(elem.get_text(strip=True))
                        if paragraph_text and 'job opportunit' not in paragraph_text.lower():
                            syllabus_content.append(paragraph_text)
            if syllabus_content:
                return "\n".join(syllabus_content)
    return "Not available"


# -------------------- SYLLABUS TRANSFORMER --------------------
def transform_syllabus(text: str) -> str:
    """Ensure each syllabus item (starting with '-') has a blank line after it."""
    if pd.isna(text) or not isinstance(text, str):
        return text

    text = text.strip()
    # Add blank line between syllabus items
    text = re.sub(r'\.\s*-\s*', r'.\n\n- ', text.strip())
    # Add line break after '- Module X: Title'
    text = re.sub(r"(-\s*Module\s*\d+\s*:\s*[^-\n]+)", r"\1\n", text, flags=re.IGNORECASE)

    return text.strip()


# -------------------- SCRAPER --------------------
def scrape_course_data(url):
    driver = get_driver()
    try:
        print(f"🌐 Accessing URL: {url}")
        driver.get(url)

        # Wait for page to load
        WebDriverWait(driver, 20).until(EC.presence_of_element_located((By.TAG_NAME, "h1")))
        time.sleep(3)

        soup = BeautifulSoup(driver.page_source, "html.parser")

        # 1. Course Name
        course_name_tag = soup.find("h1", class_="product_title entry-title")
        course_name = course_name_tag.get_text(strip=True) if course_name_tag else "Course name not found"
        print(f"📛 Course Name: {course_name}")

        # 2. About Course
        about_course = "About course not found"
        about_sections = soup.find_all("p")
        for section in about_sections:
            strong_tag = section.find("strong")
            if strong_tag and "objective" in strong_tag.get_text(strip=True).lower():
                about_course = clean_text(section.get_text(" ", strip=True))
                break
        print(f"📝 About Course: {about_course[:80]}...")

        # 3. Eligibility
        eligibility = "Not available"
        for section in about_sections:
            strong_tag = section.find("strong")
            if strong_tag and "eligibility" in strong_tag.get_text(strip=True).lower():
                eligibility = clean_text(section.get_text(strip=True))
                break
        print(f"✅ Eligibility: {eligibility}")

        # 4. Duration
        duration = "Not available"
        for section in about_sections:
            strong_tag = section.find("strong")
            if strong_tag and "duration" in strong_tag.get_text(strip=True).lower():
                duration = clean_text(section.get_text(strip=True))
                break
        print(f"⏱️ Duration: {duration}")

        # 5. Price
        price = "Not available"
        price_tag = soup.find("bdi")
        if price_tag:
            price = price_tag.get_text(strip=True)
        print(f"💰 Price: {price}")

        # Fee Structure
        fee_structure = (
            f"{price} \n- All other fees remain unchanged\n"
            "- Education loans are available through leading banks and NBFCs."
        ) if price and price != "Not available" else (
            "- All other fees remain unchanged\n"
            "- Education loans are available through leading banks and NBFCs."
        )
        print(f"💳 Fee Structure: {fee_structure}")

        # 6. Who Should Take It
        who_content = []
        who_strong = soup.find("strong", string=re.compile(r"Who Should Enroll|Who Should Take It", re.IGNORECASE))
        if who_strong:
            next_ul = who_strong.find_next("ul")
            if next_ul:
                for li in next_ul.find_all("li"):
                    who_content.append(clean_text(li.get_text(strip=True)))
        who_should_take = "\n".join([f"- {item}" for item in who_content]) if who_content else "Not available"
        print(f"👥 Who Should Take It: {who_should_take}")

        # 7. Syllabus (Old + New)
        syllabus_old = transform_syllabus(extract_old_syllabus(soup))
        syllabus_new = transform_syllabus(extract_syllabus(soup))

        print(f"📚 Syllabus (Old): {syllabus_old[:120]}...")
        print(f"📚 Syllabus (New): {syllabus_new[:120]}...")

        # 8. Certificate
        cert_link = "Certificate not available"
        cert_tag = soup.find("a", href=re.compile(r"\.pdf$"))
        if cert_tag and cert_tag.has_attr("href"):
            cert_link = cert_tag["href"]
        else:
            cert_img = soup.find("img", src=re.compile(r"certificate", re.IGNORECASE))
            if cert_img and cert_img.has_attr("src"):
                cert_link = cert_img["src"]
        print(f"📜 Certificate: {cert_link}")

        return course_name, about_course, eligibility, duration, price, who_should_take, syllabus_old, syllabus_new, cert_link, fee_structure

    except Exception as e:
        print(f"🔥 Scraping failed for {url}: {str(e)}")
        import traceback
        traceback.print_exc()
        return ["Error"] * 10
    finally:
        driver.quit()
        print("🚪 Browser closed")


# -------------------- SAVE TO EXCEL --------------------
def save_to_excel(data, file_path, url):
    columns = [
        "Course Name", "About Course", "Eligibility", "Duration", "Price",
        "Who Should Take It", "Syllabus (Old)", "Syllabus (New)",
        "Certificate", "Fee Structure", "Course URL"
    ]
    (course_name, about_course, eligibility, duration, price,
     who_should_take, syllabus_old, syllabus_new, cert_link, fee_structure) = data
    try:
        if os.path.exists(file_path):
            df = pd.read_excel(file_path)
        else:
            df = pd.DataFrame(columns=columns)

        if url not in df["Course URL"].values:
            row = {
                "Course Name": course_name,
                "About Course": about_course,
                "Eligibility": eligibility,
                "Duration": duration,
                "Price": price,
                "Who Should Take It": who_should_take,
                "Syllabus (Old)": syllabus_old,
                "Syllabus (New)": syllabus_new,
                "Certificate": cert_link,
                "Fee Structure": fee_structure,
                "Course URL": url
            }
            df = pd.concat([df, pd.DataFrame([row])], ignore_index=True)
            df.to_excel(file_path, index=False)
            print(f"💾 Saved data for: {course_name}")
        else:
            print(f"⚠️ Skipped duplicate: {url}")

    except Exception as e:
        print(f"❌ Excel save error: {e}")

# -------------------- MAIN --------------------
if __name__ == "__main__":
    input_excel = r"C:\Users\taslim.siddiqui\Downloads\Course link last endinf iisdt.xlsx"
    df_urls = pd.read_excel(input_excel)
    
    if "Course URL" not in df_urls.columns:
        raise ValueError("Excel file must contain a column named 'Course URL'")

    course_urls = df_urls["Course URL"].dropna().tolist()
    output_file = r"C:\Users\taslim.siddiqui\Downloads\IISDT pending course extraction1 .xlsx"

    print("🚀 Starting scraping process...")

    for course_url in course_urls:
        print(f"\n🔍 Processing: {course_url}")
        course_data = scrape_course_data(course_url)
        if all(item != "Error" for item in course_data):
            save_to_excel(course_data, output_file, course_url)
        else:
            print(f"❌ Failed to scrape complete data for {course_url}")

    print("\n✅ Process completed")


🚀 Starting scraping process...

🔍 Processing: https://iisdt.in/product/diploma-in-app-and-web-design/

🌐 Accessing URL: https://iisdt.in/product/diploma-in-app-and-web-design/
📘 Course Name: Diploma in App and Web Design
📝 About Course: This course provides comprehensive knowledge of designing user-friendly and visually appealing mobile applications and websites. It covers principles of user interface (UI) and user experience (UX) design, responsive layouts, graphic design, and basic coding languages. The program prepares learners to create functional and engaging digital products that meet user needs.
✅ Eligibility: Completion of10th Grade (high School)or equivalent.
⏱️ Duration: Six Months.
💰 Price: Rs.8,000
💳 Fee Structure:
Rs.8,000 
- All other fees remain unchanged
- Education loans are available through leading banks and NBFCs.
👥 Who Should Take It:
Not available
📚 Syllabus (Old):
- Basics of Computer And Information Technology:Introduction, objectives, evolution of computers, ad

Traceback (most recent call last):
  File "C:\Users\taslim.siddiqui\AppData\Local\Temp\ipykernel_13048\3708329488.py", line 79, in scrape_course_data
    driver.get(url)
  File "c:\Python311\Lib\site-packages\selenium\webdriver\remote\webdriver.py", line 479, in get
    self.execute(Command.GET, {"url": url})
  File "c:\Python311\Lib\site-packages\selenium\webdriver\remote\webdriver.py", line 454, in execute
    self.error_handler.check_response(response)
  File "c:\Python311\Lib\site-packages\selenium\webdriver\remote\errorhandler.py", line 232, in check_response
    raise exception_class(message, screen, stacktrace)
selenium.common.exceptions.WebDriverException: Message: unknown error: net::ERR_CONNECTION_TIMED_OUT
  (Session info: chrome=140.0.7339.186)
Stacktrace:
	GetHandleVerifier [0x0x106c333+65459]
	GetHandleVerifier [0x0x106c374+65524]
	(No symbol) [0x0xe8d973]
	(No symbol) [0x0xe8afe4]
	(No symbol) [0x0xe7e3e2]
	(No symbol) [0x0xe7fe55]
	(No symbol) [0x0xe7e678]
	(No symbol) 

❌ Failed to scrape complete data for https://iisdt.in/product/certificate-in-employment-law

🔍 Processing: https://iisdt.in/product/certificate-in-energy-healing

🌐 Accessing URL: https://iisdt.in/product/certificate-in-energy-healing
🔥 Scraping failed for https://iisdt.in/product/certificate-in-energy-healing: Message: unknown error: net::ERR_CONNECTION_TIMED_OUT
  (Session info: chrome=140.0.7339.186)
Stacktrace:
	GetHandleVerifier [0x0x106c333+65459]
	GetHandleVerifier [0x0x106c374+65524]
	(No symbol) [0x0xe8d973]
	(No symbol) [0x0xe8afe4]
	(No symbol) [0x0xe7e3e2]
	(No symbol) [0x0xe7fe55]
	(No symbol) [0x0xe7e678]
	(No symbol) [0x0xe7e1ac]
	(No symbol) [0x0xe7deba]
	(No symbol) [0x0xe7bdc0]
	(No symbol) [0x0xe7c76b]
	(No symbol) [0x0xe914ee]
	(No symbol) [0x0xf1c207]
	(No symbol) [0x0xef9dfc]
	(No symbol) [0x0xf1b606]
	(No symbol) [0x0xef9bf6]
	(No symbol) [0x0xecb38e]
	(No symbol) [0x0xecc274]
	GetHandleVerifier [0x0x12eeda3+2697763]
	GetHandleVerifier [0x0x12e9ec7+2677575]
	GetH

Traceback (most recent call last):
  File "C:\Users\taslim.siddiqui\AppData\Local\Temp\ipykernel_13048\3708329488.py", line 79, in scrape_course_data
    driver.get(url)
  File "c:\Python311\Lib\site-packages\selenium\webdriver\remote\webdriver.py", line 479, in get
    self.execute(Command.GET, {"url": url})
  File "c:\Python311\Lib\site-packages\selenium\webdriver\remote\webdriver.py", line 454, in execute
    self.error_handler.check_response(response)
  File "c:\Python311\Lib\site-packages\selenium\webdriver\remote\errorhandler.py", line 232, in check_response
    raise exception_class(message, screen, stacktrace)
selenium.common.exceptions.WebDriverException: Message: unknown error: net::ERR_CONNECTION_TIMED_OUT
  (Session info: chrome=140.0.7339.186)
Stacktrace:
	GetHandleVerifier [0x0x106c333+65459]
	GetHandleVerifier [0x0x106c374+65524]
	(No symbol) [0x0xe8d973]
	(No symbol) [0x0xe8afe4]
	(No symbol) [0x0xe7e3e2]
	(No symbol) [0x0xe7fe55]
	(No symbol) [0x0xe7e678]
	(No symbol) 

❌ Failed to scrape complete data for https://iisdt.in/product/certificate-in-energy-healing

🔍 Processing: https://iisdt.in/product/certificate-in-fruit-vegetable-processing-technology

🌐 Accessing URL: https://iisdt.in/product/certificate-in-fruit-vegetable-processing-technology
🔥 Scraping failed for https://iisdt.in/product/certificate-in-fruit-vegetable-processing-technology: Message: unknown error: net::ERR_CONNECTION_TIMED_OUT
  (Session info: chrome=140.0.7339.186)
Stacktrace:
	GetHandleVerifier [0x0x106c333+65459]
	GetHandleVerifier [0x0x106c374+65524]
	(No symbol) [0x0xe8d973]
	(No symbol) [0x0xe8afe4]
	(No symbol) [0x0xe7e3e2]
	(No symbol) [0x0xe7fe55]
	(No symbol) [0x0xe7e678]
	(No symbol) [0x0xe7e1ac]
	(No symbol) [0x0xe7deba]
	(No symbol) [0x0xe7bdc0]
	(No symbol) [0x0xe7c76b]
	(No symbol) [0x0xe914ee]
	(No symbol) [0x0xf1c207]
	(No symbol) [0x0xef9dfc]
	(No symbol) [0x0xf1b606]
	(No symbol) [0x0xef9bf6]
	(No symbol) [0x0xecb38e]
	(No symbol) [0x0xecc274]
	GetHandleVerifier

Traceback (most recent call last):
  File "C:\Users\taslim.siddiqui\AppData\Local\Temp\ipykernel_13048\3708329488.py", line 79, in scrape_course_data
    driver.get(url)
  File "c:\Python311\Lib\site-packages\selenium\webdriver\remote\webdriver.py", line 479, in get
    self.execute(Command.GET, {"url": url})
  File "c:\Python311\Lib\site-packages\selenium\webdriver\remote\webdriver.py", line 454, in execute
    self.error_handler.check_response(response)
  File "c:\Python311\Lib\site-packages\selenium\webdriver\remote\errorhandler.py", line 232, in check_response
    raise exception_class(message, screen, stacktrace)
selenium.common.exceptions.WebDriverException: Message: unknown error: net::ERR_CONNECTION_TIMED_OUT
  (Session info: chrome=140.0.7339.186)
Stacktrace:
	GetHandleVerifier [0x0x106c333+65459]
	GetHandleVerifier [0x0x106c374+65524]
	(No symbol) [0x0xe8d973]
	(No symbol) [0x0xe8afe4]
	(No symbol) [0x0xe7e3e2]
	(No symbol) [0x0xe7fe55]
	(No symbol) [0x0xe7e678]
	(No symbol) 

❌ Failed to scrape complete data for https://iisdt.in/product/certificate-in-fruit-vegetable-processing-technology

🔍 Processing: https://iisdt.in/product/certificate-in-hand-sanitizer-manufacturing-technology

🌐 Accessing URL: https://iisdt.in/product/certificate-in-hand-sanitizer-manufacturing-technology
📘 Course Name: Certificate in Hand Sanitizer Manufacturing Technology
📝 About Course: The objective of this course is to equip learners with the technical knowledge, production skills, and quality control measures required to manufacture safe and effective hand sanitizers. The course covers formulation, raw material selection, production processes, packaging, regulatory compliance, and business aspects to help participants establish or manage a hand sanitizer manufacturing unit.
✅ Eligibility: Course Id: CHSMT/Q1001.10th Grade(High School) or Equivalent.
⏱️ Duration: One Month.
💰 Price: Rs.6,000
💳 Fee Structure:
Rs.6,000 
- All other fees remain unchanged
- Education loans are availa

Traceback (most recent call last):
  File "c:\Python311\Lib\site-packages\urllib3\connectionpool.py", line 534, in _make_request
    response = conn.getresponse()
               ^^^^^^^^^^^^^^^^^^
  File "c:\Python311\Lib\site-packages\urllib3\connection.py", line 516, in getresponse
    httplib_response = super().getresponse()
                       ^^^^^^^^^^^^^^^^^^^^^
  File "c:\Python311\Lib\http\client.py", line 1395, in getresponse
    response.begin()
  File "c:\Python311\Lib\http\client.py", line 325, in begin
    version, status, reason = self._read_status()
                              ^^^^^^^^^^^^^^^^^^^
  File "c:\Python311\Lib\http\client.py", line 286, in _read_status
    line = str(self.fp.readline(_MAXLINE + 1), "iso-8859-1")
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Python311\Lib\socket.py", line 706, in readinto
    return self._sock.recv_into(b)
           ^^^^^^^^^^^^^^^^^^^^^^^
TimeoutError: timed out

The above exception was the direct cause of th

❌ Failed to scrape complete data for https://iisdt.in/product/certificate-in-horticulture

🔍 Processing: https://iisdt.in/product/certificate-in-horticulture-and-landscaping-management

🌐 Accessing URL: https://iisdt.in/product/certificate-in-horticulture-and-landscaping-management
📘 Course Name: Course name not found
📝 About Course: About course not found
✅ Eligibility: Not available
⏱️ Duration: Not available
💰 Price: Not available
💳 Fee Structure:
- All other fees remain unchanged
- Education loans are available through leading banks and NBFCs.
👥 Who Should Take It:
Not available
📚 Syllabus (Old):
Not available
📚 Syllabus (New):
Not available
📜 Certificate: Certificate not available
💾 Saved data for: Course name not found

🔍 Processing: https://iisdt.in/product/certificate-in-human-behaviour-emotional-intelligence

🌐 Accessing URL: https://iisdt.in/product/certificate-in-human-behaviour-emotional-intelligence
📘 Course Name: Course name not found
📝 About Course: About course not foun

Traceback (most recent call last):
  File "c:\Python311\Lib\site-packages\urllib3\connectionpool.py", line 534, in _make_request
    response = conn.getresponse()
               ^^^^^^^^^^^^^^^^^^
  File "c:\Python311\Lib\site-packages\urllib3\connection.py", line 516, in getresponse
    httplib_response = super().getresponse()
                       ^^^^^^^^^^^^^^^^^^^^^
  File "c:\Python311\Lib\http\client.py", line 1395, in getresponse
    response.begin()
  File "c:\Python311\Lib\http\client.py", line 325, in begin
    version, status, reason = self._read_status()
                              ^^^^^^^^^^^^^^^^^^^
  File "c:\Python311\Lib\http\client.py", line 286, in _read_status
    line = str(self.fp.readline(_MAXLINE + 1), "iso-8859-1")
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Python311\Lib\socket.py", line 706, in readinto
    return self._sock.recv_into(b)
           ^^^^^^^^^^^^^^^^^^^^^^^
TimeoutError: timed out

The above exception was the direct cause of th

❌ Failed to scrape complete data for https://iisdt.in/product/certificate-in-ms-office-internet

🔍 Processing: https://iisdt.in/product/certificate-in-medical-records-and-documentation-management

🌐 Accessing URL: https://iisdt.in/product/certificate-in-medical-records-and-documentation-management
🔥 Scraping failed for https://iisdt.in/product/certificate-in-medical-records-and-documentation-management: HTTPConnectionPool(host='localhost', port=62888): Read timed out. (read timeout=120)


Traceback (most recent call last):
  File "c:\Python311\Lib\site-packages\urllib3\connectionpool.py", line 534, in _make_request
    response = conn.getresponse()
               ^^^^^^^^^^^^^^^^^^
  File "c:\Python311\Lib\site-packages\urllib3\connection.py", line 516, in getresponse
    httplib_response = super().getresponse()
                       ^^^^^^^^^^^^^^^^^^^^^
  File "c:\Python311\Lib\http\client.py", line 1395, in getresponse
    response.begin()
  File "c:\Python311\Lib\http\client.py", line 325, in begin
    version, status, reason = self._read_status()
                              ^^^^^^^^^^^^^^^^^^^
  File "c:\Python311\Lib\http\client.py", line 286, in _read_status
    line = str(self.fp.readline(_MAXLINE + 1), "iso-8859-1")
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Python311\Lib\socket.py", line 706, in readinto
    return self._sock.recv_into(b)
           ^^^^^^^^^^^^^^^^^^^^^^^
TimeoutError: timed out

The above exception was the direct cause of th

❌ Failed to scrape complete data for https://iisdt.in/product/certificate-in-medical-records-and-documentation-management

🔍 Processing: https://iisdt.in/product/certificate-in-multigrain-flour-processing-technology

🌐 Accessing URL: https://iisdt.in/product/certificate-in-multigrain-flour-processing-technology
📘 Course Name: Course name not found
📝 About Course: About course not found
✅ Eligibility: Not available
⏱️ Duration: Not available
💰 Price: Not available
💳 Fee Structure:
- All other fees remain unchanged
- Education loans are available through leading banks and NBFCs.
👥 Who Should Take It:
Not available
📚 Syllabus (Old):
Not available
📚 Syllabus (New):
Not available
📜 Certificate: Certificate not available
💾 Saved data for: Course name not found

🔍 Processing: https://iisdt.in/product/certificate-in-office-management-and-secretarial-practice

🌐 Accessing URL: https://iisdt.in/product/certificate-in-office-management-and-secretarial-practice
🔥 Scraping failed for https://iisdt.

Traceback (most recent call last):
  File "c:\Python311\Lib\site-packages\urllib3\connectionpool.py", line 534, in _make_request
    response = conn.getresponse()
               ^^^^^^^^^^^^^^^^^^
  File "c:\Python311\Lib\site-packages\urllib3\connection.py", line 516, in getresponse
    httplib_response = super().getresponse()
                       ^^^^^^^^^^^^^^^^^^^^^
  File "c:\Python311\Lib\http\client.py", line 1395, in getresponse
    response.begin()
  File "c:\Python311\Lib\http\client.py", line 325, in begin
    version, status, reason = self._read_status()
                              ^^^^^^^^^^^^^^^^^^^
  File "c:\Python311\Lib\http\client.py", line 286, in _read_status
    line = str(self.fp.readline(_MAXLINE + 1), "iso-8859-1")
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Python311\Lib\socket.py", line 706, in readinto
    return self._sock.recv_into(b)
           ^^^^^^^^^^^^^^^^^^^^^^^
TimeoutError: timed out

The above exception was the direct cause of th

❌ Failed to scrape complete data for https://iisdt.in/product/certificate-in-office-management-and-secretarial-practice

🔍 Processing: https://iisdt.in/product/certificate-in-plastic-bottles-barrels-molding-business

🌐 Accessing URL: https://iisdt.in/product/certificate-in-plastic-bottles-barrels-molding-business
📘 Course Name: Course name not found
📝 About Course: About course not found
✅ Eligibility: Not available
⏱️ Duration: Not available
💰 Price: Not available
💳 Fee Structure:
- All other fees remain unchanged
- Education loans are available through leading banks and NBFCs.
👥 Who Should Take It:
Not available
📚 Syllabus (Old):
Not available
📚 Syllabus (New):
Not available
📜 Certificate: Certificate not available
💾 Saved data for: Course name not found

🔍 Processing: https://iisdt.in/product/certificate-in-printing-processes-and-press-management

🌐 Accessing URL: https://iisdt.in/product/certificate-in-printing-processes-and-press-management
📘 Course Name: Course name not found
📝 A

Traceback (most recent call last):
  File "C:\Users\taslim.siddiqui\AppData\Local\Temp\ipykernel_13048\3708329488.py", line 79, in scrape_course_data
    driver.get(url)
  File "c:\Python311\Lib\site-packages\selenium\webdriver\remote\webdriver.py", line 479, in get
    self.execute(Command.GET, {"url": url})
  File "c:\Python311\Lib\site-packages\selenium\webdriver\remote\webdriver.py", line 454, in execute
    self.error_handler.check_response(response)
  File "c:\Python311\Lib\site-packages\selenium\webdriver\remote\errorhandler.py", line 232, in check_response
    raise exception_class(message, screen, stacktrace)
selenium.common.exceptions.WebDriverException: Message: unknown error: net::ERR_CONNECTION_TIMED_OUT
  (Session info: chrome=140.0.7339.186)
Stacktrace:
	GetHandleVerifier [0x0x106c333+65459]
	GetHandleVerifier [0x0x106c374+65524]
	(No symbol) [0x0xe8d973]
	(No symbol) [0x0xe8afe4]
	(No symbol) [0x0xe7e3e2]
	(No symbol) [0x0xe7fe55]
	(No symbol) [0x0xe7e678]
	(No symbol) 

❌ Failed to scrape complete data for https://iisdt.in/product/certificate-in-reiki

🔍 Processing: https://iisdt.in/product/diploma-in-management-retail-management

🌐 Accessing URL: https://iisdt.in/product/diploma-in-management-retail-management
📘 Course Name: Certificate in Retail Business Operations & Sales Management
📝 About Course: The Certificate in Retail Business Operations & Sales Management is a comprehensive program that equips students with the knowledge and skills needed to excel in the retail industry. By focusing on areas such as store operations, sales strategies, customer service, inventory management, and marketing, the diploma provides students with a solid foundation to succeed in a variety of roles in the retail sector.
✅ Eligibility: Completion of10+2 (higher Secondary)or equivalent.
⏱️ Duration: Three Months.
💰 Price: Rs.8,000
💳 Fee Structure:
Rs.8,000 
- All other fees remain unchanged
- Education loans are available through leading banks and NBFCs.
👥 Who Should 