# All course links for IISDT CPP

In [None]:
import time
import pandas as pd
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager

def get_driver(headless=True):
    options = webdriver.ChromeOptions()
    options.add_argument("--disable-blink-features=AutomationControlled")
    options.add_argument("--window-size=1280,720")
    options.add_argument("--log-level=3")
    if headless:
        options.add_argument("--headless")
    driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)
    return driver

def get_all_course_links(base_url="https://iisdt.in/courses/"):
    """Scrape all course names and links from IISDT"""
    driver = get_driver(headless=False)
    all_courses = []
    
    print(f"🔎 Scraping: {base_url}")
    
    try:
        driver.get(base_url)
        time.sleep(5)
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
        time.sleep(3)
        
        soup = BeautifulSoup(driver.page_source, "html.parser")
        
        # Save the HTML for debugging
        with open("iisdt_page.html", "w", encoding="utf-8") as f:
            f.write(soup.prettify())
        print("💾 Saved page HTML to iisdt_page.html for inspection")
        
        all_links = soup.find_all("a", href=True)
        print(f"🔗 Found {len(all_links)} total links on the page")
        
        # List of known category/program pages to exclude
        category_endings = [
            "/courses/", "-courses/", "-programs/", "/latest-courses-1/", "/top-courses/",
            "/advance-diploma-courses/", "/computer-education-courses/", "/management-courses/",
            "/office-management-courses/", "/professional-courses/", "/self-employment-courses/",
            "/skill-development-courses/", "/technology-courses/", "/job-oriented-courses/"
        ]
        
        for link in all_links:
            href = link["href"]
            text = link.get_text(strip=True)
            
            # Skip empty or irrelevant links
            if not text or not href or any(x in href for x in ["wp-content", "wp-admin", "javascript:", "#", "tel:", "mailto:"]) or "legal-documents" in href:
                continue
            
            # Only include links that are likely actual course pages
            is_course = (
                ("/course/" in href.lower()) or
                (href.startswith("https://iisdt.in/") and
                 len(text) > 5 and
                 not any(href.lower().endswith(x) for x in category_endings))
            )
            
            if is_course:
                all_courses.append({
                    "Course Name": text,
                    "Course Link": href
                })
                print(f"📝 Found course: {text} -> {href}")
        
    except Exception as e:
        print(f"❌ Error scraping: {str(e)}")
    
    driver.quit()
    
    # Remove duplicates
    unique_courses = []
    seen_links = set()
    for course in all_courses:
        if course["Course Link"] not in seen_links:
            unique_courses.append(course)
            seen_links.add(course["Course Link"])
    
    return unique_courses

def main():
    print("🚀 Starting IISDT course extraction...")
    courses = get_all_course_links()

    if courses:
        df = pd.DataFrame(courses)
        output_path = r"C:\Users\taslim.siddiqui\Downloads\IISDT_All_Courses.xlsx"
        df.to_excel(output_path, index=False)
        print(f"\n✅ Successfully extracted {len(df)} courses")
        print(f"📂 Saved to: {output_path}")
        print("\n📋 Extracted courses:")
        print(df.to_string(index=False))
    else:
        print("❌ No courses were extracted")

if __name__ == "__main__":
    main()


In [4]:
import requests
from bs4 import BeautifulSoup
from urllib.parse import urljoin, urlparse
import pandas as pd
import re
from collections import deque
import time

base_url = "https://iisdt.in/courses/"
visited = set()
queue = deque([base_url])
headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
    'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
    'Accept-Language': 'en-US,en;q=0.5',
    'Accept-Encoding': 'gzip, deflate',
    'Connection': 'keep-alive',
    'Upgrade-Insecure-Requests': '1',
}
courses = []

# More comprehensive exclusion list
excluded_patterns = [
    "/login", "/signup", "/cart", "/checkout", "/dashboard", "/profile",
    "/terms", "/privacy", "/contact", "/about", "/faq", 
    ".jpg", ".jpeg", ".png", ".gif", ".svg", ".pdf", ".doc", ".docx", ".zip", ".rar",
    "tel:", "mailto:", "javascript:", "#",
    "wp-admin", "wp-json", "feed", "testimonial", "gallery", 
    "event", "news", "blog", "career", "job", "faculty", "staff", 
    "alumni", "placement", "result", "contact-us", "about-us", 
    "privacy-policy", "terms-conditions", "refund-policy",
    "my-account", "wp-content", "cdn-cgi", "admin"
]

# More inclusive course keywords
course_keywords = [
    'course', 'program', 'diploma', 'certificate', 'certification', 'training', 
    'workshop', 'seminar', 'learning', 'education', 'skill', 'module', 
    'curriculum', 'syllabus', 'batch', 'admission', 'enroll', 'register', 
    'apply', 'duration', 'fee', 'online', 'offline', 'class', 'lecture', 
    'tutorial', 'study', 'session', 'webinar', 'bootcamp', 'degree',
    'master', 'professional', 'advanced', 'basic', 'fundamental',
    'iisdt', 'institute', 'academy', 'school'
]

def should_crawl(url):
    """Determine if we should crawl this URL"""
    if url in visited:
        return False
    
    if not url.startswith('https://iisdt.in/'):
        return False
    
    # Parse URL to check path
    parsed = urlparse(url)
    path = parsed.path.lower()
    
    # Skip excluded patterns
    if any(exclude in path for exclude in excluded_patterns):
        return False
    
    # Skip file downloads
    if any(path.endswith(ext) for ext in ['.jpg', '.png', '.pdf', '.doc', '.zip', '.jpeg', '.gif']):
        return False
    
    # Skip URLs with too many segments (likely deep pages)
    if len(path.split('/')) > 4:
        return False
    
    return True

def is_likely_course_page(url, text, soup=None):
    """More inclusive check for course pages"""
    if not text or len(text.strip()) < 2:
        return False
    
    if not url.startswith('https://iisdt.in/'):
        return False
    
    # Skip obviously non-course pages
    if any(exclude in url.lower() for exclude in excluded_patterns):
        return False
    
    # Basic URL pattern matching for courses
    path = urlparse(url).path
    if len(path.split('/')) == 2 or (len(path.split('/')) == 3 and path.endswith('/')):
        # URLs like https://iisdt.in/course-name/ are likely courses
        pass
    
    # Check for course indicators in the page content if available
    if soup:
        page_text = soup.get_text().lower()
        
        # Look for course-specific content
        course_indicators = [
            'enroll now', 'register now', 'apply now', 'admission', 'duration',
            'course fee', 'batch', 'curriculum', 'syllabus', 'module',
            'learning objectives', 'what you will learn', 'certification'
        ]
        
        if any(indicator in page_text for indicator in course_indicators):
            return True
        
        # Check for course-related HTML structures
        course_selectors = [
            '.course-content', '.course-details', '.program-details',
            '.curriculum', '.syllabus', '.module-list', '.batch-details',
            '.fee-structure', '.enrollment-form'
        ]
        
        for selector in course_selectors:
            if soup.select(selector):
                return True
    
    # More lenient text checking
    text_lower = text.lower()
    has_course_keyword = any(keyword in text_lower for keyword in course_keywords)
    
    # Check if text looks like a course title (not navigation/menu)
    is_reasonable_title = (
        len(text.strip()) >= 3 and 
        not text.strip().isupper() and
        len(text.strip().split()) <= 10 and  # Reasonable title length
        not any(nav_word in text_lower for nav_word in [
            'home', 'back', 'next', 'previous', 'menu', 'skip', 
            'search', 'login', 'signup', 'cart'
        ])
    )
    
    return has_course_keyword or is_reasonable_title

def extract_course_info(url, soup):
    """Extract course name from page content more effectively"""
    # Try multiple strategies to get the best course name
    
    # Strategy 1: Look for title in common selectors
    title_selectors = [
        'h1', 'h2',
        '.entry-title', '.course-title', '.program-title', '.product-title',
        '.title', '.page-title', '.post-title', '.headline',
        '[class*="title"]', '[class*="heading"]',
        '.elementor-heading-title', '.wp-block-heading'
    ]
    
    for selector in title_selectors:
        elements = soup.select(selector)
        for element in elements:
            text = element.get_text(strip=True)
            if text and len(text) > 3 and is_likely_course_page(url, text, soup):
                return text
    
    # Strategy 2: Look in meta tags
    meta_title = soup.find('title')
    if meta_title:
        title_text = meta_title.get_text(strip=True)
        if title_text and '|' in title_text:
            # Often format: "Course Name | Website Name"
            course_part = title_text.split('|')[0].strip()
            if course_part and len(course_part) > 3:
                return course_part
    
    # Strategy 3: Use the first h1 or meaningful text
    h1 = soup.find('h1')
    if h1:
        h1_text = h1.get_text(strip=True)
        if h1_text and len(h1_text) > 3:
            return h1_text
    
    # Strategy 4: Use URL path as fallback
    path = urlparse(url).path
    if path and path != '/':
        # Convert URL path to readable title
        name_from_url = path.strip('/').replace('-', ' ').title()
        return name_from_url
    
    return "Course - " + urlparse(url).path.strip('/').replace('-', ' ').title()

print("🚀 Starting comprehensive IISDT course extraction...")
print(f"🔍 Starting from: {base_url}")

# Add delay to be respectful to the server
def delayed_request(url, delay=1):
    time.sleep(delay)
    return requests.get(url, headers=headers, timeout=15)

try:
    # Get the main courses page
    r = delayed_request(base_url)
    soup = BeautifulSoup(r.content, 'html.parser')
    
    print("📋 Strategy 1: Comprehensive link scanning...")
    
    # Get ALL links first, then filter
    all_links = soup.find_all('a', href=True)
    print(f"🔗 Found {len(all_links)} total links on the main page")
    
    for link in all_links:
        href = urljoin(base_url, link.get('href', '').strip())
        text = link.get_text(strip=True)
        
        if should_crawl(href) and href not in visited:
            queue.append(href)
        
        if is_likely_course_page(href, text):
            courses.append({
                "Course Name": text,
                "Course Link": href
            })
            print(f"✅ Found: {text}")
    
    print("🏗️ Strategy 2: Container-based scanning...")
    
    # Look for course containers more aggressively
    container_patterns = [
        '*[class*="course"]', '*[class*="program"]', '*[class*="product"]',
        '*[class*="card"]', '*[class*="item"]', '*[class*="grid"]',
        '.elementor-widget', '.wp-block-group', '.vc_column_container',
        '.listing-item', '.portfolio-item', '.service-item'
    ]
    
    for pattern in container_patterns:
        try:
            containers = soup.select(pattern)
            for container in containers:
                container_links = container.find_all('a', href=True)
                for link in container_links:
                    href = urljoin(base_url, link.get('href', '').strip())
                    text = link.get_text(strip=True)
                    
                    if href not in [c['Course Link'] for c in courses] and is_likely_course_page(href, text):
                        courses.append({
                            "Course Name": text,
                            "Course Link": href
                        })
                        print(f"✅ Container found: {text}")
        except Exception as e:
            continue  # Skip if selector fails
    
    print("🌐 Strategy 3: Navigation menu scanning...")
    
    # Look for navigation menus that might contain courses
    nav_selectors = [
        'nav', '.nav', '.navigation', '.menu', '.main-menu',
        '.header-nav', '.site-nav', '#menu', '#navigation'
    ]
    
    for selector in nav_selectors:
        try:
            navs = soup.select(selector)
            for nav in navs:
                nav_links = nav.find_all('a', href=True)
                for link in nav_links:
                    href = urljoin(base_url, link.get('href', '').strip())
                    text = link.get_text(strip=True)
                    
                    if (href not in [c['Course Link'] for c in courses] and 
                        is_likely_course_page(href, text) and
                        'course' in text.lower() or 'program' in text.lower()):
                        courses.append({
                            "Course Name": text,
                            "Course Link": href
                        })
                        print(f"✅ Navigation found: {text}")
        except Exception as e:
            continue

except Exception as e:
    print(f"❌ Error scanning main page: {e}")

# Enhanced crawling with better content analysis
print("\n🌐 Starting comprehensive crawling...")
max_pages = 30
pages_crawled = 0

while queue and pages_crawled < max_pages:
    current_url = queue.popleft()
    
    if current_url in visited:
        continue
        
    visited.add(current_url)
    pages_crawled += 1
    
    try:
        print(f"🔍 Scanning ({pages_crawled}/{max_pages}): {current_url}")
        r = delayed_request(current_url, delay=0.5)
        soup = BeautifulSoup(r.content, 'html.parser')
        
        # First, check if this page itself is a course page
        page_title = extract_course_info(current_url, soup)
        if (current_url not in [c['Course Link'] for c in courses] and 
            is_likely_course_page(current_url, page_title, soup)):
            
            courses.append({
                "Course Name": page_title,
                "Course Link": current_url
            })
            print(f"✅ Page is course: {page_title}")
        
        # Extract links from this page
        for link in soup.find_all('a', href=True):
            href = urljoin(base_url, link.get('href', '').strip())
            text = link.get_text(strip=True)
            
            # Add to queue for crawling
            if should_crawl(href) and href not in visited and href not in queue:
                queue.append(href)
            
            # Check if it's a course page
            if (href not in [c['Course Link'] for c in courses] and 
                is_likely_course_page(href, text)):
                
                courses.append({
                    "Course Name": text,
                    "Course Link": href
                })
                print(f"✅ Found: {text}")
                
    except Exception as e:
        print(f"⚠️ Error crawling {current_url}: {e}")
        continue

# Final pass: Visit each course page to get better names
print("\n🎯 Enhancing course information...")
enhanced_courses = []

for i, course in enumerate(courses):
    try:
        print(f"🔍 Enhancing {i+1}/{len(courses)}: {course['Course Link']}")
        r = delayed_request(course['Course Link'], delay=0.3)
        soup = BeautifulSoup(r.content, 'html.parser')
        
        better_name = extract_course_info(course['Course Link'], soup)
        
        enhanced_courses.append({
            "Course Name": better_name,
            "Course Link": course['Course Link']
        })
        
    except Exception as e:
        # Keep original if enhancement fails
        enhanced_courses.append(course)
        print(f"⚠️ Could not enhance {course['Course Link']}: {e}")

# Remove duplicates and clean data
print("\n🧹 Cleaning and deduplicating data...")
unique_courses = []
seen_links = set()

for course in enhanced_courses:
    normalized_url = course["Course Link"].lower().rstrip('/')
    
    if normalized_url not in seen_links:
        # Clean course name
        course_name = re.sub(r'\s+', ' ', course["Course Name"]).strip()
        
        if course_name and len(course_name) > 2:
            unique_courses.append({
                "Course Name": course_name,
                "Course Link": normalized_url
            })
            seen_links.add(normalized_url)

# Sort by course name
unique_courses.sort(key=lambda x: x["Course Name"])

# Create DataFrame
df = pd.DataFrame(unique_courses)

# Save to Excel
output_path = r"C:\Users\taslim.siddiqui\Downloads\IISDT_Course_Links_Comprehensive.xlsx"
df.to_excel(output_path, index=False)

print(f"\n✅ Extraction Complete!")
print(f"📊 Total courses found: {len(df)}")
print(f"📁 Saved to: {output_path}")

if len(df) > 0:
    print("\n📋 Sample of extracted courses:")
    print(df.head(15).to_string(index=False))
else:
    print("\n❌ No courses found.")
    print("💡 Trying alternative approach...")
    
    # Fallback: Try to get sitemap or use different selectors
    print("Attempting to find sitemap...")
    sitemap_urls = [
        "https://iisdt.in/sitemap.xml",
        "https://iisdt.in/sitemap_index.xml",
        "https://iisdt.in/wp-sitemap.xml"
    ]
    
    for sitemap_url in sitemap_urls:
        try:
            r = requests.get(sitemap_url, headers=headers, timeout=10)
            if r.status_code == 200:
                print(f"✅ Found sitemap: {sitemap_url}")
                # Parse sitemap logic can be added here
        except:
            continue

print("\n🎯 Extraction completed!")

🚀 Starting comprehensive IISDT course extraction...
🔍 Starting from: https://iisdt.in/courses/
📋 Strategy 1: Comprehensive link scanning...
🔗 Found 365 total links on the main page
✅ Found: Trending Courses
✅ Found: Top Courses
✅ Found: Latest Courses
✅ Found: All Courses
✅ Found: Advance Diploma courses
✅ Found: Computer Education Courses
✅ Found: Management Skills
✅ Found: Office Management Courses
✅ Found: Professional Skills
✅ Found: Industry-Specific Training
✅ Found: Skill Development Programs
✅ Found: Innovative Courses
✅ Found: Teacher Training Courses
✅ Found: Safety Management Courses
✅ Found: Skill Courses
✅ Found: Beauty and Wellness Courses
✅ Found: Yoga & Spiritual Skills
✅ Found: Certificate Courses
✅ Found: Legal Documents
✅ Found: Diploma Verification
✅ Found: Course combo
✅ Found: Courses
✅ Found: Awards
✅ Found: Verification
✅ Found: Our Centers
✅ Found: TP Registration
✅ Found: SaleProduct on saleAdvanced Professional Diploma in Computer Applications (APDCA)
✅ Found

In [None]:
import time
import pandas as pd
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from webdriver_manager.chrome import ChromeDriverManager

# -------------------- DRIVER SETUP --------------------
def get_driver(headless=False):
    options = webdriver.ChromeOptions()
    options.add_argument("--window-size=1280,720")
    options.add_argument("--disable-blink-features=AutomationControlled")
    options.add_argument("--disable-dev-shm-usage")
    options.add_argument("--no-sandbox")
    if headless:
        options.add_argument("--headless=new")
    driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=options)
    return driver

# -------------------- SCROLLING FUNCTION --------------------
def scroll_to_bottom(driver, pause=2):
    """Scroll page fully to load lazy elements"""
    last_height = driver.execute_script("return document.body.scrollHeight")
    while True:
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
        time.sleep(pause)
        new_height = driver.execute_script("return document.body.scrollHeight")
        if new_height == last_height:
            break
        last_height = new_height

# -------------------- EXTRACT COURSE LINKS --------------------
def extract_course_links(html):
    """Extract all course links that match the pattern /product/course-name/"""
    soup = BeautifulSoup(html, "html.parser")
    course_links = []
    
    # Find all links that contain '/product/' in href
    all_links = soup.find_all('a', href=True)
    
    for link in all_links:
        href = link['href']
        if '/product/' in href and href.startswith('https://iisdt.in/product/'):
            # Clean the URL - remove any parameters or fragments
            clean_url = href.split('?')[0].split('#')[0]
            
            # Get course name from the link text or from URL
            course_name = link.get_text(strip=True)
            if not course_name or len(course_name) < 3:
                # Extract course name from URL
                course_name = clean_url.split('/product/')[-1].strip('/')
                course_name = course_name.replace('-', ' ').title()
            
            course_links.append({
                "Course Name": course_name,
                "Course Link": clean_url
            })
    
    return course_links

# -------------------- GET ALL COURSE PAGES --------------------
def get_all_course_pages():
    """Get all pages that might contain course links"""
    return [
        "https://iisdt.in/courses/",
        "https://iisdt.in/trending-courses/",
        "https://iisdt.in/top-courses/",
        "https://iisdt.in/latest-courses-1/",
        "https://iisdt.in/advance-diploma-courses/",
        "https://iisdt.in/computer-education-courses/",
        "https://iisdt.in/management-courses/",
        "https://iisdt.in/office-management-courses/",
        "https://iisdt.in/professional-courses/",
        "https://iisdt.in/self-employment-courses/",
        "https://iisdt.in/skill-development-courses/",
        "https://iisdt.in/technology-courses/",
        "https://iisdt.in/job-oriented-courses/",
        "https://iisdt.in/teacher-training-courses/",
        "https://iisdt.in/fire-and-safety-courses/",
        "https://iisdt.in/skill-development-course/",
        "https://iisdt.in/beauty-and-wellness-courses/",
        "https://iisdt.in/yoga-courses/",
        "https://iisdt.in/certificate-courses/",
        "https://iisdt.in/course-combo/"
    ]

# -------------------- MAIN SCRAPING FUNCTION --------------------
def scrape_all_courses():
    driver = get_driver(headless=False)
    all_courses = []
    
    try:
        # Get all pages to scrape
        pages_to_scrape = get_all_course_pages()
        
        for page_url in pages_to_scrape:
            print(f"🌐 Scraping: {page_url}")
            
            try:
                driver.get(page_url)
                time.sleep(4)
                
                # Scroll to load all content
                scroll_to_bottom(driver)
                time.sleep(2)
                
                # Extract course links from this page
                course_links = extract_course_links(driver.page_source)
                
                # Filter only valid course links
                valid_courses = []
                for course in course_links:
                    if (course['Course Link'].startswith('https://iisdt.in/product/') and 
                        course['Course Link'].endswith('/') and
                        len(course['Course Link']) > 30):  # Basic validation
                        valid_courses.append(course)
                
                print(f"✅ Found {len(valid_courses)} course links")
                all_courses.extend(valid_courses)
                
                # Show sample of what we found
                for course in valid_courses[:3]:  # Show first 3
                    print(f"   📚 {course['Course Name']}")
                if len(valid_courses) > 3:
                    print(f"   ... and {len(valid_courses) - 3} more")
                    
            except Exception as e:
                print(f"❌ Error scraping {page_url}: {e}")
                continue
                
    except Exception as e:
        print(f"❌ Major error: {e}")
        
    finally:
        driver.quit()
    
    return all_courses

# -------------------- DIRECT SITEMAP APPROACH --------------------
def scrape_via_sitemap():
    """Alternative method: Try to find courses via sitemap or search"""
    driver = get_driver(headless=False)
    courses = []
    
    try:
        # Try the main shop page which might list all products
        shop_urls = [
            "https://iisdt.in/shop/",
            "https://iisdt.in/products/",
            "https://iisdt.in/courses/"
        ]
        
        for url in shop_urls:
            print(f"🛍️ Trying shop page: {url}")
            driver.get(url)
            time.sleep(4)
            scroll_to_bottom(driver)
            
            # Use more specific selectors for WooCommerce products
            product_elements = driver.find_elements(By.CSS_SELECTOR, 'a[href*="/product/"]')
            print(f"Found {len(product_elements)} product elements")
            
            for element in product_elements:
                try:
                    link = element.get_attribute('href')
                    name = element.text.strip()
                    
                    if link and '/product/' in link and link.startswith('https://iisdt.in/product/'):
                        if not name or len(name) < 3:
                            name = link.split('/product/')[-1].replace('-', ' ').title()
                        
                        courses.append({
                            "Course Name": name,
                            "Course Link": link.split('?')[0].split('#')[0]
                        })
                except:
                    continue
            
            if courses:
                break
                
    finally:
        driver.quit()
    
    return courses

# -------------------- DEDUPLICATE AND CLEAN --------------------
def clean_courses(courses):
    """Remove duplicates and clean the data"""
    df = pd.DataFrame(courses)
    
    if len(df) == 0:
        return df
    
    # Remove duplicates by Course Link
    df = df.drop_duplicates('Course Link')
    
    # Clean course names
    def clean_name(name):
        if isinstance(name, str):
            # Remove extra whitespace and normalize
            name = ' '.join(name.split())
            # Capitalize properly
            name = name.title()
        return name
    
    df['Course Name'] = df['Course Name'].apply(clean_name)
    
    return df

# -------------------- MAIN FUNCTION --------------------
def main():
    print("🚀 Starting IISDT course extraction...")
    print("📚 Looking for course links like: https://iisdt.in/product/course-name/")
    
    # Method 1: Scrape all category pages
    print("\n🔍 Method 1: Scraping category pages...")
    courses = scrape_all_courses()
    
    # Method 2: If first method fails, try sitemap approach
    if len(courses) == 0:
        print("\n🔍 Method 2: Trying shop pages...")
        courses = scrape_via_sitemap()
    
    # Clean and deduplicate
    df = clean_courses(courses)
    
    if len(df) > 0:
        output_path = r"C:\Users\taslim.siddiqui\Downloads\IISDT_Course_Links.xlsx"
        df.to_excel(output_path, index=False)
        
        print(f"\n🎉 SUCCESS! Found {len(df)} unique course links!")
        print(f"📂 Saved to: {output_path}")
        
        print("\n📋 Sample of courses found:")
        print(df.head(20).to_string(index=False))
        
        # Show some example links
        print("\n🔗 Example links found:")
        for i, row in df.head(10).iterrows():
            print(f"   {i+1}. {row['Course Link']}")
            
    else:
        print("❌ No course links were found.")
        print("💡 The website structure might have changed or require different approach.")

# -------------------- RUN SCRIPT --------------------
if __name__ == "__main__":
    main()