In [None]:
import os
import time
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

Set up Chrome options

In [None]:
chrome_options = Options()
#chrome_options.add_argument("--headless")  # Uncomment if you want to run in headless mode
chrome_options.add_argument("--no-sandbox")
chrome_options.add_argument("--disable-gpu")

In [None]:
def get_courses_link(college_name):
    # Check if the college name is not empty
    if not college_name.strip():
        print("College name cannot be empty")
        return None

    # Initialize the WebDriver (e.g., ChromeDriver)
    driver = webdriver.Chrome(options=chrome_options)
    
    try:
        # Open the web page
        driver.get("https://collegedunia.com/")

        #we scrap  collgedunia website  to get  college and its  courses 
        
        # Click the search button of  search bar  
        search_button = driver.find_element(By.CSS_SELECTOR, 'button.search-bar')
        search_button.click()
        time.sleep(4)  # Allow time for the search to activate
        
        # Enter the college name in the search input
        search_input = driver.find_element(By.CSS_SELECTOR, 'input.search-input')
        time.sleep(2)  # Allow time for the input field to be ready
        search_input.send_keys(college_name)
        
        # Wait for the dropdown to be visible and select the first item as this  matches  perfectly with our  collge name  
        dropdown_locator = (By.CSS_SELECTOR, "div.jsx-2090295176.desktop-search-list")
        WebDriverWait(driver, 10).until(EC.visibility_of_element_located(dropdown_locator))
        first_item_locator = (By.CSS_SELECTOR, "div.jsx-2090295176.desktop-search-list a")
        first_item = WebDriverWait(driver, 10).until(EC.element_to_be_clickable(first_item_locator))
        item_name = first_item.text
        print("First item name:", item_name)
        first_item.click()
        
        # Wait for the navigation menu to be visible
        nav_menu_locator = (By.CSS_SELECTOR, "nav.jsx-3207461571")
        WebDriverWait(driver, 10).until(EC.visibility_of_element_located(nav_menu_locator))
        
        # Find all <li> elements in the navigation menu coz it contains  all  collges  link  
        li_elements = driver.find_elements(By.CSS_SELECTOR, "nav.jsx-3207461571 li")
        
        # Iterate through each <li> element to find the one with the target title
        for li in li_elements:
            if "- Course & Fees Details" in li.get_attribute("title"): #this is  the main title  we are  looking for  the course
                # Find the <a> tag within the <li> element and extract the href attribute
                a_tag = li.find_element(By.TAG_NAME, "a")
                href_value = a_tag.get_attribute("href")
                print("Extracted href:", href_value)
                return href_value
        
        # If no matching <li> found
        print("College name is not valid or no course details link found")
        return None
    except Exception as e:
        # Print the error message and return None
        print(f"An error occurred: {e}")
        return None
    finally:
        # Close the WebDriver
        driver.quit()

In [None]:
def get_course(url):
    # Initialize the WebDriver (e.g., ChromeDriver)
    driver = webdriver.Chrome(options=chrome_options)
    results = {}
    try:
        # Access the provided URL
        driver.get(url)
        
        # Locate all divs with class 'course-card', this cards contains ad the  details  of the  
        course_card_divs = WebDriverWait(driver, 15).until(
            EC.presence_of_all_elements_located(
                (By.CSS_SELECTOR, 'div.jsx-3378387051.course-card')
            )
        )
        for card in course_card_divs:
            try:
                # Initialize variables to store degree and courses
                degree = None
                courses = set()  # Use a set to store unique courses
                # Find degree information
                links = card.find_elements(By.CSS_SELECTOR, 'a[data-csm-title]')
                for link in links:
                    degree = link.get_attribute('data-csm-title')
                    href = link.get_attribute('href')
                    if degree:
                        if degree not in results:
                            results[degree] = set()
                        break
                # Find the "View X Courses" button if it exists, to  expands the  course  detals  
                view_courses_buttons = card.find_elements(By.CSS_SELECTOR, 'div.show-spec-btn button')
                if view_courses_buttons:
                    # Wait for the button to be clickable and click it
                    button = view_courses_buttons[0]
                    WebDriverWait(driver, 10).until(
                        EC.element_to_be_clickable(button)
                    ).click()
                    print(f'Clicked Button: {button.get_attribute("data-csm-title")}')
                    time.sleep(5)  # Allow some time for the page to load
                    # Wait for the course details section to become visible
                    course_details_section = WebDriverWait(driver, 15).until(
                        EC.visibility_of_element_located((By.CSS_SELECTOR, 'div.course-other-detail'))
                    )
                    # Locate the table within the course details section
                    table = course_details_section.find_element(By.CSS_SELECTOR, 'table')
                    rows = table.find_elements(By.TAG_NAME, 'tr')
                    # Collect courses from the table
                    for row in rows:
                        columns = row.find_elements(By.TAG_NAME, 'td')
                        if columns:
                            links = columns[0].find_elements(By.CSS_SELECTOR, 'a[data-csm-title]')
                            for link in links:
                                course_name = link.text
                                courses.add(course_name)  # Add course name to the set
                                break
                    # Update the results dictionary with courses
                    if courses:
                        results[degree].update(courses)
                    else:
                        results[degree].add("No additional courses found")
                else:
                    # Write the contents of <a> tags if no button is found
                    links = card.find_elements(By.CSS_SELECTOR, 'a[data-csm-title]')
                    for link in links:
                        degree = link.get_attribute('data-csm-title')
                        href = link.get_attribute('href')
                        if degree not in results:
                            results[degree] = {"No courses available"}
                        break
            except Exception as e:
                print(f'Error handling course card: {e}')
    except Exception as e:
        print(f'Error accessing the URL: {e}')
    
    finally:
        # Close the WebDriver
        driver.quit()
    #we return the result in  dictionary  
    return results

Function to write results to a text file additionally

In [None]:
def write_to_file(results, college_name):
    # Create file path based on college name
    file_name = f"{college_name.replace(' ', '_')}_courses.txt"
    
    with open(file_name, 'w') as file:
        for degree, courses in results.items():
            file.write(f"Degree: {degree}\n")
            if isinstance(courses, set) and courses:
                if len(courses) == 1 and "No additional courses found" in courses:
                    file.write(f"  - {courses.pop()}\n")
                else:
                    file.write("Courses:\n")
                    for course in courses:
                        file.write(f"  - {course}\n")
            file.write("\n" + "-"*50 + "\n")  # Separator line

Example use , we replace  the  desired college name 

In [None]:
if __name__ == "__main__":
    college_name = "Anjuman college  of engineering  "
    link = get_courses_link(college_name)
    if link:
        results = get_course(link)
        write_to_file(results, college_name)
    else:
        print("Could not retrieve courses link.")