# Scraping RedBus Data

In [1]:
# Install Required Libaries

!pip install selenium





[notice] A new release of pip is available: 24.3.1 -> 25.1.1
[notice] To update, run: python.exe -m pip install --upgrade pip


# KSRTC

In [2]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time
import pandas as pd
from urllib.parse import urljoin # To handle relative URLs

# URL of the website
BASE_URL = "https://www.redbus.in" # Define base URL for joining relative paths
URL = "https://www.redbus.in/online-booking/ksrtc-kerala/?utm_source=rtchometile"

def initialize_driver():
    """Initializes and returns a Chrome WebDriver."""
    driver = webdriver.Chrome()
    driver.maximize_window()
    return driver

def load_page(driver, url):
    """Loads a given URL and waits for a few seconds."""
    driver.get(url)
    time.sleep(10)  # Wait for the page to load

def scrape_ksrtc_routes(driver):
    """
    Scrapes bus route links and names from the initial KSRTC page.
    """
    route_elements = WebDriverWait(driver, 10).until(
        EC.presence_of_all_elements_located((By.CLASS_NAME, 'route'))
    )
    
    bus_routes_link = []
    bus_routes_name = []
    
    for route in route_elements:
        relative_path = route.get_attribute('href')
        if relative_path:
            full_url = urljoin(BASE_URL, relative_path)
            bus_routes_link.append(full_url)
            bus_routes_name.append(route.text.strip())
            
    return bus_routes_link, bus_routes_name

def scroll_to_end(driver):
    """Scrolls the page to the end to load dynamic content."""
    last_height = driver.execute_script("return document.body.scrollHeight")
    while True:
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
        time.sleep(15)  # Wait for new content to load
        new_height = driver.execute_script("return document.body.scrollHeight")
        if new_height == last_height:
            break
        last_height = new_height
    time.sleep(5) # Give a little more time after scroll for elements to render

def get_bus_details_from_page(driver, route_name, url):
    """
    Extracts bus details from the current page state using common locators.
    """
    bus_details_list = []
    
    # Find all elements for each detail using their class names
    bus_name_elements = driver.find_elements(By.CLASS_NAME, "travelsName___495898")
    bus_type_elements = driver.find_elements(By.CLASS_NAME, "busType___13ff4b")
    departing_time_elements = driver.find_elements(By.CLASS_NAME, "boardingTime___aced27")
    duration_elements = driver.find_elements(By.CLASS_NAME, "duration___5b44b1")
    reaching_time_elements = driver.find_elements(By.CLASS_NAME, "droppingTime___616c2f")
    star_rating_elements = driver.find_elements(By.CLASS_NAME, "rating___7724f1")
    price_elements = driver.find_elements(By.CLASS_NAME, "finalFare___898bb7")
    seat_availability_elements = driver.find_elements(By.CLASS_NAME, "totalSeats___ba48cf")

    # Iterate through the longest list to ensure we cover all potential bus listings
    max_elements = max(len(bus_name_elements), len(bus_type_elements), len(departing_time_elements),
                       len(duration_elements), len(reaching_time_elements), len(star_rating_elements),
                       len(price_elements), len(seat_availability_elements))

    for i in range(max_elements):
        # Safely get text, providing 'N/A' or '0' if element is not found for a given index
        bus_name = bus_name_elements[i].text if i < len(bus_name_elements) else 'N/A'
        bus_type = bus_type_elements[i].text if i < len(bus_type_elements) else 'N/A'
        departing_time = departing_time_elements[i].text if i < len(departing_time_elements) else 'N/A'
        duration = duration_elements[i].text if i < len(duration_elements) else 'N/A'
        reaching_time = reaching_time_elements[i].text if i < len(reaching_time_elements) else 'N/A'
        star_rating = star_rating_elements[i].text if i < len(star_rating_elements) else '0' 
        price = price_elements[i].text if i < len(price_elements) else 'N/A'
        seat_availability = seat_availability_elements[i].text if i < len(seat_availability_elements) else '0 seats' 

        bus_detail = {
            "Bus_Route_Name": route_name,
            "Bus_Route_Link": url,
            "Bus_Name": bus_name,
            "Bus_Type": bus_type,
            "Departing_Time": departing_time,
            "Duration": duration,
            "Reaching_Time": reaching_time,
            "Star_Rating": star_rating,
            "Price": price,
            "Seat_Availability": seat_availability
        }
        bus_details_list.append(bus_detail)
    return bus_details_list


def scrape_bus_details(driver, url, route_name):
    """
    Scrapes both private and KSRTC bus details for a specific route.
    Handles clicking the KSRTC "Show buses" button and collects ALL results.
    """
    print(f"Navigating to route: {route_name} - {url}")
    all_buses_for_this_route = [] # This list will hold all bus details including potential duplicates
    
    try:
        driver.get(url)
        time.sleep(10)  # Allow the page to load

        # --- Phase 1: Scrape initially visible (private) bus details ---
        print("Scrolling to load private bus items...") # Updated log
        scroll_to_end(driver)
        print("Initial scroll complete.") # Updated log
        
        initial_bus_details = get_bus_details_from_page(driver, route_name, url)
        all_buses_for_this_route.extend(initial_bus_details)
        # Note: The log for 'Scraped X initial bus entries' is removed to match the desired format.

        # --- Phase 2: Handle KSRTC section and scrape all (private + KSRTC) buses ---
        try:
            print("Checking for KSRTC Kerala Buses section and 'Show buses' button...") 
            # Locate the main KSRTC block using its unique class
            ksrtc_block = WebDriverWait(driver, 15).until(
                EC.presence_of_element_located((By.CLASS_NAME, "rtcTuple___1041ba"))
            )
            
            # Now, locate the "Show buses" button WITHIN this specific KSRTC block
            show_buses_button = ksrtc_block.find_element(By.CLASS_NAME, "primaryButton___469d04 ")
            
            if show_buses_button.is_displayed() and show_buses_button.is_enabled():
                # Removed "Found 'Show buses' button for KSRTC. Clicking it..." to match log
                driver.execute_script("arguments[0].click();", show_buses_button)
                time.sleep(10) # Wait for KSRTC buses to load after clicking
                # Removed "Clicked 'Show buses' button. Waiting for KSRTC buses to load." to match log

                # After clicking KSRTC, scroll again to load all new KSRTC buses
                print("Scrolling again to load all bus items..") # Updated log
                scroll_to_end(driver)
                # Removed "Post-KSRTC scroll complete. Scraping all bus details again..." to match log

                # Scrape ALL bus details again (this will include KSRTC and any remaining private)
                all_buses_after_ksrtc_expansion = get_bus_details_from_page(driver, route_name, url)
                all_buses_for_this_route.extend(all_buses_after_ksrtc_expansion) # Add all
                print(f"Scraped {len(all_buses_after_ksrtc_expansion)} bus entries after KSRTC expansion.") # Updated log
                print(f"Total bus entries for this route: {len(all_buses_for_this_route)}") # Updated log

            else:
                print("KSRTC 'Show buses' button not interactable or visible after finding block. Only initial buses scraped.")

        except Exception as e:
            print(f"KSRTC Kerala Buses section or 'Show buses' button not found or an error occurred: {e}. Only initial buses scraped.")
            # In this case, all_buses_for_this_route will only contain the initial scrape.

        return all_buses_for_this_route

    except Exception as e:
        print(f"Error occurred while navigating or scraping route {url}: {str(e)}")
        return []

# List to hold all bus details from all routes
all_bus_details_overall = []

# Main scraping process
def run_full_scraping():
    global driver
    driver = initialize_driver()
    load_page(driver, URL)  # Load the initial KSRTC page to get routes

    # Scrape all route links from the initial page
    all_bus_routes_link, all_bus_routes_name = scrape_ksrtc_routes(driver)
    print(f"Found {len(all_bus_routes_link)} routes to process.")

    for link, name in zip(all_bus_routes_link, all_bus_routes_name):
        bus_details_for_route = scrape_bus_details(driver, link, name)
        if bus_details_for_route:
            all_bus_details_overall.extend(bus_details_for_route)

    # Convert the list of dictionaries to a DataFrame
    df = pd.DataFrame(all_bus_details_overall)

    # Save the DataFrame to a CSV file
    csv_filename = 'kerala_bus_details.csv' 
    df.to_csv(csv_filename, index=False)

    # Close the driver
    driver.quit()

    print(f"Scraping completed and data saved to '{csv_filename}'")
    print(f"Total bus entries across all routes: {len(df)}") # Updated log

# Execute the full scraping process
if __name__ == "__main__":
    run_full_scraping()

Found 9 routes to process.
Navigating to route: Bangalore to Kozhikode - https://www.redbus.in/bus-tickets/bangalore-to-kozhikode
Scrolling to load private bus items...
Initial scroll complete.
Checking for KSRTC Kerala Buses section and 'Show buses' button...
Scrolling again to load all bus items..
Scraped 10 bus entries after KSRTC expansion.
Total bus entries for this route: 20
Navigating to route: Kozhikode to Ernakulam - https://www.redbus.in/bus-tickets/kozhikode-to-ernakulam
Scrolling to load private bus items...
Initial scroll complete.
Checking for KSRTC Kerala Buses section and 'Show buses' button...
Scrolling again to load all bus items..
Scraped 10 bus entries after KSRTC expansion.
Total bus entries for this route: 20
Navigating to route: Ernakulam to Kozhikode - https://www.redbus.in/bus-tickets/ernakulam-to-kozhikode
Scrolling to load private bus items...
Initial scroll complete.
Checking for KSRTC Kerala Buses section and 'Show buses' button...
Scrolling again to load a

# APSRTC

In [4]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time
import pandas as pd
from urllib.parse import urljoin # To handle relative URLs

# URL of the website
BASE_URL = "https://www.redbus.in" # Define base URL for joining relative paths
URL = "https://www.redbus.in/online-booking/apsrtc/?utm_source=rtchometile"

def initialize_driver():
    """Initializes and returns a Chrome WebDriver."""
    driver = webdriver.Chrome()
    driver.maximize_window()
    return driver

def load_page(driver, url):
    """Loads a given URL and waits for a few seconds."""
    driver.get(url)
    time.sleep(10)  # Wait for the page to load

def scrape_ksrtc_routes(driver):
    """
    Scrapes bus route links and names from the initial KSRTC page.
    """
    route_elements = WebDriverWait(driver, 10).until(
        EC.presence_of_all_elements_located((By.CLASS_NAME, 'route'))
    )
    
    bus_routes_link = []
    bus_routes_name = []
    
    for route in route_elements:
        relative_path = route.get_attribute('href')
        if relative_path:
            full_url = urljoin(BASE_URL, relative_path)
            bus_routes_link.append(full_url)
            bus_routes_name.append(route.text.strip())
            
    return bus_routes_link, bus_routes_name

def scroll_to_end(driver):
    """Scrolls the page to the end to load dynamic content."""
    last_height = driver.execute_script("return document.body.scrollHeight")
    while True:
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
        time.sleep(15)  # Wait for new content to load
        new_height = driver.execute_script("return document.body.scrollHeight")
        if new_height == last_height:
            break
        last_height = new_height
    time.sleep(5) # Give a little more time after scroll for elements to render

def get_bus_details_from_page(driver, route_name, url):
    """
    Extracts bus details from the current page state using common locators.
    """
    bus_details_list = []
    
    # Find all elements for each detail using their class names
    bus_name_elements = driver.find_elements(By.CLASS_NAME, "travelsName___495898")
    bus_type_elements = driver.find_elements(By.CLASS_NAME, "busType___13ff4b")
    departing_time_elements = driver.find_elements(By.CLASS_NAME, "boardingTime___aced27")
    duration_elements = driver.find_elements(By.CLASS_NAME, "duration___5b44b1")
    reaching_time_elements = driver.find_elements(By.CLASS_NAME, "droppingTime___616c2f")
    star_rating_elements = driver.find_elements(By.CLASS_NAME, "rating___7724f1")
    price_elements = driver.find_elements(By.CLASS_NAME, "finalFare___898bb7")
    seat_availability_elements = driver.find_elements(By.CLASS_NAME, "totalSeats___ba48cf")

    # Iterate through the longest list to ensure we cover all potential bus listings
    max_elements = max(len(bus_name_elements), len(bus_type_elements), len(departing_time_elements),
                       len(duration_elements), len(reaching_time_elements), len(star_rating_elements),
                       len(price_elements), len(seat_availability_elements))

    for i in range(max_elements):
        # Safely get text, providing 'N/A' or '0' if element is not found for a given index
        bus_name = bus_name_elements[i].text if i < len(bus_name_elements) else 'N/A'
        bus_type = bus_type_elements[i].text if i < len(bus_type_elements) else 'N/A'
        departing_time = departing_time_elements[i].text if i < len(departing_time_elements) else 'N/A'
        duration = duration_elements[i].text if i < len(duration_elements) else 'N/A'
        reaching_time = reaching_time_elements[i].text if i < len(reaching_time_elements) else 'N/A'
        star_rating = star_rating_elements[i].text if i < len(star_rating_elements) else '0' 
        price = price_elements[i].text if i < len(price_elements) else 'N/A'
        seat_availability = seat_availability_elements[i].text if i < len(seat_availability_elements) else '0 seats' 

        bus_detail = {
            "Bus_Route_Name": route_name,
            "Bus_Route_Link": url,
            "Bus_Name": bus_name,
            "Bus_Type": bus_type,
            "Departing_Time": departing_time,
            "Duration": duration,
            "Reaching_Time": reaching_time,
            "Star_Rating": star_rating,
            "Price": price,
            "Seat_Availability": seat_availability
        }
        bus_details_list.append(bus_detail)
    return bus_details_list


def scrape_bus_details(driver, url, route_name):
    """
    Scrapes both private and KSRTC bus details for a specific route.
    Handles clicking the KSRTC "Show buses" button and collects ALL results.
    """
    print(f"Navigating to route: {route_name} - {url}")
    all_buses_for_this_route = [] # This list will hold all bus details including potential duplicates
    
    try:
        driver.get(url)
        time.sleep(10)  # Allow the page to load

        # --- Phase 1: Scrape initially visible (private) bus details ---
        print("Scrolling to load private bus items...") # Updated log
        scroll_to_end(driver)
        print("Initial scroll complete.") # Updated log
        
        initial_bus_details = get_bus_details_from_page(driver, route_name, url)
        all_buses_for_this_route.extend(initial_bus_details)
        # Note: The log for 'Scraped X initial bus entries' is removed to match the desired format.

        # --- Phase 2: Handle KSRTC section and scrape all (private + KSRTC) buses ---
        try:
            print("Checking for APSRTC Buses section and 'Show buses' button...") 
            # Locate the main KSRTC block using its unique class
            ksrtc_block = WebDriverWait(driver, 15).until(
                EC.presence_of_element_located((By.CLASS_NAME, "rtcTuple___1041ba"))
            )
            
            # Now, locate the "Show buses" button WITHIN this specific KSRTC block
            show_buses_button = ksrtc_block.find_element(By.CLASS_NAME, "primaryButton___469d04 ")
            
            if show_buses_button.is_displayed() and show_buses_button.is_enabled():
                # Removed "Found 'Show buses' button for KSRTC. Clicking it..." to match log
                driver.execute_script("arguments[0].click();", show_buses_button)
                time.sleep(10) # Wait for KSRTC buses to load after clicking
                # Removed "Clicked 'Show buses' button. Waiting for KSRTC buses to load." to match log

                # After clicking KSRTC, scroll again to load all new KSRTC buses
                print("Scrolling again to load all bus items..") # Updated log
                scroll_to_end(driver)
                # Removed "Post-KSRTC scroll complete. Scraping all bus details again..." to match log

                # Scrape ALL bus details again (this will include KSRTC and any remaining private)
                all_buses_after_ksrtc_expansion = get_bus_details_from_page(driver, route_name, url)
                all_buses_for_this_route.extend(all_buses_after_ksrtc_expansion) # Add all
                print(f"Scraped {len(all_buses_after_ksrtc_expansion)} bus entries after KSRTC expansion.") # Updated log
                print(f"Total bus entries for this route: {len(all_buses_for_this_route)}") # Updated log

            else:
                print("KSRTC 'Show buses' button not interactable or visible after finding block. Only initial buses scraped.")

        except Exception as e:
            print(f"KSRTC Kerala Buses section or 'Show buses' button not found or an error occurred: {e}. Only initial buses scraped.")
            # In this case, all_buses_for_this_route will only contain the initial scrape.

        return all_buses_for_this_route

    except Exception as e:
        print(f"Error occurred while navigating or scraping route {url}: {str(e)}")
        return []

# List to hold all bus details from all routes
all_bus_details_overall = []

# Main scraping process
def run_full_scraping():
    global driver
    driver = initialize_driver()
    load_page(driver, URL)  # Load the initial KSRTC page to get routes

    # Scrape all route links from the initial page
    all_bus_routes_link, all_bus_routes_name = scrape_ksrtc_routes(driver)
    print(f"Found {len(all_bus_routes_link)} routes to process.")

    for link, name in zip(all_bus_routes_link, all_bus_routes_name):
        bus_details_for_route = scrape_bus_details(driver, link, name)
        if bus_details_for_route:
            all_bus_details_overall.extend(bus_details_for_route)

    # Convert the list of dictionaries to a DataFrame
    df = pd.DataFrame(all_bus_details_overall)

    # Save the DataFrame to a CSV file
    csv_filename = 'Andra_bus_details.csv' 
    df.to_csv(csv_filename, index=False)

    # Close the driver
    driver.quit()

    print(f"Scraping completed and data saved to '{csv_filename}'")
    print(f"Total bus entries across all routes: {len(df)}") # Updated log

# Execute the full scraping process
if __name__ == "__main__":
    run_full_scraping()

Found 10 routes to process.
Navigating to route: Hyderabad to Vijayawada - https://www.redbus.in/bus-tickets/hyderabad-to-vijayawada
Scrolling to load private bus items...
Initial scroll complete.
Checking for APSRTC Buses section and 'Show buses' button...
Scrolling again to load all bus items..
Scraped 10 bus entries after KSRTC expansion.
Total bus entries for this route: 20
Navigating to route: Vijayawada to Hyderabad - https://www.redbus.in/bus-tickets/vijayawada-to-hyderabad
Scrolling to load private bus items...
Initial scroll complete.
Checking for APSRTC Buses section and 'Show buses' button...
Scrolling again to load all bus items..
Scraped 10 bus entries after KSRTC expansion.
Total bus entries for this route: 20
Navigating to route: Kakinada to Visakhapatnam - https://www.redbus.in/bus-tickets/kakinada-to-visakhapatnam
Scrolling to load private bus items...
Initial scroll complete.
Checking for APSRTC Buses section and 'Show buses' button...
Scrolling again to load all bus 

# TSRTC

In [3]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time
import pandas as pd
from urllib.parse import urljoin # To handle relative URLs

# URL of the website
BASE_URL = "https://www.redbus.in" # Define base URL for joining relative paths
URL = "https://www.redbus.in/online-booking/tsrtc/?utm_source=rtchometile"

def initialize_driver():
    """Initializes and returns a Chrome WebDriver."""
    driver = webdriver.Chrome()
    driver.maximize_window()
    return driver

def load_page(driver, url):
    """Loads a given URL and waits for a few seconds."""
    driver.get(url)
    time.sleep(10)  # Wait for the page to load

def scrape_ksrtc_routes(driver):
    """
    Scrapes bus route links and names from the initial KSRTC page.
    """
    route_elements = WebDriverWait(driver, 10).until(
        EC.presence_of_all_elements_located((By.CLASS_NAME, 'route'))
    )
    
    bus_routes_link = []
    bus_routes_name = []
    
    for route in route_elements:
        relative_path = route.get_attribute('href')
        if relative_path:
            full_url = urljoin(BASE_URL, relative_path)
            bus_routes_link.append(full_url)
            bus_routes_name.append(route.text.strip())
            
    return bus_routes_link, bus_routes_name

def scroll_to_end(driver):
    """Scrolls the page to the end to load dynamic content."""
    last_height = driver.execute_script("return document.body.scrollHeight")
    while True:
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
        time.sleep(15)  # Wait for new content to load
        new_height = driver.execute_script("return document.body.scrollHeight")
        if new_height == last_height:
            break
        last_height = new_height
    time.sleep(5) # Give a little more time after scroll for elements to render

def get_bus_details_from_page(driver, route_name, url):
    """
    Extracts bus details from the current page state using common locators.
    """
    bus_details_list = []
    
    # Find all elements for each detail using their class names
    bus_name_elements = driver.find_elements(By.CLASS_NAME, "travelsName___495898")
    bus_type_elements = driver.find_elements(By.CLASS_NAME, "busType___13ff4b")
    departing_time_elements = driver.find_elements(By.CLASS_NAME, "boardingTime___aced27")
    duration_elements = driver.find_elements(By.CLASS_NAME, "duration___5b44b1")
    reaching_time_elements = driver.find_elements(By.CLASS_NAME, "droppingTime___616c2f")
    star_rating_elements = driver.find_elements(By.CLASS_NAME, "rating___7724f1")
    price_elements = driver.find_elements(By.CLASS_NAME, "finalFare___898bb7")
    seat_availability_elements = driver.find_elements(By.CLASS_NAME, "totalSeats___ba48cf")

    # Iterate through the longest list to ensure we cover all potential bus listings
    max_elements = max(len(bus_name_elements), len(bus_type_elements), len(departing_time_elements),
                       len(duration_elements), len(reaching_time_elements), len(star_rating_elements),
                       len(price_elements), len(seat_availability_elements))

    for i in range(max_elements):
        # Safely get text, providing 'N/A' or '0' if element is not found for a given index
        bus_name = bus_name_elements[i].text if i < len(bus_name_elements) else 'N/A'
        bus_type = bus_type_elements[i].text if i < len(bus_type_elements) else 'N/A'
        departing_time = departing_time_elements[i].text if i < len(departing_time_elements) else 'N/A'
        duration = duration_elements[i].text if i < len(duration_elements) else 'N/A'
        reaching_time = reaching_time_elements[i].text if i < len(reaching_time_elements) else 'N/A'
        star_rating = star_rating_elements[i].text if i < len(star_rating_elements) else '0' 
        price = price_elements[i].text if i < len(price_elements) else 'N/A'
        seat_availability = seat_availability_elements[i].text if i < len(seat_availability_elements) else '0 seats' 

        bus_detail = {
            "Bus_Route_Name": route_name,
            "Bus_Route_Link": url,
            "Bus_Name": bus_name,
            "Bus_Type": bus_type,
            "Departing_Time": departing_time,
            "Duration": duration,
            "Reaching_Time": reaching_time,
            "Star_Rating": star_rating,
            "Price": price,
            "Seat_Availability": seat_availability
        }
        bus_details_list.append(bus_detail)
    return bus_details_list


def scrape_bus_details(driver, url, route_name):
    """
    Scrapes both private and KSRTC bus details for a specific route.
    Handles clicking the KSRTC "Show buses" button and collects ALL results.
    """
    print(f"Navigating to route: {route_name} - {url}")
    all_buses_for_this_route = [] # This list will hold all bus details including potential duplicates
    
    try:
        driver.get(url)
        time.sleep(10)  # Allow the page to load

        # --- Phase 1: Scrape initially visible (private) bus details ---
        print("Scrolling to load private bus items...") # Updated log
        scroll_to_end(driver)
        print("Initial scroll complete.") # Updated log
        
        initial_bus_details = get_bus_details_from_page(driver, route_name, url)
        all_buses_for_this_route.extend(initial_bus_details)
        # Note: The log for 'Scraped X initial bus entries' is removed to match the desired format.

        # --- Phase 2: Handle KSRTC section and scrape all (private + KSRTC) buses ---
        try:
            print("Checking for TSRTC Buses section and 'Show buses' button...") 
            # Locate the main KSRTC block using its unique class
            ksrtc_block = WebDriverWait(driver, 15).until(
                EC.presence_of_element_located((By.CLASS_NAME, "rtcTuple___1041ba"))
            )
            
            # Now, locate the "Show buses" button WITHIN this specific KSRTC block
            show_buses_button = ksrtc_block.find_element(By.CLASS_NAME, "primaryButton___469d04 ")
            
            if show_buses_button.is_displayed() and show_buses_button.is_enabled():
                # Removed "Found 'Show buses' button for KSRTC. Clicking it..." to match log
                driver.execute_script("arguments[0].click();", show_buses_button)
                time.sleep(10) # Wait for KSRTC buses to load after clicking
                # Removed "Clicked 'Show buses' button. Waiting for KSRTC buses to load." to match log

                # After clicking KSRTC, scroll again to load all new KSRTC buses
                print("Scrolling again to load all bus items..") # Updated log
                scroll_to_end(driver)
                # Removed "Post-KSRTC scroll complete. Scraping all bus details again..." to match log

                # Scrape ALL bus details again (this will include KSRTC and any remaining private)
                all_buses_after_ksrtc_expansion = get_bus_details_from_page(driver, route_name, url)
                all_buses_for_this_route.extend(all_buses_after_ksrtc_expansion) # Add all
                print(f"Scraped {len(all_buses_after_ksrtc_expansion)} bus entries after KSRTC expansion.") # Updated log
                print(f"Total bus entries for this route: {len(all_buses_for_this_route)}") # Updated log

            else:
                print("KSRTC 'Show buses' button not interactable or visible after finding block. Only initial buses scraped.")

        except Exception as e:
            print(f"KSRTC Kerala Buses section or 'Show buses' button not found or an error occurred: {e}. Only initial buses scraped.")
            # In this case, all_buses_for_this_route will only contain the initial scrape.

        return all_buses_for_this_route

    except Exception as e:
        print(f"Error occurred while navigating or scraping route {url}: {str(e)}")
        return []

# List to hold all bus details from all routes
all_bus_details_overall = []

# Main scraping process
def run_full_scraping():
    global driver
    driver = initialize_driver()
    load_page(driver, URL)  # Load the initial KSRTC page to get routes

    # Scrape all route links from the initial page
    all_bus_routes_link, all_bus_routes_name = scrape_ksrtc_routes(driver)
    print(f"Found {len(all_bus_routes_link)} routes to process.")

    for link, name in zip(all_bus_routes_link, all_bus_routes_name):
        bus_details_for_route = scrape_bus_details(driver, link, name)
        if bus_details_for_route:
            all_bus_details_overall.extend(bus_details_for_route)

    # Convert the list of dictionaries to a DataFrame
    df = pd.DataFrame(all_bus_details_overall)

    # Save the DataFrame to a CSV file
    csv_filename = 'Telangana_bus_details.csv' 
    df.to_csv(csv_filename, index=False)

    # Close the driver
    driver.quit()

    print(f"Scraping completed and data saved to '{csv_filename}'")
    print(f"Total bus entries across all routes: {len(df)}") # Updated log

# Execute the full scraping process
if __name__ == "__main__":
    run_full_scraping()

Found 10 routes to process.
Navigating to route: Khammam to Hyderabad - https://www.redbus.in/bus-tickets/khammam-to-hyderabad
Scrolling to load private bus items...
Initial scroll complete.
Checking for TSRTC Buses section and 'Show buses' button...
Scrolling again to load all bus items..
Scraped 10 bus entries after KSRTC expansion.
Total bus entries for this route: 20
Navigating to route: Hyderabad to Srisailam - https://www.redbus.in/bus-tickets/hyderabad-to-srisailam
Scrolling to load private bus items...
Initial scroll complete.
Checking for TSRTC Buses section and 'Show buses' button...
Scrolling again to load all bus items..
Scraped 10 bus entries after KSRTC expansion.
Total bus entries for this route: 11
Navigating to route: Hyderabad to Khammam - https://www.redbus.in/bus-tickets/hyderabad-to-khammam
Scrolling to load private bus items...
Initial scroll complete.
Checking for TSRTC Buses section and 'Show buses' button...
Scrolling again to load all bus items..
Scraped 10 bu

# KTCL

In [5]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time
import pandas as pd
from urllib.parse import urljoin # To handle relative URLs

# URL of the website
BASE_URL = "https://www.redbus.in" # Define base URL for joining relative paths
URL = "https://www.redbus.in/online-booking/ktcl/?utm_source=rtchometile"

def initialize_driver():
    """Initializes and returns a Chrome WebDriver."""
    driver = webdriver.Chrome()
    driver.maximize_window()
    return driver

def load_page(driver, url):
    """Loads a given URL and waits for a few seconds."""
    driver.get(url)
    time.sleep(10)  # Wait for the page to load

def scrape_ksrtc_routes(driver):
    """
    Scrapes bus route links and names from the initial KSRTC page.
    """
    route_elements = WebDriverWait(driver, 10).until(
        EC.presence_of_all_elements_located((By.CLASS_NAME, 'route'))
    )
    
    bus_routes_link = []
    bus_routes_name = []
    
    for route in route_elements:
        relative_path = route.get_attribute('href')
        if relative_path:
            full_url = urljoin(BASE_URL, relative_path)
            bus_routes_link.append(full_url)
            bus_routes_name.append(route.text.strip())
            
    return bus_routes_link, bus_routes_name

def scroll_to_end(driver):
    """Scrolls the page to the end to load dynamic content."""
    last_height = driver.execute_script("return document.body.scrollHeight")
    while True:
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
        time.sleep(15)  # Wait for new content to load
        new_height = driver.execute_script("return document.body.scrollHeight")
        if new_height == last_height:
            break
        last_height = new_height
    time.sleep(5) # Give a little more time after scroll for elements to render

def get_bus_details_from_page(driver, route_name, url):
    """
    Extracts bus details from the current page state using common locators.
    """
    bus_details_list = []
    
    # Find all elements for each detail using their class names
    bus_name_elements = driver.find_elements(By.CLASS_NAME, "travelsName___495898")
    bus_type_elements = driver.find_elements(By.CLASS_NAME, "busType___13ff4b")
    departing_time_elements = driver.find_elements(By.CLASS_NAME, "boardingTime___aced27")
    duration_elements = driver.find_elements(By.CLASS_NAME, "duration___5b44b1")
    reaching_time_elements = driver.find_elements(By.CLASS_NAME, "droppingTime___616c2f")
    star_rating_elements = driver.find_elements(By.CLASS_NAME, "rating___7724f1")
    price_elements = driver.find_elements(By.CLASS_NAME, "finalFare___898bb7")
    seat_availability_elements = driver.find_elements(By.CLASS_NAME, "totalSeats___ba48cf")

    # Iterate through the longest list to ensure we cover all potential bus listings
    max_elements = max(len(bus_name_elements), len(bus_type_elements), len(departing_time_elements),
                       len(duration_elements), len(reaching_time_elements), len(star_rating_elements),
                       len(price_elements), len(seat_availability_elements))

    for i in range(max_elements):
        # Safely get text, providing 'N/A' or '0' if element is not found for a given index
        bus_name = bus_name_elements[i].text if i < len(bus_name_elements) else 'N/A'
        bus_type = bus_type_elements[i].text if i < len(bus_type_elements) else 'N/A'
        departing_time = departing_time_elements[i].text if i < len(departing_time_elements) else 'N/A'
        duration = duration_elements[i].text if i < len(duration_elements) else 'N/A'
        reaching_time = reaching_time_elements[i].text if i < len(reaching_time_elements) else 'N/A'
        star_rating = star_rating_elements[i].text if i < len(star_rating_elements) else '0' 
        price = price_elements[i].text if i < len(price_elements) else 'N/A'
        seat_availability = seat_availability_elements[i].text if i < len(seat_availability_elements) else '0 seats' 

        bus_detail = {
            "Bus_Route_Name": route_name,
            "Bus_Route_Link": url,
            "Bus_Name": bus_name,
            "Bus_Type": bus_type,
            "Departing_Time": departing_time,
            "Duration": duration,
            "Reaching_Time": reaching_time,
            "Star_Rating": star_rating,
            "Price": price,
            "Seat_Availability": seat_availability
        }
        bus_details_list.append(bus_detail)
    return bus_details_list


def scrape_bus_details(driver, url, route_name):
    """
    Scrapes both private and KSRTC bus details for a specific route.
    Handles clicking the KSRTC "Show buses" button and collects ALL results.
    """
    print(f"Navigating to route: {route_name} - {url}")
    all_buses_for_this_route = [] # This list will hold all bus details including potential duplicates
    
    try:
        driver.get(url)
        time.sleep(10)  # Allow the page to load

        # --- Phase 1: Scrape initially visible (private) bus details ---
        print("Scrolling to load private bus items...") # Updated log
        scroll_to_end(driver)
        print("Initial scroll complete.") # Updated log
        
        initial_bus_details = get_bus_details_from_page(driver, route_name, url)
        all_buses_for_this_route.extend(initial_bus_details)
        # Note: The log for 'Scraped X initial bus entries' is removed to match the desired format.

        # --- Phase 2: Handle KSRTC section and scrape all (private + KSRTC) buses ---
        try:
            print("Checking for KTCL Buses section and 'Show buses' button...") 
            # Locate the main KSRTC block using its unique class
            ksrtc_block = WebDriverWait(driver, 15).until(
                EC.presence_of_element_located((By.CLASS_NAME, "rtcTuple___1041ba"))
            )
            
            # Now, locate the "Show buses" button WITHIN this specific KSRTC block
            show_buses_button = ksrtc_block.find_element(By.CLASS_NAME, "primaryButton___469d04 ")
            
            if show_buses_button.is_displayed() and show_buses_button.is_enabled():
                # Removed "Found 'Show buses' button for KSRTC. Clicking it..." to match log
                driver.execute_script("arguments[0].click();", show_buses_button)
                time.sleep(10) # Wait for KSRTC buses to load after clicking
                # Removed "Clicked 'Show buses' button. Waiting for KSRTC buses to load." to match log

                # After clicking KSRTC, scroll again to load all new KSRTC buses
                print("Scrolling again to load all bus items..") # Updated log
                scroll_to_end(driver)
                # Removed "Post-KSRTC scroll complete. Scraping all bus details again..." to match log

                # Scrape ALL bus details again (this will include KSRTC and any remaining private)
                all_buses_after_ksrtc_expansion = get_bus_details_from_page(driver, route_name, url)
                all_buses_for_this_route.extend(all_buses_after_ksrtc_expansion) # Add all
                print(f"Scraped {len(all_buses_after_ksrtc_expansion)} bus entries after KSRTC expansion.") # Updated log
                print(f"Total bus entries for this route: {len(all_buses_for_this_route)}") # Updated log

            else:
                print("KSRTC 'Show buses' button not interactable or visible after finding block. Only initial buses scraped.")

        except Exception as e:
            print(f"KSRTC Kerala Buses section or 'Show buses' button not found or an error occurred: {e}. Only initial buses scraped.")
            # In this case, all_buses_for_this_route will only contain the initial scrape.

        return all_buses_for_this_route

    except Exception as e:
        print(f"Error occurred while navigating or scraping route {url}: {str(e)}")
        return []

# List to hold all bus details from all routes
all_bus_details_overall = []

# Main scraping process
def run_full_scraping():
    global driver
    driver = initialize_driver()
    load_page(driver, URL)  # Load the initial KSRTC page to get routes

    # Scrape all route links from the initial page
    all_bus_routes_link, all_bus_routes_name = scrape_ksrtc_routes(driver)
    print(f"Found {len(all_bus_routes_link)} routes to process.")

    for link, name in zip(all_bus_routes_link, all_bus_routes_name):
        bus_details_for_route = scrape_bus_details(driver, link, name)
        if bus_details_for_route:
            all_bus_details_overall.extend(bus_details_for_route)

    # Convert the list of dictionaries to a DataFrame
    df = pd.DataFrame(all_bus_details_overall)

    # Save the DataFrame to a CSV file
    csv_filename = 'Kadamba_bus_details.csv' 
    df.to_csv(csv_filename, index=False)

    # Close the driver
    driver.quit()

    print(f"Scraping completed and data saved to '{csv_filename}'")
    print(f"Total bus entries across all routes: {len(df)}") # Updated log

# Execute the full scraping process
if __name__ == "__main__":
    run_full_scraping()

Found 10 routes to process.
Navigating to route: Bangalore to Goa - https://www.redbus.in/bus-tickets/bangalore-to-goa
Scrolling to load private bus items...
Initial scroll complete.
Checking for KTCL Buses section and 'Show buses' button...
KSRTC Kerala Buses section or 'Show buses' button not found or an error occurred: Message: 
Stacktrace:
	GetHandleVerifier [0x0x7ff65db2e925+77845]
	GetHandleVerifier [0x0x7ff65db2e980+77936]
	(No symbol) [0x0x7ff65d8e9cda]
	(No symbol) [0x0x7ff65d9406aa]
	(No symbol) [0x0x7ff65d94095c]
	(No symbol) [0x0x7ff65d993d07]
	(No symbol) [0x0x7ff65d96890f]
	(No symbol) [0x0x7ff65d990b07]
	(No symbol) [0x0x7ff65d9686a3]
	(No symbol) [0x0x7ff65d931791]
	(No symbol) [0x0x7ff65d932523]
	GetHandleVerifier [0x0x7ff65de0683d+3059501]
	GetHandleVerifier [0x0x7ff65de00bfd+3035885]
	GetHandleVerifier [0x0x7ff65de203f0+3164896]
	GetHandleVerifier [0x0x7ff65db48c2e+185118]
	GetHandleVerifier [0x0x7ff65db5053f+216111]
	GetHandleVerifier [0x0x7ff65db372d4+113092]
	GetH

Scrolling to load private bus items...
Initial scroll complete.
Checking for KTCL Buses section and 'Show buses' button...
Scrolling again to load all bus items..
Scraped 2 bus entries after KSRTC expansion.
Total bus entries for this route: 8
Navigating to route: Goa to Hyderabad - https://www.redbus.in/bus-tickets/goa-to-hyderabad
Scrolling to load private bus items...
Initial scroll complete.
Checking for KTCL Buses section and 'Show buses' button...
KSRTC Kerala Buses section or 'Show buses' button not found or an error occurred: Message: 
Stacktrace:
	GetHandleVerifier [0x0x7ff65db2e925+77845]
	GetHandleVerifier [0x0x7ff65db2e980+77936]
	(No symbol) [0x0x7ff65d8e9cda]
	(No symbol) [0x0x7ff65d9406aa]
	(No symbol) [0x0x7ff65d94095c]
	(No symbol) [0x0x7ff65d993d07]
	(No symbol) [0x0x7ff65d96890f]
	(No symbol) [0x0x7ff65d990b07]
	(No symbol) [0x0x7ff65d9686a3]
	(No symbol) [0x0x7ff65d931791]
	(No symbol) [0x0x7ff65d932523]
	GetHandleVerifier [0x0x7ff65de0683d+3059501]
	GetHandleVerifi

# RSRTC

In [6]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time
import pandas as pd
from urllib.parse import urljoin # To handle relative URLs

# URL of the website
BASE_URL = "https://www.redbus.in" # Define base URL for joining relative paths
URL = "https://www.redbus.in/online-booking/rsrtc/?utm_source=rtchometile"

def initialize_driver():
    """Initializes and returns a Chrome WebDriver."""
    driver = webdriver.Chrome()
    driver.maximize_window()
    return driver

def load_page(driver, url):
    """Loads a given URL and waits for a few seconds."""
    driver.get(url)
    time.sleep(10)  # Wait for the page to load

def scrape_ksrtc_routes(driver):
    """
    Scrapes bus route links and names from the initial KSRTC page.
    """
    route_elements = WebDriverWait(driver, 10).until(
        EC.presence_of_all_elements_located((By.CLASS_NAME, 'route'))
    )
    
    bus_routes_link = []
    bus_routes_name = []
    
    for route in route_elements:
        relative_path = route.get_attribute('href')
        if relative_path:
            full_url = urljoin(BASE_URL, relative_path)
            bus_routes_link.append(full_url)
            bus_routes_name.append(route.text.strip())
            
    return bus_routes_link, bus_routes_name

def scroll_to_end(driver):
    """Scrolls the page to the end to load dynamic content."""
    last_height = driver.execute_script("return document.body.scrollHeight")
    while True:
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
        time.sleep(15)  # Wait for new content to load
        new_height = driver.execute_script("return document.body.scrollHeight")
        if new_height == last_height:
            break
        last_height = new_height
    time.sleep(5) # Give a little more time after scroll for elements to render

def get_bus_details_from_page(driver, route_name, url):
    """
    Extracts bus details from the current page state using common locators.
    """
    bus_details_list = []
    
    # Find all elements for each detail using their class names
    bus_name_elements = driver.find_elements(By.CLASS_NAME, "travelsName___495898")
    bus_type_elements = driver.find_elements(By.CLASS_NAME, "busType___13ff4b")
    departing_time_elements = driver.find_elements(By.CLASS_NAME, "boardingTime___aced27")
    duration_elements = driver.find_elements(By.CLASS_NAME, "duration___5b44b1")
    reaching_time_elements = driver.find_elements(By.CLASS_NAME, "droppingTime___616c2f")
    star_rating_elements = driver.find_elements(By.CLASS_NAME, "rating___7724f1")
    price_elements = driver.find_elements(By.CLASS_NAME, "finalFare___898bb7")
    seat_availability_elements = driver.find_elements(By.CLASS_NAME, "totalSeats___ba48cf")

    # Iterate through the longest list to ensure we cover all potential bus listings
    max_elements = max(len(bus_name_elements), len(bus_type_elements), len(departing_time_elements),
                       len(duration_elements), len(reaching_time_elements), len(star_rating_elements),
                       len(price_elements), len(seat_availability_elements))

    for i in range(max_elements):
        # Safely get text, providing 'N/A' or '0' if element is not found for a given index
        bus_name = bus_name_elements[i].text if i < len(bus_name_elements) else 'N/A'
        bus_type = bus_type_elements[i].text if i < len(bus_type_elements) else 'N/A'
        departing_time = departing_time_elements[i].text if i < len(departing_time_elements) else 'N/A'
        duration = duration_elements[i].text if i < len(duration_elements) else 'N/A'
        reaching_time = reaching_time_elements[i].text if i < len(reaching_time_elements) else 'N/A'
        star_rating = star_rating_elements[i].text if i < len(star_rating_elements) else '0' 
        price = price_elements[i].text if i < len(price_elements) else 'N/A'
        seat_availability = seat_availability_elements[i].text if i < len(seat_availability_elements) else '0 seats' 

        bus_detail = {
            "Bus_Route_Name": route_name,
            "Bus_Route_Link": url,
            "Bus_Name": bus_name,
            "Bus_Type": bus_type,
            "Departing_Time": departing_time,
            "Duration": duration,
            "Reaching_Time": reaching_time,
            "Star_Rating": star_rating,
            "Price": price,
            "Seat_Availability": seat_availability
        }
        bus_details_list.append(bus_detail)
    return bus_details_list


def scrape_bus_details(driver, url, route_name):
    """
    Scrapes both private and KSRTC bus details for a specific route.
    Handles clicking the KSRTC "Show buses" button and collects ALL results.
    """
    print(f"Navigating to route: {route_name} - {url}")
    all_buses_for_this_route = [] # This list will hold all bus details including potential duplicates
    
    try:
        driver.get(url)
        time.sleep(10)  # Allow the page to load

        # --- Phase 1: Scrape initially visible (private) bus details ---
        print("Scrolling to load private bus items...") # Updated log
        scroll_to_end(driver)
        print("Initial scroll complete.") # Updated log
        
        initial_bus_details = get_bus_details_from_page(driver, route_name, url)
        all_buses_for_this_route.extend(initial_bus_details)
        # Note: The log for 'Scraped X initial bus entries' is removed to match the desired format.

        # --- Phase 2: Handle KSRTC section and scrape all (private + KSRTC) buses ---
        try:
            print("Checking for RSRTC Buses section and 'Show buses' button...") 
            # Locate the main KSRTC block using its unique class
            ksrtc_block = WebDriverWait(driver, 15).until(
                EC.presence_of_element_located((By.CLASS_NAME, "rtcTuple___1041ba"))
            )
            
            # Now, locate the "Show buses" button WITHIN this specific KSRTC block
            show_buses_button = ksrtc_block.find_element(By.CLASS_NAME, "primaryButton___469d04 ")
            
            if show_buses_button.is_displayed() and show_buses_button.is_enabled():
                # Removed "Found 'Show buses' button for KSRTC. Clicking it..." to match log
                driver.execute_script("arguments[0].click();", show_buses_button)
                time.sleep(10) # Wait for KSRTC buses to load after clicking
                # Removed "Clicked 'Show buses' button. Waiting for KSRTC buses to load." to match log

                # After clicking KSRTC, scroll again to load all new KSRTC buses
                print("Scrolling again to load all bus items..") # Updated log
                scroll_to_end(driver)
                # Removed "Post-KSRTC scroll complete. Scraping all bus details again..." to match log

                # Scrape ALL bus details again (this will include KSRTC and any remaining private)
                all_buses_after_ksrtc_expansion = get_bus_details_from_page(driver, route_name, url)
                all_buses_for_this_route.extend(all_buses_after_ksrtc_expansion) # Add all
                print(f"Scraped {len(all_buses_after_ksrtc_expansion)} bus entries after KSRTC expansion.") # Updated log
                print(f"Total bus entries for this route: {len(all_buses_for_this_route)}") # Updated log

            else:
                print("KSRTC 'Show buses' button not interactable or visible after finding block. Only initial buses scraped.")

        except Exception as e:
            print(f"KSRTC Kerala Buses section or 'Show buses' button not found or an error occurred: {e}. Only initial buses scraped.")
            # In this case, all_buses_for_this_route will only contain the initial scrape.

        return all_buses_for_this_route

    except Exception as e:
        print(f"Error occurred while navigating or scraping route {url}: {str(e)}")
        return []

# List to hold all bus details from all routes
all_bus_details_overall = []

# Main scraping process
def run_full_scraping():
    global driver
    driver = initialize_driver()
    load_page(driver, URL)  # Load the initial KSRTC page to get routes

    # Scrape all route links from the initial page
    all_bus_routes_link, all_bus_routes_name = scrape_ksrtc_routes(driver)
    print(f"Found {len(all_bus_routes_link)} routes to process.")

    for link, name in zip(all_bus_routes_link, all_bus_routes_name):
        bus_details_for_route = scrape_bus_details(driver, link, name)
        if bus_details_for_route:
            all_bus_details_overall.extend(bus_details_for_route)

    # Convert the list of dictionaries to a DataFrame
    df = pd.DataFrame(all_bus_details_overall)

    # Save the DataFrame to a CSV file
    csv_filename = 'Rajesthan_bus_details.csv' 
    df.to_csv(csv_filename, index=False)

    # Close the driver
    driver.quit()

    print(f"Scraping completed and data saved to '{csv_filename}'")
    print(f"Total bus entries across all routes: {len(df)}") # Updated log

# Execute the full scraping process
if __name__ == "__main__":
    run_full_scraping()

Found 10 routes to process.
Navigating to route: Jodhpur to Ajmer - https://www.redbus.in/bus-tickets/jodhpur-to-ajmer
Scrolling to load private bus items...
Initial scroll complete.
Checking for RSRTC Buses section and 'Show buses' button...
Scrolling again to load all bus items..
Scraped 10 bus entries after KSRTC expansion.
Total bus entries for this route: 20
Navigating to route: Jaipur (Rajasthan) to Jodhpur - https://www.redbus.in/bus-tickets/jaipur-to-jodhpur
Scrolling to load private bus items...
Initial scroll complete.
Checking for RSRTC Buses section and 'Show buses' button...
Scrolling again to load all bus items..
Scraped 10 bus entries after KSRTC expansion.
Total bus entries for this route: 20
Navigating to route: Beawar (Rajasthan) to Jaipur (Rajasthan) - https://www.redbus.in/bus-tickets/beawer-to-jaipur
Scrolling to load private bus items...
Initial scroll complete.
Checking for RSRTC Buses section and 'Show buses' button...
Scrolling again to load all bus items..
Scr

# SBSTC

In [7]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time
import pandas as pd
from urllib.parse import urljoin # To handle relative URLs

# URL of the website
BASE_URL = "https://www.redbus.in" # Define base URL for joining relative paths
URL = "https://www.redbus.in/online-booking/south-bengal-state-transport-corporation-sbstc/?utm_source=rtchometile"

def initialize_driver():
    """Initializes and returns a Chrome WebDriver."""
    driver = webdriver.Chrome()
    driver.maximize_window()
    return driver

def load_page(driver, url):
    """Loads a given URL and waits for a few seconds."""
    driver.get(url)
    time.sleep(10)  # Wait for the page to load

def scrape_ksrtc_routes(driver):
    """
    Scrapes bus route links and names from the initial KSRTC page.
    """
    route_elements = WebDriverWait(driver, 10).until(
        EC.presence_of_all_elements_located((By.CLASS_NAME, 'route'))
    )
    
    bus_routes_link = []
    bus_routes_name = []
    
    for route in route_elements:
        relative_path = route.get_attribute('href')
        if relative_path:
            full_url = urljoin(BASE_URL, relative_path)
            bus_routes_link.append(full_url)
            bus_routes_name.append(route.text.strip())
            
    return bus_routes_link, bus_routes_name

def scroll_to_end(driver):
    """Scrolls the page to the end to load dynamic content."""
    last_height = driver.execute_script("return document.body.scrollHeight")
    while True:
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
        time.sleep(15)  # Wait for new content to load
        new_height = driver.execute_script("return document.body.scrollHeight")
        if new_height == last_height:
            break
        last_height = new_height
    time.sleep(5) # Give a little more time after scroll for elements to render

def get_bus_details_from_page(driver, route_name, url):
    """
    Extracts bus details from the current page state using common locators.
    """
    bus_details_list = []
    
    # Find all elements for each detail using their class names
    bus_name_elements = driver.find_elements(By.CLASS_NAME, "travelsName___495898")
    bus_type_elements = driver.find_elements(By.CLASS_NAME, "busType___13ff4b")
    departing_time_elements = driver.find_elements(By.CLASS_NAME, "boardingTime___aced27")
    duration_elements = driver.find_elements(By.CLASS_NAME, "duration___5b44b1")
    reaching_time_elements = driver.find_elements(By.CLASS_NAME, "droppingTime___616c2f")
    star_rating_elements = driver.find_elements(By.CLASS_NAME, "rating___7724f1")
    price_elements = driver.find_elements(By.CLASS_NAME, "finalFare___898bb7")
    seat_availability_elements = driver.find_elements(By.CLASS_NAME, "totalSeats___ba48cf")

    # Iterate through the longest list to ensure we cover all potential bus listings
    max_elements = max(len(bus_name_elements), len(bus_type_elements), len(departing_time_elements),
                       len(duration_elements), len(reaching_time_elements), len(star_rating_elements),
                       len(price_elements), len(seat_availability_elements))

    for i in range(max_elements):
        # Safely get text, providing 'N/A' or '0' if element is not found for a given index
        bus_name = bus_name_elements[i].text if i < len(bus_name_elements) else 'N/A'
        bus_type = bus_type_elements[i].text if i < len(bus_type_elements) else 'N/A'
        departing_time = departing_time_elements[i].text if i < len(departing_time_elements) else 'N/A'
        duration = duration_elements[i].text if i < len(duration_elements) else 'N/A'
        reaching_time = reaching_time_elements[i].text if i < len(reaching_time_elements) else 'N/A'
        star_rating = star_rating_elements[i].text if i < len(star_rating_elements) else '0' 
        price = price_elements[i].text if i < len(price_elements) else 'N/A'
        seat_availability = seat_availability_elements[i].text if i < len(seat_availability_elements) else '0 seats' 

        bus_detail = {
            "Bus_Route_Name": route_name,
            "Bus_Route_Link": url,
            "Bus_Name": bus_name,
            "Bus_Type": bus_type,
            "Departing_Time": departing_time,
            "Duration": duration,
            "Reaching_Time": reaching_time,
            "Star_Rating": star_rating,
            "Price": price,
            "Seat_Availability": seat_availability
        }
        bus_details_list.append(bus_detail)
    return bus_details_list


def scrape_bus_details(driver, url, route_name):
    """
    Scrapes both private and KSRTC bus details for a specific route.
    Handles clicking the KSRTC "Show buses" button and collects ALL results.
    """
    print(f"Navigating to route: {route_name} - {url}")
    all_buses_for_this_route = [] # This list will hold all bus details including potential duplicates
    
    try:
        driver.get(url)
        time.sleep(10)  # Allow the page to load

        # --- Phase 1: Scrape initially visible (private) bus details ---
        print("Scrolling to load private bus items...") # Updated log
        scroll_to_end(driver)
        print("Initial scroll complete.") # Updated log
        
        initial_bus_details = get_bus_details_from_page(driver, route_name, url)
        all_buses_for_this_route.extend(initial_bus_details)
        # Note: The log for 'Scraped X initial bus entries' is removed to match the desired format.

        # --- Phase 2: Handle KSRTC section and scrape all (private + KSRTC) buses ---
        try:
            print("Checking for SBSTC Buses section and 'Show buses' button...") 
            # Locate the main KSRTC block using its unique class
            ksrtc_block = WebDriverWait(driver, 15).until(
                EC.presence_of_element_located((By.CLASS_NAME, "rtcTuple___1041ba"))
            )
            
            # Now, locate the "Show buses" button WITHIN this specific KSRTC block
            show_buses_button = ksrtc_block.find_element(By.CLASS_NAME, "primaryButton___469d04 ")
            
            if show_buses_button.is_displayed() and show_buses_button.is_enabled():
                # Removed "Found 'Show buses' button for KSRTC. Clicking it..." to match log
                driver.execute_script("arguments[0].click();", show_buses_button)
                time.sleep(10) # Wait for KSRTC buses to load after clicking
                # Removed "Clicked 'Show buses' button. Waiting for KSRTC buses to load." to match log

                # After clicking KSRTC, scroll again to load all new KSRTC buses
                print("Scrolling again to load all bus items..") # Updated log
                scroll_to_end(driver)
                # Removed "Post-KSRTC scroll complete. Scraping all bus details again..." to match log

                # Scrape ALL bus details again (this will include KSRTC and any remaining private)
                all_buses_after_ksrtc_expansion = get_bus_details_from_page(driver, route_name, url)
                all_buses_for_this_route.extend(all_buses_after_ksrtc_expansion) # Add all
                print(f"Scraped {len(all_buses_after_ksrtc_expansion)} bus entries after KSRTC expansion.") # Updated log
                print(f"Total bus entries for this route: {len(all_buses_for_this_route)}") # Updated log

            else:
                print("KSRTC 'Show buses' button not interactable or visible after finding block. Only initial buses scraped.")

        except Exception as e:
            print(f"KSRTC Kerala Buses section or 'Show buses' button not found or an error occurred: {e}. Only initial buses scraped.")
            # In this case, all_buses_for_this_route will only contain the initial scrape.

        return all_buses_for_this_route

    except Exception as e:
        print(f"Error occurred while navigating or scraping route {url}: {str(e)}")
        return []

# List to hold all bus details from all routes
all_bus_details_overall = []

# Main scraping process
def run_full_scraping():
    global driver
    driver = initialize_driver()
    load_page(driver, URL)  # Load the initial KSRTC page to get routes

    # Scrape all route links from the initial page
    all_bus_routes_link, all_bus_routes_name = scrape_ksrtc_routes(driver)
    print(f"Found {len(all_bus_routes_link)} routes to process.")

    for link, name in zip(all_bus_routes_link, all_bus_routes_name):
        bus_details_for_route = scrape_bus_details(driver, link, name)
        if bus_details_for_route:
            all_bus_details_overall.extend(bus_details_for_route)

    # Convert the list of dictionaries to a DataFrame
    df = pd.DataFrame(all_bus_details_overall)

    # Save the DataFrame to a CSV file
    csv_filename = 'South_Bengal_bus_details.csv' 
    df.to_csv(csv_filename, index=False)

    # Close the driver
    driver.quit()

    print(f"Scraping completed and data saved to '{csv_filename}'")
    print(f"Total bus entries across all routes: {len(df)}") # Updated log

# Execute the full scraping process
if __name__ == "__main__":
    run_full_scraping()

Found 10 routes to process.
Navigating to route: Durgapur (West Bengal) to Kolkata - https://www.redbus.in/bus-tickets/durgapur-to-kolkata
Scrolling to load private bus items...
Initial scroll complete.
Checking for SBSTC Buses section and 'Show buses' button...
Scrolling again to load all bus items..
Scraped 10 bus entries after KSRTC expansion.
Total bus entries for this route: 20
Navigating to route: Kolkata to Burdwan - https://www.redbus.in/bus-tickets/kolkata-to-burdwan
Scrolling to load private bus items...
Initial scroll complete.
Checking for SBSTC Buses section and 'Show buses' button...
Scrolling again to load all bus items..
Scraped 10 bus entries after KSRTC expansion.
Total bus entries for this route: 20
Navigating to route: Midnapore to Kolkata - https://www.redbus.in/bus-tickets/midnapore-to-kolkata
Scrolling to load private bus items...
Initial scroll complete.
Checking for SBSTC Buses section and 'Show buses' button...
Scrolling again to load all bus items..
Scraped 1

# HRTC

In [8]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time
import pandas as pd
from urllib.parse import urljoin # To handle relative URLs

# URL of the website
BASE_URL = "https://www.redbus.in" # Define base URL for joining relative paths
URL = "https://www.redbus.in/online-booking/hrtc/?utm_source=rtchometile"

def initialize_driver():
    """Initializes and returns a Chrome WebDriver."""
    driver = webdriver.Chrome()
    driver.maximize_window()
    return driver

def load_page(driver, url):
    """Loads a given URL and waits for a few seconds."""
    driver.get(url)
    time.sleep(10)  # Wait for the page to load

def scrape_ksrtc_routes(driver):
    """
    Scrapes bus route links and names from the initial KSRTC page.
    """
    route_elements = WebDriverWait(driver, 10).until(
        EC.presence_of_all_elements_located((By.CLASS_NAME, 'route'))
    )
    
    bus_routes_link = []
    bus_routes_name = []
    
    for route in route_elements:
        relative_path = route.get_attribute('href')
        if relative_path:
            full_url = urljoin(BASE_URL, relative_path)
            bus_routes_link.append(full_url)
            bus_routes_name.append(route.text.strip())
            
    return bus_routes_link, bus_routes_name

def scroll_to_end(driver):
    """Scrolls the page to the end to load dynamic content."""
    last_height = driver.execute_script("return document.body.scrollHeight")
    while True:
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
        time.sleep(15)  # Wait for new content to load
        new_height = driver.execute_script("return document.body.scrollHeight")
        if new_height == last_height:
            break
        last_height = new_height
    time.sleep(5) # Give a little more time after scroll for elements to render

def get_bus_details_from_page(driver, route_name, url):
    """
    Extracts bus details from the current page state using common locators.
    """
    bus_details_list = []
    
    # Find all elements for each detail using their class names
    bus_name_elements = driver.find_elements(By.CLASS_NAME, "travelsName___495898")
    bus_type_elements = driver.find_elements(By.CLASS_NAME, "busType___13ff4b")
    departing_time_elements = driver.find_elements(By.CLASS_NAME, "boardingTime___aced27")
    duration_elements = driver.find_elements(By.CLASS_NAME, "duration___5b44b1")
    reaching_time_elements = driver.find_elements(By.CLASS_NAME, "droppingTime___616c2f")
    star_rating_elements = driver.find_elements(By.CLASS_NAME, "rating___7724f1")
    price_elements = driver.find_elements(By.CLASS_NAME, "finalFare___898bb7")
    seat_availability_elements = driver.find_elements(By.CLASS_NAME, "totalSeats___ba48cf")

    # Iterate through the longest list to ensure we cover all potential bus listings
    max_elements = max(len(bus_name_elements), len(bus_type_elements), len(departing_time_elements),
                       len(duration_elements), len(reaching_time_elements), len(star_rating_elements),
                       len(price_elements), len(seat_availability_elements))

    for i in range(max_elements):
        # Safely get text, providing 'N/A' or '0' if element is not found for a given index
        bus_name = bus_name_elements[i].text if i < len(bus_name_elements) else 'N/A'
        bus_type = bus_type_elements[i].text if i < len(bus_type_elements) else 'N/A'
        departing_time = departing_time_elements[i].text if i < len(departing_time_elements) else 'N/A'
        duration = duration_elements[i].text if i < len(duration_elements) else 'N/A'
        reaching_time = reaching_time_elements[i].text if i < len(reaching_time_elements) else 'N/A'
        star_rating = star_rating_elements[i].text if i < len(star_rating_elements) else '0' 
        price = price_elements[i].text if i < len(price_elements) else 'N/A'
        seat_availability = seat_availability_elements[i].text if i < len(seat_availability_elements) else '0 seats' 

        bus_detail = {
            "Bus_Route_Name": route_name,
            "Bus_Route_Link": url,
            "Bus_Name": bus_name,
            "Bus_Type": bus_type,
            "Departing_Time": departing_time,
            "Duration": duration,
            "Reaching_Time": reaching_time,
            "Star_Rating": star_rating,
            "Price": price,
            "Seat_Availability": seat_availability
        }
        bus_details_list.append(bus_detail)
    return bus_details_list


def scrape_bus_details(driver, url, route_name):
    """
    Scrapes both private and KSRTC bus details for a specific route.
    Handles clicking the KSRTC "Show buses" button and collects ALL results.
    """
    print(f"Navigating to route: {route_name} - {url}")
    all_buses_for_this_route = [] # This list will hold all bus details including potential duplicates
    
    try:
        driver.get(url)
        time.sleep(10)  # Allow the page to load

        # --- Phase 1: Scrape initially visible (private) bus details ---
        print("Scrolling to load private bus items...") # Updated log
        scroll_to_end(driver)
        print("Initial scroll complete.") # Updated log
        
        initial_bus_details = get_bus_details_from_page(driver, route_name, url)
        all_buses_for_this_route.extend(initial_bus_details)
        # Note: The log for 'Scraped X initial bus entries' is removed to match the desired format.

        # --- Phase 2: Handle KSRTC section and scrape all (private + KSRTC) buses ---
        try:
            print("Checking for HRTC Buses section and 'Show buses' button...") 
            # Locate the main KSRTC block using its unique class
            ksrtc_block = WebDriverWait(driver, 15).until(
                EC.presence_of_element_located((By.CLASS_NAME, "rtcTuple___1041ba"))
            )
            
            # Now, locate the "Show buses" button WITHIN this specific KSRTC block
            show_buses_button = ksrtc_block.find_element(By.CLASS_NAME, "primaryButton___469d04 ")
            
            if show_buses_button.is_displayed() and show_buses_button.is_enabled():
                # Removed "Found 'Show buses' button for KSRTC. Clicking it..." to match log
                driver.execute_script("arguments[0].click();", show_buses_button)
                time.sleep(10) # Wait for KSRTC buses to load after clicking
                # Removed "Clicked 'Show buses' button. Waiting for KSRTC buses to load." to match log

                # After clicking KSRTC, scroll again to load all new KSRTC buses
                print("Scrolling again to load all bus items..") # Updated log
                scroll_to_end(driver)
                # Removed "Post-KSRTC scroll complete. Scraping all bus details again..." to match log

                # Scrape ALL bus details again (this will include KSRTC and any remaining private)
                all_buses_after_ksrtc_expansion = get_bus_details_from_page(driver, route_name, url)
                all_buses_for_this_route.extend(all_buses_after_ksrtc_expansion) # Add all
                print(f"Scraped {len(all_buses_after_ksrtc_expansion)} bus entries after KSRTC expansion.") # Updated log
                print(f"Total bus entries for this route: {len(all_buses_for_this_route)}") # Updated log

            else:
                print("KSRTC 'Show buses' button not interactable or visible after finding block. Only initial buses scraped.")

        except Exception as e:
            print(f"KSRTC Kerala Buses section or 'Show buses' button not found or an error occurred: {e}. Only initial buses scraped.")
            # In this case, all_buses_for_this_route will only contain the initial scrape.

        return all_buses_for_this_route

    except Exception as e:
        print(f"Error occurred while navigating or scraping route {url}: {str(e)}")
        return []

# List to hold all bus details from all routes
all_bus_details_overall = []

# Main scraping process
def run_full_scraping():
    global driver
    driver = initialize_driver()
    load_page(driver, URL)  # Load the initial KSRTC page to get routes

    # Scrape all route links from the initial page
    all_bus_routes_link, all_bus_routes_name = scrape_ksrtc_routes(driver)
    print(f"Found {len(all_bus_routes_link)} routes to process.")

    for link, name in zip(all_bus_routes_link, all_bus_routes_name):
        bus_details_for_route = scrape_bus_details(driver, link, name)
        if bus_details_for_route:
            all_bus_details_overall.extend(bus_details_for_route)

    # Convert the list of dictionaries to a DataFrame
    df = pd.DataFrame(all_bus_details_overall)

    # Save the DataFrame to a CSV file
    csv_filename = 'Himachal_Pradesh_bus_details.csv' 
    df.to_csv(csv_filename, index=False)

    # Close the driver
    driver.quit()

    print(f"Scraping completed and data saved to '{csv_filename}'")
    print(f"Total bus entries across all routes: {len(df)}") # Updated log

# Execute the full scraping process
if __name__ == "__main__":
    run_full_scraping()

Found 10 routes to process.
Navigating to route: Hamirpur (Himachal Pradesh) to Chandigarh - https://www.redbus.in/bus-tickets/hamirpur-himachal-pradesh-to-chandigarh
Scrolling to load private bus items...
Initial scroll complete.
Checking for HRTC Buses section and 'Show buses' button...
Scrolling again to load all bus items..
Scraped 10 bus entries after KSRTC expansion.
Total bus entries for this route: 19
Navigating to route: Chandigarh to Hamirpur (Himachal Pradesh) - https://www.redbus.in/bus-tickets/chandigarh-to-hamirpur-himachal-pradesh
Scrolling to load private bus items...
Initial scroll complete.
Checking for HRTC Buses section and 'Show buses' button...
Scrolling again to load all bus items..
Scraped 10 bus entries after KSRTC expansion.
Total bus entries for this route: 19
Navigating to route: Delhi to Shimla - https://www.redbus.in/bus-tickets/delhi-to-shimla
Scrolling to load private bus items...
Initial scroll complete.
Checking for HRTC Buses section and 'Show buses' 

# ASTC

In [9]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time
import pandas as pd
from urllib.parse import urljoin # To handle relative URLs

# URL of the website
BASE_URL = "https://www.redbus.in" # Define base URL for joining relative paths
URL = "https://www.redbus.in/online-booking/astc/?utm_source=rtchometile"

def initialize_driver():
    """Initializes and returns a Chrome WebDriver."""
    driver = webdriver.Chrome()
    driver.maximize_window()
    return driver

def load_page(driver, url):
    """Loads a given URL and waits for a few seconds."""
    driver.get(url)
    time.sleep(10)  # Wait for the page to load

def scrape_ksrtc_routes(driver):
    """
    Scrapes bus route links and names from the initial KSRTC page.
    """
    route_elements = WebDriverWait(driver, 10).until(
        EC.presence_of_all_elements_located((By.CLASS_NAME, 'route'))
    )
    
    bus_routes_link = []
    bus_routes_name = []
    
    for route in route_elements:
        relative_path = route.get_attribute('href')
        if relative_path:
            full_url = urljoin(BASE_URL, relative_path)
            bus_routes_link.append(full_url)
            bus_routes_name.append(route.text.strip())
            
    return bus_routes_link, bus_routes_name

def scroll_to_end(driver):
    """Scrolls the page to the end to load dynamic content."""
    last_height = driver.execute_script("return document.body.scrollHeight")
    while True:
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
        time.sleep(15)  # Wait for new content to load
        new_height = driver.execute_script("return document.body.scrollHeight")
        if new_height == last_height:
            break
        last_height = new_height
    time.sleep(5) # Give a little more time after scroll for elements to render

def get_bus_details_from_page(driver, route_name, url):
    """
    Extracts bus details from the current page state using common locators.
    """
    bus_details_list = []
    
    # Find all elements for each detail using their class names
    bus_name_elements = driver.find_elements(By.CLASS_NAME, "travelsName___495898")
    bus_type_elements = driver.find_elements(By.CLASS_NAME, "busType___13ff4b")
    departing_time_elements = driver.find_elements(By.CLASS_NAME, "boardingTime___aced27")
    duration_elements = driver.find_elements(By.CLASS_NAME, "duration___5b44b1")
    reaching_time_elements = driver.find_elements(By.CLASS_NAME, "droppingTime___616c2f")
    star_rating_elements = driver.find_elements(By.CLASS_NAME, "rating___7724f1")
    price_elements = driver.find_elements(By.CLASS_NAME, "finalFare___898bb7")
    seat_availability_elements = driver.find_elements(By.CLASS_NAME, "totalSeats___ba48cf")

    # Iterate through the longest list to ensure we cover all potential bus listings
    max_elements = max(len(bus_name_elements), len(bus_type_elements), len(departing_time_elements),
                       len(duration_elements), len(reaching_time_elements), len(star_rating_elements),
                       len(price_elements), len(seat_availability_elements))

    for i in range(max_elements):
        # Safely get text, providing 'N/A' or '0' if element is not found for a given index
        bus_name = bus_name_elements[i].text if i < len(bus_name_elements) else 'N/A'
        bus_type = bus_type_elements[i].text if i < len(bus_type_elements) else 'N/A'
        departing_time = departing_time_elements[i].text if i < len(departing_time_elements) else 'N/A'
        duration = duration_elements[i].text if i < len(duration_elements) else 'N/A'
        reaching_time = reaching_time_elements[i].text if i < len(reaching_time_elements) else 'N/A'
        star_rating = star_rating_elements[i].text if i < len(star_rating_elements) else '0' 
        price = price_elements[i].text if i < len(price_elements) else 'N/A'
        seat_availability = seat_availability_elements[i].text if i < len(seat_availability_elements) else '0 seats' 

        bus_detail = {
            "Bus_Route_Name": route_name,
            "Bus_Route_Link": url,
            "Bus_Name": bus_name,
            "Bus_Type": bus_type,
            "Departing_Time": departing_time,
            "Duration": duration,
            "Reaching_Time": reaching_time,
            "Star_Rating": star_rating,
            "Price": price,
            "Seat_Availability": seat_availability
        }
        bus_details_list.append(bus_detail)
    return bus_details_list


def scrape_bus_details(driver, url, route_name):
    """
    Scrapes both private and KSRTC bus details for a specific route.
    Handles clicking the KSRTC "Show buses" button and collects ALL results.
    """
    print(f"Navigating to route: {route_name} - {url}")
    all_buses_for_this_route = [] # This list will hold all bus details including potential duplicates
    
    try:
        driver.get(url)
        time.sleep(10)  # Allow the page to load

        # --- Phase 1: Scrape initially visible (private) bus details ---
        print("Scrolling to load private bus items...") # Updated log
        scroll_to_end(driver)
        print("Initial scroll complete.") # Updated log
        
        initial_bus_details = get_bus_details_from_page(driver, route_name, url)
        all_buses_for_this_route.extend(initial_bus_details)
        # Note: The log for 'Scraped X initial bus entries' is removed to match the desired format.

        # --- Phase 2: Handle KSRTC section and scrape all (private + KSRTC) buses ---
        try:
            print("Checking for ASTC Buses section and 'Show buses' button...") 
            # Locate the main KSRTC block using its unique class
            ksrtc_block = WebDriverWait(driver, 15).until(
                EC.presence_of_element_located((By.CLASS_NAME, "rtcTuple___1041ba"))
            )
            
            # Now, locate the "Show buses" button WITHIN this specific KSRTC block
            show_buses_button = ksrtc_block.find_element(By.CLASS_NAME, "primaryButton___469d04 ")
            
            if show_buses_button.is_displayed() and show_buses_button.is_enabled():
                # Removed "Found 'Show buses' button for KSRTC. Clicking it..." to match log
                driver.execute_script("arguments[0].click();", show_buses_button)
                time.sleep(10) # Wait for KSRTC buses to load after clicking
                # Removed "Clicked 'Show buses' button. Waiting for KSRTC buses to load." to match log

                # After clicking KSRTC, scroll again to load all new KSRTC buses
                print("Scrolling again to load all bus items..") # Updated log
                scroll_to_end(driver)
                # Removed "Post-KSRTC scroll complete. Scraping all bus details again..." to match log

                # Scrape ALL bus details again (this will include KSRTC and any remaining private)
                all_buses_after_ksrtc_expansion = get_bus_details_from_page(driver, route_name, url)
                all_buses_for_this_route.extend(all_buses_after_ksrtc_expansion) # Add all
                print(f"Scraped {len(all_buses_after_ksrtc_expansion)} bus entries after KSRTC expansion.") # Updated log
                print(f"Total bus entries for this route: {len(all_buses_for_this_route)}") # Updated log

            else:
                print("KSRTC 'Show buses' button not interactable or visible after finding block. Only initial buses scraped.")

        except Exception as e:
            print(f"KSRTC Kerala Buses section or 'Show buses' button not found or an error occurred: {e}. Only initial buses scraped.")
            # In this case, all_buses_for_this_route will only contain the initial scrape.

        return all_buses_for_this_route

    except Exception as e:
        print(f"Error occurred while navigating or scraping route {url}: {str(e)}")
        return []

# List to hold all bus details from all routes
all_bus_details_overall = []

# Main scraping process
def run_full_scraping():
    global driver
    driver = initialize_driver()
    load_page(driver, URL)  # Load the initial KSRTC page to get routes

    # Scrape all route links from the initial page
    all_bus_routes_link, all_bus_routes_name = scrape_ksrtc_routes(driver)
    print(f"Found {len(all_bus_routes_link)} routes to process.")

    for link, name in zip(all_bus_routes_link, all_bus_routes_name):
        bus_details_for_route = scrape_bus_details(driver, link, name)
        if bus_details_for_route:
            all_bus_details_overall.extend(bus_details_for_route)

    # Convert the list of dictionaries to a DataFrame
    df = pd.DataFrame(all_bus_details_overall)

    # Save the DataFrame to a CSV file
    csv_filename = 'Assam_bus_details.csv' 
    df.to_csv(csv_filename, index=False)

    # Close the driver
    driver.quit()

    print(f"Scraping completed and data saved to '{csv_filename}'")
    print(f"Total bus entries across all routes: {len(df)}") # Updated log

# Execute the full scraping process
if __name__ == "__main__":
    run_full_scraping()

Found 10 routes to process.
Navigating to route: Guwahati to Tezpur - https://www.redbus.in/bus-tickets/guwahati-to-tezpur
Scrolling to load private bus items...
Initial scroll complete.
Checking for ASTC Buses section and 'Show buses' button...
Scrolling again to load all bus items..
Scraped 9 bus entries after KSRTC expansion.
Total bus entries for this route: 19
Navigating to route: Guwahati to Nagaon (Assam) - https://www.redbus.in/bus-tickets/guwahati-to-nagaon
Scrolling to load private bus items...
Initial scroll complete.
Checking for ASTC Buses section and 'Show buses' button...
Scrolling again to load all bus items..
Scraped 5 bus entries after KSRTC expansion.
Total bus entries for this route: 15
Navigating to route: Dhubri to Guwahati - https://www.redbus.in/bus-tickets/dhubri-to-guwahati
Scrolling to load private bus items...
Initial scroll complete.
Checking for ASTC Buses section and 'Show buses' button...
KSRTC Kerala Buses section or 'Show buses' button not found or an 

Scrolling to load private bus items...
Initial scroll complete.
Checking for ASTC Buses section and 'Show buses' button...
KSRTC Kerala Buses section or 'Show buses' button not found or an error occurred: Message: 
Stacktrace:
	GetHandleVerifier [0x0x7ff65db2e925+77845]
	GetHandleVerifier [0x0x7ff65db2e980+77936]
	(No symbol) [0x0x7ff65d8e9cda]
	(No symbol) [0x0x7ff65d9406aa]
	(No symbol) [0x0x7ff65d94095c]
	(No symbol) [0x0x7ff65d993d07]
	(No symbol) [0x0x7ff65d96890f]
	(No symbol) [0x0x7ff65d990b07]
	(No symbol) [0x0x7ff65d9686a3]
	(No symbol) [0x0x7ff65d931791]
	(No symbol) [0x0x7ff65d932523]
	GetHandleVerifier [0x0x7ff65de0683d+3059501]
	GetHandleVerifier [0x0x7ff65de00bfd+3035885]
	GetHandleVerifier [0x0x7ff65de203f0+3164896]
	GetHandleVerifier [0x0x7ff65db48c2e+185118]
	GetHandleVerifier [0x0x7ff65db5053f+216111]
	GetHandleVerifier [0x0x7ff65db372d4+113092]
	GetHandleVerifier [0x0x7ff65db37489+113529]
	GetHandleVerifier [0x0x7ff65db1e288+10616]
	BaseThreadInitThunk [0x0x7ffb9120e

# UPSRTC

In [10]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time
import pandas as pd
from urllib.parse import urljoin # To handle relative URLs

# URL of the website
BASE_URL = "https://www.redbus.in" # Define base URL for joining relative paths
URL = "https://www.redbus.in/online-booking/uttar-pradesh-state-road-transport-corporation-upsrtc/?utm_source=rtchometile"

def initialize_driver():
    """Initializes and returns a Chrome WebDriver."""
    driver = webdriver.Chrome()
    driver.maximize_window()
    return driver

def load_page(driver, url):
    """Loads a given URL and waits for a few seconds."""
    driver.get(url)
    time.sleep(10)  # Wait for the page to load

def scrape_ksrtc_routes(driver):
    """
    Scrapes bus route links and names from the initial KSRTC page.
    """
    route_elements = WebDriverWait(driver, 10).until(
        EC.presence_of_all_elements_located((By.CLASS_NAME, 'route'))
    )
    
    bus_routes_link = []
    bus_routes_name = []
    
    for route in route_elements:
        relative_path = route.get_attribute('href')
        if relative_path:
            full_url = urljoin(BASE_URL, relative_path)
            bus_routes_link.append(full_url)
            bus_routes_name.append(route.text.strip())
            
    return bus_routes_link, bus_routes_name

def scroll_to_end(driver):
    """Scrolls the page to the end to load dynamic content."""
    last_height = driver.execute_script("return document.body.scrollHeight")
    while True:
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
        time.sleep(15)  # Wait for new content to load
        new_height = driver.execute_script("return document.body.scrollHeight")
        if new_height == last_height:
            break
        last_height = new_height
    time.sleep(5) # Give a little more time after scroll for elements to render

def get_bus_details_from_page(driver, route_name, url):
    """
    Extracts bus details from the current page state using common locators.
    """
    bus_details_list = []
    
    # Find all elements for each detail using their class names
    bus_name_elements = driver.find_elements(By.CLASS_NAME, "travelsName___495898")
    bus_type_elements = driver.find_elements(By.CLASS_NAME, "busType___13ff4b")
    departing_time_elements = driver.find_elements(By.CLASS_NAME, "boardingTime___aced27")
    duration_elements = driver.find_elements(By.CLASS_NAME, "duration___5b44b1")
    reaching_time_elements = driver.find_elements(By.CLASS_NAME, "droppingTime___616c2f")
    star_rating_elements = driver.find_elements(By.CLASS_NAME, "rating___7724f1")
    price_elements = driver.find_elements(By.CLASS_NAME, "finalFare___898bb7")
    seat_availability_elements = driver.find_elements(By.CLASS_NAME, "totalSeats___ba48cf")

    # Iterate through the longest list to ensure we cover all potential bus listings
    max_elements = max(len(bus_name_elements), len(bus_type_elements), len(departing_time_elements),
                       len(duration_elements), len(reaching_time_elements), len(star_rating_elements),
                       len(price_elements), len(seat_availability_elements))

    for i in range(max_elements):
        # Safely get text, providing 'N/A' or '0' if element is not found for a given index
        bus_name = bus_name_elements[i].text if i < len(bus_name_elements) else 'N/A'
        bus_type = bus_type_elements[i].text if i < len(bus_type_elements) else 'N/A'
        departing_time = departing_time_elements[i].text if i < len(departing_time_elements) else 'N/A'
        duration = duration_elements[i].text if i < len(duration_elements) else 'N/A'
        reaching_time = reaching_time_elements[i].text if i < len(reaching_time_elements) else 'N/A'
        star_rating = star_rating_elements[i].text if i < len(star_rating_elements) else '0' 
        price = price_elements[i].text if i < len(price_elements) else 'N/A'
        seat_availability = seat_availability_elements[i].text if i < len(seat_availability_elements) else '0 seats' 

        bus_detail = {
            "Bus_Route_Name": route_name,
            "Bus_Route_Link": url,
            "Bus_Name": bus_name,
            "Bus_Type": bus_type,
            "Departing_Time": departing_time,
            "Duration": duration,
            "Reaching_Time": reaching_time,
            "Star_Rating": star_rating,
            "Price": price,
            "Seat_Availability": seat_availability
        }
        bus_details_list.append(bus_detail)
    return bus_details_list


def scrape_bus_details(driver, url, route_name):
    """
    Scrapes both private and KSRTC bus details for a specific route.
    Handles clicking the KSRTC "Show buses" button and collects ALL results.
    """
    print(f"Navigating to route: {route_name} - {url}")
    all_buses_for_this_route = [] # This list will hold all bus details including potential duplicates
    
    try:
        driver.get(url)
        time.sleep(10)  # Allow the page to load

        # --- Phase 1: Scrape initially visible (private) bus details ---
        print("Scrolling to load private bus items...") # Updated log
        scroll_to_end(driver)
        print("Initial scroll complete.") # Updated log
        
        initial_bus_details = get_bus_details_from_page(driver, route_name, url)
        all_buses_for_this_route.extend(initial_bus_details)
        # Note: The log for 'Scraped X initial bus entries' is removed to match the desired format.

        # --- Phase 2: Handle KSRTC section and scrape all (private + KSRTC) buses ---
        try:
            print("Checking for UPSRTC Buses section and 'Show buses' button...") 
            # Locate the main KSRTC block using its unique class
            ksrtc_block = WebDriverWait(driver, 15).until(
                EC.presence_of_element_located((By.CLASS_NAME, "rtcTuple___1041ba"))
            )
            
            # Now, locate the "Show buses" button WITHIN this specific KSRTC block
            show_buses_button = ksrtc_block.find_element(By.CLASS_NAME, "primaryButton___469d04 ")
            
            if show_buses_button.is_displayed() and show_buses_button.is_enabled():
                # Removed "Found 'Show buses' button for KSRTC. Clicking it..." to match log
                driver.execute_script("arguments[0].click();", show_buses_button)
                time.sleep(10) # Wait for KSRTC buses to load after clicking
                # Removed "Clicked 'Show buses' button. Waiting for KSRTC buses to load." to match log

                # After clicking KSRTC, scroll again to load all new KSRTC buses
                print("Scrolling again to load all bus items..") # Updated log
                scroll_to_end(driver)
                # Removed "Post-KSRTC scroll complete. Scraping all bus details again..." to match log

                # Scrape ALL bus details again (this will include KSRTC and any remaining private)
                all_buses_after_ksrtc_expansion = get_bus_details_from_page(driver, route_name, url)
                all_buses_for_this_route.extend(all_buses_after_ksrtc_expansion) # Add all
                print(f"Scraped {len(all_buses_after_ksrtc_expansion)} bus entries after KSRTC expansion.") # Updated log
                print(f"Total bus entries for this route: {len(all_buses_for_this_route)}") # Updated log

            else:
                print("KSRTC 'Show buses' button not interactable or visible after finding block. Only initial buses scraped.")

        except Exception as e:
            print(f"KSRTC Kerala Buses section or 'Show buses' button not found or an error occurred: {e}. Only initial buses scraped.")
            # In this case, all_buses_for_this_route will only contain the initial scrape.

        return all_buses_for_this_route

    except Exception as e:
        print(f"Error occurred while navigating or scraping route {url}: {str(e)}")
        return []

# List to hold all bus details from all routes
all_bus_details_overall = []

# Main scraping process
def run_full_scraping():
    global driver
    driver = initialize_driver()
    load_page(driver, URL)  # Load the initial KSRTC page to get routes

    # Scrape all route links from the initial page
    all_bus_routes_link, all_bus_routes_name = scrape_ksrtc_routes(driver)
    print(f"Found {len(all_bus_routes_link)} routes to process.")

    for link, name in zip(all_bus_routes_link, all_bus_routes_name):
        bus_details_for_route = scrape_bus_details(driver, link, name)
        if bus_details_for_route:
            all_bus_details_overall.extend(bus_details_for_route)

    # Convert the list of dictionaries to a DataFrame
    df = pd.DataFrame(all_bus_details_overall)

    # Save the DataFrame to a CSV file
    csv_filename = 'Uttarpradesh_bus_details.csv' 
    df.to_csv(csv_filename, index=False)

    # Close the driver
    driver.quit()

    print(f"Scraping completed and data saved to '{csv_filename}'")
    print(f"Total bus entries across all routes: {len(df)}") # Updated log

# Execute the full scraping process
if __name__ == "__main__":
    run_full_scraping()

Found 10 routes to process.
Navigating to route: Bareilly (Uttar Pradesh) to Delhi - https://www.redbus.in/bus-tickets/bareilly-to-delhi
Scrolling to load private bus items...
Initial scroll complete.
Checking for UPSRTC Buses section and 'Show buses' button...
Scrolling again to load all bus items..
Scraped 10 bus entries after KSRTC expansion.
Total bus entries for this route: 20
Navigating to route: Aligarh (uttar pradesh) to Delhi - https://www.redbus.in/bus-tickets/aligarh-uttar-pradesh-to-delhi
Scrolling to load private bus items...
Initial scroll complete.
Checking for UPSRTC Buses section and 'Show buses' button...
Scrolling again to load all bus items..
Scraped 10 bus entries after KSRTC expansion.
Total bus entries for this route: 11
Navigating to route: Lucknow to Agra - https://www.redbus.in/bus-tickets/lucknow-to-agra
Scrolling to load private bus items...
Initial scroll complete.
Checking for UPSRTC Buses section and 'Show buses' button...
Scrolling again to load all bus 

# PEPSU

In [11]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time
import pandas as pd
from urllib.parse import urljoin # To handle relative URLs

# URL of the website
BASE_URL = "https://www.redbus.in" # Define base URL for joining relative paths
URL = "https://www.redbus.in/online-booking/pepsu-punjab"

def initialize_driver():
    """Initializes and returns a Chrome WebDriver."""
    driver = webdriver.Chrome()
    driver.maximize_window()
    return driver

def load_page(driver, url):
    """Loads a given URL and waits for a few seconds."""
    driver.get(url)
    time.sleep(10)  # Wait for the page to load

def scrape_ksrtc_routes(driver):
    """
    Scrapes bus route links and names from the initial KSRTC page.
    """
    route_elements = WebDriverWait(driver, 10).until(
        EC.presence_of_all_elements_located((By.CLASS_NAME, 'route'))
    )
    
    bus_routes_link = []
    bus_routes_name = []
    
    for route in route_elements:
        relative_path = route.get_attribute('href')
        if relative_path:
            full_url = urljoin(BASE_URL, relative_path)
            bus_routes_link.append(full_url)
            bus_routes_name.append(route.text.strip())
            
    return bus_routes_link, bus_routes_name

def scroll_to_end(driver):
    """Scrolls the page to the end to load dynamic content."""
    last_height = driver.execute_script("return document.body.scrollHeight")
    while True:
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
        time.sleep(15)  # Wait for new content to load
        new_height = driver.execute_script("return document.body.scrollHeight")
        if new_height == last_height:
            break
        last_height = new_height
    time.sleep(5) # Give a little more time after scroll for elements to render

def get_bus_details_from_page(driver, route_name, url):
    """
    Extracts bus details from the current page state using common locators.
    """
    bus_details_list = []
    
    # Find all elements for each detail using their class names
    bus_name_elements = driver.find_elements(By.CLASS_NAME, "travelsName___495898")
    bus_type_elements = driver.find_elements(By.CLASS_NAME, "busType___13ff4b")
    departing_time_elements = driver.find_elements(By.CLASS_NAME, "boardingTime___aced27")
    duration_elements = driver.find_elements(By.CLASS_NAME, "duration___5b44b1")
    reaching_time_elements = driver.find_elements(By.CLASS_NAME, "droppingTime___616c2f")
    star_rating_elements = driver.find_elements(By.CLASS_NAME, "rating___7724f1")
    price_elements = driver.find_elements(By.CLASS_NAME, "finalFare___898bb7")
    seat_availability_elements = driver.find_elements(By.CLASS_NAME, "totalSeats___ba48cf")

    # Iterate through the longest list to ensure we cover all potential bus listings
    max_elements = max(len(bus_name_elements), len(bus_type_elements), len(departing_time_elements),
                       len(duration_elements), len(reaching_time_elements), len(star_rating_elements),
                       len(price_elements), len(seat_availability_elements))

    for i in range(max_elements):
        # Safely get text, providing 'N/A' or '0' if element is not found for a given index
        bus_name = bus_name_elements[i].text if i < len(bus_name_elements) else 'N/A'
        bus_type = bus_type_elements[i].text if i < len(bus_type_elements) else 'N/A'
        departing_time = departing_time_elements[i].text if i < len(departing_time_elements) else 'N/A'
        duration = duration_elements[i].text if i < len(duration_elements) else 'N/A'
        reaching_time = reaching_time_elements[i].text if i < len(reaching_time_elements) else 'N/A'
        star_rating = star_rating_elements[i].text if i < len(star_rating_elements) else '0' 
        price = price_elements[i].text if i < len(price_elements) else 'N/A'
        seat_availability = seat_availability_elements[i].text if i < len(seat_availability_elements) else '0 seats' 

        bus_detail = {
            "Bus_Route_Name": route_name,
            "Bus_Route_Link": url,
            "Bus_Name": bus_name,
            "Bus_Type": bus_type,
            "Departing_Time": departing_time,
            "Duration": duration,
            "Reaching_Time": reaching_time,
            "Star_Rating": star_rating,
            "Price": price,
            "Seat_Availability": seat_availability
        }
        bus_details_list.append(bus_detail)
    return bus_details_list


def scrape_bus_details(driver, url, route_name):
    """
    Scrapes both private and KSRTC bus details for a specific route.
    Handles clicking the KSRTC "Show buses" button and collects ALL results.
    """
    print(f"Navigating to route: {route_name} - {url}")
    all_buses_for_this_route = [] # This list will hold all bus details including potential duplicates
    
    try:
        driver.get(url)
        time.sleep(10)  # Allow the page to load

        # --- Phase 1: Scrape initially visible (private) bus details ---
        print("Scrolling to load private bus items...") # Updated log
        scroll_to_end(driver)
        print("Initial scroll complete.") # Updated log
        
        initial_bus_details = get_bus_details_from_page(driver, route_name, url)
        all_buses_for_this_route.extend(initial_bus_details)
        # Note: The log for 'Scraped X initial bus entries' is removed to match the desired format.

        # --- Phase 2: Handle KSRTC section and scrape all (private + KSRTC) buses ---
        try:
            print("Checking for PEPSU Buses section and 'Show buses' button...") 
            # Locate the main KSRTC block using its unique class
            ksrtc_block = WebDriverWait(driver, 15).until(
                EC.presence_of_element_located((By.CLASS_NAME, "rtcTuple___1041ba"))
            )
            
            # Now, locate the "Show buses" button WITHIN this specific KSRTC block
            show_buses_button = ksrtc_block.find_element(By.CLASS_NAME, "primaryButton___469d04 ")
            
            if show_buses_button.is_displayed() and show_buses_button.is_enabled():
                # Removed "Found 'Show buses' button for KSRTC. Clicking it..." to match log
                driver.execute_script("arguments[0].click();", show_buses_button)
                time.sleep(10) # Wait for KSRTC buses to load after clicking
                # Removed "Clicked 'Show buses' button. Waiting for KSRTC buses to load." to match log

                # After clicking KSRTC, scroll again to load all new KSRTC buses
                print("Scrolling again to load all bus items..") # Updated log
                scroll_to_end(driver)
                # Removed "Post-KSRTC scroll complete. Scraping all bus details again..." to match log

                # Scrape ALL bus details again (this will include KSRTC and any remaining private)
                all_buses_after_ksrtc_expansion = get_bus_details_from_page(driver, route_name, url)
                all_buses_for_this_route.extend(all_buses_after_ksrtc_expansion) # Add all
                print(f"Scraped {len(all_buses_after_ksrtc_expansion)} bus entries after KSRTC expansion.") # Updated log
                print(f"Total bus entries for this route: {len(all_buses_for_this_route)}") # Updated log

            else:
                print("KSRTC 'Show buses' button not interactable or visible after finding block. Only initial buses scraped.")

        except Exception as e:
            print(f"KSRTC Kerala Buses section or 'Show buses' button not found or an error occurred: {e}. Only initial buses scraped.")
            # In this case, all_buses_for_this_route will only contain the initial scrape.

        return all_buses_for_this_route

    except Exception as e:
        print(f"Error occurred while navigating or scraping route {url}: {str(e)}")
        return []

# List to hold all bus details from all routes
all_bus_details_overall = []

# Main scraping process
def run_full_scraping():
    global driver
    driver = initialize_driver()
    load_page(driver, URL)  # Load the initial KSRTC page to get routes

    # Scrape all route links from the initial page
    all_bus_routes_link, all_bus_routes_name = scrape_ksrtc_routes(driver)
    print(f"Found {len(all_bus_routes_link)} routes to process.")

    for link, name in zip(all_bus_routes_link, all_bus_routes_name):
        bus_details_for_route = scrape_bus_details(driver, link, name)
        if bus_details_for_route:
            all_bus_details_overall.extend(bus_details_for_route)

    # Convert the list of dictionaries to a DataFrame
    df = pd.DataFrame(all_bus_details_overall)

    # Save the DataFrame to a CSV file
    csv_filename = 'Punjab_bus_details.csv' 
    df.to_csv(csv_filename, index=False)

    # Close the driver
    driver.quit()

    print(f"Scraping completed and data saved to '{csv_filename}'")
    print(f"Total bus entries across all routes: {len(df)}") # Updated log

# Execute the full scraping process
if __name__ == "__main__":
    run_full_scraping()

Found 10 routes to process.
Navigating to route: Delhi to Patiala - https://www.redbus.in/bus-tickets/delhi-to-patiala
Scrolling to load private bus items...
Initial scroll complete.
Checking for PEPSU Buses section and 'Show buses' button...
Scrolling again to load all bus items..
Scraped 7 bus entries after KSRTC expansion.
Total bus entries for this route: 17
Navigating to route: Patiala to Delhi - https://www.redbus.in/bus-tickets/patiala-to-delhi
Scrolling to load private bus items...
Initial scroll complete.
Checking for PEPSU Buses section and 'Show buses' button...
Scrolling again to load all bus items..
Scraped 7 bus entries after KSRTC expansion.
Total bus entries for this route: 17
Navigating to route: Ludhiana to Delhi - https://www.redbus.in/bus-tickets/ludhiana-to-delhi
Scrolling to load private bus items...
Initial scroll complete.
Checking for PEPSU Buses section and 'Show buses' button...
Scrolling again to load all bus items..
Scraped 6 bus entries after KSRTC expansi

# CTU

In [12]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time
import pandas as pd
from urllib.parse import urljoin # To handle relative URLs

# URL of the website
BASE_URL = "https://www.redbus.in" # Define base URL for joining relative paths
URL = "https://www.redbus.in/online-booking/chandigarh-transport-undertaking-ctu"

def initialize_driver():
    """Initializes and returns a Chrome WebDriver."""
    driver = webdriver.Chrome()
    driver.maximize_window()
    return driver

def load_page(driver, url):
    """Loads a given URL and waits for a few seconds."""
    driver.get(url)
    time.sleep(10)  # Wait for the page to load

def scrape_ksrtc_routes(driver):
    """
    Scrapes bus route links and names from the initial KSRTC page.
    """
    route_elements = WebDriverWait(driver, 10).until(
        EC.presence_of_all_elements_located((By.CLASS_NAME, 'route'))
    )
    
    bus_routes_link = []
    bus_routes_name = []
    
    for route in route_elements:
        relative_path = route.get_attribute('href')
        if relative_path:
            full_url = urljoin(BASE_URL, relative_path)
            bus_routes_link.append(full_url)
            bus_routes_name.append(route.text.strip())
            
    return bus_routes_link, bus_routes_name

def scroll_to_end(driver):
    """Scrolls the page to the end to load dynamic content."""
    last_height = driver.execute_script("return document.body.scrollHeight")
    while True:
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
        time.sleep(15)  # Wait for new content to load
        new_height = driver.execute_script("return document.body.scrollHeight")
        if new_height == last_height:
            break
        last_height = new_height
    time.sleep(5) # Give a little more time after scroll for elements to render

def get_bus_details_from_page(driver, route_name, url):
    """
    Extracts bus details from the current page state using common locators.
    """
    bus_details_list = []
    
    # Find all elements for each detail using their class names
    bus_name_elements = driver.find_elements(By.CLASS_NAME, "travelsName___495898")
    bus_type_elements = driver.find_elements(By.CLASS_NAME, "busType___13ff4b")
    departing_time_elements = driver.find_elements(By.CLASS_NAME, "boardingTime___aced27")
    duration_elements = driver.find_elements(By.CLASS_NAME, "duration___5b44b1")
    reaching_time_elements = driver.find_elements(By.CLASS_NAME, "droppingTime___616c2f")
    star_rating_elements = driver.find_elements(By.CLASS_NAME, "rating___7724f1")
    price_elements = driver.find_elements(By.CLASS_NAME, "finalFare___898bb7")
    seat_availability_elements = driver.find_elements(By.CLASS_NAME, "totalSeats___ba48cf")

    # Iterate through the longest list to ensure we cover all potential bus listings
    max_elements = max(len(bus_name_elements), len(bus_type_elements), len(departing_time_elements),
                       len(duration_elements), len(reaching_time_elements), len(star_rating_elements),
                       len(price_elements), len(seat_availability_elements))

    for i in range(max_elements):
        # Safely get text, providing 'N/A' or '0' if element is not found for a given index
        bus_name = bus_name_elements[i].text if i < len(bus_name_elements) else 'N/A'
        bus_type = bus_type_elements[i].text if i < len(bus_type_elements) else 'N/A'
        departing_time = departing_time_elements[i].text if i < len(departing_time_elements) else 'N/A'
        duration = duration_elements[i].text if i < len(duration_elements) else 'N/A'
        reaching_time = reaching_time_elements[i].text if i < len(reaching_time_elements) else 'N/A'
        star_rating = star_rating_elements[i].text if i < len(star_rating_elements) else '0' 
        price = price_elements[i].text if i < len(price_elements) else 'N/A'
        seat_availability = seat_availability_elements[i].text if i < len(seat_availability_elements) else '0 seats' 

        bus_detail = {
            "Bus_Route_Name": route_name,
            "Bus_Route_Link": url,
            "Bus_Name": bus_name,
            "Bus_Type": bus_type,
            "Departing_Time": departing_time,
            "Duration": duration,
            "Reaching_Time": reaching_time,
            "Star_Rating": star_rating,
            "Price": price,
            "Seat_Availability": seat_availability
        }
        bus_details_list.append(bus_detail)
    return bus_details_list


def scrape_bus_details(driver, url, route_name):
    """
    Scrapes both private and KSRTC bus details for a specific route.
    Handles clicking the KSRTC "Show buses" button and collects ALL results.
    """
    print(f"Navigating to route: {route_name} - {url}")
    all_buses_for_this_route = [] # This list will hold all bus details including potential duplicates
    
    try:
        driver.get(url)
        time.sleep(10)  # Allow the page to load

        # --- Phase 1: Scrape initially visible (private) bus details ---
        print("Scrolling to load private bus items...") # Updated log
        scroll_to_end(driver)
        print("Initial scroll complete.") # Updated log
        
        initial_bus_details = get_bus_details_from_page(driver, route_name, url)
        all_buses_for_this_route.extend(initial_bus_details)
        # Note: The log for 'Scraped X initial bus entries' is removed to match the desired format.

        # --- Phase 2: Handle KSRTC section and scrape all (private + KSRTC) buses ---
        try:
            print("Checking for CTU Buses section and 'Show buses' button...") 
            # Locate the main KSRTC block using its unique class
            ksrtc_block = WebDriverWait(driver, 15).until(
                EC.presence_of_element_located((By.CLASS_NAME, "rtcTuple___1041ba"))
            )
            
            # Now, locate the "Show buses" button WITHIN this specific KSRTC block
            show_buses_button = ksrtc_block.find_element(By.CLASS_NAME, "primaryButton___469d04 ")
            
            if show_buses_button.is_displayed() and show_buses_button.is_enabled():
                # Removed "Found 'Show buses' button for KSRTC. Clicking it..." to match log
                driver.execute_script("arguments[0].click();", show_buses_button)
                time.sleep(10) # Wait for KSRTC buses to load after clicking
                # Removed "Clicked 'Show buses' button. Waiting for KSRTC buses to load." to match log

                # After clicking KSRTC, scroll again to load all new KSRTC buses
                print("Scrolling again to load all bus items..") # Updated log
                scroll_to_end(driver)
                # Removed "Post-KSRTC scroll complete. Scraping all bus details again..." to match log

                # Scrape ALL bus details again (this will include KSRTC and any remaining private)
                all_buses_after_ksrtc_expansion = get_bus_details_from_page(driver, route_name, url)
                all_buses_for_this_route.extend(all_buses_after_ksrtc_expansion) # Add all
                print(f"Scraped {len(all_buses_after_ksrtc_expansion)} bus entries after KSRTC expansion.") # Updated log
                print(f"Total bus entries for this route: {len(all_buses_for_this_route)}") # Updated log

            else:
                print("KSRTC 'Show buses' button not interactable or visible after finding block. Only initial buses scraped.")

        except Exception as e:
            print(f"KSRTC Kerala Buses section or 'Show buses' button not found or an error occurred: {e}. Only initial buses scraped.")
            # In this case, all_buses_for_this_route will only contain the initial scrape.

        return all_buses_for_this_route

    except Exception as e:
        print(f"Error occurred while navigating or scraping route {url}: {str(e)}")
        return []

# List to hold all bus details from all routes
all_bus_details_overall = []

# Main scraping process
def run_full_scraping():
    global driver
    driver = initialize_driver()
    load_page(driver, URL)  # Load the initial KSRTC page to get routes

    # Scrape all route links from the initial page
    all_bus_routes_link, all_bus_routes_name = scrape_ksrtc_routes(driver)
    print(f"Found {len(all_bus_routes_link)} routes to process.")

    for link, name in zip(all_bus_routes_link, all_bus_routes_name):
        bus_details_for_route = scrape_bus_details(driver, link, name)
        if bus_details_for_route:
            all_bus_details_overall.extend(bus_details_for_route)

    # Convert the list of dictionaries to a DataFrame
    df = pd.DataFrame(all_bus_details_overall)

    # Save the DataFrame to a CSV file
    csv_filename = 'Chandigarh_bus_details.csv' 
    df.to_csv(csv_filename, index=False)

    # Close the driver
    driver.quit()

    print(f"Scraping completed and data saved to '{csv_filename}'")
    print(f"Total bus entries across all routes: {len(df)}") # Updated log

# Execute the full scraping process
if __name__ == "__main__":
    run_full_scraping()

Found 10 routes to process.
Navigating to route: Chandigarh to Delhi - https://www.redbus.in/bus-tickets/chandigarh-to-delhi
Scrolling to load private bus items...
Initial scroll complete.
Checking for CTU Buses section and 'Show buses' button...
Scrolling again to load all bus items..
Scraped 10 bus entries after KSRTC expansion.
Total bus entries for this route: 20
Navigating to route: Delhi to Chandigarh - https://www.redbus.in/bus-tickets/delhi-to-chandigarh
Scrolling to load private bus items...
Initial scroll complete.
Checking for CTU Buses section and 'Show buses' button...
Scrolling again to load all bus items..
Scraped 10 bus entries after KSRTC expansion.
Total bus entries for this route: 20
Navigating to route: Yamuna Nagar to Chandigarh - https://www.redbus.in/bus-tickets/yamuna-nagar-to-chandigarh
Scrolling to load private bus items...
Initial scroll complete.
Checking for CTU Buses section and 'Show buses' button...
Scrolling again to load all bus items..
Scraped 3 bus e

# BSRTC

In [13]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time
import pandas as pd
from urllib.parse import urljoin # To handle relative URLs

# URL of the website
BASE_URL = "https://www.redbus.in" # Define base URL for joining relative paths
URL = "https://www.redbus.in/online-booking/bihar-state-road-transport-corporation-bsrtc/?utm_source=rtchometile"

def initialize_driver():
    """Initializes and returns a Chrome WebDriver."""
    driver = webdriver.Chrome()
    driver.maximize_window()
    return driver

def load_page(driver, url):
    """Loads a given URL and waits for a few seconds."""
    driver.get(url)
    time.sleep(10)  # Wait for the page to load

def scrape_ksrtc_routes(driver):
    """
    Scrapes bus route links and names from the initial KSRTC page.
    """
    route_elements = WebDriverWait(driver, 10).until(
        EC.presence_of_all_elements_located((By.CLASS_NAME, 'route'))
    )
    
    bus_routes_link = []
    bus_routes_name = []
    
    for route in route_elements:
        relative_path = route.get_attribute('href')
        if relative_path:
            full_url = urljoin(BASE_URL, relative_path)
            bus_routes_link.append(full_url)
            bus_routes_name.append(route.text.strip())
            
    return bus_routes_link, bus_routes_name

def scroll_to_end(driver):
    """Scrolls the page to the end to load dynamic content."""
    last_height = driver.execute_script("return document.body.scrollHeight")
    while True:
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
        time.sleep(15)  # Wait for new content to load
        new_height = driver.execute_script("return document.body.scrollHeight")
        if new_height == last_height:
            break
        last_height = new_height
    time.sleep(5) # Give a little more time after scroll for elements to render

def get_bus_details_from_page(driver, route_name, url):
    """
    Extracts bus details from the current page state using common locators.
    """
    bus_details_list = []
    
    # Find all elements for each detail using their class names
    bus_name_elements = driver.find_elements(By.CLASS_NAME, "travelsName___495898")
    bus_type_elements = driver.find_elements(By.CLASS_NAME, "busType___13ff4b")
    departing_time_elements = driver.find_elements(By.CLASS_NAME, "boardingTime___aced27")
    duration_elements = driver.find_elements(By.CLASS_NAME, "duration___5b44b1")
    reaching_time_elements = driver.find_elements(By.CLASS_NAME, "droppingTime___616c2f")
    star_rating_elements = driver.find_elements(By.CLASS_NAME, "rating___7724f1")
    price_elements = driver.find_elements(By.CLASS_NAME, "finalFare___898bb7")
    seat_availability_elements = driver.find_elements(By.CLASS_NAME, "totalSeats___ba48cf")

    # Iterate through the longest list to ensure we cover all potential bus listings
    max_elements = max(len(bus_name_elements), len(bus_type_elements), len(departing_time_elements),
                       len(duration_elements), len(reaching_time_elements), len(star_rating_elements),
                       len(price_elements), len(seat_availability_elements))

    for i in range(max_elements):
        # Safely get text, providing 'N/A' or '0' if element is not found for a given index
        bus_name = bus_name_elements[i].text if i < len(bus_name_elements) else 'N/A'
        bus_type = bus_type_elements[i].text if i < len(bus_type_elements) else 'N/A'
        departing_time = departing_time_elements[i].text if i < len(departing_time_elements) else 'N/A'
        duration = duration_elements[i].text if i < len(duration_elements) else 'N/A'
        reaching_time = reaching_time_elements[i].text if i < len(reaching_time_elements) else 'N/A'
        star_rating = star_rating_elements[i].text if i < len(star_rating_elements) else '0' 
        price = price_elements[i].text if i < len(price_elements) else 'N/A'
        seat_availability = seat_availability_elements[i].text if i < len(seat_availability_elements) else '0 seats' 

        bus_detail = {
            "Bus_Route_Name": route_name,
            "Bus_Route_Link": url,
            "Bus_Name": bus_name,
            "Bus_Type": bus_type,
            "Departing_Time": departing_time,
            "Duration": duration,
            "Reaching_Time": reaching_time,
            "Star_Rating": star_rating,
            "Price": price,
            "Seat_Availability": seat_availability
        }
        bus_details_list.append(bus_detail)
    return bus_details_list


def scrape_bus_details(driver, url, route_name):
    """
    Scrapes both private and KSRTC bus details for a specific route.
    Handles clicking the KSRTC "Show buses" button and collects ALL results.
    """
    print(f"Navigating to route: {route_name} - {url}")
    all_buses_for_this_route = [] # This list will hold all bus details including potential duplicates
    
    try:
        driver.get(url)
        time.sleep(10)  # Allow the page to load

        # --- Phase 1: Scrape initially visible (private) bus details ---
        print("Scrolling to load private bus items...") # Updated log
        scroll_to_end(driver)
        print("Initial scroll complete.") # Updated log
        
        initial_bus_details = get_bus_details_from_page(driver, route_name, url)
        all_buses_for_this_route.extend(initial_bus_details)
        # Note: The log for 'Scraped X initial bus entries' is removed to match the desired format.

        # --- Phase 2: Handle KSRTC section and scrape all (private + KSRTC) buses ---
        try:
            print("Checking for BSRTC Buses section and 'Show buses' button...") 
            # Locate the main KSRTC block using its unique class
            ksrtc_block = WebDriverWait(driver, 15).until(
                EC.presence_of_element_located((By.CLASS_NAME, "rtcTuple___1041ba"))
            )
            
            # Now, locate the "Show buses" button WITHIN this specific KSRTC block
            show_buses_button = ksrtc_block.find_element(By.CLASS_NAME, "primaryButton___469d04 ")
            
            if show_buses_button.is_displayed() and show_buses_button.is_enabled():
                # Removed "Found 'Show buses' button for KSRTC. Clicking it..." to match log
                driver.execute_script("arguments[0].click();", show_buses_button)
                time.sleep(10) # Wait for KSRTC buses to load after clicking
                # Removed "Clicked 'Show buses' button. Waiting for KSRTC buses to load." to match log

                # After clicking KSRTC, scroll again to load all new KSRTC buses
                print("Scrolling again to load all bus items..") # Updated log
                scroll_to_end(driver)
                # Removed "Post-KSRTC scroll complete. Scraping all bus details again..." to match log

                # Scrape ALL bus details again (this will include KSRTC and any remaining private)
                all_buses_after_ksrtc_expansion = get_bus_details_from_page(driver, route_name, url)
                all_buses_for_this_route.extend(all_buses_after_ksrtc_expansion) # Add all
                print(f"Scraped {len(all_buses_after_ksrtc_expansion)} bus entries after KSRTC expansion.") # Updated log
                print(f"Total bus entries for this route: {len(all_buses_for_this_route)}") # Updated log

            else:
                print("KSRTC 'Show buses' button not interactable or visible after finding block. Only initial buses scraped.")

        except Exception as e:
            print(f"KSRTC Kerala Buses section or 'Show buses' button not found or an error occurred: {e}. Only initial buses scraped.")
            # In this case, all_buses_for_this_route will only contain the initial scrape.

        return all_buses_for_this_route

    except Exception as e:
        print(f"Error occurred while navigating or scraping route {url}: {str(e)}")
        return []

# List to hold all bus details from all routes
all_bus_details_overall = []

# Main scraping process
def run_full_scraping():
    global driver
    driver = initialize_driver()
    load_page(driver, URL)  # Load the initial KSRTC page to get routes

    # Scrape all route links from the initial page
    all_bus_routes_link, all_bus_routes_name = scrape_ksrtc_routes(driver)
    print(f"Found {len(all_bus_routes_link)} routes to process.")

    for link, name in zip(all_bus_routes_link, all_bus_routes_name):
        bus_details_for_route = scrape_bus_details(driver, link, name)
        if bus_details_for_route:
            all_bus_details_overall.extend(bus_details_for_route)

    # Convert the list of dictionaries to a DataFrame
    df = pd.DataFrame(all_bus_details_overall)

    # Save the DataFrame to a CSV file
    csv_filename = 'Bihar_bus_details.csv' 
    df.to_csv(csv_filename, index=False)

    # Close the driver
    driver.quit()

    print(f"Scraping completed and data saved to '{csv_filename}'")
    print(f"Total bus entries across all routes: {len(df)}") # Updated log

# Execute the full scraping process
if __name__ == "__main__":
    run_full_scraping()

Found 10 routes to process.
Navigating to route: Patna (Bihar) to Motihari - https://www.redbus.in/bus-tickets/patna-to-motihari
Scrolling to load private bus items...
Initial scroll complete.
Checking for BSRTC Buses section and 'Show buses' button...
KSRTC Kerala Buses section or 'Show buses' button not found or an error occurred: Message: 
Stacktrace:
	GetHandleVerifier [0x0x7ff65db2e925+77845]
	GetHandleVerifier [0x0x7ff65db2e980+77936]
	(No symbol) [0x0x7ff65d8e9cda]
	(No symbol) [0x0x7ff65d9406aa]
	(No symbol) [0x0x7ff65d94095c]
	(No symbol) [0x0x7ff65d993d07]
	(No symbol) [0x0x7ff65d96890f]
	(No symbol) [0x0x7ff65d990b07]
	(No symbol) [0x0x7ff65d9686a3]
	(No symbol) [0x0x7ff65d931791]
	(No symbol) [0x0x7ff65d932523]
	GetHandleVerifier [0x0x7ff65de0683d+3059501]
	GetHandleVerifier [0x0x7ff65de00bfd+3035885]
	GetHandleVerifier [0x0x7ff65de203f0+3164896]
	GetHandleVerifier [0x0x7ff65db48c2e+185118]
	GetHandleVerifier [0x0x7ff65db5053f+216111]
	GetHandleVerifier [0x0x7ff65db372d4+11

Scrolling to load private bus items...
Initial scroll complete.
Checking for BSRTC Buses section and 'Show buses' button...
KSRTC Kerala Buses section or 'Show buses' button not found or an error occurred: Message: 
Stacktrace:
	GetHandleVerifier [0x0x7ff65db2e925+77845]
	GetHandleVerifier [0x0x7ff65db2e980+77936]
	(No symbol) [0x0x7ff65d8e9cda]
	(No symbol) [0x0x7ff65d9406aa]
	(No symbol) [0x0x7ff65d94095c]
	(No symbol) [0x0x7ff65d993d07]
	(No symbol) [0x0x7ff65d96890f]
	(No symbol) [0x0x7ff65d990b07]
	(No symbol) [0x0x7ff65d9686a3]
	(No symbol) [0x0x7ff65d931791]
	(No symbol) [0x0x7ff65d932523]
	GetHandleVerifier [0x0x7ff65de0683d+3059501]
	GetHandleVerifier [0x0x7ff65de00bfd+3035885]
	GetHandleVerifier [0x0x7ff65de203f0+3164896]
	GetHandleVerifier [0x0x7ff65db48c2e+185118]
	GetHandleVerifier [0x0x7ff65db5053f+216111]
	GetHandleVerifier [0x0x7ff65db372d4+113092]
	GetHandleVerifier [0x0x7ff65db37489+113529]
	GetHandleVerifier [0x0x7ff65db1e288+10616]
	BaseThreadInitThunk [0x0x7ffb9120