In [9]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException
import time
import pandas as pd


# URL = [
#     {'key':'andhra','link':"https://www.redbus.in/online-booking/apsrtc"},
#     {'key':'telangana','link':"https://www.redbus.in/online-booking/tsrtc"}
# ]

URL = [
   
    
    {'key':'himachal','link':"https://www.redbus.in/online-booking/hrtc"},
    {'key':'rajasthan','link':"https://www.redbus.in/online-booking/rsrtc"},
    {'key':'punjab','link':"https://www.redbus.in/online-booking/pepsu"},
    {'key':'assam','link':"https://www.redbus.in/online-booking/astc"},
    {'key':'kadamba','link':"https://www.redbus.in/online-booking/ktcl"},
    {'key':'uttaroradesh','link':"https://www.redbus.in/online-booking/uttar-pradesh-state-road-transport-corporation-upsrtc"},
    {'key':'jammukashmir','link':"https://www.redbus.in/online-booking/jksrtc"}

]


def initialize_driver():
    driver = webdriver.Chrome()
    driver.maximize_window()
    return driver


def load_page(driver, url):
    driver.get(url)
    time.sleep(5)  


def scroll_to_bottom(driver):
    last_height = driver.execute_script("return document.body.scrollHeight")
    while True:
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
        time.sleep(2)
        new_height = driver.execute_script("return document.body.scrollHeight")
        if new_height == last_height:
            break
        last_height = new_height


def scrape_bus_routes(driver):
    try:
        route_elements = driver.find_elements(By.CLASS_NAME, 'route')
        bus_routes_link = [route.get_attribute('href') for route in route_elements if route.get_attribute('href')]
        bus_routes_name = [route.text.strip() for route in route_elements if route.text.strip()]
        return bus_routes_link, bus_routes_name
    except Exception as e:
        print(f"Error while scraping bus routes: {str(e)}")
        return [], []


def scrape_bus_details(driver, url, route_name):
    try:
        driver.get(url)
        time.sleep(5) 
       
        try:
            all_buttons = WebDriverWait(driver, 10).until(
                EC.presence_of_all_elements_located((By.CLASS_NAME, "button"))
            )
            view_buses_buttons = [button for button in all_buttons if button.text.strip() == "VIEW BUSES"]
            for button in view_buses_buttons:
                try:
                    driver.execute_script("arguments[0].scrollIntoView();", button)
                    driver.execute_script("arguments[0].click();", button)
                    time.sleep(3)
                except Exception as e:
                    print(f"Error clicking a 'View Buses' button: {str(e)}")
                    continue
        except TimeoutException:
            print(f'No "View Buses" buttons found on {route_name}')

       
        scroll_to_bottom(driver)

        
        bus_name_elements = driver.find_elements(By.CLASS_NAME, "travels.lh-24.f-bold.d-color")
        bus_type_elements = driver.find_elements(By.CLASS_NAME, "bus-type.f-12.m-top-16.l-color.evBus")
        departing_time_elements = driver.find_elements(By.CLASS_NAME, "dp-time.f-19.d-color.f-bold")
        duration_elements = driver.find_elements(By.CLASS_NAME, "dur.l-color.lh-24")
        reaching_time_elements = driver.find_elements(By.CLASS_NAME, "bp-time.f-19.d-color.disp-Inline")
        star_rating_elements = driver.find_elements(By.XPATH, "//div[@class='rating-sec lh-24']")
        price_elements = driver.find_elements(By.CLASS_NAME, "fare.d-block")
        seat_availability_elements = driver.find_elements(By.XPATH, "//div[contains(@class, 'seat-left')]")

        bus_details = []
        for i in range(len(bus_name_elements)):
            bus_detail = {
                "Route_Name": route_name,
                "Route_Link": url,
                "Bus_Name": bus_name_elements[i].text if i < len(bus_name_elements) else "N/A",
                "Bus_Type": bus_type_elements[i].text if i < len(bus_type_elements) else "N/A",
                "Departing_Time": departing_time_elements[i].text if i < len(departing_time_elements) else "N/A",
                "Duration": duration_elements[i].text if i < len(duration_elements) else "N/A",
                "Reaching_Time": reaching_time_elements[i].text if i < len(reaching_time_elements) else "N/A",
                "Star_Rating": star_rating_elements[i].text if i < len(star_rating_elements) else "0",
                "Price": price_elements[i].text if i < len(price_elements) else "N/A",
                "Seat_Availability": seat_availability_elements[i].text if i < len(seat_availability_elements) else "0"
            }
            bus_details.append(bus_detail)
        return bus_details
    except Exception as e:
        print(f"Error occurred while scraping bus details for {url}: {str(e)}")
        return []


def scrape_all_pages(URL):
    all_bus_details = []
    driver = initialize_driver()
    try:
        load_page(driver, URL)

       
        try:
            pagination_elements = WebDriverWait(driver, 10).until(
                EC.presence_of_all_elements_located((By.XPATH, "//div[contains(@class, 'DC_117_pageTabs')]"))
            )
            total_pages = len(pagination_elements)
        except TimeoutException:
            print("Pagination elements not found. Assuming a single page.")
            total_pages = 1

        print(f"Total pages found: {total_pages}")

       
        for page in range(1, total_pages + 1):
            if page > 1:
                try:
                    driver.get(URL)
                    pagination_tab = WebDriverWait(driver, 10).until(
                        EC.element_to_be_clickable((By.XPATH, f"//div[contains(@class, 'DC_117_pageTabs ')][text()='{page}']"))
                    )
                    driver.execute_script("arguments[0].scrollIntoView();", pagination_tab)
                    driver.execute_script("arguments[0].click();", pagination_tab)
                    time.sleep(5)
                except TimeoutException:
                    print(f"Failed to navigate to page {page}. Skipping.")
                    continue

            
            bus_routes_link, bus_routes_name = scrape_bus_routes(driver)
            for link, name in zip(bus_routes_link, bus_routes_name):
                bus_details = scrape_bus_details(driver, link, name)
                all_bus_details.extend(bus_details)

    finally:
        driver.quit()
    return all_bus_details


for i in URL:
    URL = i.get('link')
    KeyName = i.get('key')
    all_bus_details = scrape_all_pages(URL)
    df = pd.DataFrame(all_bus_details)
    df.to_csv(f'{KeyName}.csv', index=False)
    print(f'Scraping completed. Data saved to {KeyName}.csv.')
    



Total pages found: 4
No "View Buses" buttons found on Delhi to Nalagarh
No "View Buses" buttons found on Chandigarh to Manali
No "View Buses" buttons found on Kullu to Shimla
No "View Buses" buttons found on Jassur to Chandigarh
Scraping completed. Data saved to himachal.csv.
Total pages found: 2
Scraping completed. Data saved to rajasthan.csv.
Total pages found: 3
No "View Buses" buttons found on Chandigarh to Bathinda
No "View Buses" buttons found on Delhi Airport to Phagwara
No "View Buses" buttons found on Delhi Airport to Kapurthala
No "View Buses" buttons found on Amritsar to Patiala
Scraping completed. Data saved to punjab.csv.
Total pages found: 5
No "View Buses" buttons found on Guwahati to Tezpur
No "View Buses" buttons found on Goalpara to Guwahati
No "View Buses" buttons found on Sibsagar (Assam) to North Lakhimpur
No "View Buses" buttons found on Jorhat to North Lakhimpur
No "View Buses" buttons found on Dhekiajuli to Guwahati
No "View Buses" buttons found on Jorhat to Dib