In [1]:
import pandas as pd

In [2]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.action_chains import ActionChains
import time

driver = webdriver.Chrome()
wait = WebDriverWait(driver, 30)  # Increased timeout

driver.get("https://www.redbus.in/online-booking/uttar-pradesh-state-road-transport-corporation-upsrtc/?utm_source=rtchometile")

UPSRTC = []

def scrape_page():
    # Locate elements  (container)
    routes = wait.until(EC.presence_of_all_elements_located((By.CLASS_NAME, "route_link")))

    # Loop through each route to extract details
    for route in routes:
        try:
            # Extract route name and link from the route element
            route_name_element = route.find_element(By.XPATH, ".//div[@class='route_details']")
            route_link_element = route.find_element(By.XPATH, ".//a")

            route_name = route_name_element.text
            route_link = route_link_element.get_attribute('href')

            # Append extracted data to list
            UPSRTC.append({
                'route_name': route_name,
                'route_link': route_link
            })

        except Exception as e:
            print(f"An error occurred: {e}")
            continue

# Scrape data from the first 5 pages
for page_number in range(1, 6):
    scrape_page()
    if page_number < 5:  # Don't try to click next on the last page
        try:
            # Locate the pagination container
            pagination_container = wait.until(EC.presence_of_element_located(
                (By.XPATH, '//*[@id="root"]/div/div[4]/div[12]')
            ))

            # Locate the next page button within the container
            next_page_button = pagination_container.find_element(
                By.XPATH, f'.//div[contains(@class, "DC_117_pageTabs") and text()="{page_number + 1}"]'
            )

            # Ensure the next page button is in view
            actions = ActionChains(driver)
            actions.move_to_element(next_page_button).perform()
            time.sleep(1)  # Wait for a bit after scrolling

            # Log the action
            print(f"Clicking on page {page_number + 1}")

            # Click the next page button
            next_page_button.click()

            # Wait for the page number to update to the next page
            wait.until(EC.text_to_be_present_in_element(
                (By.XPATH, '//div[contains(@class, "DC_117_pageTabs DC_117_pageActive")]'), str(page_number + 1)))

            # Log the successful page navigation
            print(f"Successfully navigated to page {page_number + 1}")

            # Wait for a short duration to ensure the next page loads completely
            time.sleep(3)
        except Exception as e:
            print(f"An error occurred while navigating to page {page_number + 1}: {e}")
            break

# Print the scraped data
for entry in UPSRTC:
    print(entry)

# Convert to DataFrame and save to CSV
df = pd.DataFrame(UPSRTC)
df.to_csv("upsrtc_details.csv", index=False)

# Close the driver
driver.quit()

df.head()  # Display the first few rows of the DataFrame

Clicking on page 2
Successfully navigated to page 2
Clicking on page 3
Successfully navigated to page 3
Clicking on page 4
Successfully navigated to page 4
Clicking on page 5
Successfully navigated to page 5
{'route_name': 'Bareilly to Delhi\nFrom INR 413', 'route_link': 'https://www.redbus.in/bus-tickets/bareilly-to-delhi'}
{'route_name': 'Aligarh (uttar pradesh) to Delhi\nFrom INR 203', 'route_link': 'https://www.redbus.in/bus-tickets/aligarh-uttar-pradesh-to-delhi'}
{'route_name': 'Delhi to Bareilly\nFrom INR 411', 'route_link': 'https://www.redbus.in/bus-tickets/delhi-to-bareilly'}
{'route_name': 'Delhi to Aligarh (uttar pradesh)\nFrom INR 223', 'route_link': 'https://www.redbus.in/bus-tickets/delhi-to-aligarh-uttar-pradesh'}
{'route_name': 'Farrukhabad (Uttar Pradesh) to Delhi\nFrom INR 479', 'route_link': 'https://www.redbus.in/bus-tickets/farrukhabad-up-to-delhi'}
{'route_name': 'Badaun to Delhi\nFrom INR 334', 'route_link': 'https://www.redbus.in/bus-tickets/badaun-to-delhi'}
{

Unnamed: 0,route_name,route_link
0,Bareilly to Delhi\nFrom INR 413,https://www.redbus.in/bus-tickets/bareilly-to-...
1,Aligarh (uttar pradesh) to Delhi\nFrom INR 203,https://www.redbus.in/bus-tickets/aligarh-utta...
2,Delhi to Bareilly\nFrom INR 411,https://www.redbus.in/bus-tickets/delhi-to-bar...
3,Delhi to Aligarh (uttar pradesh)\nFrom INR 223,https://www.redbus.in/bus-tickets/delhi-to-ali...
4,Farrukhabad (Uttar Pradesh) to Delhi\nFrom INR...,https://www.redbus.in/bus-tickets/farrukhabad-...
