In [None]:
import pymysql

# Install MySQLdb adapter for compatibility with pymysql
pymysql.install_as_MySQLdb()

def fetch_redbus_data():
    """
    Connects to the MySQL database, fetches all records from the 'redbus1' table,
    and prints the retrieved data.

    This function assumes that the MySQL server is running locally with default credentials.
    """
    try:
        # Establish a connection to the MySQL database
        conn = pymysql.connect(host='localhost', database='guvi', user='root', password='root')
        cursor = conn.cursor()

        # Execute a query to fetch all records from the 'redbus1' table
        cursor.execute("SELECT * FROM redbus1")

        # Fetch all rows from the result of the query
        rows = cursor.fetchall()

        # Print the fetched rows
        print(rows)

    except pymysql.MySQLError as e:
        print(f"Error connecting to MySQL: {e}")

    finally:
        # Ensure that the connection is closed after the operation
        if conn:
            conn.close()


# Run the function to fetch data
if __name__ == "__main__":
    fetch_redbus_data()


In [None]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import ElementClickInterceptedException, TimeoutException, WebDriverException
import time
import re

def setup_driver():
    """
    Sets up and returns a Selenium WebDriver for Chrome.

    Returns:
        webdriver.Chrome: A Selenium WebDriver instance.
    """
    driver = webdriver.Chrome()
    driver.get("https://www.redbus.in/")
    return driver


def click_rtc_name(rtc_element, rtc_index):
    """
    Clicks on the RTC name element with retries in case of interception.

    Parameters:
        rtc_element (WebElement): The RTC name element.
        rtc_index (int): The index of the RTC name in the list.
    """
    attempt = 0
    max_attempts = 3
    while attempt < max_attempts:
        try:
            print(f"\nClicking RTC name {rtc_index + 1}: {rtc_element.text}")
            driver.execute_script("arguments[0].click();", rtc_element)
            break
        except ElementClickInterceptedException:
            attempt += 1
            print(f"Click intercepted. Retrying... (Attempt {attempt} of {max_attempts})")
            time.sleep(1)
    else:
        print(f"Failed to click RTC name {rtc_index + 1} after {max_attempts} attempts.")


def extract_bus_details(ticket_link):
    """
    Extracts bus details from the ticket page for a given route.

    Parameters:
        ticket_link (str): The URL link to the bus ticket page.
    """
    driver.execute_script("window.open(arguments[0]);", ticket_link)
    time.sleep(2)
    driver.switch_to.window(driver.window_handles[-1])

    try:
        # Wait for bus details to load on the ticket page
        WebDriverWait(driver, 20).until(
            EC.presence_of_all_elements_located((By.CSS_SELECTOR, ".travels.lh-24.f-bold.d-color"))
        )

        bus_name_elements = driver.find_elements(By.CSS_SELECTOR, ".travels.lh-24.f-bold.d-color")
        bus_type_elements = driver.find_elements(By.CSS_SELECTOR, ".bus-type.f-12.m-top-16.l-color.evBus")
        departing_time_elements = driver.find_elements(By.CSS_SELECTOR, ".dp-time.f-19.d-color.f-bold")
        duration_elements = driver.find_elements(By.CSS_SELECTOR, ".dur.l-color.lh-24")
        reaching_time_elements = driver.find_elements(By.CSS_SELECTOR, ".bp-time.f-19.d-color.disp-Inline")
        star_rating_elements = driver.find_elements(By.CSS_SELECTOR, ".icon.icon-ic-star.d-block")
        price_elements = driver.find_elements(By.CSS_SELECTOR, ".fare.d-block")
        seats_available_elements = driver.find_elements(By.CSS_SELECTOR, ".seat-left.m-top-30")

        print(f"\nScraping Bus Details on the route {ticket_link}:")

        for bus_name, bus_type, departing_time, duration, reaching_time, star_rating, price, seats_available in zip(
            bus_name_elements, bus_type_elements, departing_time_elements, duration_elements, reaching_time_elements,
            star_rating_elements, price_elements, seats_available_elements):

            price_value = re.search(r'\d+', price.text)
            price_value = float(price_value.group()) if price_value else 0.0

            seats_available_value = re.search(r'\d+', seats_available.text)
            seats_available_value = int(seats_available_value.group()) if seats_available_value else 0

            print(f"Bus Name: {bus_name.text}, Bus Type: {bus_type.text}, Departing Time: {departing_time.text}, "
                  f"Duration: {duration.text}, Reaching Time: {reaching_time.text}, "
                  f"Star Rating: {star_rating.text if star_rating else 'N/A'}, Price: {price_value}, "
                  f"Seats Available: {seats_available_value}")

            # Prepare and execute the database insert query
            insert_query = """
                INSERT INTO redbus (state, route_name, route_link, bus_name, bus_type, departing_time, duration, reaching_time, star_rating, price, seats_available)
                VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
            """

            cursor.execute(insert_query, (rtcName, route_name, ticket_link, bus_name.text, bus_type.text,
                                          departing_time.text, duration.text, reaching_time.text, 0.0, price_value,
                                          seats_available_value))
            conn.commit()
            print("Data inserted successfully!")

    except (TimeoutException, WebDriverException) as e:
        print(f"Failed to find bus details on {ticket_link}. Error: {e}")

    # Close the ticket page and return to the main route page
    driver.close()
    driver.switch_to.window(driver.window_handles[0])


def handle_pagination():
    """
    Handles pagination by clicking the 'Next' button if available.
    """
    try:
        pagination_table = WebDriverWait(driver, 30).until(
            EC.presence_of_element_located((By.CLASS_NAME, "DC_117_paginationTable"))
        )
        next_page_button = None
        for page in pagination_table.find_elements(By.CLASS_NAME, "DC_117_pageTabs"):
            if page.text.lower() == "next":
                next_page_button = page
                break

        if next_page_button:
            driver.execute_script("arguments[0].scrollIntoView(true);", next_page_button)
            driver.execute_script("arguments[0].click();", next_page_button)
            WebDriverWait(driver, 30).until(
                EC.presence_of_all_elements_located((By.CLASS_NAME, "route"))
            )
            time.sleep(1)
        else:
            return False  # No more pages to navigate
    except TimeoutException:
        print("Pagination not found or no more pages to navigate.")
        return False
    return True


# Main function to execute the scraping process
def main():
    driver = setup_driver()

    rtc_name_elements = WebDriverWait(driver, 30).until(
        EC.presence_of_all_elements_located((By.CLASS_NAME, "rtcName"))
    )

    for rtc_index, rtc_element in enumerate(rtc_name_elements):
        click_rtc_name(rtc_element, rtc_index)

        # Wait for route elements to load on the second page
        WebDriverWait(driver, 30).until(
            EC.presence_of_all_elements_located((By.CLASS_NAME, "route"))
        )

        while True:
            route_elements = driver.find_elements(By.CLASS_NAME, "route")
            print(f"Routes found for RTC name {rtc_index + 1}:")
            for route in route_elements:
                print("Bus Route Name:", route.text)
                route_name = route.text
                ticket_link = f"https://redbus.in/bus-tickets/{route.text.replace(' ', '-').lower()}"
                print(f"Bus Route Link: {ticket_link}")

                # Open the ticket page and extract bus details
                extract_bus_details(ticket_link)

            # Handle pagination
            if not handle_pagination():
                break

        # Return to the main RTC page and refresh elements
        driver.back()
        rtc_name_elements = WebDriverWait(driver, 30).until(
            EC.presence_of_all_elements_located((By.CLASS_NAME, "rtcName"))
        )

    driver.quit()

# Run the main function to start the scraping
if __name__ == "__main__":
    main()
