In [6]:
# Scrape the Redbus APSRTC PRIVATE BUS from website,  route : 'Hyderabad to Vijayawada

from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time
import pandas as pd

# Initialize the Chrome driver
driver = webdriver.Chrome()

# Navigate to the RedBus website
driver.get('https://www.redbus.in/')
driver.maximize_window()
time.sleep(5)

try:
    # Hover over and click the APSRTC option
    hover_element = driver.find_element(By.CLASS_NAME, "rtcName")
    action = ActionChains(driver)
    action.move_to_element(hover_element).perform()
    time.sleep(3)

    apsrtc_option = WebDriverWait(driver, 10).until(
        EC.element_to_be_clickable((By.XPATH, "//div[contains(@class, 'rtcName') and text()='APSRTC']"))
    )
    apsrtc_option.click()
    time.sleep(5)

    # Select a route
    routes = driver.find_elements(By.XPATH, "//a[@class='route']")
    if not routes:
        print("No routes found.")
        driver.quit()
        exit()

    # Use the first route
    route_link = routes[0].get_attribute("href")
    route_name = routes[0].text
    driver.get(route_link)
    time.sleep(5)

    # Check for buses
    bus_elements = driver.find_elements(By.XPATH, "//div[@class='travels lh-24 f-bold d-color']")
    if not bus_elements:
        print("No buses found. Attempting alternative method...")
        fallback_element = WebDriverWait(driver, 10).until(
            EC.element_to_be_clickable((By.XPATH, '//*[@id="fixer"]/div/div/div[1]/span[3]/i'))
        )
        action.move_to_element(fallback_element).pause(1).click().perform()
        time.sleep(5)

    # Scrolling and extracting buses
    scroll_count = 0
    bus_data = []

    while scroll_count < 5:  # Scroll up to 5 times
        driver.find_element(By.TAG_NAME, "body").send_keys(Keys.PAGE_DOWN)
        time.sleep(7)
        scroll_count += 1

        # Fetch bus details
        bus_names = driver.find_elements(By.XPATH, "//div[@class='travels lh-24 f-bold d-color']")
        bus_types = driver.find_elements(By.XPATH, "//div[@class='bus-type f-12 m-top-16 l-color evBus']")
        departing_times = driver.find_elements(By.XPATH, "//div[@class='dp-time f-19 d-color f-bold']")
        durations = driver.find_elements(By.XPATH, "//div[@class='dur l-color lh-24']")
        reaching_times = driver.find_elements(By.XPATH, "//div[@class='bp-time f-19 d-color disp-Inline']")
        star_ratings = driver.find_elements(By.XPATH, "//span[contains(text(), '.') and string-length(text()) > 2]")
        prices = driver.find_elements(By.XPATH, "//div[contains(@class, 'fare')]/span[contains(@class, 'f-19')]")
        seats_available = driver.find_elements(By.XPATH, "//div[contains(@class, 'seat-left')]")

        # Find the minimum length to avoid mismatches
        min_length = min(len(bus_names), len(bus_types), len(departing_times), len(durations),
                         len(reaching_times), len(star_ratings), len(prices), len(seats_available))

        for i in range(min_length):
            bus_info = {
                "route_name": route_name,
                "route_link": route_link,
                "bus_name": bus_names[i].text,
                "bus_type": bus_types[i].text,
                "departing_time": departing_times[i].text,
                "duration": durations[i].text,
                "reaching_time": reaching_times[i].text,
                "star_rating": star_ratings[i].text,
                "price": prices[i].text.replace('₹', '').strip(),
                "seats_available": seats_available[i].text.replace('Seats available', '').strip(),
            }
            if bus_info not in bus_data:  # Avoid duplicates
                bus_data.append(bus_info)

        print(f"Scroll {scroll_count}: Found {len(bus_data)} unique buses.")

        if len(bus_data) >= 10:  # Stop once 10 buses are collected
            break

    # Print the collected data
    print("\nCollected Bus Data:")
    for bus in bus_data[:10]:  # Limit output to 10 buses
        print(bus)

except Exception as e:
    print(f"An error occurred: {e}")

finally:
    driver.quit()


Scroll 1: Found 5 unique buses.
Scroll 2: Found 10 unique buses.

Collected Bus Data:
{'route_name': 'Hyderabad to Vijayawada', 'route_link': 'https://www.redbus.in/bus-tickets/hyderabad-to-vijayawada', 'bus_name': 'Sri Sanvi Tours and Travels', 'bus_type': 'A/C Sleeper (2+1)', 'departing_time': '22:15', 'duration': '06h 38m', 'reaching_time': '04:53', 'star_rating': '4.5', 'price': '949', 'seats_available': '15'}
{'route_name': 'Hyderabad to Vijayawada', 'route_link': 'https://www.redbus.in/bus-tickets/hyderabad-to-vijayawada', 'bus_name': 'FRESHBUS', 'bus_type': 'Electric A/C Seater (2+2)', 'departing_time': '22:00', 'duration': '07h 45m', 'reaching_time': '05:45', 'star_rating': '4.6', 'price': '779', 'seats_available': '10'}
{'route_name': 'Hyderabad to Vijayawada', 'route_link': 'https://www.redbus.in/bus-tickets/hyderabad-to-vijayawada', 'bus_name': 'IntrCity SmartBus', 'bus_type': 'Scania AC Multi Axle Sleeper (2+1)', 'departing_time': '22:10', 'duration': '07h 55m', 'reaching_t

In [7]:

# push the pvt apsrtc bus data ,route : 'Hyderabad to Vijayawada
# Import necessary modules
import psycopg2
from psycopg2.extensions import ISOLATION_LEVEL_AUTOCOMMIT

# SQL Database Interaction
try:
    # Connect to the PostgreSQL database
    connection = psycopg2.connect(
        host="localhost",
        port="5432",
        database="red_bus",
        user="postgres",
        password="sample12"
    )
    connection.set_isolation_level(ISOLATION_LEVEL_AUTOCOMMIT)
    cursor = connection.cursor()

    # Create the bus_routes table if it doesn't already exist
    cursor.execute("""
    CREATE TABLE IF NOT EXISTS bus_routes (
        id SERIAL PRIMARY KEY,
        route_name TEXT NOT NULL,
        route_link TEXT NOT NULL,
        bus_name TEXT DEFAULT NULL,
        bus_type TEXT DEFAULT NULL,
        departing_time TIME DEFAULT NULL,
        duration TEXT DEFAULT NULL,
        reaching_time TIME DEFAULT NULL,
        star_rating FLOAT DEFAULT NULL,
        price DECIMAL(10, 2) DEFAULT NULL,
        seats_available INT DEFAULT NULL
    )
    """)

    # Insert the data into the table
    for _, row in bus_data.iterrows():
        cursor.execute("""
        INSERT INTO bus_routes (route_name, route_link, bus_name, bus_type, departing_time, duration, 
        reaching_time, star_rating, price, seats_available) 
        VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
        """, (
            row['route_name'], row['route_link'], row.get('bus_name'), row.get('bus_type'), 
            row.get('departing_time'), row.get('duration'), row.get('reaching_time'), 
            row.get('star_rating'), row.get('price'), row.get('seats_available')
        ))

    # Commit the transaction
    connection.commit()
    print("Data inserted successfully.")

except Exception as e:
    print(f"An error occurred while interacting with the database: {e}")

finally:
    # Ensure resources are closed properly
    if 'cursor' in locals() and cursor:
        cursor.close()
    if 'connection' in locals() and connection:
        connection.close()


An error occurred while interacting with the database: 'list' object has no attribute 'iterrows'


In [8]:
import psycopg2
from psycopg2.extensions import ISOLATION_LEVEL_AUTOCOMMIT
import pandas as pd

# Insert clean data into the database
try:
    # Connect to PostgreSQL
    connection = psycopg2.connect(
        host="localhost",
        port="5432",
        database="red_bus",
        user="postgres",
        password="sample12"
    )
    connection.set_isolation_level(ISOLATION_LEVEL_AUTOCOMMIT)
    cursor = connection.cursor()

    # Create table if not exists
    cursor.execute("""
    CREATE TABLE IF NOT EXISTS bus_routes (
        id SERIAL PRIMARY KEY,
        route_name TEXT NOT NULL,
        route_link TEXT NOT NULL,
        bus_name TEXT DEFAULT NULL,
        bus_type TEXT DEFAULT NULL,
        departing_time TIME DEFAULT NULL,
        duration TEXT DEFAULT NULL,
        reaching_time TIME DEFAULT NULL,
        star_rating FLOAT DEFAULT NULL,
        price DECIMAL(10, 2) DEFAULT NULL,
        seats_available INT DEFAULT NULL
    )
    """)

    # Insert data into the table
    for row in bus_data:  # Assuming `bus_data` is a list of dictionaries
        cursor.execute("""
        INSERT INTO bus_routes (route_name, route_link, bus_name, bus_type, departing_time, duration, 
        reaching_time, star_rating, price, seats_available) 
        VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
        """, (
            row['route_name'], row['route_link'], row.get('bus_name'), row.get('bus_type'), 
            row.get('departing_time'), row.get('duration'), row.get('reaching_time'), 
            row.get('star_rating'), row.get('price'), row.get('seats_available')
        ))

    connection.commit()
    print("Data inserted successfully.")

except Exception as e:
    print(f"An error occurred: {e}")

finally:
    if 'cursor' in locals() and cursor:
        cursor.close()
    if 'connection' in locals() and connection:
        connection.close()


Data inserted successfully.


In [9]:
# Scrape the Redbus APSRTC GOVT BUS from website,  route : 'Hyderabad to Vijayawada

from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time
import pandas as pd

# Initialize the Chrome driver
driver = webdriver.Chrome()

# Navigate to the RedBus website
driver.get('https://www.redbus.in/')
driver.maximize_window()
time.sleep(5)

try:
    # Hover over and click the APSRTC option
    hover_element = driver.find_element(By.CLASS_NAME, "rtcName")
    action = ActionChains(driver)
    action.move_to_element(hover_element).perform()
    time.sleep(3)

    apsrtc_option = WebDriverWait(driver, 10).until(
        EC.element_to_be_clickable((By.XPATH, "//div[contains(@class, 'rtcName') and text()='APSRTC']"))
    )
    apsrtc_option.click()
    time.sleep(5)

    # Select a route
    routes = driver.find_elements(By.XPATH, "//a[@class='route']")
    if not routes:
        print("No routes found.")
        driver.quit()
        exit()

    # Use the first route
    route_link = routes[0].get_attribute("href")
    route_name = routes[0].text
    driver.get(route_link)
    time.sleep(5)

    # Check for buses
    bus_elements = driver.find_elements(By.XPATH, "//div[@class='travels lh-24 f-bold d-color']")
    if not bus_elements:
        print("No buses found. Attempting alternative method...")
        fallback_element = WebDriverWait(driver, 10).until(
            EC.element_to_be_clickable((By.XPATH, '//*[@id="fixer"]/div/div/div[1]/span[3]/i'))
        )
        action.move_to_element(fallback_element).pause(1).click().perform()
        time.sleep(5)
        
    # Locate and hover over the 'rtcName' element
    hover_element = driver.find_element(By.CLASS_NAME, "button")
    action = ActionChains(driver)
    action.move_to_element(hover_element).perform()
    time.sleep(3)

    # Click on the 'APSRTC govt buses' option
    apsrtc_option = WebDriverWait(driver, 10).until(
        EC.element_to_be_clickable((By.XPATH, "//div[contains(@class, 'button') and text()='View Buses']"))
    )
    apsrtc_option.click()
    time.sleep(10)

    # Scrolling and extracting buses
    scroll_count = 0
    bus_data = []

    while scroll_count < 5:  # Scroll up to 5 times
        driver.find_element(By.TAG_NAME, "body").send_keys(Keys.PAGE_DOWN)
        time.sleep(7)
        scroll_count += 1

        # Fetch bus details
        #bus_names = driver.find_elements(By.XPATH, "//div[@class='travels lh-24 f-bold d-color']")
        #Use . Instead of text(): If the text content is nested or mixed, . can capture all text within the element:
        bus_names = driver.find_elements(By.XPATH, "//div[contains(@class, 'travels lh-24 f-bold d-color') and contains(., 'APSRTC')]")

        bus_types = driver.find_elements(By.XPATH, "//div[@class='bus-type f-12 m-top-16 l-color evBus']")
        departing_times = driver.find_elements(By.XPATH, "//div[@class='dp-time f-19 d-color f-bold']")
        durations = driver.find_elements(By.XPATH, "//div[@class='dur l-color lh-24']")
        reaching_times = driver.find_elements(By.XPATH, "//div[@class='bp-time f-19 d-color disp-Inline']")
        star_ratings = driver.find_elements(By.XPATH, "//span[contains(text(), '.') and string-length(text()) > 2]")
        prices = driver.find_elements(By.XPATH, "//div[contains(@class, 'fare')]/span[contains(@class, 'f-19')]")
        seats_available = driver.find_elements(By.XPATH, "//div[contains(@class, 'seat-left')]")

        # Find the minimum length to avoid mismatches
        min_length = min(len(bus_names), len(bus_types), len(departing_times), len(durations),
                         len(reaching_times), len(star_ratings), len(prices), len(seats_available))

        for i in range(min_length):
            bus_info = {
                "route_name": route_name,
                "route_link": route_link,
                "bus_name": bus_names[i].text,
                "bus_type": bus_types[i].text,
                "departing_time": departing_times[i].text,
                "duration": durations[i].text,
                "reaching_time": reaching_times[i].text,
                "star_rating": star_ratings[i].text,
                "price": prices[i].text.replace('₹', '').strip(),
                "seats_available": seats_available[i].text.replace('Seats available', '').strip(),
            }
            if bus_info not in bus_data:  # Avoid duplicates
                bus_data.append(bus_info)

        print(f"Scroll {scroll_count}: Found {len(bus_data)} unique buses.")

        if len(bus_data) >= 10:  # Stop once 10 buses are collected
            break

    # Print the collected data
    print("\nCollected Bus Data:")
    for bus in bus_data[:10]:  # Limit output to 10 buses
        print(bus)

except Exception as e:
    print(f"An error occurred: {e}")

finally:
    driver.quit()


Scroll 1: Found 5 unique buses.
Scroll 2: Found 10 unique buses.

Collected Bus Data:
{'route_name': 'Hyderabad to Vijayawada', 'route_link': 'https://www.redbus.in/bus-tickets/hyderabad-to-vijayawada', 'bus_name': 'APSRTC - 4920', 'bus_type': 'INDRA(A.C. Seater)', 'departing_time': '21:00', 'duration': '07h 00m', 'reaching_time': '04:00', 'star_rating': '3.8', 'price': '567', 'seats_available': '6'}
{'route_name': 'Hyderabad to Vijayawada', 'route_link': 'https://www.redbus.in/bus-tickets/hyderabad-to-vijayawada', 'bus_name': 'APSRTC - 4823', 'bus_type': 'SUPER LUXURY (NON-AC, 2 + 2 PUSH BACK)', 'departing_time': '21:00', 'duration': '06h 45m', 'reaching_time': '03:45', 'star_rating': '2.7', 'price': '469', 'seats_available': '7'}
{'route_name': 'Hyderabad to Vijayawada', 'route_link': 'https://www.redbus.in/bus-tickets/hyderabad-to-vijayawada', 'bus_name': 'APSRTC - 3612', 'bus_type': 'SUPER LUXURY (NON-AC, 2 + 2 PUSH BACK)', 'departing_time': '21:00', 'duration': '07h 50m', 'reachin

In [10]:
# Extract bus details
bus_names = driver.find_elements(By.XPATH, "//div[contains(@class, 'travels lh-24 f-bold d-color') and contains(., 'APSRTC')]")
bus_types = driver.find_elements(By.XPATH, "//div[@class='bus-type f-12 m-top-16 l-color evBus']")
departing_times = driver.find_elements(By.XPATH, "//div[@class='dp-time f-19 d-color f-bold']")
durations = driver.find_elements(By.XPATH, "//div[@class='dur l-color lh-24']")
reaching_times = driver.find_elements(By.XPATH, "//div[@class='bp-time f-19 d-color disp-Inline']")
star_ratings = driver.find_elements(By.XPATH, "//span[normalize-space(.) != '' and contains(text(), '.') and number(text())]")
prices = driver.find_elements(By.XPATH, "//div[contains(@class, 'fare')]/span[contains(@class, 'f-19')]")
seats_available = driver.find_elements(By.XPATH, "//div[contains(@class, 'seat-left')]")

# Find the minimum length to avoid mismatches
min_length = min(len(bus_names), len(bus_types), len(departing_times), len(durations),
                 len(reaching_times), len(star_ratings), len(prices), len(seats_available))

# Extract and format data
for i in range(min_length):
    # Extract and process the bus_name
    raw_bus_name = bus_names[i].text
    bus_name_parts = raw_bus_name.split(' - ')  # Split 'APSRTC - 9363' into ['APSRTC', '9363']
    bus_company = bus_name_parts[0] if len(bus_name_parts) > 0 else None  # Extract 'APSRTC'
    bus_number = int(bus_name_parts[1]) if len(bus_name_parts) > 1 and bus_name_parts[1].isdigit() else None  # Extract and convert '9363' to number

    # Create the dictionary
    bus_info = {
        "route_name": route_name,
        "route_link": route_link,
        "bus_name": raw_bus_name,  # Keep full string for display
        "bus_company": bus_company,  # Separate company name
        "bus_number": bus_number,  # Numeric part of bus_name
        "bus_type": bus_types[i].text,
        "departing_time": departing_times[i].text,
        "duration": durations[i].text,
        "reaching_time": reaching_times[i].text,
        "star_rating": star_ratings[i].text,
        "price": prices[i].text.replace('₹', '').strip(),
        "seats_available": seats_available[i].text.replace('Seats available', '').strip(),
    }

    if bus_info not in bus_data:  # Avoid duplicates
        bus_data.append(bus_info)

print(f"Scroll {scroll_count}: Found {len(bus_data)} unique buses.")

if len(bus_data) >= 10:  # Stop once 10 buses are collected
    break


MaxRetryError: HTTPConnectionPool(host='localhost', port=7978): Max retries exceeded with url: /session/ab2c2650b5718c3c04096c4fea352622/elements (Caused by NewConnectionError('<urllib3.connection.HTTPConnection object at 0x000001ED7CCF8950>: Failed to establish a new connection: [WinError 10061] No connection could be made because the target machine actively refused it'))

In [None]:
# Scrape the Redbus APSRTC PRIVATE BUS from website,  route : 'Hyderabad to Vijayawada & add to the Database
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time
import pandas as pd
import psycopg2
from psycopg2.extensions import ISOLATION_LEVEL_AUTOCOMMIT

# Initialize the Chrome driver
driver = webdriver.Chrome()

# Navigate to the RedBus website
driver.get('https://www.redbus.in/')
driver.maximize_window()
time.sleep(5)

try:
    # Hover over and click the APSRTC option
    hover_element = driver.find_element(By.CLASS_NAME, "rtcName")
    action = ActionChains(driver)
    action.move_to_element(hover_element).perform()
    time.sleep(3)

    apsrtc_option = WebDriverWait(driver, 10).until(
        EC.element_to_be_clickable((By.XPATH, "//div[contains(@class, 'rtcName') and text()='APSRTC']"))
    )
    apsrtc_option.click()
    time.sleep(5)

    # Select a route
    routes = driver.find_elements(By.XPATH, "//a[@class='route']")
    if not routes:
        print("No routes found.")
        driver.quit()
        exit()

    # Use the first route
    route_link = routes[0].get_attribute("href")
    route_name = routes[0].text
    driver.get(route_link)
    time.sleep(5)

    # Check for buses
    bus_elements = driver.find_elements(By.XPATH, "//div[@class='travels lh-24 f-bold d-color']")
    if not bus_elements:
        print("No buses found. Attempting alternative method...")
        fallback_element = WebDriverWait(driver, 10).until(
            EC.element_to_be_clickable((By.XPATH, '//*[@id="fixer"]/div/div/div[1]/span[3]/i'))
        )
        action.move_to_element(fallback_element).pause(1).click().perform()
        time.sleep(5)

    # Scrolling and extracting buses
    scroll_count = 0
    bus_data = []

    while scroll_count < 5:  # Scroll up to 5 times
        driver.find_element(By.TAG_NAME, "body").send_keys(Keys.PAGE_DOWN)
        time.sleep(7)
        scroll_count += 1

        # Fetch bus details
        bus_names = driver.find_elements(By.XPATH, "//div[@class='travels lh-24 f-bold d-color']")
        bus_types = driver.find_elements(By.XPATH, "//div[@class='bus-type f-12 m-top-16 l-color evBus']")
        departing_times = driver.find_elements(By.XPATH, "//div[@class='dp-time f-19 d-color f-bold']")
        durations = driver.find_elements(By.XPATH, "//div[@class='dur l-color lh-24']")
        reaching_times = driver.find_elements(By.XPATH, "//div[@class='bp-time f-19 d-color disp-Inline']")
        #star_ratings = driver.find_elements(By.XPATH, "//span[contains(text(), '.') and string-length(text()) > 2]")
        star_ratings = driver.find_elements(By.XPATH, "//span[normalize-space(.) != '' and contains(text(), '.') and number(text())]")#[:10]
        prices = driver.find_elements(By.XPATH, "//div[contains(@class, 'fare')]/span[contains(@class, 'f-19')]")
        seats_available = driver.find_elements(By.XPATH, "//div[contains(@class, 'seat-left')]")

        # Find the minimum length to avoid mismatches
        min_length = min(len(bus_names), len(bus_types), len(departing_times), len(durations),
                         len(reaching_times), len(star_ratings), len(prices), len(seats_available))

        for i in range(min_length):
            bus_info = {
                "route_name": route_name,
                "route_link": route_link,
                "bus_name": bus_names[i].text,
                "bus_type": bus_types[i].text,
                "departing_time": departing_times[i].text,
                "duration": durations[i].text,
                "reaching_time": reaching_times[i].text,
                "star_rating": star_ratings[i].text,
                "price": prices[i].text.replace('₹', '').strip(),
                "seats_available": seats_available[i].text.replace('Seats available', '').strip(),
            }
            if bus_info not in bus_data:  # Avoid duplicates
                bus_data.append(bus_info)

        print(f"Scroll {scroll_count}: Found {len(bus_data)} unique buses.")

        if len(bus_data) >= 10:  # Stop once 10 buses are collected
            break

    # Convert data to a DataFrame
    df = pd.DataFrame(bus_data)

except Exception as e:
    print(f"An error occurred: {e}")

finally:
    driver.quit()

# SQL Database Interaction
try:
    # Connect to the PostgreSQL database
    connection = psycopg2.connect(
        host="localhost",
        port="5432",
        database="red_bus",
        user="postgres",
        password="sample12"
    )
    connection.set_isolation_level(ISOLATION_LEVEL_AUTOCOMMIT)
    cursor = connection.cursor()

    # Create the bus_routes table if it doesn't already exist
    cursor.execute("""
    CREATE TABLE IF NOT EXISTS bus_routes (
        id SERIAL PRIMARY KEY,
        route_name TEXT,
        route_link TEXT,
        bus_name TEXT DEFAULT NULL,
        bus_type TEXT DEFAULT NULL,
        departing_time TIME DEFAULT NULL,
        duration TEXT DEFAULT NULL,
        reaching_time TIME DEFAULT NULL,
        star_rating FLOAT DEFAULT NULL,
        price DECIMAL DEFAULT NULL,
        seats_available INT DEFAULT NULL
    )
    """)

    # Insert the data into the table
    for _, row in df.iterrows():
        cursor.execute("""
        INSERT INTO bus_routes (route_name, route_link, bus_name, bus_type, departing_time, duration, 
        reaching_time, star_rating, price, seats_available) 
        VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
        """, (
            row['route_name'], row['route_link'], row['bus_name'], row['bus_type'], 
            row['departing_time'], row['duration'], row['reaching_time'], row['star_rating'], 
            row['price'], row['seats_available']
        ))

    # Commit the transaction
    connection.commit()
    print("Data inserted successfully.")

except Exception as e:
    print(f"An error occurred while interacting with the database: {e}")

finally:
    if cursor:
        cursor.close()
    if connection:
        connection.close()


In [None]:
# Scrape the Redbus APSRTC GOVT BUS from website,  route : 'Hyderabad to Vijayawada ,  route : 'Hyderabad to Vijayawada & add to the Database

from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time
import pandas as pd

# Initialize the Chrome driver
driver = webdriver.Chrome()

# Navigate to the RedBus website
driver.get('https://www.redbus.in/')
driver.maximize_window()
time.sleep(5)

try:
    # Hover over and click the APSRTC option
    hover_element = driver.find_element(By.CLASS_NAME, "rtcName")
    action = ActionChains(driver)
    action.move_to_element(hover_element).perform()
    time.sleep(3)

    apsrtc_option = WebDriverWait(driver, 10).until(
        EC.element_to_be_clickable((By.XPATH, "//div[contains(@class, 'rtcName') and text()='APSRTC']"))
    )
    apsrtc_option.click()
    time.sleep(5)

    # Select a route
    routes = driver.find_elements(By.XPATH, "//a[@class='route']")
    if not routes:
        print("No routes found.")
        driver.quit()
        exit()

    # Use the first route
    route_link = routes[0].get_attribute("href")
    route_name = routes[0].text
    driver.get(route_link)
    time.sleep(5)

    # Check for buses
    bus_elements = driver.find_elements(By.XPATH, "//div[@class='travels lh-24 f-bold d-color']")
    if not bus_elements:
        print("No buses found. Attempting alternative method...")
        fallback_element = WebDriverWait(driver, 10).until(
            EC.element_to_be_clickable((By.XPATH, '//*[@id="fixer"]/div/div/div[1]/span[3]/i'))
        )
        action.move_to_element(fallback_element).pause(1).click().perform()
        time.sleep(5)
        
    # Locate and hover over the 'rtcName' element
    hover_element = driver.find_element(By.CLASS_NAME, "button")
    action = ActionChains(driver)
    action.move_to_element(hover_element).perform()
    time.sleep(3)

    # Click on the 'APSRTC govt buses' option
    apsrtc_option = WebDriverWait(driver, 10).until(
        EC.element_to_be_clickable((By.XPATH, "//div[contains(@class, 'button') and text()='View Buses']"))
    )
    apsrtc_option.click()
    time.sleep(10)

    # Scrolling and extracting buses
    scroll_count = 0
    APSRTC_GOV_bus_data = []

    while scroll_count < 5:  # Scroll up to 5 times
        driver.find_element(By.TAG_NAME, "body").send_keys(Keys.PAGE_DOWN)
        time.sleep(7)
        scroll_count += 1

        # Fetch bus details
        #bus_names = driver.find_elements(By.XPATH, "//div[@class='travels lh-24 f-bold d-color']")
        #Use . Instead of text(): If the text content is nested or mixed, . can capture all text within the element:
        bus_names = driver.find_elements(By.XPATH, "//div[contains(@class, 'travels lh-24 f-bold d-color') and contains(., 'APSRTC')]")

        bus_types = driver.find_elements(By.XPATH, "//div[@class='bus-type f-12 m-top-16 l-color evBus']")
        departing_times = driver.find_elements(By.XPATH, "//div[@class='dp-time f-19 d-color f-bold']")
        durations = driver.find_elements(By.XPATH, "//div[@class='dur l-color lh-24']")
        reaching_times = driver.find_elements(By.XPATH, "//div[@class='bp-time f-19 d-color disp-Inline']")
        #star_ratings = driver.find_elements(By.XPATH, "//span[contains(text(), '.') and string-length(text()) > 2]")
        star_ratings = driver.find_elements(By.XPATH, "//span[normalize-space(.) != '' and contains(text(), '.') and number(text())]")#[:10]
        prices = driver.find_elements(By.XPATH, "//div[contains(@class, 'fare')]/span[contains(@class, 'f-19')]")
        seats_available = driver.find_elements(By.XPATH, "//div[contains(@class, 'seat-left')]")

        # Find the minimum length to avoid mismatches
        min_length = min(len(bus_names), len(bus_types), len(departing_times), len(durations),
                         len(reaching_times), len(star_ratings), len(prices), len(seats_available))
        
                # Extract and format data
        for i in range(min_length):
            # Extract and process the bus_name
            raw_bus_name = bus_names[i].text
            bus_name_parts = raw_bus_name.split(' - ')  # Split 'APSRTC - 9363' into ['APSRTC', '9363']
            bus_company = bus_name_parts[0] if len(bus_name_parts) > 0 else None  # Extract 'APSRTC'
            bus_number = int(bus_name_parts[1]) if len(bus_name_parts) > 1 and bus_name_parts[1].isdigit() else None  # Extract and convert '9363' to number

            # Create the dictionary

            for i in range(min_length):
                bus_info = {
                    "route_name": route_name,
                    "route_link": route_link,
                    "bus_name": bus_names[i].text,
                    "bus_type": bus_types[i].text,
                    "departing_time": departing_times[i].text,
                    "duration": durations[i].text,
                    "reaching_time": reaching_times[i].text,
                    "star_rating": star_ratings[i].text,
                    "price": prices[i].text.replace('₹', '').strip(),
                    "seats_available": seats_available[i].text.replace('Seats available', '').strip(),
                }
                if bus_info not in APSRTC_GOV_bus_data:  # Avoid duplicates
                    APSRTC_GOV_bus_data.append(bus_info)

        print(f"Scroll {scroll_count}: Found {len(APSRTC_GOV_bus_data)} unique buses.")

        if len(APSRTC_GOV_bus_data) >= 10:  # Stop once 10 buses are collected
            break

    # Print the collected data
    print("\nCollected Bus Data:")
    for bus in APSRTC_GOV_bus_data[:10]:  # Limit output to 10 buses
        print(bus)

except Exception as e:
    print(f"An error occurred: {e}")

finally:
    driver.quit()
    
# SQL Database Interaction
try:
    # Connect to the PostgreSQL database
    connection = psycopg2.connect(
        host="localhost",
        port="5432",
        database="red_bus",
        user="postgres",
        password="sample12"
    )
    connection.set_isolation_level(ISOLATION_LEVEL_AUTOCOMMIT)
    cursor = connection.cursor()

    # Create the bus_routes table if it doesn't already exist
    cursor.execute("""
    CREATE TABLE IF NOT EXISTS bus_routes (
        id SERIAL PRIMARY KEY,
        route_name TEXT,
        route_link TEXT,
        bus_name TEXT DEFAULT NULL,
        bus_type TEXT DEFAULT NULL,
        departing_time TIME DEFAULT NULL,
        duration TEXT DEFAULT NULL,
        reaching_time TIME DEFAULT NULL,
        star_rating FLOAT DEFAULT NULL,
        price DECIMAL DEFAULT NULL,
        seats_available INT DEFAULT NULL
    )
    """)

    # Insert the data into the table
    for _, row in df.iterrows():
        cursor.execute("""
        INSERT INTO bus_routes (route_name, route_link, bus_name, bus_type, departing_time, duration, 
        reaching_time, star_rating, price, seats_available) 
        VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
        """, (
            row['route_name'], row['route_link'], row['bus_name'], row['bus_type'], 
            row['departing_time'], row['duration'], row['reaching_time'], row['star_rating'], 
            row['price'], row['seats_available']
        ))

    # Commit the transaction
    connection.commit()
    print("Data inserted successfully.")

except Exception as e:
    print(f"An error occurred while interacting with the database: {e}")

finally:
    if cursor:
        cursor.close()
    if connection:
        connection.close()


In [None]:
# Extract bus details
bus_names = driver.find_elements(By.XPATH, "//div[contains(@class, 'travels lh-24 f-bold d-color') and contains(., 'APSRTC')]")
bus_types = driver.find_elements(By.XPATH, "//div[@class='bus-type f-12 m-top-16 l-color evBus']")
departing_times = driver.find_elements(By.XPATH, "//div[@class='dp-time f-19 d-color f-bold']")
durations = driver.find_elements(By.XPATH, "//div[@class='dur l-color lh-24']")
reaching_times = driver.find_elements(By.XPATH, "//div[@class='bp-time f-19 d-color disp-Inline']")
star_ratings = driver.find_elements(By.XPATH, "//span[normalize-space(.) != '' and contains(text(), '.') and number(text())]")
prices = driver.find_elements(By.XPATH, "//div[contains(@class, 'fare')]/span[contains(@class, 'f-19')]")
seats_available = driver.find_elements(By.XPATH, "//div[contains(@class, 'seat-left')]")

# Find the minimum length to avoid mismatches
min_length = min(len(bus_names), len(bus_types), len(departing_times), len(durations),
                 len(reaching_times), len(star_ratings), len(prices), len(seats_available))

# Extract and format data
for i in range(min_length):
    # Extract and process the bus_name
    raw_bus_name = bus_names[i].text
    bus_name_parts = raw_bus_name.split(' - ')  # Split 'APSRTC - 9363' into ['APSRTC', '9363']
    bus_company = bus_name_parts[0] if len(bus_name_parts) > 0 else None  # Extract 'APSRTC'
    bus_number = int(bus_name_parts[1]) if len(bus_name_parts) > 1 and bus_name_parts[1].isdigit() else None  # Extract and convert '9363' to number

    # Create the dictionary
    bus_info = {
        "route_name": route_name,
        "route_link": route_link,
        "bus_name": raw_bus_name,  # Keep full string for display
        "bus_company": bus_company,  # Separate company name
        "bus_number": bus_number,  # Numeric part of bus_name
        "bus_type": bus_types[i].text,
        "departing_time": departing_times[i].text,
        "duration": durations[i].text,
        "reaching_time": reaching_times[i].text,
        "star_rating": star_ratings[i].text,
        "price": prices[i].text.replace('₹', '').strip(),
        "seats_available": seats_available[i].text.replace('Seats available', '').strip(),
    }

    if bus_info not in bus_data:  # Avoid duplicates
        bus_data.append(bus_info)

print(f"Scroll {scroll_count}: Found {len(bus_data)} unique buses.")

if len(bus_data) >= 10:  # Stop once 10 buses are collected
    break


06-02-2025

In [None]:
# Scrape the Redbus APSRTC GOVT BUS from website,  route : 'Hyderabad to Vijayawada.

from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time
import pandas as pd

# Initialize the Chrome driver
driver = webdriver.Chrome()

# Navigate to the RedBus website
driver.get('https://www.redbus.in/')
driver.maximize_window()
time.sleep(5)

try:
    # Hover over and click the APSRTC option
    hover_element = driver.find_element(By.CLASS_NAME, "rtcName")
    action = ActionChains(driver)
    action.move_to_element(hover_element).perform()
    time.sleep(3)

    apsrtc_option = WebDriverWait(driver, 10).until(
        EC.element_to_be_clickable((By.XPATH, "//div[contains(@class, 'rtcName') and text()='APSRTC']"))
    )
    apsrtc_option.click()
    time.sleep(5)

    # Select a route
    routes = driver.find_elements(By.XPATH, "//a[@class='route']")
    if not routes:
        print("No routes found.")
        driver.quit()
        exit()

    # Use the first route
    route_link = routes[0].get_attribute("href")
    route_name = routes[0].text
    driver.get(route_link)
    time.sleep(5)

    # Check for buses
    bus_elements = driver.find_elements(By.XPATH, "//div[@class='travels lh-24 f-bold d-color']")
    if not bus_elements:
        print("No buses found. Attempting alternative method...")
        fallback_element = WebDriverWait(driver, 10).until(
            EC.element_to_be_clickable((By.XPATH, '//*[@id="fixer"]/div/div/div[1]/span[3]/i'))
        )
        action.move_to_element(fallback_element).pause(1).click().perform()
        time.sleep(5)
        
    # Locate and hover over the 'rtcName' element
    hover_element = driver.find_element(By.CLASS_NAME, "button")
    action = ActionChains(driver)
    action.move_to_element(hover_element).perform()
    time.sleep(3)

    # Click on the 'APSRTC govt buses' option
    apsrtc_option = WebDriverWait(driver, 10).until(
        EC.element_to_be_clickable((By.XPATH, "//div[contains(@class, 'button') and text()='View Buses']"))
    )
    apsrtc_option.click()
    time.sleep(10)

    # Scrolling and extracting buses
    scroll_count = 0
    APSRTC_GOV_bus_data = []

    while scroll_count < 5:  # Scroll up to 5 times
        driver.find_element(By.TAG_NAME, "body").send_keys(Keys.PAGE_DOWN)
        time.sleep(7)
        scroll_count += 1

        # Fetch bus details
        #bus_names = driver.find_elements(By.XPATH, "//div[@class='travels lh-24 f-bold d-color']")
        #Use . Instead of text(): If the text content is nested or mixed, . can capture all text within the element:
        bus_names = driver.find_elements(By.XPATH, "//div[contains(@class, 'travels lh-24 f-bold d-color') and contains(., 'APSRTC')]")

        bus_types = driver.find_elements(By.XPATH, "//div[@class='bus-type f-12 m-top-16 l-color evBus']")
        departing_times = driver.find_elements(By.XPATH, "//div[@class='dp-time f-19 d-color f-bold']")
        durations = driver.find_elements(By.XPATH, "//div[@class='dur l-color lh-24']")
        reaching_times = driver.find_elements(By.XPATH, "//div[@class='bp-time f-19 d-color disp-Inline']")
        #star_ratings = driver.find_elements(By.XPATH, "//span[contains(text(), '.') and string-length(text()) > 2]")
        star_ratings = driver.find_elements(By.XPATH, "//span[normalize-space(.) != '' and contains(text(), '.') and number(text())]")#[:10]
        prices = driver.find_elements(By.XPATH, "//div[contains(@class, 'fare')]/span[contains(@class, 'f-19')]")
        seats_available = driver.find_elements(By.XPATH, "//div[contains(@class, 'seat-left')]")

        # Find the minimum length to avoid mismatches
        min_length = min(len(bus_names), len(bus_types), len(departing_times), len(durations),
                         len(reaching_times), len(star_ratings), len(prices), len(seats_available))
        
                # Extract and format data
        for i in range(min_length):
            # Extract and process the bus_name
            raw_bus_name = bus_names[i].text
            bus_name_parts = raw_bus_name.split(' - ')  # Split 'APSRTC - 9363' into ['APSRTC', '9363']
            bus_company = bus_name_parts[0] if len(bus_name_parts) > 0 else None  # Extract 'APSRTC'
            bus_number = int(bus_name_parts[1]) if len(bus_name_parts) > 1 and bus_name_parts[1].isdigit() else None  # Extract and convert '9363' to number

            # Create the dictionary

            for i in range(min_length):
                bus_info = {
                    "route_name": route_name,
                    "route_link": route_link,
                    "bus_name": bus_names[i].text,
                    "bus_type": bus_types[i].text,
                    "departing_time": departing_times[i].text,
                    "duration": durations[i].text,
                    "reaching_time": reaching_times[i].text,
                    "star_rating": star_ratings[i].text,
                    "price": prices[i].text.replace('₹', '').strip(),
                    "seats_available": seats_available[i].text.replace('Seats available', '').strip(),
                }
                if bus_info not in APSRTC_GOV_bus_data:  # Avoid duplicates
                    APSRTC_GOV_bus_data.append(bus_info)

        print(f"Scroll {scroll_count}: Found {len(APSRTC_GOV_bus_data)} unique buses.")

        if len(APSRTC_GOV_bus_data) >= 10:  # Stop once 10 buses are collected
            break

    # Print the collected data
    print("\nCollected Bus Data:")
    for bus in APSRTC_GOV_bus_data[:10]:  # Limit output to 10 buses
        print(bus)

except Exception as e:
    print(f"An error occurred: {e}")

finally:
    driver.quit()
    


Scroll 1: Found 5 unique buses.
Scroll 2: Found 10 unique buses.

Collected Bus Data:
{'route_name': 'Hyderabad to Vijayawada', 'route_link': 'https://www.redbus.in/bus-tickets/hyderabad-to-vijayawada', 'bus_name': 'APSRTC - 35188', 'bus_type': 'VENNELA (A.C. SLEEPER)', 'departing_time': '00:40', 'duration': '06h 05m', 'reaching_time': '06:45', 'star_rating': '2.2', 'price': '737', 'seats_available': '10'}
{'route_name': 'Hyderabad to Vijayawada', 'route_link': 'https://www.redbus.in/bus-tickets/hyderabad-to-vijayawada', 'bus_name': 'APSRTC - 3563', 'bus_type': 'SUPER LUXURY (NON-AC, 2 + 2 PUSH BACK)', 'departing_time': '01:00', 'duration': '06h 15m', 'reaching_time': '07:15', 'star_rating': '3.8', 'price': '412', 'seats_available': '26'}
{'route_name': 'Hyderabad to Vijayawada', 'route_link': 'https://www.redbus.in/bus-tickets/hyderabad-to-vijayawada', 'bus_name': 'APSRTC - 35266', 'bus_type': 'VENNELA (A.C. SLEEPER)', 'departing_time': '01:25', 'duration': '05h 45m', 'reaching_time':

In [None]:
APSRTC_GOV_bus_data

[{'route_name': 'Hyderabad to Vijayawada',
  'route_link': 'https://www.redbus.in/bus-tickets/hyderabad-to-vijayawada',
  'bus_name': 'APSRTC - 35188',
  'bus_type': 'VENNELA (A.C. SLEEPER)',
  'departing_time': '00:40',
  'duration': '06h 05m',
  'reaching_time': '06:45',
  'star_rating': '2.2',
  'price': '737',
  'seats_available': '10'},
 {'route_name': 'Hyderabad to Vijayawada',
  'route_link': 'https://www.redbus.in/bus-tickets/hyderabad-to-vijayawada',
  'bus_name': 'APSRTC - 3563',
  'bus_type': 'SUPER LUXURY (NON-AC, 2 + 2 PUSH BACK)',
  'departing_time': '01:00',
  'duration': '06h 15m',
  'reaching_time': '07:15',
  'star_rating': '3.8',
  'price': '412',
  'seats_available': '26'},
 {'route_name': 'Hyderabad to Vijayawada',
  'route_link': 'https://www.redbus.in/bus-tickets/hyderabad-to-vijayawada',
  'bus_name': 'APSRTC - 35266',
  'bus_type': 'VENNELA (A.C. SLEEPER)',
  'departing_time': '01:25',
  'duration': '05h 45m',
  'reaching_time': '07:10',
  'star_rating': '3.6',

In [None]:
import psycopg2
from psycopg2.extensions import ISOLATION_LEVEL_AUTOCOMMIT
from datetime import datetime

try:
    # Connect to PostgreSQL database
    connection = psycopg2.connect(
        host="localhost",
        port="5432",
        database="red_bus",
        user="postgres",
        password="sample12"
    )
    connection.set_isolation_level(ISOLATION_LEVEL_AUTOCOMMIT)
    cursor = connection.cursor()

    # Create the bus_routes table if it doesn't exist
    cursor.execute("""
    CREATE TABLE IF NOT EXISTS bus_routes (
        id SERIAL PRIMARY KEY,
        route_name TEXT,
        route_link TEXT,
        bus_name TEXT DEFAULT NULL,
        bus_type TEXT DEFAULT NULL,
        departing_time TIME DEFAULT NULL,
        duration TEXT DEFAULT NULL,
        reaching_time TIME DEFAULT NULL,
        star_rating FLOAT DEFAULT NULL,
        price DECIMAL DEFAULT NULL,
        seats_available INT DEFAULT NULL
    )
    """)

    # Insert data into the table
    for row in APSRTC_GOV_bus_data:
        cursor.execute("""
        INSERT INTO bus_routes (route_name, route_link, bus_name, bus_type, departing_time, duration, 
        reaching_time, star_rating, price, seats_available) 
        VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
        """, (
            row['route_name'], row['route_link'], row['bus_name'], row['bus_type'],
            datetime.strptime(row['departing_time'], "%H:%M").time() if row['departing_time'] else None,
            row['duration'],
            datetime.strptime(row['reaching_time'], "%H:%M").time() if row['reaching_time'] else None,
            float(row['star_rating']) if row['star_rating'] else None,
            float(row['price']) if row['price'] else None,
            int(row['seats_available']) if row['seats_available'] else None
        ))

    # Commit the transaction
    connection.commit()
    print("Data inserted successfully.")

except Exception as e:
    print(f"An error occurred while interacting with the database: {e}")

finally:
    if cursor:
        cursor.close()
    if connection:
        connection.close()


Data inserted successfully.


In [None]:
 # Scrape the Redbus APSRTC PRIVATE BUS from website,  route : 'Hyderabad to Vijayawada

from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time
import pandas as pd

# Initialize the Chrome driver
driver = webdriver.Chrome()

# Navigate to the RedBus website
driver.get('https://www.redbus.in/')
driver.maximize_window()
time.sleep(5)

try:
    # Hover over and click the APSRTC option
    hover_element = driver.find_element(By.CLASS_NAME, "rtcName")
    action = ActionChains(driver)
    action.move_to_element(hover_element).perform()
    time.sleep(3)

    apsrtc_option = WebDriverWait(driver, 10).until(
        EC.element_to_be_clickable((By.XPATH, "//div[contains(@class, 'rtcName') and text()='APSRTC']"))
    )
    apsrtc_option.click()
    time.sleep(5)

    # Select a route
    routes = driver.find_elements(By.XPATH, "//a[@class='route']")
    if not routes:
        print("No routes found.")
        driver.quit()
        exit()

    # Use the first route
    route_link = routes[0].get_attribute("href")
    route_name = routes[0].text
    driver.get(route_link)
    time.sleep(5)

    # Check for buses
    bus_elements = driver.find_elements(By.XPATH, "//div[@class='travels lh-24 f-bold d-color']")
    if not bus_elements:
        print("No buses found. Attempting alternative method...")
        fallback_element = WebDriverWait(driver, 10).until(
            EC.element_to_be_clickable((By.XPATH, '//*[@id="fixer"]/div/div/div[1]/span[3]/i'))
        )
        action.move_to_element(fallback_element).pause(1).click().perform()
        time.sleep(5)

    # Scrolling and extracting buses
    scroll_count = 0
    Apsrtc_pvt_bus_data = []

    while scroll_count < 5:  # Scroll up to 5 times
        driver.find_element(By.TAG_NAME, "body").send_keys(Keys.PAGE_DOWN)
        time.sleep(7)
        scroll_count += 1

        # Fetch bus details
        bus_names = driver.find_elements(By.XPATH, "//div[@class='travels lh-24 f-bold d-color']")
        bus_types = driver.find_elements(By.XPATH, "//div[@class='bus-type f-12 m-top-16 l-color evBus']")
        departing_times = driver.find_elements(By.XPATH, "//div[@class='dp-time f-19 d-color f-bold']")
        durations = driver.find_elements(By.XPATH, "//div[@class='dur l-color lh-24']")
        reaching_times = driver.find_elements(By.XPATH, "//div[@class='bp-time f-19 d-color disp-Inline']")
        star_ratings = driver.find_elements(By.XPATH, "//span[contains(text(), '.') and string-length(text()) > 2]")
        prices = driver.find_elements(By.XPATH, "//div[contains(@class, 'fare')]/span[contains(@class, 'f-19')]")
        seats_available = driver.find_elements(By.XPATH, "//div[contains(@class, 'seat-left')]")

        # Find the minimum length to avoid mismatches
        min_length = min(len(bus_names), len(bus_types), len(departing_times), len(durations),
                         len(reaching_times), len(star_ratings), len(prices), len(seats_available))

        for i in range(min_length):
            bus_info = {
                "route_name": route_name,
                "route_link": route_link,
                "bus_name": bus_names[i].text,
                "bus_type": bus_types[i].text,
                "departing_time": departing_times[i].text,
                "duration": durations[i].text,
                "reaching_time": reaching_times[i].text,
                "star_rating": star_ratings[i].text,
                "price": prices[i].text.replace('₹', '').strip(),
                "seats_available": seats_available[i].text.replace('Seats available', '').strip(),
            }
            if bus_info not in Apsrtc_pvt_bus_data :  # Avoid duplicates
                Apsrtc_pvt_bus_data .append(bus_info)

        print(f"Scroll {scroll_count}: Found {len(Apsrtc_pvt_bus_data )} unique buses.")

        if len(Apsrtc_pvt_bus_data ) >= 10:  # Stop once 10 buses are collected
            break

    # Print the collected data
    print("\nCollected Bus Data:")
    for bus in Apsrtc_pvt_bus_data[:10]:  # Limit output to 10 buses
        print(bus)

except Exception as e:
    print(f"An error occurred: {e}")

finally:
    driver.quit()


Scroll 1: Found 5 unique buses.
Scroll 2: Found 10 unique buses.

Collected Bus Data:
{'route_name': 'Hyderabad to Vijayawada', 'route_link': 'https://www.redbus.in/bus-tickets/hyderabad-to-vijayawada', 'bus_name': 'FRESHBUS', 'bus_type': 'Electric A/C Seater (2+2)', 'departing_time': '22:35', 'duration': '07h 05m', 'reaching_time': '05:40', 'star_rating': '4.7', 'price': '898', 'seats_available': '21'}
{'route_name': 'Hyderabad to Vijayawada', 'route_link': 'https://www.redbus.in/bus-tickets/hyderabad-to-vijayawada', 'bus_name': 'IntrCity SmartBus', 'bus_type': 'A/C Seater / Sleeper (2+1)', 'departing_time': '23:05', 'duration': '06h 55m', 'reaching_time': '06:00', 'star_rating': '4.6', 'price': '759', 'seats_available': '25'}
{'route_name': 'Hyderabad to Vijayawada', 'route_link': 'https://www.redbus.in/bus-tickets/hyderabad-to-vijayawada', 'bus_name': 'IntrCity SmartBus', 'bus_type': 'AC Sleeper (2+1)', 'departing_time': '23:50', 'duration': '06h 10m', 'reaching_time': '06:00', 'sta

In [None]:
#  insert the data Scrape the Redbus APSRTC PRIVATE BUS from website,  route : 'Hyderabad to Vijayawada
import psycopg2
from psycopg2.extensions import ISOLATION_LEVEL_AUTOCOMMIT
from datetime import datetime

try:
    # Connect to PostgreSQL database
    connection = psycopg2.connect(
        host="localhost",
        port="5432",
        database="red_bus",
        user="postgres",
        password="sample12"
    )
    connection.set_isolation_level(ISOLATION_LEVEL_AUTOCOMMIT)
    cursor = connection.cursor()

    # Create the bus_routes table if it doesn't exist
    cursor.execute("""
    CREATE TABLE IF NOT EXISTS bus_routes (
        id SERIAL PRIMARY KEY,
        route_name TEXT,
        route_link TEXT,
        bus_name TEXT DEFAULT NULL,
        bus_type TEXT DEFAULT NULL,
        departing_time TIME DEFAULT NULL,
        duration TEXT DEFAULT NULL,
        reaching_time TIME DEFAULT NULL,
        star_rating FLOAT DEFAULT NULL,
        price DECIMAL DEFAULT NULL,
        seats_available INT DEFAULT NULL
    )
    """)

    # Insert data into the table
    for row in Apsrtc_pvt_bus_data:
        cursor.execute("""
        INSERT INTO bus_routes (route_name, route_link, bus_name, bus_type, departing_time, duration, 
        reaching_time, star_rating, price, seats_available) 
        VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
        """, (
            row['route_name'], row['route_link'], row['bus_name'], row['bus_type'],
            datetime.strptime(row['departing_time'], "%H:%M").time() if row['departing_time'] else None,
            row['duration'],
            datetime.strptime(row['reaching_time'], "%H:%M").time() if row['reaching_time'] else None,
            float(row['star_rating']) if row['star_rating'] else None,
            float(row['price']) if row['price'] else None,
            int(row['seats_available']) if row['seats_available'] else None
        ))

    # Commit the transaction
    connection.commit()
    print("Data inserted successfully.")

except Exception as e:
    print(f"An error occurred while interacting with the database: {e}")

finally:
    if cursor:
        cursor.close()
    if connection:
        connection.close()


Data inserted successfully.


In [None]:
 # Scrape the Redbus APSRTC PRIVATE BUS from website,  route : Vijayawada to Hyderabad

from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time
import pandas as pd

# Initialize the Chrome driver
driver = webdriver.Chrome()

# Navigate to the RedBus website
driver.get('https://www.redbus.in/')
driver.maximize_window()
time.sleep(5)

try:
    # Hover over and click the APSRTC option
    hover_element = driver.find_element(By.CLASS_NAME, "rtcName")
    action = ActionChains(driver)
    action.move_to_element(hover_element).perform()
    time.sleep(3)

    apsrtc_option = WebDriverWait(driver, 10).until(
        EC.element_to_be_clickable((By.XPATH, "//div[contains(@class, 'rtcName') and text()='APSRTC']"))
    )
    apsrtc_option.click()
    time.sleep(5)

    # Select a route
    driver.find_element(By.TAG_NAME, "body").send_keys(Keys.PAGE_DOWN)
    
    routes = driver.find_elements(By.XPATH, "//a[contains(@class, 'route') and text()='Vijayawada to Hyderabad']")

    
    if not routes:
        print("No routes found.")
        driver.quit()
        exit()

    # Use the first route
    route_link = routes[0].get_attribute("href")
    route_name = routes[0].text
    driver.get(route_link)
    time.sleep(5)

    # Check for buses
    bus_elements = driver.find_elements(By.XPATH, "//div[@class='travels lh-24 f-bold d-color']")
    if not bus_elements:
        print("No buses found. Attempting alternative method...")
        fallback_element = WebDriverWait(driver, 10).until(
            EC.element_to_be_clickable((By.XPATH, '//*[@id="fixer"]/div/div/div[1]/span[3]/i'))
        )
        action.move_to_element(fallback_element).pause(1).click().perform()
        time.sleep(5)

    # Scrolling and extracting buses
    scroll_count = 0
    Apsrtc_pvt_bus_data = []

    while scroll_count < 5:  # Scroll up to 5 times
        driver.find_element(By.TAG_NAME, "body").send_keys(Keys.PAGE_DOWN)
        time.sleep(7)
        scroll_count += 1

        # Fetch bus details
        bus_names = driver.find_elements(By.XPATH, "//div[@class='travels lh-24 f-bold d-color']")
        bus_types = driver.find_elements(By.XPATH, "//div[@class='bus-type f-12 m-top-16 l-color evBus']")
        departing_times = driver.find_elements(By.XPATH, "//div[@class='dp-time f-19 d-color f-bold']")
        durations = driver.find_elements(By.XPATH, "//div[@class='dur l-color lh-24']")
        reaching_times = driver.find_elements(By.XPATH, "//div[@class='bp-time f-19 d-color disp-Inline']")
        star_ratings = driver.find_elements(By.XPATH, "//span[contains(text(), '.') and string-length(text()) > 2]")
        prices = driver.find_elements(By.XPATH, "//div[contains(@class, 'fare')]/span[contains(@class, 'f-19')]")
        seats_available = driver.find_elements(By.XPATH, "//div[contains(@class, 'seat-left')]")

        # Find the minimum length to avoid mismatches
        min_length = min(len(bus_names), len(bus_types), len(departing_times), len(durations),
                         len(reaching_times), len(star_ratings), len(prices), len(seats_available))

        for i in range(min_length):
            bus_info = {
                "route_name": route_name,
                "route_link": route_link,
                "bus_name": bus_names[i].text,
                "bus_type": bus_types[i].text,
                "departing_time": departing_times[i].text,
                "duration": durations[i].text,
                "reaching_time": reaching_times[i].text,
                "star_rating": star_ratings[i].text,
                "price": prices[i].text.replace('₹', '').strip(),
                "seats_available": seats_available[i].text.replace('Seats available', '').strip(),
            }
            if bus_info not in Apsrtc_pvt_bus_data :  # Avoid duplicates
                Apsrtc_pvt_bus_data .append(bus_info)

        print(f"Scroll {scroll_count}: Found {len(Apsrtc_pvt_bus_data )} unique buses.")

        if len(Apsrtc_pvt_bus_data ) >= 10:  # Stop once 10 buses are collected
            break

    # Print the collected data
    print("\nCollected Bus Data:")
    for bus in Apsrtc_pvt_bus_data[:10]:  # Limit output to 10 buses
        print(bus)

except Exception as e:
    print(f"An error occurred: {e}")

finally:
    driver.quit()


Scroll 1: Found 5 unique buses.
Scroll 2: Found 10 unique buses.

Collected Bus Data:
{'route_name': 'Vijayawada to Hyderabad', 'route_link': 'https://www.redbus.in/bus-tickets/vijayawada-to-hyderabad', 'bus_name': 'IntrCity SmartBus', 'bus_type': 'A/C Seater / Sleeper (2+1)', 'departing_time': '22:45', 'duration': '06h 55m', 'reaching_time': '05:40', 'star_rating': '4.6', 'price': '639', 'seats_available': '27'}
{'route_name': 'Vijayawada to Hyderabad', 'route_link': 'https://www.redbus.in/bus-tickets/vijayawada-to-hyderabad', 'bus_name': 'FRESHBUS', 'bus_type': 'Electric A/C Seater (2+2)', 'departing_time': '21:50', 'duration': '05h 40m', 'reaching_time': '03:30', 'star_rating': '4.7', 'price': '398', 'seats_available': '23'}
{'route_name': 'Vijayawada to Hyderabad', 'route_link': 'https://www.redbus.in/bus-tickets/vijayawada-to-hyderabad', 'bus_name': 'FRESHBUS', 'bus_type': 'Electric A/C Seater (2+2)', 'departing_time': '23:25', 'duration': '05h 40m', 'reaching_time': '05:05', 'sta

In [None]:
#  insert the data   Scrape the Redbus APSRTC PRIVATE BUS from website,  route : Vijayawada to Hyderabad
import psycopg2
from psycopg2.extensions import ISOLATION_LEVEL_AUTOCOMMIT
from datetime import datetime

try:
    # Connect to PostgreSQL database
    connection = psycopg2.connect(
        host="localhost",
        port="5432",
        database="red_bus",
        user="postgres",
        password="sample12"
    )
    connection.set_isolation_level(ISOLATION_LEVEL_AUTOCOMMIT)
    cursor = connection.cursor()

    # Create the bus_routes table if it doesn't exist
    cursor.execute("""
    CREATE TABLE IF NOT EXISTS bus_routes (
        id SERIAL PRIMARY KEY,
        route_name TEXT,
        route_link TEXT,
        bus_name TEXT DEFAULT NULL,
        bus_type TEXT DEFAULT NULL,
        departing_time TIME DEFAULT NULL,
        duration TEXT DEFAULT NULL,
        reaching_time TIME DEFAULT NULL,
        star_rating FLOAT DEFAULT NULL,
        price DECIMAL DEFAULT NULL,
        seats_available INT DEFAULT NULL
    )
    """)

    # Insert data into the table
    for row in Apsrtc_pvt_bus_data:
        cursor.execute("""
        INSERT INTO bus_routes (route_name, route_link, bus_name, bus_type, departing_time, duration, 
        reaching_time, star_rating, price, seats_available) 
        VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
        """, (
            row['route_name'], row['route_link'], row['bus_name'], row['bus_type'],
            datetime.strptime(row['departing_time'], "%H:%M").time() if row['departing_time'] else None,
            row['duration'],
            datetime.strptime(row['reaching_time'], "%H:%M").time() if row['reaching_time'] else None,
            float(row['star_rating']) if row['star_rating'] else None,
            float(row['price']) if row['price'] else None,
            int(row['seats_available']) if row['seats_available'] else None
        ))

    # Commit the transaction
    connection.commit()
    print("Data inserted successfully.")

except Exception as e:
    print(f"An error occurred while interacting with the database: {e}")

finally:
    if cursor:
        cursor.close()
    if connection:
        connection.close()


Data inserted successfully.


24/02/2025

In [None]:
# Scrape the Redbus APSRTC GOVT BUS from website,  route : Vijayawada to Hyderabad

from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time
import pandas as pd

# Initialize the Chrome driver
driver = webdriver.Chrome()

# Navigate to the RedBus website
driver.get('https://www.redbus.in/')
driver.maximize_window()
time.sleep(5)

try:
    # Hover over and click the APSRTC option
    hover_element = driver.find_element(By.CLASS_NAME, "rtcName")
    action = ActionChains(driver)
    action.move_to_element(hover_element).perform()
    time.sleep(3)

    apsrtc_option = WebDriverWait(driver, 10).until(
        EC.element_to_be_clickable((By.XPATH, "//div[contains(@class, 'rtcName') and text()='APSRTC']"))
    )
    apsrtc_option.click()
    time.sleep(5)

    # Select a route

    driver.find_element(By.TAG_NAME, "body").send_keys(Keys.PAGE_DOWN)
    
    routes = driver.find_elements(By.XPATH, "//a[contains(@class, 'route') and text()='Vijayawada to Hyderabad']")
   
    if not routes:
        print("No routes found.")
        driver.quit()
        exit()

    # Use the first route
    route_link = routes[0].get_attribute("href")
    route_name = routes[0].text
    driver.get(route_link)
    time.sleep(5)

    # Check for buses
    bus_elements = driver.find_elements(By.XPATH, "//div[@class='travels lh-24 f-bold d-color']")
    if not bus_elements:
        print("No buses found. Attempting alternative method...")
        fallback_element = WebDriverWait(driver, 10).until(
            EC.element_to_be_clickable((By.XPATH, '//*[@id="fixer"]/div/div/div[1]/span[3]/i'))
        )
        action.move_to_element(fallback_element).pause(1).click().perform()
        time.sleep(5)
        
    # Locate and hover over the 'rtcName' element
    hover_element = driver.find_element(By.XPATH, "//div[contains(@class, 'f-bold grop-name m-top-20') and text()='APSRTC Buses']")
    button_element = driver.find_element(By.CLASS_NAME, "button")

    action = ActionChains(driver)
    action.move_to_element(hover_element).pause(1).move_to_element(button_element).click().perform()

    #action.move_to_element(hover_element).perform()
    time.sleep(3)

    # Click on the 'APSRTC govt buses' option
    button_element = driver.find_element(By.CLASS_NAME, "button")
driver.execute_script("arguments[0].scrollIntoView();", button_element)
time.sleep(2)  # Allow time for scrolling
button_element.click()


    # Use JavaScript to click
    driver.execute_script("arguments[0].click();", button_element)

    #apsrtc_option = WebDriverWait(driver, 10).until(
    #    EC.element_to_be_clickable((By.XPATH, "//div[contains(@class, 'button') and text()='View Buses']"))
    #)
    #apsrtc_option.click()
    #time.sleep(10)

    # Scrolling and extracting buses
    scroll_count = 0
    APSRTC_GOV_bus_data = []

    while scroll_count < 5:  # Scroll up to 5 times
        driver.find_element(By.TAG_NAME, "body").send_keys(Keys.PAGE_DOWN)
        time.sleep(7)
        scroll_count += 1

        # Fetch bus details
        #bus_names = driver.find_elements(By.XPATH, "//div[@class='travels lh-24 f-bold d-color']")
        #Use . Instead of text(): If the text content is nested or mixed, . can capture all text within the element:
        bus_names = driver.find_elements(By.XPATH, "//div[contains(@class, 'travels lh-24 f-bold d-color') and contains(., 'APSRTC')]")

        bus_types = driver.find_elements(By.XPATH, "//div[@class='bus-type f-12 m-top-16 l-color evBus']")
        departing_times = driver.find_elements(By.XPATH, "//div[@class='dp-time f-19 d-color f-bold']")
        durations = driver.find_elements(By.XPATH, "//div[@class='dur l-color lh-24']")
        reaching_times = driver.find_elements(By.XPATH, "//div[@class='bp-time f-19 d-color disp-Inline']")
        #star_ratings = driver.find_elements(By.XPATH, "//span[contains(text(), '.') and string-length(text()) > 2]")
        star_ratings = driver.find_elements(By.XPATH, "//span[normalize-space(.) != '' and contains(text(), '.') and number(text())]")#[:10]
        prices = driver.find_elements(By.XPATH, "//div[contains(@class, 'fare')]/span[contains(@class, 'f-19')]")
        seats_available = driver.find_elements(By.XPATH, "//div[contains(@class, 'seat-left')]")

        # Find the minimum length to avoid mismatches
        min_length = min(len(bus_names), len(bus_types), len(departing_times), len(durations),
                         len(reaching_times), len(star_ratings), len(prices), len(seats_available))
        
                # Extract and format data
        for i in range(min_length):
            # Extract and process the bus_name
            raw_bus_name = bus_names[i].text
            bus_name_parts = raw_bus_name.split(' - ')  # Split 'APSRTC - 9363' into ['APSRTC', '9363']
            bus_company = bus_name_parts[0] if len(bus_name_parts) > 0 else None  # Extract 'APSRTC'
            bus_number = int(bus_name_parts[1]) if len(bus_name_parts) > 1 and bus_name_parts[1].isdigit() else None  # Extract and convert '9363' to number

            # Create the dictionary

            for i in range(min_length):
                bus_info = {
                    "route_name": route_name,
                    "route_link": route_link,
                    "bus_name": bus_names[i].text,
                    "bus_type": bus_types[i].text,
                    "departing_time": departing_times[i].text,
                    "duration": durations[i].text,
                    "reaching_time": reaching_times[i].text,
                    "star_rating": star_ratings[i].text,
                    "price": prices[i].text.replace('₹', '').strip(),
                    "seats_available": seats_available[i].text.replace('Seats available', '').strip(),
                }
                if bus_info not in APSRTC_GOV_bus_data:  # Avoid duplicates
                    APSRTC_GOV_bus_data.append(bus_info)

        print(f"Scroll {scroll_count}: Found {len(APSRTC_GOV_bus_data)} unique buses.")

        if len(APSRTC_GOV_bus_data) >= 10:  # Stop once 10 buses are collected
            break

    # Print the collected data
    print("\nCollected Bus Data:")
    for bus in APSRTC_GOV_bus_data[:10]:  # Limit output to 10 buses
        print(bus)

except Exception as e:
    print(f"An error occurred: {e}")

finally:
    driver.quit()
    


An error occurred: Message: element click intercepted: Element <div class="button">...</div> is not clickable at point (1434, 15). Other element would receive the click: <div class="modify-sec-onward clearfix modify-sec d-color f-bold">...</div>
  (Session info: chrome=133.0.6943.142)
Stacktrace:
	GetHandleVerifier [0x00007FF64A0EC6A5+28789]
	(No symbol) [0x00007FF64A055B20]
	(No symbol) [0x00007FF649EE8F9A]
	(No symbol) [0x00007FF649F471E9]
	(No symbol) [0x00007FF649F44BA2]
	(No symbol) [0x00007FF649F41C51]
	(No symbol) [0x00007FF649F40B51]
	(No symbol) [0x00007FF649F32314]
	(No symbol) [0x00007FF649F6732A]
	(No symbol) [0x00007FF649F31BC6]
	(No symbol) [0x00007FF649F67540]
	(No symbol) [0x00007FF649F8F7E3]
	(No symbol) [0x00007FF649F67103]
	(No symbol) [0x00007FF649F2FFC0]
	(No symbol) [0x00007FF649F31273]
	GetHandleVerifier [0x00007FF64A431AED+3458237]
	GetHandleVerifier [0x00007FF64A44829C+3550316]
	GetHandleVerifier [0x00007FF64A43DB9D+3507565]
	GetHandleVerifier [0x00007FF64A1B2C

In [None]:
# Govt bus APSRTC_GOV_bus_data Hydrebad Scrape the Redbus APSRTC GOVT BUS from website,  route : Vijayawada to Hyderabad
import psycopg2
from psycopg2.extensions import ISOLATION_LEVEL_AUTOCOMMIT
from datetime import datetime

try:
    # Connect to PostgreSQL database
    connection = psycopg2.connect(
        host="localhost",
        port="5432",
        database="red_bus",
        user="postgres",
        password="sample12"
    )
    connection.set_isolation_level(ISOLATION_LEVEL_AUTOCOMMIT)
    cursor = connection.cursor()

    # Create the bus_routes table if it doesn't exist
    cursor.execute("""
    CREATE TABLE IF NOT EXISTS bus_routes (
        id SERIAL PRIMARY KEY,
        route_name TEXT,
        route_link TEXT,
        bus_name TEXT DEFAULT NULL,
        bus_type TEXT DEFAULT NULL,
        departing_time TIME DEFAULT NULL,
        duration TEXT DEFAULT NULL,
        reaching_time TIME DEFAULT NULL,
        star_rating FLOAT DEFAULT NULL,
        price DECIMAL DEFAULT NULL,
        seats_available INT DEFAULT NULL
    )
    """)

    # Insert data into the table
    for row in APSRTC_GOV_bus_data:
        cursor.execute("""
        INSERT INTO bus_routes (route_name, route_link, bus_name, bus_type, departing_time, duration, 
        reaching_time, star_rating, price, seats_available) 
        VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
        """, (
            row['route_name'], row['route_link'], row['bus_name'], row['bus_type'],
            datetime.strptime(row['departing_time'], "%H:%M").time() if row['departing_time'] else None,
            row['duration'],
            datetime.strptime(row['reaching_time'], "%H:%M").time() if row['reaching_time'] else None,
            float(row['star_rating']) if row['star_rating'] else None,
            float(row['price']) if row['price'] else None,
            int(row['seats_available']) if row['seats_available'] else None
        ))

    # Commit the transaction
    connection.commit()
    print("Data inserted successfully.")

except Exception as e:
    print(f"An error occurred while interacting with the database: {e}")

finally:
    if cursor:
        cursor.close()
    if connection:
        connection.close()


Data inserted successfully.


In [None]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time
import pandas as pd

# Initialize the Chrome driver
driver = webdriver.Chrome()

# Navigate to the RedBus website
driver.get('https://www.redbus.in/')
driver.maximize_window()
time.sleep(5)

try:
    # Hover over and click the APSRTC option
    hover_element = driver.find_element(By.CLASS_NAME, "rtcName")
    action = ActionChains(driver)
    action.move_to_element(hover_element).perform()
    time.sleep(3)

    apsrtc_option = WebDriverWait(driver, 10).until(
        EC.element_to_be_clickable((By.XPATH, "//div[contains(@class, 'rtcName') and text()='APSRTC']"))
    )
    apsrtc_option.click()
    time.sleep(5)

    # Select a route
    driver.find_element(By.TAG_NAME, "body").send_keys(Keys.PAGE_DOWN)
    
    routes = driver.find_elements(By.XPATH, "//a[contains(@class, 'route') and text()='Vijayawada to Hyderabad']")
    
    if not routes:
        print("No routes found.")
        driver.quit()
        exit()

    # Use the first route
    route_link = routes[0].get_attribute("href")
    route_name = routes[0].text
    driver.get(route_link)
    time.sleep(5)

    # Check for buses
    bus_elements = driver.find_elements(By.XPATH, "//div[@class='travels lh-24 f-bold d-color']")
    if not bus_elements:
        print("No buses found. Attempting alternative method...")
        fallback_element = WebDriverWait(driver, 10).until(
            EC.element_to_be_clickable((By.XPATH, '//*[@id="fixer"]/div/div/div[1]/span[3]/i'))
        )
        action.move_to_element(fallback_element).pause(1).click().perform()
        time.sleep(5)
    
    # Locate and hover over the 'rtcName' element
    hover_element = driver.find_element(By.XPATH, "//div[contains(@class, 'f-bold grop-name m-top-20') and text()='APSRTC Buses']")
    button_element = driver.find_element(By.CLASS_NAME, "button")

    # Scroll into view before clicking
    driver.execute_script("arguments[0].scrollIntoView();", button_element)
    time.sleep(2)
    button_element.click()
    
    time.sleep(10)

    # Scrolling and extracting buses
    scroll_count = 0
    APSRTC_GOV_bus_data = []

    while scroll_count < 5:  # Scroll up to 5 times
        driver.find_element(By.TAG_NAME, "body").send_keys(Keys.PAGE_DOWN)
        time.sleep(7)
        scroll_count += 1

        # Fetch bus details
        bus_names = driver.find_elements(By.XPATH, "//div[contains(@class, 'travels lh-24 f-bold d-color') and contains(., 'APSRTC')]")
        bus_types = driver.find_elements(By.XPATH, "//div[@class='bus-type f-12 m-top-16 l-color evBus']")
        departing_times = driver.find_elements(By.XPATH, "//div[@class='dp-time f-19 d-color f-bold']")
        durations = driver.find_elements(By.XPATH, "//div[@class='dur l-color lh-24']")
        reaching_times = driver.find_elements(By.XPATH, "//div[@class='bp-time f-19 d-color disp-Inline']")
        star_ratings = driver.find_elements(By.XPATH, "//span[normalize-space(.) != '' and contains(text(), '.') and number(text())]")
        prices = driver.find_elements(By.XPATH, "//div[contains(@class, 'fare')]/span[contains(@class, 'f-19')]")
        seats_available = driver.find_elements(By.XPATH, "//div[contains(@class, 'seat-left')]")

        # Find the minimum length to avoid mismatches
        min_length = min(len(bus_names), len(bus_types), len(departing_times), len(durations),
                         len(reaching_times), len(star_ratings), len(prices), len(seats_available))
        
        # Extract and format data
        for i in range(min_length):
            bus_info = {
                "route_name": route_name,
                "route_link": route_link,
                "bus_name": bus_names[i].text,
                "bus_type": bus_types[i].text,
                "departing_time": departing_times[i].text,
                "duration": durations[i].text,
                "reaching_time": reaching_times[i].text,
                "star_rating": star_ratings[i].text,
                "price": prices[i].text.replace('₹', '').strip(),
                "seats_available": seats_available[i].text.replace('Seats available', '').strip(),
            }
            if bus_info not in APSRTC_GOV_bus_data:  # Avoid duplicates
                APSRTC_GOV_bus_data.append(bus_info)

        print(f"Scroll {scroll_count}: Found {len(APSRTC_GOV_bus_data)} unique buses.")

        if len(APSRTC_GOV_bus_data) >= 10:  # Stop once 10 buses are collected
            break

    # Print the collected data
    print("\nCollected Bus Data:")
    for bus in APSRTC_GOV_bus_data[:10]:  # Limit output to 10 buses
        print(bus)

except Exception as e:
    print(f"An error occurred: {e}")

finally:
    driver.quit()

Scroll 1: Found 10 unique buses.

Collected Bus Data:
{'route_name': 'Vijayawada to Hyderabad', 'route_link': 'https://www.redbus.in/bus-tickets/vijayawada-to-hyderabad', 'bus_name': 'APSRTC - 4046', 'bus_type': 'SUPER LUXURY (NON-AC, 2 + 2 PUSH BACK)', 'departing_time': '21:00', 'duration': '06h 40m', 'reaching_time': '03:40', 'star_rating': '4.4', 'price': '480', 'seats_available': '5'}
{'route_name': 'Vijayawada to Hyderabad', 'route_link': 'https://www.redbus.in/bus-tickets/vijayawada-to-hyderabad', 'bus_name': 'APSRTC - 9651', 'bus_type': 'SUPER LUXURY (NON-AC, 2 + 2 PUSH BACK)', 'departing_time': '21:00', 'duration': '07h 41m', 'reaching_time': '04:41', 'star_rating': '4.2', 'price': '469', 'seats_available': '23'}
{'route_name': 'Vijayawada to Hyderabad', 'route_link': 'https://www.redbus.in/bus-tickets/vijayawada-to-hyderabad', 'bus_name': 'APSRTC - 9654', 'bus_type': 'SUPER LUXURY (NON-AC, 2 + 2 PUSH BACK)', 'departing_time': '21:40', 'duration': '07h 40m', 'reaching_time': '0

In [None]:
# Govt bus APSRTC_GOV_bus_data Hydrebad Scrape the Redbus APSRTC GOVT BUS from website,  route : Vijayawada to Hyderabad
import psycopg2
from psycopg2.extensions import ISOLATION_LEVEL_AUTOCOMMIT
from datetime import datetime

try:
    # Connect to PostgreSQL database
    connection = psycopg2.connect(
        host="localhost",
        port="5432",
        database="red_bus",
        user="postgres",
        password="sample12"
    )
    connection.set_isolation_level(ISOLATION_LEVEL_AUTOCOMMIT)
    cursor = connection.cursor()

    # Create the bus_routes table if it doesn't exist
    cursor.execute("""
    CREATE TABLE IF NOT EXISTS bus_routes (
        id SERIAL PRIMARY KEY,
        route_name TEXT,
        route_link TEXT,
        bus_name TEXT DEFAULT NULL,
        bus_type TEXT DEFAULT NULL,
        departing_time TIME DEFAULT NULL,
        duration TEXT DEFAULT NULL,
        reaching_time TIME DEFAULT NULL,
        star_rating FLOAT DEFAULT NULL,
        price DECIMAL DEFAULT NULL,
        seats_available INT DEFAULT NULL
    )
    """)

    # Insert data into the table
    for row in APSRTC_GOV_bus_data:
        cursor.execute("""
        INSERT INTO bus_routes (route_name, route_link, bus_name, bus_type, departing_time, duration, 
        reaching_time, star_rating, price, seats_available) 
        VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
        """, (
            row['route_name'], row['route_link'], row['bus_name'], row['bus_type'],
            datetime.strptime(row['departing_time'], "%H:%M").time() if row['departing_time'] else None,
            row['duration'],
            datetime.strptime(row['reaching_time'], "%H:%M").time() if row['reaching_time'] else None,
            float(row['star_rating']) if row['star_rating'] else None,
            float(row['price']) if row['price'] else None,
            int(row['seats_available']) if row['seats_available'] else None
        ))

    # Commit the transaction
    connection.commit()
    print("Data inserted successfully.")

except Exception as e:
    print(f"An error occurred while interacting with the database: {e}")

finally:
    if cursor:
        cursor.close()
    if connection:
        connection.close()


Data inserted successfully.


In [12]:
# Govt bus APSRTC_GOV_bus_data Hydrebad Scrape the Redbus APSRTC GOVT BUS from website,  route : Vijayawada to Hyderabad
import psycopg2
from psycopg2.extensions import ISOLATION_LEVEL_AUTOCOMMIT
from datetime import datetime

try:
    # Connect to PostgreSQL database
    connection = psycopg2.connect(
        host="localhost",
        port="5432",
        database="red_bus",
        user="postgres",
        password="sample12"
    )
    connection.set_isolation_level(ISOLATION_LEVEL_AUTOCOMMIT)
    cursor = connection.cursor()

    # Reset the sequence before inserting new rows
    cursor.execute("SELECT setval('bus_routes_id_seq', COALESCE((SELECT MAX(id) FROM bus_routes), 1), true);")


    # Insert data into the table
    for row in APSRTC_GOV_bus_data:
        cursor.execute("""
        INSERT INTO bus_routes (route_name, route_link, bus_name, bus_type, departing_time, duration, 
        reaching_time, star_rating, price, seats_available) 
        VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
        """, (
            row['route_name'], row['route_link'], row['bus_name'], row['bus_type'],
            datetime.strptime(row['departing_time'], "%H:%M").time() if row['departing_time'] else None,
            row['duration'],
            datetime.strptime(row['reaching_time'], "%H:%M").time() if row['reaching_time'] else None,
            float(row['star_rating']) if row['star_rating'] else None,
            float(row['price']) if row['price'] else None,
            int(row['seats_available']) if row['seats_available'] else None
        ))

    # Commit the transaction
    connection.commit()
    print("Data inserted successfully.")

except Exception as e:
    print(f"An error occurred while interacting with the database: {e}")

finally:
    if cursor:
        cursor.close()
    if connection:
        connection.close()


Data inserted successfully.


07-03-2025

In [11]:
# Scrape  the data  TGSRTC route :TGSRTC Gvt BUS : Hyderabad to Vijayawada.
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time

driver = webdriver.Chrome()
driver.get('https://www.redbus.in/')
driver.maximize_window()
time.sleep(5)

try:
    hover_element = WebDriverWait(driver, 10).until(
        EC.presence_of_element_located((By.XPATH, "//div[contains(@class, 'rtcName') and text()='TGSRTC']"))
    )
    action = ActionChains(driver)
    action.move_to_element(hover_element).perform()

    TGSRTC_option = WebDriverWait(driver, 10).until(
        EC.element_to_be_clickable((By.XPATH, "//div[contains(@class, 'rtcName') and text()='TGSRTC']"))
    )
    TGSRTC_option.click()
    time.sleep(5)

    driver.find_element(By.TAG_NAME, "body").send_keys(Keys.PAGE_DOWN)

    routes = driver.find_elements(By.XPATH, "//a[contains(@class, 'route') and text()='Hyderabad to Vijayawada']")

    if not routes:
        print("No routes found.")
        driver.quit()
        exit()

    route_link = routes[0].get_attribute("href")
    route_name = routes[0].text
    driver.get(route_link)
    time.sleep(5)

    fallback_element = WebDriverWait(driver, 20).until(
        EC.element_to_be_clickable((By.XPATH, '//*[@id="fixer"]/div/div/div[1]/span[3]/i'))
    )
    action.move_to_element(fallback_element).pause(1).click().perform()
    time.sleep(5)

    #driver.execute_script("window.scrollTo(0, document.body.scrollHeight / 2);") #scrolls to the middle of the page.

    tgsrtc_view_buses_button = WebDriverWait(driver, 20).until(
    EC.element_to_be_clickable((By.XPATH, "//div[contains(text(), 'TGSRTC Buses')]/parent::div/following-sibling::div//div[contains(@class, 'button')]"))
    )
    driver.execute_script("arguments[0].scrollIntoView({block: 'center'});", tgsrtc_view_buses_button)
    time.sleep(2)
    tgsrtc_view_buses_button.click()


    #hover_element = WebDriverWait(driver, 20).until(
    #EC.element_to_be_clickable((By.XPATH, "//div[contains(@class, 'f-bold grop-name m-top-20') and (text()='TGSRTC Buses' or text()='View Buses')]"))
    #)

    #hover_element = WebDriverWait(driver, 20).until(
      # EC.element_to_be_clickable((By.XPATH, "//div[contains(@class, 'f-bold grop-name m-top-20') and text()='TGSRTC Buses' and text()='View Buses']"))
    #)
    #action = ActionChains(driver)
    


    #hover_element = WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.XPATH, 
    #"//div[contains(@class, 'f-bold grop-name m-top-20') and text()='View Buses']")))

    #action = ActionChains(driver)
    #action.move_to_element(hover_element).perform()

    #button_element = WebDriverWait(driver, 20).until(
    #    EC.element_to_be_clickable((By.CLASS_NAME, "button"))
    #)

    #driver.execute_script("arguments[0].scrollIntoView();", button_element)
    #time.sleep(2)
    #button_element.click()

    #time.sleep(10)

    scroll_count = 0
    TGSRTC_GOV_bus_data = [] 

    while scroll_count < 5:
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
        time.sleep(3) #reduced time.

        # Fetch bus details with explicit wait.
        bus_names = WebDriverWait(driver, 10).until(EC.presence_of_all_elements_located((By.XPATH, "//div[contains(@class, 'travels lh-24 f-bold d-color') and contains(., 'TGSRTC')]")))

        bus_types = driver.find_elements(By.XPATH, "//div[@class='bus-type f-12 m-top-16 l-color evBus']")
        departing_times = driver.find_elements(By.XPATH, "//div[@class='dp-time f-19 d-color f-bold']")
        durations = driver.find_elements(By.XPATH, "//div[@class='dur l-color lh-24']")
        reaching_times = driver.find_elements(By.XPATH, "//div[@class='bp-time f-19 d-color disp-Inline']")
        star_ratings = driver.find_elements(By.XPATH, "//span[normalize-space(.) != '' and contains(text(), '.') and number(text())]")
        prices = driver.find_elements(By.XPATH, "//div[contains(@class, 'fare')]/span[contains(@class, 'f-19')]")
        seats_available = driver.find_elements(By.XPATH, "//div[contains(@class, 'seat-left')]")

        min_length = min(len(bus_names), len(bus_types), len(departing_times), len(durations),
                         len(reaching_times), len(star_ratings), len(prices), len(seats_available))

        for i in range(min_length):
            bus_info = {
                "route_name": route_name,
                "route_link": route_link,
                "bus_name": bus_names[i].text,
                "bus_type": bus_types[i].text,
                "departing_time": departing_times[i].text,
                "duration": durations[i].text,
                "reaching_time": reaching_times[i].text,
                "star_rating": star_ratings[i].text,
                "price": prices[i].text.replace('₹', '').strip(),
                "seats_available": seats_available[i].text.replace('Seats available', '').strip(),
            }
            if bus_info not in TGSRTC_GOV_bus_data:
                TGSRTC_GOV_bus_data.append(bus_info)
                if len(TGSRTC_GOV_bus_data) >= 10: #check inside the for loop.
                    break #break inner loop.

        print(f"Scroll {scroll_count}: Found {len(TGSRTC_GOV_bus_data)} unique buses.")

        if len(TGSRTC_GOV_bus_data) >= 10:
            break

        scroll_count += 1

    print("\nCollected Bus Data:")
    for bus in TGSRTC_GOV_bus_data[:10]:
        print(bus)

except Exception as e:
    print(f"An error occurred: {e}")

finally:
    driver.quit()

Scroll 0: Found 10 unique buses.

Collected Bus Data:
{'route_name': 'Hyderabad to Vijayawada', 'route_link': 'https://www.redbus.in/bus-tickets/hyderabad-to-vijayawada', 'bus_name': 'TGSRTC - 40010', 'bus_type': 'Super Luxury (Non AC Seater 2+2 Push Back)', 'departing_time': '03:00', 'duration': '06h 55m', 'reaching_time': '09:55', 'star_rating': '3.5', 'price': '396', 'seats_available': '17'}
{'route_name': 'Hyderabad to Vijayawada', 'route_link': 'https://www.redbus.in/bus-tickets/hyderabad-to-vijayawada', 'bus_name': 'TGSRTC - 4771', 'bus_type': 'RAJDHANI (A.C. Semi Sleeper)', 'departing_time': '03:05', 'duration': '06h 20m', 'reaching_time': '09:25', 'star_rating': '3.3', 'price': '491', 'seats_available': '31'}
{'route_name': 'Hyderabad to Vijayawada', 'route_link': 'https://www.redbus.in/bus-tickets/hyderabad-to-vijayawada', 'bus_name': 'TGSRTC - 1463', 'bus_type': 'RAJDHANI (A.C. Semi Sleeper)', 'departing_time': '03:15', 'duration': '05h 15m', 'reaching_time': '08:30', 'star_r

In [47]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time

driver = webdriver.Chrome()
driver.get('https://www.redbus.in/')
driver.maximize_window()
time.sleep(5)

try:
    hover_element = WebDriverWait(driver, 10).until(
        EC.presence_of_element_located((By.XPATH, "//div[contains(@class, 'rtcName') and text()='TGSRTC']"))
    )
    action = ActionChains(driver)
    action.move_to_element(hover_element).perform()

    TGSRTC_option = WebDriverWait(driver, 10).until(
        EC.element_to_be_clickable((By.XPATH, "//div[contains(@class, 'rtcName') and text()='TGSRTC']"))
    )
    TGSRTC_option.click()
    time.sleep(5)

    driver.find_element(By.TAG_NAME, "body").send_keys(Keys.PAGE_DOWN)

    routes = driver.find_elements(By.XPATH, "//a[contains(@class, 'route') and text()='Hyderabad to Vijayawada']")

    if not routes:
        print("No routes found.")
        driver.quit()
        exit()

    route_link = routes[0].get_attribute("href")
    route_name = routes[0].text
    driver.get(route_link)
    time.sleep(5)

    fallback_element = WebDriverWait(driver, 20).until(
        EC.element_to_be_clickable((By.XPATH, '//*[@id="fixer"]/div/div/div[1]/span[3]/i'))
    )
    action.move_to_element(fallback_element).pause(1).click().perform()
    time.sleep(5)

    # Wait for the hover element to be present
    hover_element = WebDriverWait(driver, 10).until(
            EC.presence_of_element_located((By.XPATH, "//div[contains(@class, 'f-bold grop-name m-top-20') and text()='TGSRTC Buses']"))
        )

        # Move to the hover element
    action = ActionChains(driver)
    action.move_to_element(hover_element).perform()

        # Wait for the button element to be clickable
    button_element = WebDriverWait(driver, 20).until(
            EC.element_to_be_clickable((By.XPATH, "//div[contains(@class, 'f-bold grop-name m-top-20') and text()='TGSRTC Buses']/following-sibling::div[@class='button']"))
        )


    driver.execute_script("arguments[0].scrollIntoView();", button_element)
    time.sleep(2)

    # Use JavaScript click as a fallback
    try:
        button_element.click()
    except:
        driver.execute_script("arguments[0].click();", button_element)

    time.sleep(10)
    scroll_count = 0
    TGSRTC_GOV_bus_data = [] #Corrected list name.

    while scroll_count < 5:
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
        time.sleep(3) #reduced time.

        # Fetch bus details with explicit wait.
        bus_names = WebDriverWait(driver, 10).until(EC.presence_of_all_elements_located((By.XPATH, "//div[contains(@class, 'travels lh-24 f-bold d-color') and contains(., 'TGSRTC')]")))

        bus_types = driver.find_elements(By.XPATH, "//div[@class='bus-type f-12 m-top-16 l-color evBus']")
        departing_times = driver.find_elements(By.XPATH, "//div[@class='dp-time f-19 d-color f-bold']")
        durations = driver.find_elements(By.XPATH, "//div[@class='dur l-color lh-24']")
        reaching_times = driver.find_elements(By.XPATH, "//div[@class='bp-time f-19 d-color disp-Inline']")
        star_ratings = driver.find_elements(By.XPATH, "//span[normalize-space(.) != '' and contains(text(), '.') and number(text())]")
        prices = driver.find_elements(By.XPATH, "//div[contains(@class, 'fare')]/span[contains(@class, 'f-19')]")
        seats_available = driver.find_elements(By.XPATH, "//div[contains(@class, 'seat-left')]")

        min_length = min(len(bus_names), len(bus_types), len(departing_times), len(durations),
                         len(reaching_times), len(star_ratings), len(prices), len(seats_available))

        for i in range(min_length):
            bus_info = {
                "route_name": route_name,
                "route_link": route_link,
                "bus_name": bus_names[i].text,
                "bus_type": bus_types[i].text,
                "departing_time": departing_times[i].text,
                "duration": durations[i].text,
                "reaching_time": reaching_times[i].text,
                "star_rating": star_ratings[i].text,
                "price": prices[i].text.replace('₹', '').strip(),
                "seats_available": seats_available[i].text.replace('Seats available', '').strip(),
            }
            if bus_info not in TGSRTC_GOV_bus_data:
                TGSRTC_GOV_bus_data.append(bus_info)
                if len(TGSRTC_GOV_bus_data) >= 10: #check inside the for loop.
                    break #break inner loop.

        print(f"Scroll {scroll_count}: Found {len(TGSRTC_GOV_bus_data)} unique buses.")

        if len(TGSRTC_GOV_bus_data) >= 10:
            break

        scroll_count += 1

    print("\nCollected Bus Data:")
    for bus in TGSRTC_GOV_bus_data[:10]:
        print(bus)

except Exception as e:
    print(f"An error occurred: {e}")

finally:
    driver.quit()



An error occurred: Message: 
Stacktrace:
	GetHandleVerifier [0x00007FF60CAEC6A5+28789]
	(No symbol) [0x00007FF60CA55B20]
	(No symbol) [0x00007FF60C8E8F9A]
	(No symbol) [0x00007FF60C93F346]
	(No symbol) [0x00007FF60C93F57C]
	(No symbol) [0x00007FF60C992B17]
	(No symbol) [0x00007FF60C96736F]
	(No symbol) [0x00007FF60C98F7E3]
	(No symbol) [0x00007FF60C967103]
	(No symbol) [0x00007FF60C92FFC0]
	(No symbol) [0x00007FF60C931273]
	GetHandleVerifier [0x00007FF60CE31AED+3458237]
	GetHandleVerifier [0x00007FF60CE4829C+3550316]
	GetHandleVerifier [0x00007FF60CE3DB9D+3507565]
	GetHandleVerifier [0x00007FF60CBB2C6A+841274]
	(No symbol) [0x00007FF60CA609EF]
	(No symbol) [0x00007FF60CA5CB34]
	(No symbol) [0x00007FF60CA5CCD6]
	(No symbol) [0x00007FF60CA4C119]
	BaseThreadInitThunk [0x00007FFE89F6259D+29]
	RtlUserThreadStart [0x00007FFE8B7CAF38+40]



In [11]:
# push  the data  TGSRTC route :TGSRTC GOVT BUS : Hyderabad to Vijayawada.
import psycopg2
from psycopg2.extensions import ISOLATION_LEVEL_AUTOCOMMIT
from datetime import datetime

try:
    # Connect to PostgreSQL database
    connection = psycopg2.connect(
        host="localhost",
        port="5432",
        database="red_bus",
        user="postgres",
        password="sample12"
    )
    connection.set_isolation_level(ISOLATION_LEVEL_AUTOCOMMIT)
    cursor = connection.cursor()

    # Create the bus_routes table if it doesn't exist
    cursor.execute("""
    CREATE TABLE IF NOT EXISTS bus_routes (
        id SERIAL PRIMARY KEY,
        route_name TEXT,
        route_link TEXT,
        bus_name TEXT DEFAULT NULL,
        bus_type TEXT DEFAULT NULL,
        departing_time TIME DEFAULT NULL,
        duration TEXT DEFAULT NULL,
        reaching_time TIME DEFAULT NULL,
        star_rating FLOAT DEFAULT NULL,
        price DECIMAL DEFAULT NULL,
        seats_available INT DEFAULT NULL
    )
    """)

    # Insert data into the table
    for row in TGSRTC_GOV_bus_data:
        cursor.execute("""
        INSERT INTO bus_routes (route_name, route_link, bus_name, bus_type, departing_time, duration, 
        reaching_time, star_rating, price, seats_available) 
        VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
        """, (
            row['route_name'], row['route_link'], row['bus_name'], row['bus_type'],
            datetime.strptime(row['departing_time'], "%H:%M").time() if row['departing_time'] else None,
            row['duration'],
            datetime.strptime(row['reaching_time'], "%H:%M").time() if row['reaching_time'] else None,
            float(row['star_rating']) if row['star_rating'] else None,
            float(row['price']) if row['price'] else None,
            int(row['seats_available']) if row['seats_available'] else None
        ))

    # Commit the transaction
    connection.commit()
    print("Data inserted successfully.")

except Exception as e:
    print(f"An error occurred while interacting with the database: {e}")

finally:
    if cursor:
        cursor.close()
    if connection:
        connection.close()


Data inserted successfully.


In [13]:

# scarpe the TGSRTC route :TGSRTC pvt BUS : .

from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time
import pandas as pd

# Initialize the Chrome driver
driver = webdriver.Chrome()

# Navigate to the RedBus website
driver.get('https://www.redbus.in/')
driver.maximize_window()
time.sleep(5)

try:
    # Hover over and click the TGSRTC option

    hover_element = WebDriverWait(driver, 10).until(
    EC.presence_of_element_located((By.XPATH, "//div[contains(@class, 'rtcName') and text()='TGSRTC']"))
    )
    action = ActionChains(driver)
    action.move_to_element(hover_element).perform()

    TGSRTC_option = WebDriverWait(driver, 10).until(
        EC.element_to_be_clickable((By.XPATH, "//div[contains(@class, 'rtcName') and text()='TGSRTC']"))
    )
    TGSRTC_option.click()
    time.sleep(5)

    # Select a route
    driver.find_element(By.TAG_NAME, "body").send_keys(Keys.PAGE_DOWN)
    
    routes = driver.find_elements(By.XPATH, "//a[contains(@class, 'route') and text()='Hyderabad to Vijayawada']")

    
    if not routes:
        print("No routes found.")
        driver.quit()
        exit()

    # Use the first route
    route_link = routes[0].get_attribute("href")
    route_name = routes[0].text
    driver.get(route_link)
    time.sleep(5)

    # Check for buses
    bus_elements = driver.find_elements(By.XPATH, "//div[@class='travels lh-24 f-bold d-color']")
    if not bus_elements:
        print("No buses found. Attempting alternative method...")
        fallback_element = WebDriverWait(driver, 10).until(
            EC.element_to_be_clickable((By.XPATH, '//*[@id="fixer"]/div/div/div[1]/span[3]/i'))
        )
        action.move_to_element(fallback_element).pause(1).click().perform()
        time.sleep(5)

    # Scrolling and extracting buses
    scroll_count = 0
    Tgsrtc_pvt_bus_data = []

    while scroll_count < 5:  # Scroll up to 5 times
        driver.find_element(By.TAG_NAME, "body").send_keys(Keys.PAGE_DOWN)
        time.sleep(7)
        scroll_count += 1

        # Fetch bus details
        bus_names = driver.find_elements(By.XPATH, "//div[@class='travels lh-24 f-bold d-color']")
        bus_types = driver.find_elements(By.XPATH, "//div[@class='bus-type f-12 m-top-16 l-color evBus']")
        departing_times = driver.find_elements(By.XPATH, "//div[@class='dp-time f-19 d-color f-bold']")
        durations = driver.find_elements(By.XPATH, "//div[@class='dur l-color lh-24']")
        reaching_times = driver.find_elements(By.XPATH, "//div[@class='bp-time f-19 d-color disp-Inline']")
        star_ratings = driver.find_elements(By.XPATH, "//span[contains(text(), '.') and string-length(text()) > 2]")
        prices = driver.find_elements(By.XPATH, "//div[contains(@class, 'fare')]/span[contains(@class, 'f-19')]")
        seats_available = driver.find_elements(By.XPATH, "//div[contains(@class, 'seat-left')]")

        # Find the minimum length to avoid mismatches
        min_length = min(len(bus_names), len(bus_types), len(departing_times), len(durations),
                         len(reaching_times), len(star_ratings), len(prices), len(seats_available))

        for i in range(min_length):
            bus_info = {
                "route_name": route_name,
                "route_link": route_link,
                "bus_name": bus_names[i].text,
                "bus_type": bus_types[i].text,
                "departing_time": departing_times[i].text,
                "duration": durations[i].text,
                "reaching_time": reaching_times[i].text,
                "star_rating": star_ratings[i].text,
                "price": prices[i].text.replace('₹', '').strip(),
                "seats_available": seats_available[i].text.replace('Seats available', '').strip(),
            }
            if bus_info not in Tgsrtc_pvt_bus_data :  # Avoid duplicates
                Tgsrtc_pvt_bus_data .append(bus_info)

        print(f"Scroll {scroll_count}: Found {len(Tgsrtc_pvt_bus_data )} unique buses.")

        if len(Tgsrtc_pvt_bus_data ) >= 10:  # Stop once 10 buses are collected
            break

    # Print the collected data
    print("\nCollected Bus Data:")
    for bus in Tgsrtc_pvt_bus_data[:10]:  # Limit output to 10 buses
        print(bus)

except Exception as e:
    print(f"An error occurred: {e}")

finally:
    driver.quit()


Scroll 1: Found 5 unique buses.
Scroll 2: Found 10 unique buses.

Collected Bus Data:
{'route_name': 'Hyderabad to Vijayawada', 'route_link': 'https://www.redbus.in/bus-tickets/hyderabad-to-vijayawada', 'bus_name': 'IntrCity SmartBus', 'bus_type': 'A/C Seater / Sleeper (2+1)', 'departing_time': '23:05', 'duration': '06h 55m', 'reaching_time': '06:00', 'star_rating': '4.7', 'price': '579', 'seats_available': '28'}
{'route_name': 'Hyderabad to Vijayawada', 'route_link': 'https://www.redbus.in/bus-tickets/hyderabad-to-vijayawada', 'bus_name': 'FRESHBUS', 'bus_type': 'Electric A/C Seater (2+2)', 'departing_time': '12:50', 'duration': '06h 50m', 'reaching_time': '19:40', 'star_rating': '4.7', 'price': '363', 'seats_available': '27'}
{'route_name': 'Hyderabad to Vijayawada', 'route_link': 'https://www.redbus.in/bus-tickets/hyderabad-to-vijayawada', 'bus_name': 'FRESHBUS', 'bus_type': 'Electric A/C Seater (2+2)', 'departing_time': '05:50', 'duration': '06h 10m', 'reaching_time': '12:00', 'sta

In [14]:
# push  the data  TGSRTC route :TGSRTC pvt BUS : Hyderabad to Vijayawada.
import psycopg2
from psycopg2.extensions import ISOLATION_LEVEL_AUTOCOMMIT
from datetime import datetime

try:
    # Connect to PostgreSQL database
    connection = psycopg2.connect(
        host="localhost",
        port="5432",
        database="red_bus",
        user="postgres",
        password="sample12"
    )
    connection.set_isolation_level(ISOLATION_LEVEL_AUTOCOMMIT)
    cursor = connection.cursor()

    # Create the bus_routes table if it doesn't exist
    cursor.execute("""
    CREATE TABLE IF NOT EXISTS bus_routes (
        id SERIAL PRIMARY KEY,
        route_name TEXT,
        route_link TEXT,
        bus_name TEXT DEFAULT NULL,
        bus_type TEXT DEFAULT NULL,
        departing_time TIME DEFAULT NULL,
        duration TEXT DEFAULT NULL,
        reaching_time TIME DEFAULT NULL,
        star_rating FLOAT DEFAULT NULL,
        price DECIMAL DEFAULT NULL,
        seats_available INT DEFAULT NULL
    )
    """)

    # Insert data into the table
    for row in Tgsrtc_pvt_bus_data:
        cursor.execute("""
        INSERT INTO bus_routes (route_name, route_link, bus_name, bus_type, departing_time, duration, 
        reaching_time, star_rating, price, seats_available) 
        VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
        """, (
            row['route_name'], row['route_link'], row['bus_name'], row['bus_type'],
            datetime.strptime(row['departing_time'], "%H:%M").time() if row['departing_time'] else None,
            row['duration'],
            datetime.strptime(row['reaching_time'], "%H:%M").time() if row['reaching_time'] else None,
            float(row['star_rating']) if row['star_rating'] else None,
            float(row['price']) if row['price'] else None,
            int(row['seats_available']) if row['seats_available'] else None
        ))

    # Commit the transaction
    connection.commit()
    print("Data inserted successfully.")

except Exception as e:
    print(f"An error occurred while interacting with the database: {e}")

finally:
    if cursor:
        cursor.close()
    if connection:
        connection.close()


Data inserted successfully.


In [36]:
# Scrape  the data  TGSRTC route :TGSRTC Gvt BUS : Hyderabad to Khammam.
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time

driver = webdriver.Chrome()
driver.get('https://www.redbus.in/')
driver.maximize_window()
time.sleep(5)

try:
    hover_element = WebDriverWait(driver, 10).until(
        EC.presence_of_element_located((By.XPATH, "//div[contains(@class, 'rtcName') and text()='TGSRTC']"))
    )
    action = ActionChains(driver)
    action.move_to_element(hover_element).perform()

    TGSRTC_option = WebDriverWait(driver, 10).until(
        EC.element_to_be_clickable((By.XPATH, "//div[contains(@class, 'rtcName') and text()='TGSRTC']"))
    )
    TGSRTC_option.click()
    time.sleep(5)

    driver.find_element(By.TAG_NAME, "body").send_keys(Keys.PAGE_DOWN)

    routes = driver.find_elements(By.XPATH, "//a[contains(@class, 'route') and text()='Hyderabad to Khammam']")

    if not routes:
        print("No routes found.")
        driver.quit()
        exit()

    route_link = routes[0].get_attribute("href")
    route_name = routes[0].text
    driver.get(route_link)
    time.sleep(5)

    fallback_element = WebDriverWait(driver, 10).until(
        EC.element_to_be_clickable((By.XPATH, '//*[@id="fixer"]/div/div/div[1]/span[3]/i'))
    )
    action.move_to_element(fallback_element).pause(1).click().perform()
    time.sleep(5)

    hover_element = WebDriverWait(driver, 20).until(
        EC.element_to_be_clickable((By.XPATH, "//div[contains(@class, 'f-bold grop-name m-top-20') and text()='TGSRTC Buses']"))
    )

    button_element = WebDriverWait(driver, 20).until(
        EC.element_to_be_clickable((By.CLASS_NAME, "button"))
    )

    driver.execute_script("arguments[0].scrollIntoView();", button_element)
    time.sleep(2)
    button_element.click()

    time.sleep(10)

    scroll_count = 0
    TGSRTC_GOV_bus_data = [] #Corrected list name.

    while scroll_count < 5:
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
        time.sleep(3) #reduced time.

        # Fetch bus details with explicit wait.
        bus_names = WebDriverWait(driver, 10).until(EC.presence_of_all_elements_located((By.XPATH, "//div[contains(@class, 'travels lh-24 f-bold d-color') and contains(., 'TGSRTC')]")))

        bus_types = driver.find_elements(By.XPATH, "//div[@class='bus-type f-12 m-top-16 l-color evBus']")
        departing_times = driver.find_elements(By.XPATH, "//div[@class='dp-time f-19 d-color f-bold']")
        durations = driver.find_elements(By.XPATH, "//div[@class='dur l-color lh-24']")
        reaching_times = driver.find_elements(By.XPATH, "//div[@class='bp-time f-19 d-color disp-Inline']")
        star_ratings = driver.find_elements(By.XPATH, "//span[normalize-space(.) != '' and contains(text(), '.') and number(text())]")
        prices = driver.find_elements(By.XPATH, "//div[contains(@class, 'fare')]/span[contains(@class, 'f-19')]")
        seats_available = driver.find_elements(By.XPATH, "//div[contains(@class, 'seat-left')]")

        min_length = min(len(bus_names), len(bus_types), len(departing_times), len(durations),
                         len(reaching_times), len(star_ratings), len(prices), len(seats_available))

        for i in range(min_length):
            bus_info = {
                "route_name": route_name,
                "route_link": route_link,
                "bus_name": bus_names[i].text,
                "bus_type": bus_types[i].text,
                "departing_time": departing_times[i].text,
                "duration": durations[i].text,
                "reaching_time": reaching_times[i].text,
                "star_rating": star_ratings[i].text,
                "price": prices[i].text.replace('₹', '').strip(),
                "seats_available": seats_available[i].text.replace('Seats available', '').strip(),
            }
            if bus_info not in TGSRTC_GOV_bus_data:
                TGSRTC_GOV_bus_data.append(bus_info)
                if len(TGSRTC_GOV_bus_data) >= 10: #check inside the for loop.
                    break #break inner loop.

        print(f"Scroll {scroll_count}: Found {len(TGSRTC_GOV_bus_data)} unique buses.")

        if len(TGSRTC_GOV_bus_data) >= 10:
            break

        scroll_count += 1

    print("\nCollected Bus Data:")
    for bus in TGSRTC_GOV_bus_data[:10]:
        print(bus)

except Exception as e:
    print(f"An error occurred: {e}")

finally:
    driver.quit()

Scroll 0: Found 10 unique buses.

Collected Bus Data:
{'route_name': 'Hyderabad to Khammam', 'route_link': 'https://www.redbus.in/bus-tickets/hyderabad-to-khammam', 'bus_name': 'TGSRTC - 8678', 'bus_type': 'Super Luxury (Non AC Seater 2+2 Push Back)', 'departing_time': '01:00', 'duration': '04h 20m', 'reaching_time': '05:20', 'star_rating': '4.3', 'price': '320', 'seats_available': '31'}
{'route_name': 'Hyderabad to Khammam', 'route_link': 'https://www.redbus.in/bus-tickets/hyderabad-to-khammam', 'bus_name': 'TGSRTC - 4167', 'bus_type': 'Rajdhani (AC Semi Sleeper 2+2)', 'departing_time': '04:15', 'duration': '03h 30m', 'reaching_time': '07:45', 'star_rating': '4.1', 'price': '389', 'seats_available': '34'}
{'route_name': 'Hyderabad to Khammam', 'route_link': 'https://www.redbus.in/bus-tickets/hyderabad-to-khammam', 'bus_name': 'TGSRTC - 1318', 'bus_type': 'LAHARI A/C SLEEPER CUM SEATER', 'departing_time': '04:40', 'duration': '05h 00m', 'reaching_time': '09:40', 'star_rating': '4.1', '

Data inserted successfully.


In [None]:
# push  the data  TGSRTC route :TGSRTC gvt BUS :Hyderabad to Khammam.
import psycopg2
from psycopg2.extensions import ISOLATION_LEVEL_AUTOCOMMIT
from datetime import datetime

try:
    # Connect to PostgreSQL database
    connection = psycopg2.connect(
        host="localhost",
        port="5432",
        database="red_bus",
        user="postgres",
        password="sample12"
    )
    connection.set_isolation_level(ISOLATION_LEVEL_AUTOCOMMIT)
    cursor = connection.cursor()

    # Create the bus_routes table if it doesn't exist
    cursor.execute("""
    CREATE TABLE IF NOT EXISTS bus_routes (
        id SERIAL PRIMARY KEY,
        route_name TEXT,
        route_link TEXT,
        bus_name TEXT DEFAULT NULL,
        bus_type TEXT DEFAULT NULL,
        departing_time TIME DEFAULT NULL,
        duration TEXT DEFAULT NULL,
        reaching_time TIME DEFAULT NULL,
        star_rating FLOAT DEFAULT NULL,
        price DECIMAL DEFAULT NULL,
        seats_available INT DEFAULT NULL
    )
    """)

    # Insert data into the table
    for row in TGSRTC_GOV_bus_data:
        cursor.execute("""
        INSERT INTO bus_routes (route_name, route_link, bus_name, bus_type, departing_time, duration, 
        reaching_time, star_rating, price, seats_available) 
        VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
        """, (
            row['route_name'], row['route_link'], row['bus_name'], row['bus_type'],
            datetime.strptime(row['departing_time'], "%H:%M").time() if row['departing_time'] else None,
            row['duration'],
            datetime.strptime(row['reaching_time'], "%H:%M").time() if row['reaching_time'] else None,
            float(row['star_rating']) if row['star_rating'] else None,
            float(row['price']) if row['price'] else None,
            int(row['seats_available']) if row['seats_available'] else None
        ))

    # Commit the transaction
    connection.commit()
    print("Data inserted successfully.")

except Exception as e:
    print(f"An error occurred while interacting with the database: {e}")

finally:
    if cursor:
        cursor.close()
    if connection:
        connection.close()


Data inserted successfully.


13-03-2025

In [12]:

# scarpe the TGSRTC route :TGSRTC pvt BUS : Hyderabad to Khammam

from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time
import pandas as pd

# Initialize the Chrome driver
driver = webdriver.Chrome()

# Navigate to the RedBus website
driver.get('https://www.redbus.in/')
driver.maximize_window()
time.sleep(5)

try:
    # Hover over and click the TGSRTC option

    hover_element = WebDriverWait(driver, 10).until(
    EC.presence_of_element_located((By.XPATH, "//div[contains(@class, 'rtcName') and text()='TGSRTC']"))
    )
    action = ActionChains(driver)
    action.move_to_element(hover_element).perform()

    TGSRTC_option = WebDriverWait(driver, 10).until(
        EC.element_to_be_clickable((By.XPATH, "//div[contains(@class, 'rtcName') and text()='TGSRTC']"))
    )
    TGSRTC_option.click()
    time.sleep(5)

    # Select a route
    driver.find_element(By.TAG_NAME, "body").send_keys(Keys.PAGE_DOWN)
    
    routes = driver.find_elements(By.XPATH, "//a[contains(@class, 'route') and text()='Hyderabad to Khammam']")

    
    if not routes:
        print("No routes found.")
        driver.quit()
        exit()

    # Use the first route
    route_link = routes[0].get_attribute("href")
    route_name = routes[0].text
    driver.get(route_link)
    time.sleep(5)

    # Check for buses
    bus_elements = driver.find_elements(By.XPATH, "//div[@class='travels lh-24 f-bold d-color']")
    if not bus_elements:
        print("No buses found. Attempting alternative method...")
        fallback_element = WebDriverWait(driver, 10).until(
            EC.element_to_be_clickable((By.XPATH, '//*[@id="fixer"]/div/div/div[1]/span[3]/i'))
        )
        action.move_to_element(fallback_element).pause(1).click().perform()
        time.sleep(5)

    # Scrolling and extracting buses
    scroll_count = 0
    Tgsrtc_pvt_bus_data = []

    while scroll_count < 5:  # Scroll up to 5 times
        driver.find_element(By.TAG_NAME, "body").send_keys(Keys.PAGE_DOWN)
        time.sleep(7)
        scroll_count += 1

        # Fetch bus details
        bus_names = driver.find_elements(By.XPATH, "//div[@class='travels lh-24 f-bold d-color']")
        bus_types = driver.find_elements(By.XPATH, "//div[@class='bus-type f-12 m-top-16 l-color evBus']")
        departing_times = driver.find_elements(By.XPATH, "//div[@class='dp-time f-19 d-color f-bold']")
        durations = driver.find_elements(By.XPATH, "//div[@class='dur l-color lh-24']")
        reaching_times = driver.find_elements(By.XPATH, "//div[@class='bp-time f-19 d-color disp-Inline']")
        star_ratings = driver.find_elements(By.XPATH, "//span[contains(text(), '.') and string-length(text()) > 2]")
        prices = driver.find_elements(By.XPATH, "//div[contains(@class, 'fare')]/span[contains(@class, 'f-19')]")
        seats_available = driver.find_elements(By.XPATH, "//div[contains(@class, 'seat-left')]")

        # Find the minimum length to avoid mismatches
        min_length = min(len(bus_names), len(bus_types), len(departing_times), len(durations),
                         len(reaching_times), len(star_ratings), len(prices), len(seats_available))

        for i in range(min_length):
            bus_info = {
                "route_name": route_name,
                "route_link": route_link,
                "bus_name": bus_names[i].text,
                "bus_type": bus_types[i].text,
                "departing_time": departing_times[i].text,
                "duration": durations[i].text,
                "reaching_time": reaching_times[i].text,
                "star_rating": star_ratings[i].text,
                "price": prices[i].text.replace('₹', '').strip(),
                "seats_available": seats_available[i].text.replace('Seats available', '').strip(),
            }
            if bus_info not in Tgsrtc_pvt_bus_data :  # Avoid duplicates
                Tgsrtc_pvt_bus_data .append(bus_info)

        print(f"Scroll {scroll_count}: Found {len(Tgsrtc_pvt_bus_data )} unique buses.")

        if len(Tgsrtc_pvt_bus_data ) >= 10:  # Stop once 10 buses are collected
            break

    # Print the collected data
    print("\nCollected Bus Data:")
    for bus in Tgsrtc_pvt_bus_data[:10]:  # Limit output to 10 buses
        print(bus)

except Exception as e:
    print(f"An error occurred: {e}")

finally:
    driver.quit()


Scroll 1: Found 10 unique buses.

Collected Bus Data:
{'route_name': 'Hyderabad to Khammam', 'route_link': 'https://www.redbus.in/bus-tickets/hyderabad-to-khammam', 'bus_name': 'SBS Tours and Travels', 'bus_type': 'Non A/C Seater / Sleeper (2+1)', 'departing_time': '20:30', 'duration': '05h 54m', 'reaching_time': '02:24', 'star_rating': '4.3', 'price': '924', 'seats_available': '1 Seat available'}
{'route_name': 'Hyderabad to Khammam', 'route_link': 'https://www.redbus.in/bus-tickets/hyderabad-to-khammam', 'bus_name': 'RRR Travels', 'bus_type': 'A/C Sleeper (2+1)', 'departing_time': '22:00', 'duration': '06h 00m', 'reaching_time': '04:00', 'star_rating': '4.1', 'price': '900', 'seats_available': '11'}
{'route_name': 'Hyderabad to Khammam', 'route_link': 'https://www.redbus.in/bus-tickets/hyderabad-to-khammam', 'bus_name': 'Sri KVR Travels', 'bus_type': 'Non A/C Seater / Sleeper (2+1)', 'departing_time': '22:00', 'duration': '06h 30m', 'reaching_time': '04:30', 'star_rating': '4.3', 'pr

In [None]:
# push  the data  TGSRTC route :TGSRTC pvt BUS : Hyderabad to Khammam.
import psycopg2
from psycopg2.extensions import ISOLATION_LEVEL_AUTOCOMMIT
from datetime import datetime

try:
    # Connect to PostgreSQL database
    connection = psycopg2.connect(
        host="localhost",
        port="5432",
        database="red_bus",
        user="postgres",
        password="sample12"
    )
    connection.set_isolation_level(ISOLATION_LEVEL_AUTOCOMMIT)
    cursor = connection.cursor()

    # Create the bus_routes table if it doesn't exist
    cursor.execute("""
    CREATE TABLE IF NOT EXISTS bus_routes (
        id SERIAL PRIMARY KEY,
        route_name TEXT,
        route_link TEXT,
        bus_name TEXT DEFAULT NULL,
        bus_type TEXT DEFAULT NULL,
        departing_time TIME DEFAULT NULL,
        duration TEXT DEFAULT NULL,
        reaching_time TIME DEFAULT NULL,
        star_rating FLOAT DEFAULT NULL,
        price DECIMAL DEFAULT NULL,
        seats_available INT DEFAULT NULL
    )
    """)

    # Insert data into the table
    for row in Tgsrtc_pvt_bus_data:
        cursor.execute("""
        INSERT INTO bus_routes (route_name, route_link, bus_name, bus_type, departing_time, duration, 
        reaching_time, star_rating, price, seats_available) 
        VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
        """, (
            row['route_name'], row['route_link'], row['bus_name'], row['bus_type'],
            datetime.strptime(row['departing_time'], "%H:%M").time() if row['departing_time'] else None,
            row['duration'],
            datetime.strptime(row['reaching_time'], "%H:%M").time() if row['reaching_time'] else None,
            float(row['star_rating']) if row['star_rating'] else None,
            float(row['price']) if row['price'] else None,
            int(row['seats_available']) if row['seats_available'] else None
        ))

    # Commit the transaction
    connection.commit()
    print("Data inserted successfully.")

except Exception as e:
    print(f"An error occurred while interacting with the database: {e}")

finally:
    if cursor:
        cursor.close()
    if connection:
        connection.close()


In [26]:
# Scrape  the data Kerala KSRTC route : Gvt BUS : Kozhikode to Ernakulam.
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time

driver = webdriver.Chrome()
driver.get('https://www.redbus.in/')
driver.maximize_window()
time.sleep(5)

try:
    # Hover over the KERALA RTC element

    hover_element = WebDriverWait(driver, 20).until(
        EC.presence_of_element_located((By.XPATH, "//div[contains(@class, 'rtcName') and text()='KERALA RTC']"))
    )
    action = ActionChains(driver)
    action.move_to_element(hover_element).perform()

    # Click the KERALA RTC element using JavaScript
    KERALA_RTC_option = WebDriverWait(driver, 10).until(
        EC.element_to_be_clickable((By.XPATH, "//div[contains(@class, 'rtcName') and text()='KERALA RTC']"))
    )
    #KERALA_RTC_option.click()
    driver.execute_script("arguments[0].click();", KERALA_RTC_option) #use javascript click.

    time.sleep(5)

    driver.find_element(By.TAG_NAME, "body").send_keys(Keys.PAGE_DOWN)

    routes = driver.find_elements(By.XPATH, "//a[contains(@class, 'route') and text()='Kozhikode to Ernakulam']")

    if not routes:
        print("No routes found.")
        driver.quit()
        exit()

    route_link = routes[0].get_attribute("href")
    route_name = routes[0].text
    driver.get(route_link)
    time.sleep(5)

    fallback_element = WebDriverWait(driver, 20).until(
        EC.element_to_be_clickable((By.XPATH, '//*[@id="fixer"]/div/div/div[1]/span[3]/i'))
    )
    action.move_to_element(fallback_element).pause(1).click().perform()
    time.sleep(5)



    # Click on the 'KSRTC govt buses' option
    KSRTC_view_buses_button = WebDriverWait(driver, 10).until(
        EC.element_to_be_clickable((By.XPATH, "//div[contains(@class, 'button') and text()='View Buses']"))
    )
    KSRTC_view_buses_button.click()
    time.sleep(10)

    scroll_count = 0
    KSRTC_Kozhi_to_Erna__GOV_bus_data = [] 

    while scroll_count < 5:
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
        time.sleep(3) #reduced time.

        # Fetch bus details with explicit wait.
        bus_names = WebDriverWait(driver, 10).until(EC.presence_of_all_elements_located((By.XPATH, "//div[contains(@class, 'travels lh-24 f-bold d-color') and contains(., 'KSRTC')]")))

        bus_types = driver.find_elements(By.XPATH, "//div[@class='bus-type f-12 m-top-16 l-color evBus']")
        departing_times = driver.find_elements(By.XPATH, "//div[@class='dp-time f-19 d-color f-bold']")
        durations = driver.find_elements(By.XPATH, "//div[@class='dur l-color lh-24']")
        reaching_times = driver.find_elements(By.XPATH, "//div[@class='bp-time f-19 d-color disp-Inline']")
        star_ratings = driver.find_elements(By.XPATH, "//span[normalize-space(.) != '' and contains(text(), '.') and number(text())]")
        prices = driver.find_elements(By.XPATH, "//div[contains(@class, 'fare')]/span[contains(@class, 'f-19')]")
        seats_available = driver.find_elements(By.XPATH, "//div[contains(@class, 'seat-left')]")

        min_length = min(len(bus_names), len(bus_types), len(departing_times), len(durations),
                         len(reaching_times), len(star_ratings), len(prices), len(seats_available))

        for i in range(min_length):
            bus_info = {
                "route_name": route_name,
                "route_link": route_link,
                "bus_name": bus_names[i].text,
                "bus_type": bus_types[i].text,
                "departing_time": departing_times[i].text,
                "duration": durations[i].text,
                "reaching_time": reaching_times[i].text,
                "star_rating": star_ratings[i].text,
                "price": prices[i].text.replace('₹', '').strip(),
                "seats_available": seats_available[i].text.replace('Seats available', '').strip(),
            }
            if bus_info not in KSRTC_Kozhi_to_Erna__GOV_bus_data :
                KSRTC_Kozhi_to_Erna__GOV_bus_data .append(bus_info)
                if len(KSRTC_Kozhi_to_Erna__GOV_bus_data ) >= 10: #check inside the for loop.
                    break #break inner loop.

        print(f"Scroll {scroll_count}: Found {len(KSRTC_Kozhi_to_Erna__GOV_bus_data )} unique buses.")

        if len(KSRTC_Kozhi_to_Erna__GOV_bus_data ) >= 10:
            break

        scroll_count += 1

    print("\nCollected Bus Data:")
    for bus in KSRTC_Kozhi_to_Erna__GOV_bus_data[:10]:
        print(bus)

except Exception as e:
    print(f"An error occurred: {e}")

finally:
    driver.quit()

Scroll 0: Found 10 unique buses.

Collected Bus Data:
{'route_name': 'Kozhikode to Ernakulam', 'route_link': 'https://www.redbus.in/bus-tickets/kozhikode-to-ernakulam', 'bus_name': 'KSRTC (Kerala) - 1029', 'bus_type': 'Super Fast Non AC Seater (2+3)', 'departing_time': '00:01', 'duration': '05h 34m', 'reaching_time': '05:35', 'star_rating': '2.5', 'price': '269', 'seats_available': '45'}
{'route_name': 'Kozhikode to Ernakulam', 'route_link': 'https://www.redbus.in/bus-tickets/kozhikode-to-ernakulam', 'bus_name': 'KSRTC (Kerala) - 2518', 'bus_type': 'Swift Deluxe Non AC Air Bus (2+2)', 'departing_time': '00:11', 'duration': '04h 43m', 'reaching_time': '04:54', 'star_rating': '3.8', 'price': '309', 'seats_available': '30'}
{'route_name': 'Kozhikode to Ernakulam', 'route_link': 'https://www.redbus.in/bus-tickets/kozhikode-to-ernakulam', 'bus_name': 'KSRTC (Kerala) - 3546', 'bus_type': 'Super Express Non AC Seater Air Bus (2+2)', 'departing_time': '00:25', 'duration': '04h 40m', 'reaching_

In [None]:
# push  the data  Kerala route :govt BUS :'Kozhikode to Ernakulam'
import psycopg2
from psycopg2.extensions import ISOLATION_LEVEL_AUTOCOMMIT
from datetime import datetime

try:
    # Connect to PostgreSQL database
    connection = psycopg2.connect(
        host="localhost",
        port="5432",
        database="red_bus",
        user="postgres",
        password="sample12"
    )
    connection.set_isolation_level(ISOLATION_LEVEL_AUTOCOMMIT)
    cursor = connection.cursor()

    # Create the bus_routes table if it doesn't exist
    cursor.execute("""
    CREATE TABLE IF NOT EXISTS bus_routes (
        id SERIAL PRIMARY KEY,
        route_name TEXT,
        route_link TEXT,
        bus_name TEXT DEFAULT NULL,
        bus_type TEXT DEFAULT NULL,
        departing_time TIME DEFAULT NULL,
        duration TEXT DEFAULT NULL,
        reaching_time TIME DEFAULT NULL,
        star_rating FLOAT DEFAULT NULL,
        price DECIMAL DEFAULT NULL,
        seats_available INT DEFAULT NULL
    )
    """)

    # Insert data into the table
    for row in KSRTC_Kozhi_to_Erna__GOV_bus_data:
        cursor.execute("""
        INSERT INTO bus_routes (route_name, route_link, bus_name, bus_type, departing_time, duration, 
        reaching_time, star_rating, price, seats_available) 
        VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
        """, (
            row['route_name'], row['route_link'], row['bus_name'], row['bus_type'],
            datetime.strptime(row['departing_time'], "%H:%M").time() if row['departing_time'] else None,
            row['duration'],
            datetime.strptime(row['reaching_time'], "%H:%M").time() if row['reaching_time'] else None,
            float(row['star_rating']) if row['star_rating'] else None,
            float(row['price']) if row['price'] else None,
            int(row['seats_available']) if row['seats_available'] else None
        ))

    # Commit the transaction
    connection.commit()
    print("Data inserted successfully.")

except Exception as e:
    print(f"An error occurred while interacting with the database: {e}")

finally:
    if cursor:
        cursor.close()
    if connection:
        connection.close()


Data inserted successfully.


Use this below code method for all push data pgsql

In [None]:
# push  the data  Kerala route :govt BUS :'Kozhikode to Ernakulam'
import psycopg2
from psycopg2.extensions import ISOLATION_LEVEL_AUTOCOMMIT
from datetime import datetime

try:
    # Connect to PostgreSQL database
    connection = psycopg2.connect(
        host="localhost",
        port="5432",
        database="red_bus",
        user="postgres",
        password="sample12"
    )
    connection.set_isolation_level(ISOLATION_LEVEL_AUTOCOMMIT)
    cursor = connection.cursor()

    # Reset the sequence before inserting new rows
    cursor.execute("SELECT setval('bus_routes_id_seq', COALESCE((SELECT MAX(id) FROM bus_routes), 1), true);")


    # Insert data into the table
    for row in KSRTC_Kozhi_to_Erna__GOV_bus_data:
        cursor.execute("""
        INSERT INTO bus_routes (route_name, route_link, bus_name, bus_type, departing_time, duration, 
        reaching_time, star_rating, price, seats_available) 
        VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
        """, (
            row['route_name'], row['route_link'], row['bus_name'], row['bus_type'],
            datetime.strptime(row['departing_time'], "%H:%M").time() if row['departing_time'] else None,
            row['duration'],
            datetime.strptime(row['reaching_time'], "%H:%M").time() if row['reaching_time'] else None,
            float(row['star_rating']) if row['star_rating'] else None,
            float(row['price']) if row['price'] else None,
            int(row['seats_available']) if row['seats_available'] else None
        ))

    # Commit the transaction
    connection.commit()
    print("Data inserted successfully.")

except Exception as e:
    print(f"An error occurred while interacting with the database: {e}")

finally:
    if cursor:
        cursor.close()
    if connection:
        connection.close()
