In [None]:
import time
import pandas as pd
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import (
    NoSuchElementException,
    TimeoutException,
    StaleElementReferenceException,
)

# File paths for input and output CSVs
INPUT_CSV_PATH = 'C:/Users/Dine24/RedBus/df_KL.csv'
OUTPUT_CSV_PATH = 'C:/Users/Dine24/RedBus/df_KL_bus_details.csv'

# Load the input CSV containing route URLs
route_df = pd.read_csv(INPUT_CSV_PATH)

# Initialize Chrome WebDriver with custom options
options = webdriver.ChromeOptions()
options.add_argument(
    "user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
)
driver = webdriver.Chrome(options=options)

# List to store extracted bus details
consolidated_bus_data = []


def extract_bus_details(driver, url):
    """
    Extracts bus details from a given route URL using Selenium.

    Args:
        driver (webdriver.Chrome): The Selenium WebDriver instance.
        url (str): The URL to extract bus details from.

    Returns:
        pd.DataFrame: A DataFrame containing the extracted bus details.
    """
    bus_data = {
        "Bus_names": [],
        "Bus_types": [],
        "Start_Time": [],
        "End_Time": [],
        "Star_Ratings": [],
        "Total_Duration": [],
        "Prices": [],
        "Seats_Available": [],
        "Route_URL": [],
    }

    try:
        # Navigate to the given URL
        driver.get(url)
        time.sleep(2)  # Wait for the page to load

        # Scroll incrementally to load all content
        previous_height = driver.execute_script("return document.body.scrollHeight")
        for _ in range(10):  # Scroll up to 10 times
            driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
            time.sleep(2)  # Allow time for content to load
            current_height = driver.execute_script("return document.body.scrollHeight")
            if current_height == previous_height:
                break
            previous_height = current_height

        # Wait for bus details to appear on the page
        WebDriverWait(driver, 30).until(
            EC.presence_of_all_elements_located(
                (By.XPATH, "//div[contains(@class, 'clearfix bus-item')]")
            )
        )

        # Locate all bus containers
        buses = driver.find_elements(By.XPATH, "//div[contains(@class, 'clearfix bus-item')]")
        print(f"Found {len(buses)} buses for URL: {url}")

        # Extract details from each bus container
        for i in range(len(buses)):
            try:
                buses = driver.find_elements(By.XPATH, "//div[contains(@class, 'clearfix bus-item')]")
                bus = buses[i]

                # Extract bus details with error handling for missing elements
                bus_data["Bus_names"].append(
                    bus.find_element(By.XPATH, ".//div[contains(@class, 'travels')]").text
                ) if bus.find_elements(By.XPATH, ".//div[contains(@class, 'travels')]") else bus_data["Bus_names"].append("N/A")

                bus_data["Bus_types"].append(
                    bus.find_element(By.XPATH, ".//div[contains(@class, 'bus-type')]").text
                ) if bus.find_elements(By.XPATH, ".//div[contains(@class, 'bus-type')]") else bus_data["Bus_types"].append("N/A")

                bus_data["Start_Time"].append(
                    bus.find_element(By.XPATH, ".//div[contains(@class, 'dp-time')]").text
                ) if bus.find_elements(By.XPATH, ".//div[contains(@class, 'dp-time')]") else bus_data["Start_Time"].append("N/A")

                bus_data["End_Time"].append(
                    bus.find_element(By.XPATH, ".//div[contains(@class, 'bp-time')]").text
                ) if bus.find_elements(By.XPATH, ".//div[contains(@class, 'bp-time')]") else bus_data["End_Time"].append("N/A")

                bus_data["Star_Ratings"].append(
                    bus.find_element(By.XPATH, ".//div[contains(@class, 'rating')]/span").text
                ) if bus.find_elements(By.XPATH, ".//div[contains(@class, 'rating')]/span") else bus_data["Star_Ratings"].append("N/A")

                bus_data["Total_Duration"].append(
                    bus.find_element(By.XPATH, ".//div[contains(@class, 'dur')]").text
                ) if bus.find_elements(By.XPATH, ".//div[contains(@class, 'dur')]") else bus_data["Total_Duration"].append("N/A")

                bus_data["Prices"].append(
                    bus.find_element(By.XPATH, ".//div[contains(@class, 'fare')]/span").text
                ) if bus.find_elements(By.XPATH, ".//div[contains(@class, 'fare')]/span") else bus_data["Prices"].append("N/A")

                bus_data["Seats_Available"].append(
                    bus.find_element(By.XPATH, ".//div[contains(@class, 'seat-left')]").text
                ) if bus.find_elements(By.XPATH, ".//div[contains(@class, 'seat-left')]") else bus_data["Seats_Available"].append("N/A")

                bus_data["Route_URL"].append(url)

            except StaleElementReferenceException:
                print(f"Stale element reference at bus {i + 1}, retrying...")
                continue

    except TimeoutException:
        print(f"Timeout occurred for URL: {url}")
    except Exception as e:
        print(f"Error processing URL {url}: {e}")

    return pd.DataFrame(bus_data)


# Iterate through the input CSV and extract bus details for each route
for index, row in route_df.iterrows():
    route_url = row.get("Route_link")  # Ensure column name matches the CSV
    print(f"Processing URL {index + 1}/{len(route_df)}: {route_url}")

    try:
        bus_details_df = extract_bus_details(driver, route_url)
        if not bus_details_df.empty:
            consolidated_bus_data.append(bus_details_df)
    except Exception as e:
        print(f"Error processing URL {route_url}: {e}")

# Consolidate and save the extracted data
if consolidated_bus_data:
    consolidated_df = pd.concat(consolidated_bus_data, ignore_index=True)
    consolidated_df.to_csv(OUTPUT_CSV_PATH, index=False)
    print(f"Consolidated bus details saved to {OUTPUT_CSV_PATH}")
else:
    print("No bus details extracted.")

# Close the WebDriver
driver.quit()


In [None]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import NoSuchElementException, TimeoutException, StaleElementReferenceException
import pandas as pd
import time

# Load the input CSV
input_csv_path = 'C:/Users/Dine24/RedBus/df_AA.csv'
output_csv_path = 'C:/Users/Dine24/RedBus/df_AA_bus_details.csv'
route_df = pd.read_csv(input_csv_path)

# Initialize WebDriver
options = webdriver.ChromeOptions()
options.add_argument("user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36")
driver = webdriver.Chrome(options=options)

# Initialize consolidated data
consolidated_bus_data = []

def extract_bus_details(driver, url):
    bus_data = {
        "Bus_names": [],
        "Bus_types": [],
        "Start_Time": [],
        "End_Time": [],
        "Star_Ratings": [],
        "Total_Duration": [],
        "Prices": [],
        "Seats_Available": [],
        "Route_URL": [],
    }
    try:
        driver.get(url)
        time.sleep(2)  # Adjust based on page load speed

        # Scroll and load content incrementally
        previous_height = driver.execute_script("return document.body.scrollHeight")
        for _ in range(10):  # Limit the scrolls to avoid infinite loops
            driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
            time.sleep(2)  # Adjust based on page loading speed
            current_height = driver.execute_script("return document.body.scrollHeight")
            if current_height == previous_height:
                break
            previous_height = current_height

        # Wait for bus details to load
        WebDriverWait(driver, 30).until(
            EC.presence_of_all_elements_located((By.XPATH, "//div[contains(@class, 'clearfix bus-item')]"))
        )

        # Locate all bus containers
        buses = driver.find_elements(By.XPATH, "//div[contains(@class, 'clearfix bus-item')]")
        print(f"Found {len(buses)} buses for URL: {url}")

        # Extract details
        for i in range(len(buses)):
            try:
                # Re-fetch the bus container to avoid stale element reference
                buses = driver.find_elements(By.XPATH, "//div[contains(@class, 'clearfix bus-item')]")
                bus = buses[i]

                try:
                    bus_data["Bus_names"].append(bus.find_element(By.XPATH, ".//div[contains(@class, 'travels')]").text)
                except NoSuchElementException:
                    bus_data["Bus_names"].append("N/A")

                try:
                    bus_data["Bus_types"].append(bus.find_element(By.XPATH, ".//div[contains(@class, 'bus-type')]").text)
                except NoSuchElementException:
                    bus_data["Bus_types"].append("N/A")

                try:
                    bus_data["Start_Time"].append(bus.find_element(By.XPATH, ".//div[contains(@class, 'dp-time')]").text)
                except NoSuchElementException:
                    bus_data["Start_Time"].append("N/A")

                try:
                    bus_data["End_Time"].append(bus.find_element(By.XPATH, ".//div[contains(@class, 'bp-time')]").text)
                except NoSuchElementException:
                    bus_data["End_Time"].append("N/A")

                try:
                    bus_data["Star_Ratings"].append(bus.find_element(By.XPATH, ".//div[contains(@class, 'rating')]/span").text)
                except NoSuchElementException:
                    bus_data["Star_Ratings"].append("N/A")

                try:
                    bus_data["Total_Duration"].append(bus.find_element(By.XPATH, ".//div[contains(@class, 'dur')]").text)
                except NoSuchElementException:
                    bus_data["Total_Duration"].append("N/A")

                try:
                    bus_data["Prices"].append(bus.find_element(By.XPATH, ".//div[contains(@class, 'fare')]/span").text)
                except NoSuchElementException:
                    bus_data["Prices"].append("N/A")

                try:
                    bus_data["Seats_Available"].append(bus.find_element(By.XPATH, ".//div[contains(@class, 'seat-left')]").text)
                except NoSuchElementException:
                    bus_data["Seats_Available"].append("N/A")

                bus_data["Route_URL"].append(url)  # Add the URL for tracking

            except StaleElementReferenceException:
                print(f"Stale element reference at bus {i + 1}, retrying...")
                continue

    except TimeoutException:
        print(f"Timeout occurred for URL: {url}")
    except Exception as e:
        print(f"Error processing URL {url}: {e}")

    return pd.DataFrame(bus_data)

# Loop through all URLs and extract details
for index, row in route_df.iterrows():
    route_url = row.get("Route_link")  # Adjust column name as per your CSV
    print(f"Processing URL {index + 1}/{len(route_df)}: {route_url}")
    try:
        bus_details_df = extract_bus_details(driver, route_url)
        if not bus_details_df.empty:
            consolidated_bus_data.append(bus_details_df)
    except Exception as e:
        print(f"Error processing URL {route_url}: {e}")

# Combine all data into a single DataFrame
if consolidated_bus_data:
    consolidated_df = pd.concat(consolidated_bus_data, ignore_index=True)
    consolidated_df.to_csv(output_csv_path, index=False)
    print(f"Consolidated bus details saved to {output_csv_path}")
else:
    print("No bus details extracted.")

# Close the browser
driver.quit()


In [1]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import NoSuchElementException, TimeoutException, StaleElementReferenceException
import pandas as pd
import time

# Load the input CSV
input_csv_path = 'C:/Users/Dine24/RedBus/df_AS.csv'
output_csv_path = 'C:/Users/Dine24/RedBus/df_AS_bus_details.csv'
route_df = pd.read_csv(input_csv_path)

# Initialize WebDriver
options = webdriver.ChromeOptions()
options.add_argument("user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36")
driver = webdriver.Chrome(options=options)

# Initialize consolidated data
consolidated_bus_data = []

def extract_bus_details(driver, url):
    bus_data = {
        "Bus_names": [],
        "Bus_types": [],
        "Start_Time": [],
        "End_Time": [],
        "Star_Ratings": [],
        "Total_Duration": [],
        "Prices": [],
        "Seats_Available": [],
        "Route_URL": [],
    }
    try:
        driver.get(url)
        time.sleep(2)  # Adjust based on page load speed

        # Scroll and load content incrementally
        previous_height = driver.execute_script("return document.body.scrollHeight")
        for _ in range(10):  # Limit the scrolls to avoid infinite loops
            driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
            time.sleep(2)  # Adjust based on page loading speed
            current_height = driver.execute_script("return document.body.scrollHeight")
            if current_height == previous_height:
                break
            previous_height = current_height

        # Wait for bus details to load
        WebDriverWait(driver, 30).until(
            EC.presence_of_all_elements_located((By.XPATH, "//div[contains(@class, 'clearfix bus-item')]"))
        )

        # Locate all bus containers
        buses = driver.find_elements(By.XPATH, "//div[contains(@class, 'clearfix bus-item')]")
        print(f"Found {len(buses)} buses for URL: {url}")

        # Extract details
        for i in range(len(buses)):
            try:
                # Re-fetch the bus container to avoid stale element reference
                buses = driver.find_elements(By.XPATH, "//div[contains(@class, 'clearfix bus-item')]")
                bus = buses[i]

                try:
                    bus_data["Bus_names"].append(bus.find_element(By.XPATH, ".//div[contains(@class, 'travels')]").text)
                except NoSuchElementException:
                    bus_data["Bus_names"].append("N/A")

                try:
                    bus_data["Bus_types"].append(bus.find_element(By.XPATH, ".//div[contains(@class, 'bus-type')]").text)
                except NoSuchElementException:
                    bus_data["Bus_types"].append("N/A")

                try:
                    bus_data["Start_Time"].append(bus.find_element(By.XPATH, ".//div[contains(@class, 'dp-time')]").text)
                except NoSuchElementException:
                    bus_data["Start_Time"].append("N/A")

                try:
                    bus_data["End_Time"].append(bus.find_element(By.XPATH, ".//div[contains(@class, 'bp-time')]").text)
                except NoSuchElementException:
                    bus_data["End_Time"].append("N/A")

                try:
                    bus_data["Star_Ratings"].append(bus.find_element(By.XPATH, ".//div[contains(@class, 'rating')]/span").text)
                except NoSuchElementException:
                    bus_data["Star_Ratings"].append("N/A")

                try:
                    bus_data["Total_Duration"].append(bus.find_element(By.XPATH, ".//div[contains(@class, 'dur')]").text)
                except NoSuchElementException:
                    bus_data["Total_Duration"].append("N/A")

                try:
                    bus_data["Prices"].append(bus.find_element(By.XPATH, ".//div[contains(@class, 'fare')]/span").text)
                except NoSuchElementException:
                    bus_data["Prices"].append("N/A")

                try:
                    bus_data["Seats_Available"].append(bus.find_element(By.XPATH, ".//div[contains(@class, 'seat-left')]").text)
                except NoSuchElementException:
                    bus_data["Seats_Available"].append("N/A")

                bus_data["Route_URL"].append(url)  # Add the URL for tracking

            except StaleElementReferenceException:
                print(f"Stale element reference at bus {i + 1}, retrying...")
                continue

    except TimeoutException:
        print(f"Timeout occurred for URL: {url}")
    except Exception as e:
        print(f"Error processing URL {url}: {e}")

    return pd.DataFrame(bus_data)

# Loop through all URLs and extract details
for index, row in route_df.iterrows():
    route_url = row.get("Route_link")  # Adjust column name as per your CSV
    print(f"Processing URL {index + 1}/{len(route_df)}: {route_url}")
    try:
        bus_details_df = extract_bus_details(driver, route_url)
        if not bus_details_df.empty:
            consolidated_bus_data.append(bus_details_df)
    except Exception as e:
        print(f"Error processing URL {route_url}: {e}")

# Combine all data into a single DataFrame
if consolidated_bus_data:
    consolidated_df = pd.concat(consolidated_bus_data, ignore_index=True)
    consolidated_df.to_csv(output_csv_path, index=False)
    print(f"Consolidated bus details saved to {output_csv_path}")
else:
    print("No bus details extracted.")

# Close the browser
driver.quit()


Processing URL 1/47: https://www.redbus.in/bus-tickets/tezpur-to-guwahati
Found 10 buses for URL: https://www.redbus.in/bus-tickets/tezpur-to-guwahati
Processing URL 2/47: https://www.redbus.in/bus-tickets/guwahati-to-tezpur
Found 40 buses for URL: https://www.redbus.in/bus-tickets/guwahati-to-tezpur
Processing URL 3/47: https://www.redbus.in/bus-tickets/nagaon-to-guwahati
Found 2 buses for URL: https://www.redbus.in/bus-tickets/nagaon-to-guwahati
Processing URL 4/47: https://www.redbus.in/bus-tickets/guwahati-to-nagaon
Found 6 buses for URL: https://www.redbus.in/bus-tickets/guwahati-to-nagaon
Processing URL 5/47: https://www.redbus.in/bus-tickets/goalpara-to-guwahati
Timeout occurred for URL: https://www.redbus.in/bus-tickets/goalpara-to-guwahati
Processing URL 6/47: https://www.redbus.in/bus-tickets/dhubri-to-guwahati
Found 4 buses for URL: https://www.redbus.in/bus-tickets/dhubri-to-guwahati
Processing URL 7/47: https://www.redbus.in/bus-tickets/sibsagar-to-north-lakhimpur
Timeout 

In [2]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import NoSuchElementException, TimeoutException, StaleElementReferenceException
import pandas as pd
import time

# Load the input CSV
input_csv_path = 'C:/Users/Dine24/RedBus/df_CH.csv'
output_csv_path = 'C:/Users/Dine24/RedBus/df_CH_bus_details.csv'
route_df = pd.read_csv(input_csv_path)

# Initialize WebDriver
options = webdriver.ChromeOptions()
options.add_argument("user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36")
driver = webdriver.Chrome(options=options)

# Initialize consolidated data
consolidated_bus_data = []

def extract_bus_details(driver, url):
    bus_data = {
        "Bus_names": [],
        "Bus_types": [],
        "Start_Time": [],
        "End_Time": [],
        "Star_Ratings": [],
        "Total_Duration": [],
        "Prices": [],
        "Seats_Available": [],
        "Route_URL": [],
    }
    try:
        driver.get(url)
        time.sleep(2)  # Adjust based on page load speed

        # Scroll and load content incrementally
        previous_height = driver.execute_script("return document.body.scrollHeight")
        for _ in range(10):  # Limit the scrolls to avoid infinite loops
            driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
            time.sleep(2)  # Adjust based on page loading speed
            current_height = driver.execute_script("return document.body.scrollHeight")
            if current_height == previous_height:
                break
            previous_height = current_height

        # Wait for bus details to load
        WebDriverWait(driver, 30).until(
            EC.presence_of_all_elements_located((By.XPATH, "//div[contains(@class, 'clearfix bus-item')]"))
        )

        # Locate all bus containers
        buses = driver.find_elements(By.XPATH, "//div[contains(@class, 'clearfix bus-item')]")
        print(f"Found {len(buses)} buses for URL: {url}")

        # Extract details
        for i in range(len(buses)):
            try:
                # Re-fetch the bus container to avoid stale element reference
                buses = driver.find_elements(By.XPATH, "//div[contains(@class, 'clearfix bus-item')]")
                bus = buses[i]

                try:
                    bus_data["Bus_names"].append(bus.find_element(By.XPATH, ".//div[contains(@class, 'travels')]").text)
                except NoSuchElementException:
                    bus_data["Bus_names"].append("N/A")

                try:
                    bus_data["Bus_types"].append(bus.find_element(By.XPATH, ".//div[contains(@class, 'bus-type')]").text)
                except NoSuchElementException:
                    bus_data["Bus_types"].append("N/A")

                try:
                    bus_data["Start_Time"].append(bus.find_element(By.XPATH, ".//div[contains(@class, 'dp-time')]").text)
                except NoSuchElementException:
                    bus_data["Start_Time"].append("N/A")

                try:
                    bus_data["End_Time"].append(bus.find_element(By.XPATH, ".//div[contains(@class, 'bp-time')]").text)
                except NoSuchElementException:
                    bus_data["End_Time"].append("N/A")

                try:
                    bus_data["Star_Ratings"].append(bus.find_element(By.XPATH, ".//div[contains(@class, 'rating')]/span").text)
                except NoSuchElementException:
                    bus_data["Star_Ratings"].append("N/A")

                try:
                    bus_data["Total_Duration"].append(bus.find_element(By.XPATH, ".//div[contains(@class, 'dur')]").text)
                except NoSuchElementException:
                    bus_data["Total_Duration"].append("N/A")

                try:
                    bus_data["Prices"].append(bus.find_element(By.XPATH, ".//div[contains(@class, 'fare')]/span").text)
                except NoSuchElementException:
                    bus_data["Prices"].append("N/A")

                try:
                    bus_data["Seats_Available"].append(bus.find_element(By.XPATH, ".//div[contains(@class, 'seat-left')]").text)
                except NoSuchElementException:
                    bus_data["Seats_Available"].append("N/A")

                bus_data["Route_URL"].append(url)  # Add the URL for tracking

            except StaleElementReferenceException:
                print(f"Stale element reference at bus {i + 1}, retrying...")
                continue

    except TimeoutException:
        print(f"Timeout occurred for URL: {url}")
    except Exception as e:
        print(f"Error processing URL {url}: {e}")

    return pd.DataFrame(bus_data)

# Loop through all URLs and extract details
for index, row in route_df.iterrows():
    route_url = row.get("Route_link")  # Adjust column name as per your CSV
    print(f"Processing URL {index + 1}/{len(route_df)}: {route_url}")
    try:
        bus_details_df = extract_bus_details(driver, route_url)
        if not bus_details_df.empty:
            consolidated_bus_data.append(bus_details_df)
    except Exception as e:
        print(f"Error processing URL {route_url}: {e}")

# Combine all data into a single DataFrame
if consolidated_bus_data:
    consolidated_df = pd.concat(consolidated_bus_data, ignore_index=True)
    consolidated_df.to_csv(output_csv_path, index=False)
    print(f"Consolidated bus details saved to {output_csv_path}")
else:
    print("No bus details extracted.")

# Close the browser
driver.quit()


Processing URL 1/29: https://www.redbus.in/bus-tickets/delhi-to-chandigarh
Found 110 buses for URL: https://www.redbus.in/bus-tickets/delhi-to-chandigarh
Processing URL 2/29: https://www.redbus.in/bus-tickets/chandigarh-to-delhi
Found 94 buses for URL: https://www.redbus.in/bus-tickets/chandigarh-to-delhi
Processing URL 3/29: https://www.redbus.in/bus-tickets/yamuna-nagar-to-chandigarh
Found 16 buses for URL: https://www.redbus.in/bus-tickets/yamuna-nagar-to-chandigarh
Processing URL 4/29: https://www.redbus.in/bus-tickets/ludhiana-to-chandigarh
Found 40 buses for URL: https://www.redbus.in/bus-tickets/ludhiana-to-chandigarh
Processing URL 5/29: https://www.redbus.in/bus-tickets/chandigarh-to-yamuna-nagar
Found 16 buses for URL: https://www.redbus.in/bus-tickets/chandigarh-to-yamuna-nagar
Processing URL 6/29: https://www.redbus.in/bus-tickets/chandigarh-to-hamirpur-himachal-pradesh
Found 4 buses for URL: https://www.redbus.in/bus-tickets/chandigarh-to-hamirpur-himachal-pradesh
Processi

In [3]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import NoSuchElementException, TimeoutException, StaleElementReferenceException
import pandas as pd
import time

# Load the input CSV
input_csv_path = 'C:/Users/Dine24/RedBus/df_HP.csv'
output_csv_path = 'C:/Users/Dine24/RedBus/df_HP_bus_details.csv'
route_df = pd.read_csv(input_csv_path)

# Initialize WebDriver
options = webdriver.ChromeOptions()
options.add_argument("user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36")
driver = webdriver.Chrome(options=options)

# Initialize consolidated data
consolidated_bus_data = []

def extract_bus_details(driver, url):
    bus_data = {
        "Bus_names": [],
        "Bus_types": [],
        "Start_Time": [],
        "End_Time": [],
        "Star_Ratings": [],
        "Total_Duration": [],
        "Prices": [],
        "Seats_Available": [],
        "Route_URL": [],
    }
    try:
        driver.get(url)
        time.sleep(2)  # Adjust based on page load speed

        # Scroll and load content incrementally
        previous_height = driver.execute_script("return document.body.scrollHeight")
        for _ in range(10):  # Limit the scrolls to avoid infinite loops
            driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
            time.sleep(2)  # Adjust based on page loading speed
            current_height = driver.execute_script("return document.body.scrollHeight")
            if current_height == previous_height:
                break
            previous_height = current_height

        # Wait for bus details to load
        WebDriverWait(driver, 30).until(
            EC.presence_of_all_elements_located((By.XPATH, "//div[contains(@class, 'clearfix bus-item')]"))
        )

        # Locate all bus containers
        buses = driver.find_elements(By.XPATH, "//div[contains(@class, 'clearfix bus-item')]")
        print(f"Found {len(buses)} buses for URL: {url}")

        # Extract details
        for i in range(len(buses)):
            try:
                # Re-fetch the bus container to avoid stale element reference
                buses = driver.find_elements(By.XPATH, "//div[contains(@class, 'clearfix bus-item')]")
                bus = buses[i]

                try:
                    bus_data["Bus_names"].append(bus.find_element(By.XPATH, ".//div[contains(@class, 'travels')]").text)
                except NoSuchElementException:
                    bus_data["Bus_names"].append("N/A")

                try:
                    bus_data["Bus_types"].append(bus.find_element(By.XPATH, ".//div[contains(@class, 'bus-type')]").text)
                except NoSuchElementException:
                    bus_data["Bus_types"].append("N/A")

                try:
                    bus_data["Start_Time"].append(bus.find_element(By.XPATH, ".//div[contains(@class, 'dp-time')]").text)
                except NoSuchElementException:
                    bus_data["Start_Time"].append("N/A")

                try:
                    bus_data["End_Time"].append(bus.find_element(By.XPATH, ".//div[contains(@class, 'bp-time')]").text)
                except NoSuchElementException:
                    bus_data["End_Time"].append("N/A")

                try:
                    bus_data["Star_Ratings"].append(bus.find_element(By.XPATH, ".//div[contains(@class, 'rating')]/span").text)
                except NoSuchElementException:
                    bus_data["Star_Ratings"].append("N/A")

                try:
                    bus_data["Total_Duration"].append(bus.find_element(By.XPATH, ".//div[contains(@class, 'dur')]").text)
                except NoSuchElementException:
                    bus_data["Total_Duration"].append("N/A")

                try:
                    bus_data["Prices"].append(bus.find_element(By.XPATH, ".//div[contains(@class, 'fare')]/span").text)
                except NoSuchElementException:
                    bus_data["Prices"].append("N/A")

                try:
                    bus_data["Seats_Available"].append(bus.find_element(By.XPATH, ".//div[contains(@class, 'seat-left')]").text)
                except NoSuchElementException:
                    bus_data["Seats_Available"].append("N/A")

                bus_data["Route_URL"].append(url)  # Add the URL for tracking

            except StaleElementReferenceException:
                print(f"Stale element reference at bus {i + 1}, retrying...")
                continue

    except TimeoutException:
        print(f"Timeout occurred for URL: {url}")
    except Exception as e:
        print(f"Error processing URL {url}: {e}")

    return pd.DataFrame(bus_data)

# Loop through all URLs and extract details
for index, row in route_df.iterrows():
    route_url = row.get("Route_link")  # Adjust column name as per your CSV
    print(f"Processing URL {index + 1}/{len(route_df)}: {route_url}")
    try:
        bus_details_df = extract_bus_details(driver, route_url)
        if not bus_details_df.empty:
            consolidated_bus_data.append(bus_details_df)
    except Exception as e:
        print(f"Error processing URL {route_url}: {e}")

# Combine all data into a single DataFrame
if consolidated_bus_data:
    consolidated_df = pd.concat(consolidated_bus_data, ignore_index=True)
    consolidated_df.to_csv(output_csv_path, index=False)
    print(f"Consolidated bus details saved to {output_csv_path}")
else:
    print("No bus details extracted.")

# Close the browser
driver.quit()


Processing URL 1/37: https://www.redbus.in/bus-tickets/delhi-to-shimla
Found 22 buses for URL: https://www.redbus.in/bus-tickets/delhi-to-shimla
Processing URL 2/37: https://www.redbus.in/bus-tickets/hamirpur-himachal-pradesh-to-chandigarh
Found 14 buses for URL: https://www.redbus.in/bus-tickets/hamirpur-himachal-pradesh-to-chandigarh
Processing URL 3/37: https://www.redbus.in/bus-tickets/chandigarh-to-hamirpur-himachal-pradesh
Found 4 buses for URL: https://www.redbus.in/bus-tickets/chandigarh-to-hamirpur-himachal-pradesh
Processing URL 4/37: https://www.redbus.in/bus-tickets/shimla-to-delhi
Found 20 buses for URL: https://www.redbus.in/bus-tickets/shimla-to-delhi
Processing URL 5/37: https://www.redbus.in/bus-tickets/hamirpur-himachal-pradesh-to-delhi
Found 14 buses for URL: https://www.redbus.in/bus-tickets/hamirpur-himachal-pradesh-to-delhi
Processing URL 6/37: https://www.redbus.in/bus-tickets/delhi-to-hamirpur-himachal-pradesh
Found 14 buses for URL: https://www.redbus.in/bus-ti

In [4]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import NoSuchElementException, TimeoutException, StaleElementReferenceException
import pandas as pd
import time

# Load the input CSV
input_csv_path = 'C:/Users/Dine24/RedBus/df_HY.csv'
output_csv_path = 'C:/Users/Dine24/RedBus/df_HY_bus_details.csv'
route_df = pd.read_csv(input_csv_path)

# Initialize WebDriver
options = webdriver.ChromeOptions()
options.add_argument("user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36")
driver = webdriver.Chrome(options=options)

# Initialize consolidated data
consolidated_bus_data = []

def extract_bus_details(driver, url):
    bus_data = {
        "Bus_names": [],
        "Bus_types": [],
        "Start_Time": [],
        "End_Time": [],
        "Star_Ratings": [],
        "Total_Duration": [],
        "Prices": [],
        "Seats_Available": [],
        "Route_URL": [],
    }
    try:
        driver.get(url)
        time.sleep(2)  # Adjust based on page load speed

        # Scroll and load content incrementally
        previous_height = driver.execute_script("return document.body.scrollHeight")
        for _ in range(10):  # Limit the scrolls to avoid infinite loops
            driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
            time.sleep(2)  # Adjust based on page loading speed
            current_height = driver.execute_script("return document.body.scrollHeight")
            if current_height == previous_height:
                break
            previous_height = current_height

        # Wait for bus details to load
        WebDriverWait(driver, 30).until(
            EC.presence_of_all_elements_located((By.XPATH, "//div[contains(@class, 'clearfix bus-item')]"))
        )

        # Locate all bus containers
        buses = driver.find_elements(By.XPATH, "//div[contains(@class, 'clearfix bus-item')]")
        print(f"Found {len(buses)} buses for URL: {url}")

        # Extract details
        for i in range(len(buses)):
            try:
                # Re-fetch the bus container to avoid stale element reference
                buses = driver.find_elements(By.XPATH, "//div[contains(@class, 'clearfix bus-item')]")
                bus = buses[i]

                try:
                    bus_data["Bus_names"].append(bus.find_element(By.XPATH, ".//div[contains(@class, 'travels')]").text)
                except NoSuchElementException:
                    bus_data["Bus_names"].append("N/A")

                try:
                    bus_data["Bus_types"].append(bus.find_element(By.XPATH, ".//div[contains(@class, 'bus-type')]").text)
                except NoSuchElementException:
                    bus_data["Bus_types"].append("N/A")

                try:
                    bus_data["Start_Time"].append(bus.find_element(By.XPATH, ".//div[contains(@class, 'dp-time')]").text)
                except NoSuchElementException:
                    bus_data["Start_Time"].append("N/A")

                try:
                    bus_data["End_Time"].append(bus.find_element(By.XPATH, ".//div[contains(@class, 'bp-time')]").text)
                except NoSuchElementException:
                    bus_data["End_Time"].append("N/A")

                try:
                    bus_data["Star_Ratings"].append(bus.find_element(By.XPATH, ".//div[contains(@class, 'rating')]/span").text)
                except NoSuchElementException:
                    bus_data["Star_Ratings"].append("N/A")

                try:
                    bus_data["Total_Duration"].append(bus.find_element(By.XPATH, ".//div[contains(@class, 'dur')]").text)
                except NoSuchElementException:
                    bus_data["Total_Duration"].append("N/A")

                try:
                    bus_data["Prices"].append(bus.find_element(By.XPATH, ".//div[contains(@class, 'fare')]/span").text)
                except NoSuchElementException:
                    bus_data["Prices"].append("N/A")

                try:
                    bus_data["Seats_Available"].append(bus.find_element(By.XPATH, ".//div[contains(@class, 'seat-left')]").text)
                except NoSuchElementException:
                    bus_data["Seats_Available"].append("N/A")

                bus_data["Route_URL"].append(url)  # Add the URL for tracking

            except StaleElementReferenceException:
                print(f"Stale element reference at bus {i + 1}, retrying...")
                continue

    except TimeoutException:
        print(f"Timeout occurred for URL: {url}")
    except Exception as e:
        print(f"Error processing URL {url}: {e}")

    return pd.DataFrame(bus_data)

# Loop through all URLs and extract details
for index, row in route_df.iterrows():
    route_url = row.get("Route_link")  # Adjust column name as per your CSV
    print(f"Processing URL {index + 1}/{len(route_df)}: {route_url}")
    try:
        bus_details_df = extract_bus_details(driver, route_url)
        if not bus_details_df.empty:
            consolidated_bus_data.append(bus_details_df)
    except Exception as e:
        print(f"Error processing URL {route_url}: {e}")

# Combine all data into a single DataFrame
if consolidated_bus_data:
    consolidated_df = pd.concat(consolidated_bus_data, ignore_index=True)
    consolidated_df.to_csv(output_csv_path, index=False)
    print(f"Consolidated bus details saved to {output_csv_path}")
else:
    print("No bus details extracted.")

# Close the browser
driver.quit()


Processing URL 1/26: https://www.redbus.in/bus-tickets/khammam-to-hyderabad
Found 48 buses for URL: https://www.redbus.in/bus-tickets/khammam-to-hyderabad
Processing URL 2/26: https://www.redbus.in/bus-tickets/hyderabad-to-vijayawada
Found 120 buses for URL: https://www.redbus.in/bus-tickets/hyderabad-to-vijayawada
Processing URL 3/26: https://www.redbus.in/bus-tickets/hyderabad-to-khammam
Found 96 buses for URL: https://www.redbus.in/bus-tickets/hyderabad-to-khammam
Processing URL 4/26: https://www.redbus.in/bus-tickets/hyderabad-to-srisailam
Found 26 buses for URL: https://www.redbus.in/bus-tickets/hyderabad-to-srisailam
Processing URL 5/26: https://www.redbus.in/bus-tickets/karimnagar-to-hyderabad
Found 4 buses for URL: https://www.redbus.in/bus-tickets/karimnagar-to-hyderabad
Processing URL 6/26: https://www.redbus.in/bus-tickets/hyderabad-to-adilabad
Found 84 buses for URL: https://www.redbus.in/bus-tickets/hyderabad-to-adilabad
Processing URL 7/26: https://www.redbus.in/bus-ticke

In [5]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import NoSuchElementException, TimeoutException, StaleElementReferenceException
import pandas as pd
import time

# Load the input CSV
input_csv_path = 'C:/Users/Dine24/RedBus/df_KA.csv'
output_csv_path = 'C:/Users/Dine24/RedBus/df_KA_bus_details.csv'
route_df = pd.read_csv(input_csv_path)

# Initialize WebDriver
options = webdriver.ChromeOptions()
options.add_argument("user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36")
driver = webdriver.Chrome(options=options)

# Initialize consolidated data
consolidated_bus_data = []

def extract_bus_details(driver, url):
    bus_data = {
        "Bus_names": [],
        "Bus_types": [],
        "Start_Time": [],
        "End_Time": [],
        "Star_Ratings": [],
        "Total_Duration": [],
        "Prices": [],
        "Seats_Available": [],
        "Route_URL": [],
    }
    try:
        driver.get(url)
        time.sleep(2)  # Adjust based on page load speed

        # Scroll and load content incrementally
        previous_height = driver.execute_script("return document.body.scrollHeight")
        for _ in range(10):  # Limit the scrolls to avoid infinite loops
            driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
            time.sleep(2)  # Adjust based on page loading speed
            current_height = driver.execute_script("return document.body.scrollHeight")
            if current_height == previous_height:
                break
            previous_height = current_height

        # Wait for bus details to load
        WebDriverWait(driver, 30).until(
            EC.presence_of_all_elements_located((By.XPATH, "//div[contains(@class, 'clearfix bus-item')]"))
        )

        # Locate all bus containers
        buses = driver.find_elements(By.XPATH, "//div[contains(@class, 'clearfix bus-item')]")
        print(f"Found {len(buses)} buses for URL: {url}")

        # Extract details
        for i in range(len(buses)):
            try:
                # Re-fetch the bus container to avoid stale element reference
                buses = driver.find_elements(By.XPATH, "//div[contains(@class, 'clearfix bus-item')]")
                bus = buses[i]

                try:
                    bus_data["Bus_names"].append(bus.find_element(By.XPATH, ".//div[contains(@class, 'travels')]").text)
                except NoSuchElementException:
                    bus_data["Bus_names"].append("N/A")

                try:
                    bus_data["Bus_types"].append(bus.find_element(By.XPATH, ".//div[contains(@class, 'bus-type')]").text)
                except NoSuchElementException:
                    bus_data["Bus_types"].append("N/A")

                try:
                    bus_data["Start_Time"].append(bus.find_element(By.XPATH, ".//div[contains(@class, 'dp-time')]").text)
                except NoSuchElementException:
                    bus_data["Start_Time"].append("N/A")

                try:
                    bus_data["End_Time"].append(bus.find_element(By.XPATH, ".//div[contains(@class, 'bp-time')]").text)
                except NoSuchElementException:
                    bus_data["End_Time"].append("N/A")

                try:
                    bus_data["Star_Ratings"].append(bus.find_element(By.XPATH, ".//div[contains(@class, 'rating')]/span").text)
                except NoSuchElementException:
                    bus_data["Star_Ratings"].append("N/A")

                try:
                    bus_data["Total_Duration"].append(bus.find_element(By.XPATH, ".//div[contains(@class, 'dur')]").text)
                except NoSuchElementException:
                    bus_data["Total_Duration"].append("N/A")

                try:
                    bus_data["Prices"].append(bus.find_element(By.XPATH, ".//div[contains(@class, 'fare')]/span").text)
                except NoSuchElementException:
                    bus_data["Prices"].append("N/A")

                try:
                    bus_data["Seats_Available"].append(bus.find_element(By.XPATH, ".//div[contains(@class, 'seat-left')]").text)
                except NoSuchElementException:
                    bus_data["Seats_Available"].append("N/A")

                bus_data["Route_URL"].append(url)  # Add the URL for tracking

            except StaleElementReferenceException:
                print(f"Stale element reference at bus {i + 1}, retrying...")
                continue

    except TimeoutException:
        print(f"Timeout occurred for URL: {url}")
    except Exception as e:
        print(f"Error processing URL {url}: {e}")

    return pd.DataFrame(bus_data)

# Loop through all URLs and extract details
for index, row in route_df.iterrows():
    route_url = row.get("Route_link")  # Adjust column name as per your CSV
    print(f"Processing URL {index + 1}/{len(route_df)}: {route_url}")
    try:
        bus_details_df = extract_bus_details(driver, route_url)
        if not bus_details_df.empty:
            consolidated_bus_data.append(bus_details_df)
    except Exception as e:
        print(f"Error processing URL {route_url}: {e}")

# Combine all data into a single DataFrame
if consolidated_bus_data:
    consolidated_df = pd.concat(consolidated_bus_data, ignore_index=True)
    consolidated_df.to_csv(output_csv_path, index=False)
    print(f"Consolidated bus details saved to {output_csv_path}")
else:
    print("No bus details extracted.")

# Close the browser
driver.quit()


Processing URL 1/45: https://www.redbus.in/bus-tickets/pune-to-goa
Found 110 buses for URL: https://www.redbus.in/bus-tickets/pune-to-goa
Processing URL 2/45: https://www.redbus.in/bus-tickets/goa-to-pune
Found 30 buses for URL: https://www.redbus.in/bus-tickets/goa-to-pune
Processing URL 3/45: https://www.redbus.in/bus-tickets/mumbai-to-goa
Found 88 buses for URL: https://www.redbus.in/bus-tickets/mumbai-to-goa
Processing URL 4/45: https://www.redbus.in/bus-tickets/goa-to-mumbai
Found 80 buses for URL: https://www.redbus.in/bus-tickets/goa-to-mumbai
Processing URL 5/45: https://www.redbus.in/bus-tickets/belagavi-to-goa
Timeout occurred for URL: https://www.redbus.in/bus-tickets/belagavi-to-goa
Processing URL 6/45: https://www.redbus.in/bus-tickets/bangalore-to-goa
Found 74 buses for URL: https://www.redbus.in/bus-tickets/bangalore-to-goa
Processing URL 7/45: https://www.redbus.in/bus-tickets/goa-to-bangalore
Found 62 buses for URL: https://www.redbus.in/bus-tickets/goa-to-bangalore
Pr

In [6]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import NoSuchElementException, TimeoutException, StaleElementReferenceException
import pandas as pd
import time

# Load the input CSV
input_csv_path = 'C:/Users/Dine24/RedBus/df_PU.csv'
output_csv_path = 'C:/Users/Dine24/RedBus/df_PU_bus_details.csv'
route_df = pd.read_csv(input_csv_path)

# Initialize WebDriver
options = webdriver.ChromeOptions()
options.add_argument("user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36")
driver = webdriver.Chrome(options=options)

# Initialize consolidated data
consolidated_bus_data = []

def extract_bus_details(driver, url):
    bus_data = {
        "Bus_names": [],
        "Bus_types": [],
        "Start_Time": [],
        "End_Time": [],
        "Star_Ratings": [],
        "Total_Duration": [],
        "Prices": [],
        "Seats_Available": [],
        "Route_URL": [],
    }
    try:
        driver.get(url)
        time.sleep(2)  # Adjust based on page load speed

        # Scroll and load content incrementally
        previous_height = driver.execute_script("return document.body.scrollHeight")
        for _ in range(10):  # Limit the scrolls to avoid infinite loops
            driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
            time.sleep(2)  # Adjust based on page loading speed
            current_height = driver.execute_script("return document.body.scrollHeight")
            if current_height == previous_height:
                break
            previous_height = current_height

        # Wait for bus details to load
        WebDriverWait(driver, 30).until(
            EC.presence_of_all_elements_located((By.XPATH, "//div[contains(@class, 'clearfix bus-item')]"))
        )

        # Locate all bus containers
        buses = driver.find_elements(By.XPATH, "//div[contains(@class, 'clearfix bus-item')]")
        print(f"Found {len(buses)} buses for URL: {url}")

        # Extract details
        for i in range(len(buses)):
            try:
                # Re-fetch the bus container to avoid stale element reference
                buses = driver.find_elements(By.XPATH, "//div[contains(@class, 'clearfix bus-item')]")
                bus = buses[i]

                try:
                    bus_data["Bus_names"].append(bus.find_element(By.XPATH, ".//div[contains(@class, 'travels')]").text)
                except NoSuchElementException:
                    bus_data["Bus_names"].append("N/A")

                try:
                    bus_data["Bus_types"].append(bus.find_element(By.XPATH, ".//div[contains(@class, 'bus-type')]").text)
                except NoSuchElementException:
                    bus_data["Bus_types"].append("N/A")

                try:
                    bus_data["Start_Time"].append(bus.find_element(By.XPATH, ".//div[contains(@class, 'dp-time')]").text)
                except NoSuchElementException:
                    bus_data["Start_Time"].append("N/A")

                try:
                    bus_data["End_Time"].append(bus.find_element(By.XPATH, ".//div[contains(@class, 'bp-time')]").text)
                except NoSuchElementException:
                    bus_data["End_Time"].append("N/A")

                try:
                    bus_data["Star_Ratings"].append(bus.find_element(By.XPATH, ".//div[contains(@class, 'rating')]/span").text)
                except NoSuchElementException:
                    bus_data["Star_Ratings"].append("N/A")

                try:
                    bus_data["Total_Duration"].append(bus.find_element(By.XPATH, ".//div[contains(@class, 'dur')]").text)
                except NoSuchElementException:
                    bus_data["Total_Duration"].append("N/A")

                try:
                    bus_data["Prices"].append(bus.find_element(By.XPATH, ".//div[contains(@class, 'fare')]/span").text)
                except NoSuchElementException:
                    bus_data["Prices"].append("N/A")

                try:
                    bus_data["Seats_Available"].append(bus.find_element(By.XPATH, ".//div[contains(@class, 'seat-left')]").text)
                except NoSuchElementException:
                    bus_data["Seats_Available"].append("N/A")

                bus_data["Route_URL"].append(url)  # Add the URL for tracking

            except StaleElementReferenceException:
                print(f"Stale element reference at bus {i + 1}, retrying...")
                continue

    except TimeoutException:
        print(f"Timeout occurred for URL: {url}")
    except Exception as e:
        print(f"Error processing URL {url}: {e}")

    return pd.DataFrame(bus_data)

# Loop through all URLs and extract details
for index, row in route_df.iterrows():
    route_url = row.get("Route_link")  # Adjust column name as per your CSV
    print(f"Processing URL {index + 1}/{len(route_df)}: {route_url}")
    try:
        bus_details_df = extract_bus_details(driver, route_url)
        if not bus_details_df.empty:
            consolidated_bus_data.append(bus_details_df)
    except Exception as e:
        print(f"Error processing URL {route_url}: {e}")

# Combine all data into a single DataFrame
if consolidated_bus_data:
    consolidated_df = pd.concat(consolidated_bus_data, ignore_index=True)
    consolidated_df.to_csv(output_csv_path, index=False)
    print(f"Consolidated bus details saved to {output_csv_path}")
else:
    print("No bus details extracted.")

# Close the browser
driver.quit()


Processing URL 1/23: https://www.redbus.in/bus-tickets/delhi-to-patiala
Found 6 buses for URL: https://www.redbus.in/bus-tickets/delhi-to-patiala
Processing URL 2/23: https://www.redbus.in/bus-tickets/patiala-to-delhi
Found 4 buses for URL: https://www.redbus.in/bus-tickets/patiala-to-delhi
Processing URL 3/23: https://www.redbus.in/bus-tickets/ludhiana-to-delhi
Found 60 buses for URL: https://www.redbus.in/bus-tickets/ludhiana-to-delhi
Processing URL 4/23: https://www.redbus.in/bus-tickets/delhi-to-ludhiana
Found 90 buses for URL: https://www.redbus.in/bus-tickets/delhi-to-ludhiana
Processing URL 5/23: https://www.redbus.in/bus-tickets/ludhiana-to-delhi-airport
Found 60 buses for URL: https://www.redbus.in/bus-tickets/ludhiana-to-delhi-airport
Processing URL 6/23: https://www.redbus.in/bus-tickets/delhi-to-jalandhar
Found 110 buses for URL: https://www.redbus.in/bus-tickets/delhi-to-jalandhar
Processing URL 7/23: https://www.redbus.in/bus-tickets/delhi-airport-to-patiala
Found 6 buses

In [7]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import NoSuchElementException, TimeoutException, StaleElementReferenceException
import pandas as pd
import time

# Load the input CSV
input_csv_path = 'C:/Users/Dine24/RedBus/df_RA.csv'
output_csv_path = 'C:/Users/Dine24/RedBus/df_RA_bus_details.csv'
route_df = pd.read_csv(input_csv_path)

# Initialize WebDriver
options = webdriver.ChromeOptions()
options.add_argument("user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36")
driver = webdriver.Chrome(options=options)

# Initialize consolidated data
consolidated_bus_data = []

def extract_bus_details(driver, url):
    bus_data = {
        "Bus_names": [],
        "Bus_types": [],
        "Start_Time": [],
        "End_Time": [],
        "Star_Ratings": [],
        "Total_Duration": [],
        "Prices": [],
        "Seats_Available": [],
        "Route_URL": [],
    }
    try:
        driver.get(url)
        time.sleep(2)  # Adjust based on page load speed

        # Scroll and load content incrementally
        previous_height = driver.execute_script("return document.body.scrollHeight")
        for _ in range(10):  # Limit the scrolls to avoid infinite loops
            driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
            time.sleep(2)  # Adjust based on page loading speed
            current_height = driver.execute_script("return document.body.scrollHeight")
            if current_height == previous_height:
                break
            previous_height = current_height

        # Wait for bus details to load
        WebDriverWait(driver, 30).until(
            EC.presence_of_all_elements_located((By.XPATH, "//div[contains(@class, 'clearfix bus-item')]"))
        )

        # Locate all bus containers
        buses = driver.find_elements(By.XPATH, "//div[contains(@class, 'clearfix bus-item')]")
        print(f"Found {len(buses)} buses for URL: {url}")

        # Extract details
        for i in range(len(buses)):
            try:
                # Re-fetch the bus container to avoid stale element reference
                buses = driver.find_elements(By.XPATH, "//div[contains(@class, 'clearfix bus-item')]")
                bus = buses[i]

                try:
                    bus_data["Bus_names"].append(bus.find_element(By.XPATH, ".//div[contains(@class, 'travels')]").text)
                except NoSuchElementException:
                    bus_data["Bus_names"].append("N/A")

                try:
                    bus_data["Bus_types"].append(bus.find_element(By.XPATH, ".//div[contains(@class, 'bus-type')]").text)
                except NoSuchElementException:
                    bus_data["Bus_types"].append("N/A")

                try:
                    bus_data["Start_Time"].append(bus.find_element(By.XPATH, ".//div[contains(@class, 'dp-time')]").text)
                except NoSuchElementException:
                    bus_data["Start_Time"].append("N/A")

                try:
                    bus_data["End_Time"].append(bus.find_element(By.XPATH, ".//div[contains(@class, 'bp-time')]").text)
                except NoSuchElementException:
                    bus_data["End_Time"].append("N/A")

                try:
                    bus_data["Star_Ratings"].append(bus.find_element(By.XPATH, ".//div[contains(@class, 'rating')]/span").text)
                except NoSuchElementException:
                    bus_data["Star_Ratings"].append("N/A")

                try:
                    bus_data["Total_Duration"].append(bus.find_element(By.XPATH, ".//div[contains(@class, 'dur')]").text)
                except NoSuchElementException:
                    bus_data["Total_Duration"].append("N/A")

                try:
                    bus_data["Prices"].append(bus.find_element(By.XPATH, ".//div[contains(@class, 'fare')]/span").text)
                except NoSuchElementException:
                    bus_data["Prices"].append("N/A")

                try:
                    bus_data["Seats_Available"].append(bus.find_element(By.XPATH, ".//div[contains(@class, 'seat-left')]").text)
                except NoSuchElementException:
                    bus_data["Seats_Available"].append("N/A")

                bus_data["Route_URL"].append(url)  # Add the URL for tracking

            except StaleElementReferenceException:
                print(f"Stale element reference at bus {i + 1}, retrying...")
                continue

    except TimeoutException:
        print(f"Timeout occurred for URL: {url}")
    except Exception as e:
        print(f"Error processing URL {url}: {e}")

    return pd.DataFrame(bus_data)

# Loop through all URLs and extract details
for index, row in route_df.iterrows():
    route_url = row.get("Route_link")  # Adjust column name as per your CSV
    print(f"Processing URL {index + 1}/{len(route_df)}: {route_url}")
    try:
        bus_details_df = extract_bus_details(driver, route_url)
        if not bus_details_df.empty:
            consolidated_bus_data.append(bus_details_df)
    except Exception as e:
        print(f"Error processing URL {route_url}: {e}")

# Combine all data into a single DataFrame
if consolidated_bus_data:
    consolidated_df = pd.concat(consolidated_bus_data, ignore_index=True)
    consolidated_df.to_csv(output_csv_path, index=False)
    print(f"Consolidated bus details saved to {output_csv_path}")
else:
    print("No bus details extracted.")

# Close the browser
driver.quit()


Processing URL 1/20: https://www.redbus.in/bus-tickets/jodhpur-to-ajmer
Found 84 buses for URL: https://www.redbus.in/bus-tickets/jodhpur-to-ajmer
Processing URL 2/20: https://www.redbus.in/bus-tickets/beawer-to-jaipur
Found 94 buses for URL: https://www.redbus.in/bus-tickets/beawer-to-jaipur
Processing URL 3/20: https://www.redbus.in/bus-tickets/udaipur-to-jodhpur
Found 42 buses for URL: https://www.redbus.in/bus-tickets/udaipur-to-jodhpur
Processing URL 4/20: https://www.redbus.in/bus-tickets/jaipur-to-jodhpur
Found 52 buses for URL: https://www.redbus.in/bus-tickets/jaipur-to-jodhpur
Processing URL 5/20: https://www.redbus.in/bus-tickets/sikar-to-jaipur
Found 74 buses for URL: https://www.redbus.in/bus-tickets/sikar-to-jaipur
Processing URL 6/20: https://www.redbus.in/bus-tickets/aligarh-uttar-pradesh-to-jaipur
Found 10 buses for URL: https://www.redbus.in/bus-tickets/aligarh-uttar-pradesh-to-jaipur
Processing URL 7/20: https://www.redbus.in/bus-tickets/kota-rajasthan-to-jaipur
Foun

In [8]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import NoSuchElementException, TimeoutException, StaleElementReferenceException
import pandas as pd
import time

# Load the input CSV
input_csv_path = 'C:/Users/Dine24/RedBus/df_SB.csv'
output_csv_path = 'C:/Users/Dine24/RedBus/df_SB_bus_details.csv'
route_df = pd.read_csv(input_csv_path)

# Initialize WebDriver
options = webdriver.ChromeOptions()
options.add_argument("user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36")
driver = webdriver.Chrome(options=options)

# Initialize consolidated data
consolidated_bus_data = []

def extract_bus_details(driver, url):
    bus_data = {
        "Bus_names": [],
        "Bus_types": [],
        "Start_Time": [],
        "End_Time": [],
        "Star_Ratings": [],
        "Total_Duration": [],
        "Prices": [],
        "Seats_Available": [],
        "Route_URL": [],
    }
    try:
        driver.get(url)
        time.sleep(2)  # Adjust based on page load speed

        # Scroll and load content incrementally
        previous_height = driver.execute_script("return document.body.scrollHeight")
        for _ in range(10):  # Limit the scrolls to avoid infinite loops
            driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
            time.sleep(2)  # Adjust based on page loading speed
            current_height = driver.execute_script("return document.body.scrollHeight")
            if current_height == previous_height:
                break
            previous_height = current_height

        # Wait for bus details to load
        WebDriverWait(driver, 30).until(
            EC.presence_of_all_elements_located((By.XPATH, "//div[contains(@class, 'clearfix bus-item')]"))
        )

        # Locate all bus containers
        buses = driver.find_elements(By.XPATH, "//div[contains(@class, 'clearfix bus-item')]")
        print(f"Found {len(buses)} buses for URL: {url}")

        # Extract details
        for i in range(len(buses)):
            try:
                # Re-fetch the bus container to avoid stale element reference
                buses = driver.find_elements(By.XPATH, "//div[contains(@class, 'clearfix bus-item')]")
                bus = buses[i]

                try:
                    bus_data["Bus_names"].append(bus.find_element(By.XPATH, ".//div[contains(@class, 'travels')]").text)
                except NoSuchElementException:
                    bus_data["Bus_names"].append("N/A")

                try:
                    bus_data["Bus_types"].append(bus.find_element(By.XPATH, ".//div[contains(@class, 'bus-type')]").text)
                except NoSuchElementException:
                    bus_data["Bus_types"].append("N/A")

                try:
                    bus_data["Start_Time"].append(bus.find_element(By.XPATH, ".//div[contains(@class, 'dp-time')]").text)
                except NoSuchElementException:
                    bus_data["Start_Time"].append("N/A")

                try:
                    bus_data["End_Time"].append(bus.find_element(By.XPATH, ".//div[contains(@class, 'bp-time')]").text)
                except NoSuchElementException:
                    bus_data["End_Time"].append("N/A")

                try:
                    bus_data["Star_Ratings"].append(bus.find_element(By.XPATH, ".//div[contains(@class, 'rating')]/span").text)
                except NoSuchElementException:
                    bus_data["Star_Ratings"].append("N/A")

                try:
                    bus_data["Total_Duration"].append(bus.find_element(By.XPATH, ".//div[contains(@class, 'dur')]").text)
                except NoSuchElementException:
                    bus_data["Total_Duration"].append("N/A")

                try:
                    bus_data["Prices"].append(bus.find_element(By.XPATH, ".//div[contains(@class, 'fare')]/span").text)
                except NoSuchElementException:
                    bus_data["Prices"].append("N/A")

                try:
                    bus_data["Seats_Available"].append(bus.find_element(By.XPATH, ".//div[contains(@class, 'seat-left')]").text)
                except NoSuchElementException:
                    bus_data["Seats_Available"].append("N/A")

                bus_data["Route_URL"].append(url)  # Add the URL for tracking

            except StaleElementReferenceException:
                print(f"Stale element reference at bus {i + 1}, retrying...")
                continue

    except TimeoutException:
        print(f"Timeout occurred for URL: {url}")
    except Exception as e:
        print(f"Error processing URL {url}: {e}")

    return pd.DataFrame(bus_data)

# Loop through all URLs and extract details
for index, row in route_df.iterrows():
    route_url = row.get("Route_link")  # Adjust column name as per your CSV
    print(f"Processing URL {index + 1}/{len(route_df)}: {route_url}")
    try:
        bus_details_df = extract_bus_details(driver, route_url)
        if not bus_details_df.empty:
            consolidated_bus_data.append(bus_details_df)
    except Exception as e:
        print(f"Error processing URL {route_url}: {e}")

# Combine all data into a single DataFrame
if consolidated_bus_data:
    consolidated_df = pd.concat(consolidated_bus_data, ignore_index=True)
    consolidated_df.to_csv(output_csv_path, index=False)
    print(f"Consolidated bus details saved to {output_csv_path}")
else:
    print("No bus details extracted.")

# Close the browser
driver.quit()


Processing URL 1/43: https://www.redbus.in/bus-tickets/durgapur-to-kolkata
Found 6 buses for URL: https://www.redbus.in/bus-tickets/durgapur-to-kolkata
Processing URL 2/43: https://www.redbus.in/bus-tickets/kolkata-to-burdwan
Found 24 buses for URL: https://www.redbus.in/bus-tickets/kolkata-to-burdwan
Processing URL 3/43: https://www.redbus.in/bus-tickets/kolkata-to-durgapur
Found 16 buses for URL: https://www.redbus.in/bus-tickets/kolkata-to-durgapur
Processing URL 4/43: https://www.redbus.in/bus-tickets/haldia-to-kolkata
Timeout occurred for URL: https://www.redbus.in/bus-tickets/haldia-to-kolkata
Processing URL 5/43: https://www.redbus.in/bus-tickets/kolkata-to-haldia
Timeout occurred for URL: https://www.redbus.in/bus-tickets/kolkata-to-haldia
Processing URL 6/43: https://www.redbus.in/bus-tickets/midnapore-to-kolkata
Timeout occurred for URL: https://www.redbus.in/bus-tickets/midnapore-to-kolkata
Processing URL 7/43: https://www.redbus.in/bus-tickets/kolkata-to-arambagh-west-benga

In [None]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import NoSuchElementException, TimeoutException, StaleElementReferenceException
import pandas as pd
import time

# Load the input CSV
input_csv_path = 'C:/Users/Dine24/RedBus/df_UP.csv'
output_csv_path = 'C:/Users/Dine24/RedBus/df_UP_bus_details.csv'
route_df = pd.read_csv(input_csv_path)

# Initialize WebDriver
options = webdriver.ChromeOptions()
options.add_argument("user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36")
driver = webdriver.Chrome(options=options)

# Initialize consolidated data
consolidated_bus_data = []

def extract_bus_details(driver, url):
    bus_data = {
        "Bus_names": [],
        "Bus_types": [],
        "Start_Time": [],
        "End_Time": [],
        "Star_Ratings": [],
        "Total_Duration": [],
        "Prices": [],
        "Seats_Available": [],
        "Route_URL": [],
    }
    try:
        driver.get(url)
        time.sleep(2)  # Adjust based on page load speed

        # Scroll and load content incrementally
        previous_height = driver.execute_script("return document.body.scrollHeight")
        for _ in range(10):  # Limit the scrolls to avoid infinite loops
            driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
            time.sleep(2)  # Adjust based on page loading speed
            current_height = driver.execute_script("return document.body.scrollHeight")
            if current_height == previous_height:
                break
            previous_height = current_height

        # Wait for bus details to load
        WebDriverWait(driver, 30).until(
            EC.presence_of_all_elements_located((By.XPATH, "//div[contains(@class, 'clearfix bus-item')]"))
        )

        # Locate all bus containers
        buses = driver.find_elements(By.XPATH, "//div[contains(@class, 'clearfix bus-item')]")
        print(f"Found {len(buses)} buses for URL: {url}")

        # Extract details
        for i in range(len(buses)):
            try:
                # Re-fetch the bus container to avoid stale element reference
                buses = driver.find_elements(By.XPATH, "//div[contains(@class, 'clearfix bus-item')]")
                bus = buses[i]

                try:
                    bus_data["Bus_names"].append(bus.find_element(By.XPATH, ".//div[contains(@class, 'travels')]").text)
                except NoSuchElementException:
                    bus_data["Bus_names"].append("N/A")

                try:
                    bus_data["Bus_types"].append(bus.find_element(By.XPATH, ".//div[contains(@class, 'bus-type')]").text)
                except NoSuchElementException:
                    bus_data["Bus_types"].append("N/A")

                try:
                    bus_data["Start_Time"].append(bus.find_element(By.XPATH, ".//div[contains(@class, 'dp-time')]").text)
                except NoSuchElementException:
                    bus_data["Start_Time"].append("N/A")

                try:
                    bus_data["End_Time"].append(bus.find_element(By.XPATH, ".//div[contains(@class, 'bp-time')]").text)
                except NoSuchElementException:
                    bus_data["End_Time"].append("N/A")

                try:
                    bus_data["Star_Ratings"].append(bus.find_element(By.XPATH, ".//div[contains(@class, 'rating')]/span").text)
                except NoSuchElementException:
                    bus_data["Star_Ratings"].append("N/A")

                try:
                    bus_data["Total_Duration"].append(bus.find_element(By.XPATH, ".//div[contains(@class, 'dur')]").text)
                except NoSuchElementException:
                    bus_data["Total_Duration"].append("N/A")

                try:
                    bus_data["Prices"].append(bus.find_element(By.XPATH, ".//div[contains(@class, 'fare')]/span").text)
                except NoSuchElementException:
                    bus_data["Prices"].append("N/A")

                try:
                    bus_data["Seats_Available"].append(bus.find_element(By.XPATH, ".//div[contains(@class, 'seat-left')]").text)
                except NoSuchElementException:
                    bus_data["Seats_Available"].append("N/A")

                bus_data["Route_URL"].append(url)  # Add the URL for tracking

            except StaleElementReferenceException:
                print(f"Stale element reference at bus {i + 1}, retrying...")
                continue

    except TimeoutException:
        print(f"Timeout occurred for URL: {url}")
    except Exception as e:
        print(f"Error processing URL {url}: {e}")

    return pd.DataFrame(bus_data)

# Loop through all URLs and extract details
for index, row in route_df.iterrows():
    route_url = row.get("Route_link")  # Adjust column name as per your CSV
    print(f"Processing URL {index + 1}/{len(route_df)}: {route_url}")
    try:
        bus_details_df = extract_bus_details(driver, route_url)
        if not bus_details_df.empty:
            consolidated_bus_data.append(bus_details_df)
    except Exception as e:
        print(f"Error processing URL {route_url}: {e}")

# Combine all data into a single DataFrame
if consolidated_bus_data:
    consolidated_df = pd.concat(consolidated_bus_data, ignore_index=True)
    consolidated_df.to_csv(output_csv_path, index=False)
    print(f"Consolidated bus details saved to {output_csv_path}")
else:
    print("No bus details extracted.")

# Close the browser
driver.quit()


In [9]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import NoSuchElementException, TimeoutException, StaleElementReferenceException
import pandas as pd
import time

# Load the input CSV
input_csv_path = 'C:/Users/Dine24/RedBus/df_WB.csv'
output_csv_path = 'C:/Users/Dine24/RedBus/df_WB_bus_details.csv'
route_df = pd.read_csv(input_csv_path)

# Initialize WebDriver
options = webdriver.ChromeOptions()
options.add_argument("user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36")
driver = webdriver.Chrome(options=options)

# Initialize consolidated data
consolidated_bus_data = []

def extract_bus_details(driver, url):
    bus_data = {
        "Bus_names": [],
        "Bus_types": [],
        "Start_Time": [],
        "End_Time": [],
        "Star_Ratings": [],
        "Total_Duration": [],
        "Prices": [],
        "Seats_Available": [],
        "Route_URL": [],
    }
    try:
        driver.get(url)
        time.sleep(2)  # Adjust based on page load speed

        # Scroll and load content incrementally
        previous_height = driver.execute_script("return document.body.scrollHeight")
        for _ in range(10):  # Limit the scrolls to avoid infinite loops
            driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
            time.sleep(2)  # Adjust based on page loading speed
            current_height = driver.execute_script("return document.body.scrollHeight")
            if current_height == previous_height:
                break
            previous_height = current_height

        # Wait for bus details to load
        WebDriverWait(driver, 30).until(
            EC.presence_of_all_elements_located((By.XPATH, "//div[contains(@class, 'clearfix bus-item')]"))
        )

        # Locate all bus containers
        buses = driver.find_elements(By.XPATH, "//div[contains(@class, 'clearfix bus-item')]")
        print(f"Found {len(buses)} buses for URL: {url}")

        # Extract details
        for i in range(len(buses)):
            try:
                # Re-fetch the bus container to avoid stale element reference
                buses = driver.find_elements(By.XPATH, "//div[contains(@class, 'clearfix bus-item')]")
                bus = buses[i]

                try:
                    bus_data["Bus_names"].append(bus.find_element(By.XPATH, ".//div[contains(@class, 'travels')]").text)
                except NoSuchElementException:
                    bus_data["Bus_names"].append("N/A")

                try:
                    bus_data["Bus_types"].append(bus.find_element(By.XPATH, ".//div[contains(@class, 'bus-type')]").text)
                except NoSuchElementException:
                    bus_data["Bus_types"].append("N/A")

                try:
                    bus_data["Start_Time"].append(bus.find_element(By.XPATH, ".//div[contains(@class, 'dp-time')]").text)
                except NoSuchElementException:
                    bus_data["Start_Time"].append("N/A")

                try:
                    bus_data["End_Time"].append(bus.find_element(By.XPATH, ".//div[contains(@class, 'bp-time')]").text)
                except NoSuchElementException:
                    bus_data["End_Time"].append("N/A")

                try:
                    bus_data["Star_Ratings"].append(bus.find_element(By.XPATH, ".//div[contains(@class, 'rating')]/span").text)
                except NoSuchElementException:
                    bus_data["Star_Ratings"].append("N/A")

                try:
                    bus_data["Total_Duration"].append(bus.find_element(By.XPATH, ".//div[contains(@class, 'dur')]").text)
                except NoSuchElementException:
                    bus_data["Total_Duration"].append("N/A")

                try:
                    bus_data["Prices"].append(bus.find_element(By.XPATH, ".//div[contains(@class, 'fare')]/span").text)
                except NoSuchElementException:
                    bus_data["Prices"].append("N/A")

                try:
                    bus_data["Seats_Available"].append(bus.find_element(By.XPATH, ".//div[contains(@class, 'seat-left')]").text)
                except NoSuchElementException:
                    bus_data["Seats_Available"].append("N/A")

                bus_data["Route_URL"].append(url)  # Add the URL for tracking

            except StaleElementReferenceException:
                print(f"Stale element reference at bus {i + 1}, retrying...")
                continue

    except TimeoutException:
        print(f"Timeout occurred for URL: {url}")
    except Exception as e:
        print(f"Error processing URL {url}: {e}")

    return pd.DataFrame(bus_data)

# Loop through all URLs and extract details
for index, row in route_df.iterrows():
    route_url = row.get("Route_link")  # Adjust column name as per your CSV
    print(f"Processing URL {index + 1}/{len(route_df)}: {route_url}")
    try:
        bus_details_df = extract_bus_details(driver, route_url)
        if not bus_details_df.empty:
            consolidated_bus_data.append(bus_details_df)
    except Exception as e:
        print(f"Error processing URL {route_url}: {e}")

# Combine all data into a single DataFrame
if consolidated_bus_data:
    consolidated_df = pd.concat(consolidated_bus_data, ignore_index=True)
    consolidated_df.to_csv(output_csv_path, index=False)
    print(f"Consolidated bus details saved to {output_csv_path}")
else:
    print("No bus details extracted.")

# Close the browser
driver.quit()


Processing URL 1/38: https://www.redbus.in/bus-tickets/digha-to-barasat-west-bengal
Found 6 buses for URL: https://www.redbus.in/bus-tickets/digha-to-barasat-west-bengal
Processing URL 2/38: https://www.redbus.in/bus-tickets/durgapur-to-kolkata
Timeout occurred for URL: https://www.redbus.in/bus-tickets/durgapur-to-kolkata
Processing URL 3/38: https://www.redbus.in/bus-tickets/digha-to-kolkata
Found 28 buses for URL: https://www.redbus.in/bus-tickets/digha-to-kolkata
Processing URL 4/38: https://www.redbus.in/bus-tickets/barasat-west-bengal-to-digha
Timeout occurred for URL: https://www.redbus.in/bus-tickets/barasat-west-bengal-to-digha
Processing URL 5/38: https://www.redbus.in/bus-tickets/kolkata-to-durgapur
Found 12 buses for URL: https://www.redbus.in/bus-tickets/kolkata-to-durgapur
Processing URL 6/38: https://www.redbus.in/bus-tickets/kolkata-to-digha
Found 78 buses for URL: https://www.redbus.in/bus-tickets/kolkata-to-digha
Processing URL 7/38: https://www.redbus.in/bus-tickets/

In [17]:
# Combine all DataFrames into one
df_combined = pd.concat([
    df_KL_bus_details, df_AA_bus_details, df_HY_bus_details, df_KA_bus_details,
    df_RA_bus_details, df_HP_bus_details, df_SB_bus_details, df_AS_bus_details,
    df_UP_bus_details, df_CH_bus_details, df_PU_bus_details, df_WB_bus_details
], ignore_index=True)

# Display the consolidated DataFrame
print(df_combined)

# Save the consolidated DataFrame to a CSV file
output_csv_path = "C:/Users/Dine24/RedBus/df_redbus_details.csv"
df_combined.to_csv(output_csv_path, index=False)
print(f"Consolidated CSV saved at: {output_csv_path}")


                   Bus_names                       Bus_types Start_Time  \
0      KSRTC (Kerala) - 3435  Super Fast Non AC Seater (2+3)      23:44   
1      KSRTC (Kerala) - 3435  Super Fast Non AC Seater (2+3)      23:44   
2                MMK Travels      A/C Seater / Sleeper (2+1)      22:30   
3                MMK Travels      A/C Seater / Sleeper (2+1)      22:30   
4        SKS Tours & Travels               A/C Sleeper (2+1)      21:15   
...                      ...                             ...        ...   
11852          Pammi Travels               A/C Sleeper (2+1)      21:00   
11853          Pammi Travels  Non A/C Seater / Sleeper (2+1)      21:00   
11854          Pammi Travels  Non A/C Seater / Sleeper (2+1)      21:00   
11855          Pammi Travels           NON A/C Sleeper (2+1)      21:15   
11856          Pammi Travels           NON A/C Sleeper (2+1)      21:15   

      End_Time  Star_Ratings Total_Duration  Prices     Seats_Available  \
0        08:59          

In [16]:
import pandas as pd

# Load individual DataFrames from CSV files
import pandas as pd

# Load individual DataFrames from CSV files
df_KL_bus_details = pd.read_csv("C:/Users/Dine24/RedBus/df_KL_bus_details.csv")
df_AA_bus_details = pd.read_csv("C:/Users/Dine24/RedBus/df_AA_bus_details.csv")
df_HY_bus_details = pd.read_csv("C:/Users/Dine24/RedBus/df_HY_bus_details.csv")
df_KA_bus_details = pd.read_csv("C:/Users/Dine24/RedBus/df_KA_bus_details.csv")
df_RA_bus_details = pd.read_csv("C:/Users/Dine24/RedBus/df_RA_bus_details.csv")
df_HP_bus_details = pd.read_csv("C:/Users/Dine24/RedBus/df_HP_bus_details.csv")
df_SB_bus_details = pd.read_csv("C:/Users/Dine24/RedBus/df_SB_bus_details.csv")
df_AS_bus_details = pd.read_csv("C:/Users/Dine24/RedBus/df_AS_bus_details.csv")
df_UP_bus_details = pd.read_csv("C:/Users/Dine24/RedBus/df_UP_bus_details.csv")
df_CH_bus_details = pd.read_csv("C:/Users/Dine24/RedBus/df_CH_bus_details.csv")
df_PU_bus_details = pd.read_csv("C:/Users/Dine24/RedBus/df_PU_bus_details.csv")
df_WB_bus_details = pd.read_csv("C:/Users/Dine24/RedBus/df_WB_bus_details.csv")
df_UP_bus_details = pd.read_csv("C:/Users/Dine24/RedBus/df_UP_bus_details.csv")


In [5]:
import pandas as pd

# File paths
df_combined_path = 'C:/Users/Dine24/RedBus/df_combined.csv'
df_redbus_details_path = 'C:/Users/Dine24/RedBus/df_redbus_details.csv'

try:
    # Load the DataFrames
    df_combined = pd.read_csv(df_combined_path)
    df_redbus_details = pd.read_csv(df_redbus_details_path)

    # Standardize column names
    df_combined.rename(columns={'Route_link': 'route_link'}, inplace=True)
    df_redbus_details.rename(columns={'Route_URL': 'route_link'}, inplace=True)

    # Debug: Check if required columns exist
    if 'route_link' in df_combined.columns and 'route_link' in df_redbus_details.columns:
        print("Both DataFrames contain the 'route_link' column.")

        # Merge the two DataFrames
        df_redbus_details = pd.merge(
            df_redbus_details,
            df_combined[['route_link', 'Route_name']],
            on='route_link',
            how='left'
        )

        # Save the updated DataFrame
        updated_file_path = 'C:/Users/Dine24/RedBus/updated_df_redbus_details.csv'
        df_redbus_details.to_csv(updated_file_path, index=False)
        print(f"Updated file saved at {updated_file_path}")
    else:
        print("The required 'route_link' column is missing even after renaming.")
        print(f"Columns in df_combined: {df_combined.columns}")
        print(f"Columns in df_redbus_details: {df_redbus_details.columns}")

except FileNotFoundError as e:
    print(f"File not found: {e}")
except Exception as e:
    print(f"An error occurred: {e}")


Both DataFrames contain the 'route_link' column.
Updated file saved at C:/Users/Dine24/RedBus/updated_df_redbus_details.csv
