In [None]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import NoSuchElementException
import time
import pandas as pd

# Initialize driver
def initialize_driver():
    driver = webdriver.Chrome()
    driver.maximize_window()
    return driver

# Open RedBus and navigate to APSRTC Hyderabad to Vijayawada route
def open_redbus_and_navigate(driver):
    driver.get("https://www.redbus.in")
    wait = WebDriverWait(driver, 20)

    # Close popup if appears
    time.sleep(2)
    try:
        close_popup = driver.find_element(By.CLASS_NAME, 'icon-close')
        close_popup.click()
        print("Popup closed")
    except:
        pass

    # Click APSRTC filter
    apsrtc = wait.until(EC.element_to_be_clickable((By.XPATH, "//span[text()='APSRTC']")))
    apsrtc.click()
    time.sleep(5)


    hyd_vij = wait.until(EC.element_to_be_clickable((By.XPATH, "//a[@title='Hyderabad to Vijayawada']")))
    hyd_vij.click()
    time.sleep(5)

    apsrtc = wait.until(EC.element_to_be_clickable((By.XPATH, "//span[text()='APSRTC Buses']")))
    apsrtc.click()
    time.sleep(5)

# Scroll down till "End of list" is found
def scroll_until_end_with_page_source(driver):
    scrolling = True
    while scrolling:
        old_page_source = driver.page_source
        
        # Perform PAGE_DOWN using ActionChains
        ActionChains(driver).send_keys(Keys.PAGE_DOWN).perform()
        
        time.sleep(3)  # Adjust as per page load speed
        
        new_page_source = driver.page_source
        
        if new_page_source == old_page_source:
            print("Reached end of list.")
            scrolling = False

# Scrape bus details from current page
def scrape_bus_details(driver):
    wait = WebDriverWait(driver, 10)

    scroll_until_end_with_page_source(driver)
                                      # Call scrolling before scraping

    bus_cards = driver.find_elements(By.XPATH, "//li[contains(@class,'tupleWrapper')]")
    print(f"{len(bus_cards)} buses found.")

    bus_details = []
    for bus in bus_cards:
    

        try:
            # Bus Name
            name = bus.find_element(By.XPATH, ".//div[contains(@class,'travelsName')]").text
            bus_type = bus.find_element(By.XPATH, ".//p[contains(@class,'busType')]").text
            rating = bus.find_element(By.XPATH, ".//div[contains(@class,'rating')]").text
            price = bus.find_element(By.XPATH, ".//p[contains(@class,'finalFare')]").text
            duration  = bus.find_element(By.XPATH, ".//p[contains(@class,'duration')]").text
           
            # review_date = bus.find_element(By.XPATH, ".//div[contains(@class,'date')]").text
 
            bus_detail = {
                "Bus Name": name,
                "Operator Name": bus_type,
                "Duration": duration,
                "Review": rating,
                "Price": price
            }
            bus_details.append(bus_detail)
        except Exception as e:
            print(f"Error occurred: {e}")

    return bus_details

# Main execution
driver = initialize_driver()
open_redbus_and_navigate(driver)

all_bus_details = scrape_bus_details(driver)

# Convert to DataFrame and save
df = pd.DataFrame(all_bus_details)
df.to_csv('test_scrapped.csv', index=False)
print("Data saved ")

driver.quit()


In [None]:
import pymysql

In [None]:
con= pymysql.connect(
    host="localhost",
    user="root",
    password="12345",
    autocommit=True
    )

In [None]:
mycursor=con.cursor()

In [None]:
mycursor.execute("CREATE DATABASE Bus_Details")

In [None]:
mycursor.execute("SHOW DATABASES")
for x in mycursor:
    print(x)

In [None]:
mycursor.execute("USE bus_details")

In [None]:
mycursor.execute("CREATE TABLE AP_bus ( id INT AUTO_INCREMENT PRIMARY KEY,bus_name VARCHAR(255),operator_name VARCHAR(255),duration VARCHAR(100),price VARCHAR(50),Rating FLOAT,No_Of_Ratings FLOAT,Review_Status VARCHAR(50))")
