In [1]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.action_chains import ActionChains
import time
import pandas as pd
import traceback

# BUS ROOTS & LINKS

In [274]:
driver = webdriver.Chrome()

wait = WebDriverWait(driver, 30)

driver.get("https://www.redbus.in/online-booking/astc")
time.sleep(3)
routedata = [] 

def scrape_page():
    
    routescontainer = wait.until(EC.presence_of_all_elements_located((By.CLASS_NAME, "route_link")))
    for route in routescontainer:
        
        routename = route.find_element(By.CSS_SELECTOR,"a[class='route']").text
        routelink = route.find_element(By.CSS_SELECTOR,"a[class='route']").get_attribute('href')
        
        routedata.append({
            'State' : "ASTC",
            'routename': routename, 
            'routelink': routelink})
driver.implicitly_wait(3)

for page_number in range(1, 6):
    print(f"Scraping page {page_number}")
    scrape_page()
    if page_number < 5:
        try:
            pagination_container = wait.until(EC.presence_of_element_located(
                (By.XPATH, '//*[@class="DC_117_paginationTable"]')
            ))
            next_page_button = pagination_container.find_element(
                By.XPATH, f'.//div[contains(@class, "DC_117_pageTabs") and text()="{page_number + 1}"]'
            )
            actions = ActionChains(driver)
            actions.move_to_element(next_page_button).perform()
            time.sleep(3) 

            print(f"Clicking on page {page_number + 1}")
            next_page_button.click()
            wait.until(EC.text_to_be_present_in_element(
                (By.XPATH, '//div[contains(@class, "DC_117_pageTabs DC_117_pageActive")]'), str(page_number + 1)))
            print(f"Successfully navigated to page {page_number + 1}")
            time.sleep(3)
        except Exception as e:
            print(f"An error occurred while navigating to page {page_number + 1}: {e}")
            break

driver.quit()

Scraping page 1
Clicking on page 2
Successfully navigated to page 2
Scraping page 2
Clicking on page 3
Successfully navigated to page 3
Scraping page 3
Clicking on page 4
Successfully navigated to page 4
Scraping page 4
Clicking on page 5
Successfully navigated to page 5
Scraping page 5


In [276]:
routedata

[{'State': 'ASTC',
  'routename': 'Tezpur to Guwahati',
  'routelink': 'https://www.redbus.in/bus-tickets/tezpur-to-guwahati'},
 {'State': 'ASTC',
  'routename': 'Guwahati to Tezpur',
  'routelink': 'https://www.redbus.in/bus-tickets/guwahati-to-tezpur'},
 {'State': 'ASTC',
  'routename': 'Nagaon (Assam) to Guwahati',
  'routelink': 'https://www.redbus.in/bus-tickets/nagaon-to-guwahati'},
 {'State': 'ASTC',
  'routename': 'Guwahati to Nagaon (Assam)',
  'routelink': 'https://www.redbus.in/bus-tickets/guwahati-to-nagaon'},
 {'State': 'ASTC',
  'routename': 'Goalpara to Guwahati',
  'routelink': 'https://www.redbus.in/bus-tickets/goalpara-to-guwahati'},
 {'State': 'ASTC',
  'routename': 'Jorhat to North Lakhimpur',
  'routelink': 'https://www.redbus.in/bus-tickets/jorhat-to-north-lakhimpur'},
 {'State': 'ASTC',
  'routename': 'Dhubri to Guwahati',
  'routelink': 'https://www.redbus.in/bus-tickets/dhubri-to-guwahati'},
 {'State': 'ASTC',
  'routename': 'Guwahati to Dhubri',
  'routelink':

#  BUS DETAILS

In [278]:

driver = webdriver.Chrome()

wait = WebDriverWait(driver, 30)
busdata = []

for item in routedata:
    link = item['routelink']
    route =item['routename']
    driver.get(link)

    time.sleep(15)

    # clickable = driver.find_element(By.CSS_SELECTOR,""div[class='button']"").click()


    def scrape_page(current_bus_route):
        container = wait.until(EC.presence_of_all_elements_located((By.CSS_SELECTOR,"li[class='row-sec clearfix']")))

        for route in container:
            busname = route.find_element(By.CSS_SELECTOR,"div[class='travels lh-24 f-bold d-color']").text
            bustype = route.find_element(By.CSS_SELECTOR,"div[class='bus-type f-12 m-top-16 l-color evBus']").text
            departingtime = route.find_element(By.CSS_SELECTOR,"div[class='dp-time f-19 d-color f-bold']").text
            duration = route.find_element(By.CSS_SELECTOR,"div[class='dur l-color lh-24']").text
            reachingtime = route.find_element(By.CSS_SELECTOR,"div[class='bp-time f-19 d-color disp-Inline']").text
            price = route.find_element(By.CSS_SELECTOR,".fare span.f-19").text
            seatavailability = route.find_element(By.CSS_SELECTOR,"div[class='column-eight w-15 fl']").text  #seat-left m-top-30
            starrating = route.find_elements(By.XPATH, ".//div[@class='rating-sec lh-24']//span")
            if starrating:
                for rating in starrating:
                    starrating = rating.text
            else:
                starrating = 0.0

            busdata.append({
            "States" :  'ASTC',
            "Bus_route" : current_bus_route,
            "Bus_name" : busname,
            "Bus_type" : bustype,
            "Departing_time" : departingtime,
            "Duration" : duration,
            "Reaching_time" : reachingtime,
            "star_rating" : starrating,
            "Price" : price,
            "Seat_availability" : seatavailability
            })

    def scroll_and_collect_data():
        
        last_height = driver.execute_script("return document.body.scrollHeight")
        while True:
            driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
            time.sleep(1)
            new_height = driver.execute_script("return document.body.scrollHeight")
            if new_height == last_height:
                break
            last_height = new_height
            
    scroll_and_collect_data()
    scrape_page(route)
        
driver.quit()

In [280]:
busdata

[{'States': 'ASTC',
  'Bus_route': 'Tezpur to Guwahati',
  'Bus_name': 'Assam State Transport Corporation (ASTC) - 147480',
  'Bus_type': 'Bharat Benz A/C Seater (2+2)',
  'Departing_time': '05:00',
  'Duration': '04h 00m',
  'Reaching_time': '09:00',
  'star_rating': '3.9',
  'Price': '270',
  'Seat_availability': '22 Seats available\n8 Window'},
 {'States': 'ASTC',
  'Bus_route': 'Tezpur to Guwahati',
  'Bus_name': 'Assam State Transport Corporation (ASTC) - 135927',
  'Bus_type': 'Volvo AC Seater 2+2',
  'Departing_time': '07:10',
  'Duration': '04h 30m',
  'Reaching_time': '11:40',
  'star_rating': '4.3',
  'Price': '298',
  'Seat_availability': '29 Seats available\n11 Window'},
 {'States': 'ASTC',
  'Bus_route': 'Tezpur to Guwahati',
  'Bus_name': 'Assam State Transport Corporation (ASTC) - 156885',
  'Bus_type': 'Bharat Benz A/C Seater (2+2)',
  'Departing_time': '08:00',
  'Duration': '04h 30m',
  'Reaching_time': '12:30',
  'star_rating': '4.3',
  'Price': '298',
  'Seat_availa

In [282]:
df = pd.DataFrame(busdata)
print(df)

    States              Bus_route  \
0     ASTC     Tezpur to Guwahati   
1     ASTC     Tezpur to Guwahati   
2     ASTC     Tezpur to Guwahati   
3     ASTC     Tezpur to Guwahati   
4     ASTC     Tezpur to Guwahati   
..     ...                    ...   
470   ASTC  Dibrugarh to Bokakhat   
471   ASTC  Dibrugarh to Bokakhat   
472   ASTC  Dibrugarh to Bokakhat   
473   ASTC  Dibrugarh to Bokakhat   
474   ASTC  Dibrugarh to Bokakhat   

                                              Bus_name  \
0    Assam State Transport Corporation (ASTC) - 147480   
1    Assam State Transport Corporation (ASTC) - 135927   
2    Assam State Transport Corporation (ASTC) - 156885   
3    Assam State Transport Corporation (ASTC) - 170878   
4    Assam State Transport Corporation (ASTC) - 157959   
..                                                 ...   
470                               Purple Wings Coaches   
471                     Shree Ram Travels (Under ASTC)   
472                              

In [284]:
df.to_csv('ASTC.csv', index=False)

In [81]:
routedata

[{'State': 'KERALA RTC',
  'routename': 'Bangalore to Kozhikode',
  'routelink': 'https://www.redbus.in/bus-tickets/bangalore-to-kozhikode'},
 {'State': 'KERALA RTC',
  'routename': 'Kozhikode to Ernakulam',
  'routelink': 'https://www.redbus.in/bus-tickets/kozhikode-to-ernakulam'},
 {'State': 'KERALA RTC',
  'routename': 'Kozhikode to Bangalore',
  'routelink': 'https://www.redbus.in/bus-tickets/kozhikode-to-bangalore'},
 {'State': 'KERALA RTC',
  'routename': 'Ernakulam to Kozhikode',
  'routelink': 'https://www.redbus.in/bus-tickets/ernakulam-to-kozhikode'},
 {'State': 'KERALA RTC',
  'routename': 'Kozhikode to Mysore',
  'routelink': 'https://www.redbus.in/bus-tickets/kozhikode-to-mysore'},
 {'State': 'KERALA RTC',
  'routename': 'Kozhikode to Thiruvananthapuram',
  'routelink': 'https://www.redbus.in/bus-tickets/kozhikode-to-thiruvananthapuram'},
 {'State': 'KERALA RTC',
  'routename': 'Bangalore to Kalpetta (kerala)',
  'routelink': 'https://www.redbus.in/bus-tickets/bangalore-to

In [286]:
import glob
 
# list all csv files only
csv_files = glob.glob('*.{}'.format('csv'))
csv_files

['APSRTC.csv',
 'ASTC.csv',
 'BSRTC.csv',
 'HRTC.csv',
 'KRSTC_RTC.csv',
 'PEPSU_(Punjab).csv',
 'RSRTC.csv',
 'SBSTC.csv',
 'UPSRTC.csv',
 'WBTC(CTC).csv']

In [288]:
l = []
 
for f in csv_files:
    l.append(pd.read_csv(f))
     
bus = pd.concat(l, ignore_index=True)


In [290]:
bus

Unnamed: 0,States,Bus_route,Bus_name,Bus_type,Departing_time,Duration,Reaching_time,star_rating,Price,Seat_availability
0,APSRTC,Vijayawada to Hyderabad,APSRTC - 3657,AMARAVATHI (VOLVO / SCANIA A.C Multi Axle),10:30,05h 55m,16:25,3.9,720.0,25 Seats available\n10 Window
1,APSRTC,Vijayawada to Hyderabad,APSRTC - 3513,"SUPER LUXURY (NON-AC, 2 + 2 PUSH BACK)",10:39,09h 06m,19:45,2.1,469.0,33 Seats available
2,APSRTC,Vijayawada to Hyderabad,APSRTC - 9400,"SUPER LUXURY (NON-AC, 2 + 2 PUSH BACK)",10:45,06h 45m,17:30,3.3,469.0,33 Seats available
3,APSRTC,Vijayawada to Hyderabad,APSRTC - 3858,DOLPHIN CRUISE (VOLVO / SCANIA A.C Multi Axle),11:30,05h 00m,16:30,3.9,670.0,46 Seats available\n21 Window
4,APSRTC,Vijayawada to Hyderabad,APSRTC - 9356,"SUPER LUXURY (NON-AC, 2 + 2 PUSH BACK)",11:30,06h 35m,18:05,4.0,469.0,33 Seats available
...,...,...,...,...,...,...,...,...,...,...
13158,WBTC_(CTC),Habra to Contai (Kanthi),WBTC (CTC) HABRA-DIGHA via Bally - 26|12:45,Non AC Seater (2+3),12:45,04h 25m,17:10,3.5,149.0,39 Seats available\n9 Window
13159,WBTC_(CTC),Habra to Contai (Kanthi),Santosh Bus Service,A/C Seater Push Back (2+3),05:10,04h 20m,09:30,4.6,416.0,3 Seats available\n1 Window
13160,WBTC_(CTC),Habra to Midnapore,WBTC (CTC) Habra-Midnapur - 194|05:25,Non AC Seater (2+3),05:25,04h 10m,09:35,4.5,138.0,46 Seats available\n1 Single
13161,WBTC_(CTC),Habra to Midnapore,WBTC (CTC) Habra-Midnapur - 194|05:55,Non AC Seater (2+3),05:55,04h 20m,10:15,4.3,138.0,39 Seats available\n16 Window


In [292]:
bus.to_csv('redbus.csv', index=False)

# sql_connecter

In [295]:
import mysql.connector as db

In [297]:
db_connection = db.connect(
    
    host = "localhost",
    user = "root",
    password = "Yuva@4435",
    database = "redbus"

    
)

In [299]:
db_connection

<mysql.connector.connection_cext.CMySQLConnection at 0x13147c50ce0>

In [321]:

curr = db_connection.cursor()

In [323]:
curr.execute(""" select * from detail """)

In [325]:
data = curr.fetchall()

In [327]:
data

[(1,
  'APSRTC',
  'Vijayawada to Hyderabad',
  'APSRTC - 3657',
  'AMARAVATHI (VOLVO / SCANIA A.C Multi Axle)',
  datetime.timedelta(seconds=37800),
  '05h 55m',
  datetime.timedelta(seconds=59100),
  3.9,
  720.0,
  '25 Seats available\n10 Window'),
 (2,
  'APSRTC',
  'Vijayawada to Hyderabad',
  'APSRTC - 3513',
  'SUPER LUXURY (NON-AC, 2 + 2 PUSH BACK)',
  datetime.timedelta(seconds=38340),
  '09h 06m',
  datetime.timedelta(seconds=71100),
  2.1,
  469.0,
  '33 Seats available'),
 (3,
  'APSRTC',
  'Vijayawada to Hyderabad',
  'APSRTC - 9400',
  'SUPER LUXURY (NON-AC, 2 + 2 PUSH BACK)',
  datetime.timedelta(seconds=38700),
  '06h 45m',
  datetime.timedelta(seconds=63000),
  3.3,
  469.0,
  '33 Seats available'),
 (4,
  'APSRTC',
  'Vijayawada to Hyderabad',
  'APSRTC - 3858',
  'DOLPHIN CRUISE (VOLVO / SCANIA A.C Multi Axle)',
  datetime.timedelta(seconds=41400),
  '05h 00m',
  datetime.timedelta(seconds=59400),
  3.9,
  670.0,
  '46 Seats available\n21 Window'),
 (5,
  'APSRTC',
 

In [329]:
col = [i[0] for i in curr.description]

In [331]:
db_table = pd.DataFrame(data,columns=col)

In [333]:
db_table

Unnamed: 0,ID,States,Bus_route,Bus_name,Bus_type,Departing_time,Duration,Reaching_time,star_rating,Price,Seat_availability
0,1,APSRTC,Vijayawada to Hyderabad,APSRTC - 3657,AMARAVATHI (VOLVO / SCANIA A.C Multi Axle),0 days 10:30:00,05h 55m,0 days 16:25:00,3.9,720.0,25 Seats available\n10 Window
1,2,APSRTC,Vijayawada to Hyderabad,APSRTC - 3513,"SUPER LUXURY (NON-AC, 2 + 2 PUSH BACK)",0 days 10:39:00,09h 06m,0 days 19:45:00,2.1,469.0,33 Seats available
2,3,APSRTC,Vijayawada to Hyderabad,APSRTC - 9400,"SUPER LUXURY (NON-AC, 2 + 2 PUSH BACK)",0 days 10:45:00,06h 45m,0 days 17:30:00,3.3,469.0,33 Seats available
3,4,APSRTC,Vijayawada to Hyderabad,APSRTC - 3858,DOLPHIN CRUISE (VOLVO / SCANIA A.C Multi Axle),0 days 11:30:00,05h 00m,0 days 16:30:00,3.9,670.0,46 Seats available\n21 Window
4,5,APSRTC,Vijayawada to Hyderabad,APSRTC - 9356,"SUPER LUXURY (NON-AC, 2 + 2 PUSH BACK)",0 days 11:30:00,06h 35m,0 days 18:05:00,4.0,469.0,33 Seats available
...,...,...,...,...,...,...,...,...,...,...,...
13157,13158,WBTC_(CTC),Habra to Contai (Kanthi),WBTC (CTC) HABRA-DIGHA via Bally - 26|12:45,Non AC Seater (2+3),0 days 12:45:00,04h 25m,0 days 17:10:00,3.5,149.0,39 Seats available\n9 Window
13158,13159,WBTC_(CTC),Habra to Contai (Kanthi),Santosh Bus Service,A/C Seater Push Back (2+3),0 days 05:10:00,04h 20m,0 days 09:30:00,4.6,416.0,3 Seats available\n1 Window
13159,13160,WBTC_(CTC),Habra to Midnapore,WBTC (CTC) Habra-Midnapur - 194|05:25,Non AC Seater (2+3),0 days 05:25:00,04h 10m,0 days 09:35:00,4.5,138.0,46 Seats available\n1 Single
13160,13161,WBTC_(CTC),Habra to Midnapore,WBTC (CTC) Habra-Midnapur - 194|05:55,Non AC Seater (2+3),0 days 05:55:00,04h 20m,0 days 10:15:00,4.3,138.0,39 Seats available\n16 Window
