In [49]:
import pandas as pd
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from bs4 import BeautifulSoup
import re
from selenium.common.exceptions import NoSuchElementException

# Set up the Selenium driver
webdriver_service = Service('C:\\chromedriver.exe')

# Set up options for the Chrome driver
options = webdriver.ChromeOptions()
options.add_argument('--headless')  # Run Chrome in headless mode

# Create a new instance of the Chrome driver
driver = webdriver.Chrome(service=webdriver_service, options=options)

# Initialize lists to store the STD codes, cities, and states
std_codes = []
cities = []
states = []

# Function to extract data using BeautifulSoup
def extract_data(html_content, state):
    soup = BeautifulSoup(html_content, 'html.parser')
    divs = soup.find_all('div', class_='floatLEFT fullwidth')
    for div in divs:
        std_code = div.find('a', href=True).text.strip()
        city = div.find('a', href=True).find_next('a', href=True).text.strip()
        std_codes.append(std_code)
        cities.append(city)
        states.append(state)

# Website URLs
urls = [
    'https://www.stdcodesin.com/Andaman-And-Nicobar-std-code/1/s1',
    'https://www.stdcodesin.com/Andhra-Pradesh-std-code/2/s1',
    'https://www.stdcodesin.com/Arunachal-Pradesh-std-code/3/s1',
    'https://www.stdcodesin.com/Assam-std-code/4/s1',
    'https://www.stdcodesin.com/Bihar-std-code/5/s1',
    'https://www.stdcodesin.com/Chandigarh-std-code/6/s1',
    'https://www.stdcodesin.com/Chhattisgarh-std-code/7/s1',
    'https://www.stdcodesin.com/Dadra-And-Nagar-Haveli-std-code/8/s1',
    'https://www.stdcodesin.com/Daman-And-Diu-std-code/9/s1',
    'https://www.stdcodesin.com/Goa-std-code/10/s1',
    'https://www.stdcodesin.com/Gujarat-std-code/11/s1',
    'https://www.stdcodesin.com/Haryana-std-code/12/s1',
    'https://www.stdcodesin.com/Himachal-Pradesh-std-code/13/s1',
    'https://www.stdcodesin.com/Jammu-And-Kashmir-std-code/14/s1',
    'https://www.stdcodesin.com/Jharkhand-std-code/15/s1',
    'https://www.stdcodesin.com/Karnataka-std-code/16/s1',
    'https://www.stdcodesin.com/Kerala-std-code/17/s1',
    'https://www.stdcodesin.com/Lakshadweep-std-code/18/s1',
    'https://www.stdcodesin.com/Madhya-Pradesh-std-code/19/s1',
    'https://www.stdcodesin.com/Maharashtra-std-code/20/s1',
    'https://www.stdcodesin.com/Manipur-std-code/21/s1',
    'https://www.stdcodesin.com/Meghalaya-std-code/22/s1',
    'https://www.stdcodesin.com/Mizoram-std-code/23/s1',
    'https://www.stdcodesin.com/Nagaland-std-code/24/s1',
    'https://www.stdcodesin.com/New-Delhi-std-code/25/s1',
    'https://www.stdcodesin.com/North-East-std-code/36/s1',
    'https://www.stdcodesin.com/Orissa-std-code/26/s1',
    'https://www.stdcodesin.com/Pondicherry-std-code/27/s1',
    'https://www.stdcodesin.com/Punjab-std-code/28/s1',
    'https://www.stdcodesin.com/Rajasthan-std-code/29/s1',
    'https://www.stdcodesin.com/Sikkim-std-code/30/s1',
    'https://www.stdcodesin.com/Tamil-Nadu-std-code/31/s1',
    'https://www.stdcodesin.com/Tripura-std-code/32/s1',
    'https://www.stdcodesin.com/Uttar-Pradesh-std-code/33/s1',
    'https://www.stdcodesin.com/Uttrakhand-std-code/34/s1'
]

# Loop through the URLs and collect data for each state
for url in urls:
    state = re.search(r'\/([A-Za-z\-]+)-std-code\/', url).group(1)
    
    print(f"Processing {state}...")
    
    # Navigate to the website
    driver.get(url)

    # Get the initial page source
    html_content = driver.page_source

    # Extract data from the initial page
    extract_data(html_content, state)

    try:
        # Find the number of pages
        number_of_pages = int(driver.find_element(By.ID, 'BC_GV_GridViewPager1_LabelNumberOfPages').text)

        # Loop through all the pages and collect the data
        for page in range(2, number_of_pages + 1):
            # Scroll to the next page button
            next_page_button = driver.find_element(By.XPATH, '//*[@id="BC_GV_GridViewPager1_ImageButtonNext"]')
            driver.execute_script("arguments[0].scrollIntoView();", next_page_button)

            # Click the next page button
            next_page_button.click()
            print("Navigating to Next Page")

            # Wait for the page to load
            driver.implicitly_wait(3)  # Adjust the wait time as needed

            # Get the page source
            html_content = driver.page_source

            # Extract data from the current page
            extract_data(html_content, state)
    except NoSuchElementException:
        # There is only one page, continue to the next URL
        pass

# Close the driver
driver.quit()

# Create a DataFrame with the extracted data
df = pd.DataFrame({'code': std_codes, 'name': cities, 'state': states})

# Print the DataFrame
with pd.option_context('display.max_columns', None, 'display.max_rows', None):
    display(df)


Processing Andaman-And-Nicobar...
Processing Andhra-Pradesh...
Navigating to Next Page
Navigating to Next Page
Navigating to Next Page
Navigating to Next Page
Navigating to Next Page
Navigating to Next Page
Navigating to Next Page
Navigating to Next Page
Navigating to Next Page
Navigating to Next Page
Navigating to Next Page
Navigating to Next Page
Navigating to Next Page
Navigating to Next Page
Navigating to Next Page
Navigating to Next Page
Navigating to Next Page
Navigating to Next Page
Navigating to Next Page
Navigating to Next Page
Navigating to Next Page
Navigating to Next Page
Navigating to Next Page
Navigating to Next Page
Navigating to Next Page
Navigating to Next Page
Navigating to Next Page
Navigating to Next Page
Navigating to Next Page
Navigating to Next Page
Navigating to Next Page
Navigating to Next Page
Navigating to Next Page
Navigating to Next Page
Navigating to Next Page
Navigating to Next Page
Navigating to Next Page
Navigating to Next Page
Navigating to Next Page
N

Unnamed: 0,code,name,state
0,31928,Bidhgunj,Andaman-And-Nicobar
1,31926,Campbollbay,Andaman-And-Nicobar
2,3193,Car Nicobar,Andaman-And-Nicobar
3,3192,Chouldari,Andaman-And-Nicobar
4,31927,Diglipur,Andaman-And-Nicobar
5,31928,Ferrangannj,Andaman-And-Nicobar
6,31928,Garcachazma,Andaman-And-Nicobar
7,31928,Guptapara,Andaman-And-Nicobar
8,31928,Havclock,Andaman-And-Nicobar
9,31928,Hutbay,Andaman-And-Nicobar
