In [1]:
import csv
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import NoSuchElementException

def scrape_fir_data(url, date_from, date_to, city):
    # Create a ChromeDriver object
    driver = webdriver.Chrome()
    # Navigate to the website
    driver.get(url)
    driver.get(url)

    # Wait for the input elements to be loaded
    wait = WebDriverWait(driver, 10)
    date_input_from = wait.until(EC.presence_of_element_located((By.ID, "ContentPlaceHolder1_txtDateOfRegistrationFrom")))
    date_input_to = wait.until(EC.presence_of_element_located((By.ID, "ContentPlaceHolder1_txtDateOfRegistrationTo")))
    city_input = wait.until(EC.presence_of_element_located((By.ID, "ContentPlaceHolder1_ddlDistrict")))
    date_input_from.send_keys(date_from)
    date_input_to.send_keys(date_to)
    city_input.send_keys(city)
    search_button = driver.find_element(By.ID, "ContentPlaceHolder1_btnSearch")
    search_button.click()
    wait.until(EC.presence_of_element_located((By.ID, "ContentPlaceHolder1_gdvDeadBody")))

    # Scrape data on each page
    data = []
    while True:
        fir_table = driver.find_element(By.ID, "ContentPlaceHolder1_gdvDeadBody")
        rows = fir_table.find_elements(By.TAG_NAME, "tr")[1:]
        try:
            for row in rows:
                cells = row.find_elements(By.TAG_NAME, "td")
                sr_no = cells[0].text.strip()
                state = cells[1].text.strip()
                district = cells[2].text.strip()
                police_station = cells[3].text.strip()
                year = cells[4].text.strip()
                fir_number = cells[5].text.strip()
                registration_date = cells[6].text.strip()
                sections = cells[7].text.strip()

                # Store the extracted data
                data.append([sr_no, state, district, police_station, year, fir_number, registration_date, sections])
        except Exception as e:
            pass
        
        try:
            next_button = driver.find_element(By.XPATH, "//a[contains(@class, 'rgPageNext')]")
            if "disabled" in next_button.get_attribute("class"):
                break
            next_button.click()
            wait.until(EC.staleness_of(fir_table))
        except NoSuchElementException:
            break

    # Close the browser
    driver.quit()

    # Write data to a CSV file
    filename = "fir_data.csv"
    with open(filename, "w", newline="") as file:
        writer = csv.writer(file)
        # Write headers
        writer.writerow(["Sr. No.", "State", "District", "Police Station", "Year", "FIR No.", "Registration Date", "Sections"])
        # Write data rows
        writer.writerows(data)

    print(f"Data extracted and stored in {filename}")


# Example usage
url = "https://citizen.mahapolice.gov.in/Citizen/MH/PublishedFIRs.aspx"
date_from = "01/01/2023"
date_to = "01/02/2023"
city = "RAILWAY MUMBAI"
scrape_fir_data(url, date_from, date_to, city)


Data extracted and stored in fir_data.csv


In [2]:
import pandas as pd

In [3]:
df = pd.read_csv("fir_data.csv")

In [4]:
df.head(5)

Unnamed: 0,Sr. No.,State,District,Police Station,Year,FIR No.,Registration Date,Sections
0,1,MAHARASHTRA,RAILWAY MUMBAI,ANDHERI,2023,341,02/06/2023 23:52:00,0341/2023
1,2,MAHARASHTRA,RAILWAY MUMBAI,ANDHERI,2023,340,01/06/2023 15:26:00,0340/2023
2,3,MAHARASHTRA,RAILWAY MUMBAI,ANDHERI,2023,339,01/06/2023 12:41:00,0339/2023
3,4,MAHARASHTRA,RAILWAY MUMBAI,BANDRA,2023,378,02/06/2023 00:54:24,0378/2023
4,5,MAHARASHTRA,RAILWAY MUMBAI,BANDRA,2023,377,01/06/2023 22:48:53,0377/2023


### Implementation of the code

In [None]:
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

In [None]:
# Get the HTML of the website
url = "https://citizen.mahapolice.gov.in/Citizen/MH/PublishedFIRs.aspx"

In [None]:
# Create a ChromeDriver object
driver = webdriver.Chrome()

In [None]:
# Navigate to the website
driver.get(url)
driver.get(url)

In [None]:
# Wait for the input elements to be loaded
wait = WebDriverWait(driver, 10)
date_input_from = wait.until(EC.presence_of_element_located((By.ID, "ContentPlaceHolder1_txtDateOfRegistrationFrom")))
date_input_to = wait.until(EC.presence_of_element_located((By.ID, "ContentPlaceHolder1_txtDateOfRegistrationTo")))
city_input = wait.until(EC.presence_of_element_located((By.ID, "ContentPlaceHolder1_ddlDistrict")))

In [None]:
date_input_from.send_keys("01/01/2023")
date_input_to.send_keys("01/01/2023")
city_input.send_keys("RAILWAY MUMBAI")

In [None]:
search_button = driver.find_element(By.ID, "ContentPlaceHolder1_btnSearch")
search_button.click()

In [None]:
wait.until(EC.presence_of_element_located((By.ID, "ContentPlaceHolder1_gdvDeadBody")))

In [None]:
fir_table = driver.find_element(By.ID, "ContentPlaceHolder1_gdvDeadBody")
rows = fir_table.find_elements(By.TAG_NAME, "tr")[1:]

In [None]:
try:
    for row in rows:
        cells = row.find_elements(By.TAG_NAME, "td")
        fir_number = cells[0].text.strip()
        fir_date = cells[1].text.strip()
        police_station = cells[2].text.strip()
        district = cells[3].text.strip()

        # Print or process the extracted data as needed
        print("FIR Number:", fir_number)
        print("FIR Date:", fir_date)
        print("Police Station:", police_station)
        print("District:", district)
        print("-" * 50)
except Exception as e:
    pass

In [None]:
next_button = driver.find_element(By.XPATH, "//a[contains(@href, 'Page$')]")

In [None]:
next_button.click()

In [None]:
import time

In [None]:
# Scrape data on each page
while True:
    # Find and extract the data on the current page
    fir_table = driver.find_element(By.ID, "ContentPlaceHolder1_gdvDeadBody")
    rows = fir_table.find_elements(By.TAG_NAME, "tr")[1:]
    for row in rows[1:]:
        cells = row.find_elements(By.TAG_NAME, "td")
        fir_number = cells[0].text.strip()
        fir_date = cells[1].text.strip()
        police_station = cells[2].text.strip()
        district = cells[3].text.strip()

    # Check if there is a "Next" button
    next_button = driver.find_element(By.XPATH, "//a[contains(@href, 'Page$')]")
    if not next_button:
        break  # Exit the loop if there is no "Next" button

    # Click the "Next" button
    next_button.click()

    # Wait for the next page to load
    time.sleep(2)  # Add a short delay to allow the page to load

# Close the browser
driver.quit()

In [None]:
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import NoSuchElementException

def scrape_fir_data(url, date_from, date_to, city):
    # Create a ChromeDriver object
    driver = webdriver.Chrome()
    # Navigate to the website
    driver.get(url)
    driver.get(url)

    # Wait for the input elements to be loaded
    wait = WebDriverWait(driver, 10)
    date_input_from = wait.until(EC.presence_of_element_located((By.ID, "ContentPlaceHolder1_txtDateOfRegistrationFrom")))
    date_input_to = wait.until(EC.presence_of_element_located((By.ID, "ContentPlaceHolder1_txtDateOfRegistrationTo")))
    city_input = wait.until(EC.presence_of_element_located((By.ID, "ContentPlaceHolder1_ddlDistrict")))
    date_input_from.send_keys(date_from)
    date_input_to.send_keys(date_to)
    city_input.send_keys(city)
    search_button = driver.find_element(By.ID, "ContentPlaceHolder1_btnSearch")
    search_button.click()
    wait.until(EC.presence_of_element_located((By.ID, "ContentPlaceHolder1_gdvDeadBody")))

    # Scrape data on each page
    while True:
        fir_table = driver.find_element(By.ID, "ContentPlaceHolder1_gdvDeadBody")
        rows = fir_table.find_elements(By.TAG_NAME, "tr")[1:]
        try:
            for row in rows:
                cells = row.find_elements(By.TAG_NAME, "td")
                fir_number = cells[0].text.strip()
                fir_date = cells[1].text.strip()
                state = cells[1].text.strip()
                police_station = cells[2].text.strip()
                district = cells[3].text.strip()
                year = cells[4].text.strip()
                registration_date = cells[6].text.strip()
                sections = cells[7].text.strip()
                # Print or process the extracted data as needed
                print("FIR Number:", fir_number)
                print("FIR Date:", fir_date)
                print("Police Station:", police_station)
                print("District:", district)
                print("year:", year)
                print("registration_date:",registration_date)
                print("sections:",sections)
                print("")
                print("-" * 50)
        except Exception as e:
            pass
        
        try:
            next_button = driver.find_element(By.XPATH, "//a[contains(@class, 'rgPageNext')]")
            if "disabled" in next_button.get_attribute("class"):
                break
            next_button.click()
            wait.until(EC.staleness_of(fir_table))
        except NoSuchElementException:
            break

    # Close the browser
    driver.quit()


# Example usage
url = "https://citizen.mahapolice.gov.in/Citizen/MH/PublishedFIRs.aspx"
date_from = "01/06/2023"
date_to = "01/06/2023"
city = "AKOLA"
scrape_fir_data(url, date_from, date_to, city)


In [7]:
import csv
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import NoSuchElementException

def scrape_fir_data(url, date_from, date_to, city):
    # Create a ChromeDriver object
    driver = webdriver.Chrome()
    # Navigate to the website
    driver.get(url)
    driver.get(url)

    # Wait for the input elements to be loaded
    wait = WebDriverWait(driver, 10)
    date_input_from = wait.until(EC.presence_of_element_located((By.ID, "ContentPlaceHolder1_txtDateOfRegistrationFrom")))
    date_input_to = wait.until(EC.presence_of_element_located((By.ID, "ContentPlaceHolder1_txtDateOfRegistrationTo")))
    city_input = wait.until(EC.presence_of_element_located((By.ID, "ContentPlaceHolder1_ddlDistrict")))
    date_input_from.send_keys(date_from)
    date_input_to.send_keys(date_to)
    city_input.send_keys(city)
    search_button = driver.find_element(By.ID, "ContentPlaceHolder1_btnSearch")
    search_button.click()
    wait.until(EC.presence_of_element_located((By.ID, "ContentPlaceHolder1_gdvDeadBody")))

    # Scrape data on each page
    data = []
    while True:
        fir_table = driver.find_element(By.ID, "ContentPlaceHolder1_gdvDeadBody")
        rows = fir_table.find_elements(By.TAG_NAME, "tr")[1:]
        try:
            for row in rows:
                cells = row.find_elements(By.TAG_NAME, "td")
                sr_no = cells[0].text.strip()
                state = cells[1].text.strip()
                district = cells[2].text.strip()
                police_station = cells[3].text.strip()
                year = cells[4].text.strip()
                fir_number = cells[5].text.strip()
                registration_date = cells[6].text.strip()
                sections = cells[7].text.strip()

                # Store the extracted data
                data.append([sr_no, state, district, police_station, year, fir_number, registration_date, sections])
        except Exception as e:
            pass
        
        try:
            next_button = driver.find_element(By.XPATH, "//a[contains(@class, 'rgPageNext')]")
            if "disabled" in next_button.get_attribute("class"):
                break
            next_button.click()
            wait.until(EC.staleness_of(fir_table))
        except NoSuchElementException:
            break

    # Close the browser
    driver.quit()

    # Write data to a CSV file
    filename = "fir_data.csv"
    with open(filename, "w", newline="") as file:
        writer = csv.writer(file)
        # Write headers
        writer.writerow(["Sr. No.", "State", "District", "Police Station", "Year", "FIR No.", "Registration Date", "Sections"])
        # Write data rows
        writer.writerows(data)

    print(f"Data extracted and stored in {filename}")


# Example usage
url = "https://citizen.mahapolice.gov.in/Citizen/MH/PublishedFIRs.aspx"
date_from = "01/06/2023"
date_to = "01/06/2023"
city = "AKOLA"
scrape_fir_data(url, date_from, date_to, city)


Data extracted and stored in fir_data.csv
