In [22]:
# %% Part 1: Imports
import os
import csv
import datetime
import base64
import time
import requests
from io import BytesIO

from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from webdriver_manager.chrome import ChromeDriverManager

from PIL import Image
import pytesseract
import cv2
import numpy as np


In [23]:
# %% Part 2: Date Range
today = datetime.date.today()
from_date = today - datetime.timedelta(days=10)
to_date = today

print("From Date:", from_date.strftime("%d/%m/%Y"))
print("To Date:", to_date.strftime("%d/%m/%Y"))


From Date: 04/09/2025
To Date: 14/09/2025


In [24]:
# Part 3: Selenium setup (headless)
chrome_options = Options()
chrome_options.add_argument("--headless")
chrome_options.add_argument("--no-sandbox")
chrome_options.add_argument("--disable-dev-shm-usage")

driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=chrome_options)
url = "https://hcraj.nic.in/cishcraj-jdp/JudgementFilters/"
driver.get(url)


In [25]:
# %%
# Part 4: Wait for form elements and fill the form
wait = WebDriverWait(driver, 10)

# 1️⃣ Wait for From Date and To Date input fields
from_date_elem = wait.until(EC.presence_of_element_located((By.ID, "partyFromDate")))
to_date_elem = wait.until(EC.presence_of_element_located((By.ID, "partyToDate")))

# Fill dates
from_date_elem.clear()
from_date_elem.send_keys(from_date.strftime("%d/%m/%Y"))
to_date_elem.clear()
to_date_elem.send_keys(to_date.strftime("%d/%m/%Y"))

# 2️⃣ Select Reportable Judgment = Yes
reportable_yes = wait.until(EC.element_to_be_clickable((By.ID, "rpjudgeY")))
reportable_yes.click()

# 3️⃣ Wait for captcha image to appear
captcha_img_elem = wait.until(EC.presence_of_element_located((By.ID, "captcha")))
captcha_src = captcha_img_elem.get_attribute("src")

# If captcha is base64, save as image
if captcha_src.startswith("data:image"):
    img_data = base64.b64decode(captcha_src.split(",")[1])
    with open("captcha.png", "wb") as f:
        f.write(img_data)
    print("Captcha saved as captcha.png → open and read it.")

# 4️⃣ Ask user to input captcha manually
captcha_code = input("Enter captcha code from captcha.png: ")

# 5️⃣ Fill captcha input field
captcha_input_elem = wait.until(EC.presence_of_element_located((By.ID, "txtCaptcha")))
captcha_input_elem.clear()
captcha_input_elem.send_keys(captcha_code)


Captcha saved as captcha.png → open and read it.


In [26]:
# %%
# Part 5: Submit the form and wait for table

# Wait for SEARCH button (the visible one)
search_button = wait.until(EC.element_to_be_clickable((By.ID, "btncasedetail1_1")))
search_button.click()

print("Form submitted, waiting for results table...")

# Wait for results table (ID = sample_1)
results_table = wait.until(EC.presence_of_element_located((By.ID, "sample_1")))
print("Results table loaded successfully!")


Form submitted, waiting for results table...
Results table loaded successfully!


In [28]:
# Extract rows (excluding header)
rows = results_table.find_elements(By.XPATH, ".//tbody/tr")

print(f"Found {len(rows)} rows")

for row in rows:
    cols = row.find_elements(By.TAG_NAME, "td")
    data = [col.text.strip() for col in cols]
    print(data)


Found 10 rows
['1', 'CRLR / 433 / 2020\nXXXXX Vs. XXXXX\n(2025:RJ-JD:39461)', 'SANDEEP SHAH', '04-Sep-2025', '']
['2', 'CRLR / 48 / 2024\nJAISHANKAR SHARMA Vs. STATE OF RAJASTHAN', 'SANDEEP SHAH', '04-Sep-2025', '']
['3', 'CRLR / 509 / 2024\nXXXXX Vs. XXXXX', 'SANDEEP SHAH', '04-Sep-2025', '']
['4', 'CRLA / 670 / 2016\nSANJAY KUMAR Vs. STATE', 'MANOJ KUMAR GARG,\nRAVI CHIRANIA', '08-Sep-2025', '']
['5', 'CW / 2932 / 2023\nSHREE ARIHANT OIL AND GENERAL MILLS Vs. UNION OF INDIA\n(2025:RJ-JD:39647-DB)', 'DINESH MEHTA,\nSANGEETA SHARMA', '08-Sep-2025', '']
['6', 'CRLA / 135 / 1993\nSURESH KUMAR KOLI Vs. STATE', 'FARJAND ALI', '09-Sep-2025', '']
['7', 'CW / 15870 / 2025\nSHAITAN RAM Vs. STATE OF RAJASTHAN\n(2025:RJ-JD:40058)', 'REKHA BORANA', '09-Sep-2025', '']
['8', 'SAW / 1105 / 2024\nTHE STATE OF RAJASTHAN Vs. DR. AHATSHAM ALI', 'PUSHPENDRA SINGH BHATI,\nSANDEEP TANEJA', '09-Sep-2025', '']
['9', 'SAW / 1109 / 2024\nTHE STATE OF RAJASTHAN Vs. DR. MOHD YUNUS', 'PUSHPENDRA SINGH BHATI,\nSAN

In [29]:
import csv

# Write rows to CSV
with open("cases_summary.csv", "w", newline="", encoding="utf-8") as f:
    writer = csv.writer(f)
    writer.writerow(["Sr.No.", "Case Details", "Justice", "Date", "Action"])
    for row in rows:
        cols = row.find_elements(By.TAG_NAME, "td")
        data = [col.text.strip() for col in cols]
        writer.writerow(data)

print("Cases saved to cases_summary.csv")


Cases saved to cases_summary.csv


In [None]:
# %%
# === Part 1: Imports ===
import os
import csv
import datetime
import base64
import time
import requests

from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from webdriver_manager.chrome import ChromeDriverManager

# === Part 2: Date Range ===
today = datetime.date.today()
from_date = today - datetime.timedelta(days=10)
to_date = today

print("From Date:", from_date.strftime("%d/%m/%Y"))
print("To Date:", to_date.strftime("%d/%m/%Y"))

# === Part 3: Selenium Setup ===
chrome_options = Options()
chrome_options.add_argument("--headless")
chrome_options.add_argument("--no-sandbox")
chrome_options.add_argument("--disable-dev-shm-usage")

driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=chrome_options)
driver.get("https://hcraj.nic.in/cishcraj-jdp/JudgementFilters/")

wait = WebDriverWait(driver, 10)

# === Part 4: Fill Form ===
# Dates
from_date_elem = wait.until(EC.presence_of_element_located((By.ID, "partyFromDate")))
to_date_elem = wait.until(EC.presence_of_element_located((By.ID, "partyToDate")))
from_date_elem.clear()
from_date_elem.send_keys(from_date.strftime("%d/%m/%Y"))
to_date_elem.clear()
to_date_elem.send_keys(to_date.strftime("%d/%m/%Y"))

# Reportable = Yes
reportable_yes = wait.until(EC.element_to_be_clickable((By.ID, "rpjudgeY")))
reportable_yes.click()

# Captcha
captcha_elem = wait.until(EC.presence_of_element_located((By.ID, "captcha")))
captcha_src = captcha_elem.get_attribute("src")
if captcha_src.startswith("data:image"):
    img_data = base64.b64decode(captcha_src.split(",")[1])
    with open("captcha.png", "wb") as f:
        f.write(img_data)
    print("Captcha saved as captcha.png → open and read it.")

captcha_code = input("Enter captcha code from captcha.png: ")
captcha_input = wait.until(EC.presence_of_element_located((By.ID, "txtCaptcha")))
captcha_input.clear()
captcha_input.send_keys(captcha_code)

# === Part 5: Submit Form and Wait for Results ===
search_button = wait.until(EC.element_to_be_clickable((By.ID, "btncasedetail1_1")))
search_button.click()
print("Form submitted, waiting for results table...")

results_table = wait.until(EC.presence_of_element_located((By.ID, "sample_1")))
print("Results table loaded successfully!")

# === Part 6: Load existing CSV for incremental download ===
master_file = "cases_master.csv"
existing_cases = set()
if os.path.exists(master_file):
    with open(master_file, "r", encoding="utf-8") as f:
        reader = csv.DictReader(f)
        for row in reader:
            existing_cases.add(row["Case Details"])

# === Part 7: Parse Table Rows ===
rows = results_table.find_elements(By.XPATH, ".//tbody/tr")
print(f"Found {len(rows)} rows")

master_data = []
pdf_folder = "pdfs"
os.makedirs(pdf_folder, exist_ok=True)

for row in rows:
    cols = row.find_elements(By.TAG_NAME, "td")
    data = [col.text.strip() for col in cols]
    case_details = data[1]

    # Skip if already downloaded
    if case_details in existing_cases:
        continue

    # Try to download PDF from Action column
    try:
        pdf_link_elem = cols[4].find_element(By.TAG_NAME, "a")
        pdf_url = pdf_link_elem.get_attribute("href")
        pdf_name = pdf_url.split("/")[-1]
        pdf_path = os.path.join(pdf_folder, pdf_name)

        r = requests.get(pdf_url)
        with open(pdf_path, "wb") as f:
            f.write(r.content)
        print(f"Downloaded PDF: {pdf_name}")
    except:
        pdf_name = ""
        print(f"No PDF found for case {case_details}")

    data.append(pdf_name)
    master_data.append(data)

# === Part 8: Save/Append to CSV ===
file_exists = os.path.exists(master_file)
with open(master_file, "a", newline="", encoding="utf-8") as f:
    writer = csv.writer(f)
    if not file_exists:
        writer.writerow(["Sr.No.", "Case Details", "Justice", "Date", "Action", "PDF Name"])
    writer.writerows(master_data)

print(f" Saved {len(master_data)} new cases to {master_file}")

# === Part 9: Cleanup ===
driver.quit()


From Date: 04/09/2025
To Date: 14/09/2025
Captcha saved as captcha.png → open and read it.
Form submitted, waiting for results table...
Results table loaded successfully!
Found 10 rows
No PDF found for case CRLR / 433 / 2020
XXXXX Vs. XXXXX
(2025:RJ-JD:39461)
No PDF found for case CRLR / 48 / 2024
JAISHANKAR SHARMA Vs. STATE OF RAJASTHAN
No PDF found for case CRLR / 509 / 2024
XXXXX Vs. XXXXX
No PDF found for case CRLA / 670 / 2016
SANJAY KUMAR Vs. STATE
No PDF found for case CW / 2932 / 2023
SHREE ARIHANT OIL AND GENERAL MILLS Vs. UNION OF INDIA
(2025:RJ-JD:39647-DB)
No PDF found for case CRLA / 135 / 1993
SURESH KUMAR KOLI Vs. STATE
No PDF found for case CW / 15870 / 2025
SHAITAN RAM Vs. STATE OF RAJASTHAN
(2025:RJ-JD:40058)
No PDF found for case SAW / 1105 / 2024
THE STATE OF RAJASTHAN Vs. DR. AHATSHAM ALI
No PDF found for case SAW / 1109 / 2024
THE STATE OF RAJASTHAN Vs. DR. MOHD YUNUS
No PDF found for case SAW / 1123 / 2024
THE STATE OF RAJASTHAN Vs. DR. ALI TAQI
✅ Saved 10 new ca