In [1]:
import argparse
import json
import time
from datetime import datetime, timedelta

from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException, NoSuchElementException, WebDriverException
import requests

# --- Configuration ---
ECOURTS_URL = "https://services.ecourts.gov.in/ecourtindia_v6/"
OUTPUT_DIR = "scraper_results/"
# Replace with the actual path to your WebDriver if it's not in PATH
# E.g., DRIVER_PATH = "/path/to/chromedriver"
DRIVER_PATH = "chromedriver" 
# This example uses Case Status search (Menu > Case Status)
CASE_STATUS_SEARCH_URL = ECOURTS_URL + "caseStatus.php" 

# --- Helper Functions ---

def setup_driver():
    """Initializes and returns a Selenium WebDriver."""
    print("Setting up WebDriver...")
    try:
        service = Service(DRIVER_PATH)
        options = webdriver.ChromeOptions()
        # Optional: Run in headless mode (without opening a browser GUI)
        # options.add_argument('--headless') 
        options.add_argument('--no-sandbox')
        options.add_argument('--disable-dev-shm-usage')
        driver = webdriver.Chrome(service=service, options=options)
        return driver
    except WebDriverException as e:
        print(f"Error initializing WebDriver. Make sure '{DRIVER_PATH}' is correct and compatible.")
        print("Details:", e)
        return None

def save_output(data, filename_suffix="result", format="json"):
    """Saves data to a JSON or text file."""
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    filename = f"{OUTPUT_DIR}{filename_suffix}_{timestamp}.{format}"
    
    with open(filename, 'w', encoding='utf-8') as f:
        if format == "json":
            json.dump(data, f, ensure_ascii=False, indent=4)
        else:
            f.write(data)
    print(f"\n✅ Results saved to {filename}")

def download_file(url, filename_prefix="case_document"):
    """Downloads a file from a URL using requests."""
    try:
        print(f"Attempting to download file from: {url}")
        response = requests.get(url, stream=True, timeout=15)
        response.raise_for_status() # Raise an HTTPError for bad responses (4xx or 5xx)
        
        filename = f"{OUTPUT_DIR}{filename_prefix}_{datetime.now().strftime('%Y%m%d')}.pdf"
        with open(filename, 'wb') as f:
            for chunk in response.iter_content(chunk_size=8192):
                f.write(chunk)
        print(f"✅ PDF downloaded successfully as {filename}")
        return filename
    except requests.exceptions.RequestException as e:
        print(f"❌ Error downloading file: {e}")
        return None

# --- Core Logic Functions ---

def get_target_dates(today=False, tomorrow=False):
    """Calculates today's and/or tomorrow's date for listing check."""
    target_dates = []
    if today:
        target_dates.append(datetime.now().strftime("%d-%m-%Y"))
    if tomorrow:
        target_dates.append((datetime.now() + timedelta(days=1)).strftime("%d-%m-%Y"))
    return target_dates

def check_case_listing(driver, case_details, target_dates):
    """
    1. Navigates to the Case Status page.
    2. Inputs CNR or Case Type, Number, Year.
    3. Searches for the case.
    4. Checks listing for today/tomorrow (target_dates).
    5. Downloads PDF if available.
    """
    print(f"\n--- Checking Case Listing: {case_details} ---")
    
    # *** 1. Go to Case Status page ***
    try:
        driver.get(CASE_STATUS_SEARCH_URL)
        wait = WebDriverWait(driver, 10)
        # Wait for the main search form to be present
        wait.until(EC.presence_of_element_located((By.ID, 'caseStatus')))
    except (TimeoutException, WebDriverException) as e:
        print(f"❌ Failed to load eCourts Case Status page: {e}")
        return {"error": "Page loading failed."}
    
    # NOTE: eCourts pages have complex elements like CAPTCHAs and dropdowns.
    # The actual implementation needs to handle selecting 'Court Complex', 
    # 'Case Type', entering data, solving CAPTCHA, and submitting the form.
    
    # *** Placeholder for Form Interaction (Highly dependent on actual site structure) ***
    
    # *** 2. Input case details (Using CNR for simplicity, but need to adapt) ***
    # This is a general example. You must find the actual element IDs/names.
    if 'cnr' in case_details:
        print(f"Searching by CNR: {case_details['cnr']} (Needs manual element finding)")
        # Example interaction:
        # driver.find_element(By.ID, "cnrNo").send_keys(case_details['cnr'])
        # (Submit form logic goes here)
    elif 'type' in case_details and 'number' in case_details and 'year' in case_details:
        print(f"Searching by Case No: {case_details['type']}/{case_details['number']}/{case_details['year']} (Needs manual element finding)")
        # Example interaction:
        # Select dropdowns for Court Complex, Case Type, enter number/year, etc.
        # (Submit form logic goes here)
        
    # *** 3. Post-Search Logic (Assuming search brings up a case details table) ***
    print("--- Simulating successful search and data extraction... ---")
    
    results = {
        "case_details": case_details,
        "is_listed": False,
        "listing_info": [],
        "pdf_downloaded": None
    }
    
    # Placeholder: Simulate finding case details on the resulting page
    # In a real scenario, you'd find the table/section with 'Hearing Dates'.
    
    # Simulate data found on the page after search submission
    # This is dummy data, the real logic uses Selenium to find and parse tables
    case_hearing_data = [
        {"date": datetime.now().strftime("%d-%m-%Y"), "court": "Court Hall No. 5", "sno": "15"},
        {"date": (datetime.now() + timedelta(days=2)).strftime("%d-%m-%Y"), "court": "Court Hall No. 3", "sno": "4"}
    ]
    
    pdf_link_found = "https://example.com/dummy_case_file.pdf" # Replace with actual logic
    
    for item in case_hearing_data:
        if item['date'] in target_dates:
            results['is_listed'] = True
            info = {
                "date": item['date'],
                "court_name": item['court'],
                "serial_number": item['sno']
            }
            results['listing_info'].append(info)
            print(f"🎉 Case LISTED on {info['date']}! Court: {info['court_name']}, Sr. No.: {info['serial_number']}")
            
    if not results['is_listed']:
        print("😞 Case NOT listed today or tomorrow.")
        
    # *** 4. Optional: Download the case PDF ***
    if pdf_link_found:
        print("\nFound a potential PDF link.")
        # A real implementation would find the actual 'View Document' or 'Download Order' link
        # and click it or extract the direct URL.
        # This example uses the helper function to download a file.
        # In many cases, you must click a link/button with Selenium to trigger the download.
        results['pdf_downloaded'] = download_file(pdf_link_found, f"case_{case_details.get('cnr', 'no')}")

    save_output(results, filename_suffix="case_listing_check")
    return results

def download_causelist(driver, date_mode):
    """
    Downloads the entire cause list for today or tomorrow.
    This requires navigating to the Cause List page and submitting a form.
    """
    print(f"\n--- Downloading Cause List for {date_mode} ---")
    target_date = datetime.now()
    if date_mode == 'tomorrow':
        target_date += timedelta(days=1)

    target_date_str = target_date.strftime("%d-%m-%Y")
    
    # *** 1. Navigate to Cause List Page (URL may differ) ***
    # This is a guess, adjust the URL if necessary
    CAUSELIST_URL = ECOURTS_URL + "causelist.php" 
    try:
        driver.get(CAUSELIST_URL)
        wait = WebDriverWait(driver, 10)
        wait.until(EC.presence_of_element_located((By.ID, 'causelistForm')))
        
        # NOTE: Like Case Status, this requires selecting Court Complex, Court Establishment,
        # selecting the date (or ensuring today's date is default), and submitting a CAPTCHA.
        
        # *** 2. Interaction Placeholder ***
        # Example: Input the date (if not default)
        # driver.find_element(By.ID, "listDate").send_keys(target_date_str)
        # (Select dropdowns for Court Complex, Establishment, then submit form)
        
        # *** 3. Data Extraction Placeholder ***
        # The result page usually has links/buttons to download the Cause List PDF or HTML.
        # Find the download link/button.
        
        causelist_pdf_link = "https://example.com/dummy_causelist.pdf" # Replace with actual logic
        
        if causelist_pdf_link:
            print(f"Found Cause List download link for {target_date_str}.")
            download_file(causelist_pdf_link, f"causelist_{date_mode}_{target_date.strftime('%Y%m%d')}")
            
            # Optionally, you could try to scrape the HTML table if there's no direct PDF link
            # cause_list_html = driver.find_element(By.ID, 'causeListTable').text 
            # save_output(cause_list_html, f"causelist_{date_mode}", "txt")

            return {"status": "success", "date": target_date_str}
        else:
            print(f"❌ Could not find the Cause List download link for {target_date_str}.")
            return {"status": "failed", "reason": "Download link not found"}

    except (TimeoutException, WebDriverException) as e:
        print(f"❌ Failed to load eCourts Cause List page or interact: {e}")
        return {"error": "Page interaction failed."}


# --- Main Execution ---

def main():
    parser = argparse.ArgumentParser(
        description="eCourts Scraper for case listing and cause list download.",
        formatter_class=argparse.RawTextHelpFormatter
    )
    
    # Group for Case Search (mutually exclusive with causelist)
    group_case = parser.add_argument_group('Case Search Options')
    group_case.add_argument('--cnr', type=str, help="CNR Number (e.g., KABA012345672023)")
    group_case.add_argument('--case', type=str, nargs=3, metavar=('TYPE', 'NUMBER', 'YEAR'),
                            help="Case Type, Number, and Year (e.g., 'WP' '123' '2023')")
    group_case.add_argument('--today', action='store_true', help="Check if case is listed today.")
    group_case.add_argument('--tomorrow', action='store_true', help="Check if case is listed tomorrow.")
    
    # Group for Cause List Download
    group_causelist = parser.add_argument_group('Cause List Download Options')
    group_causelist.add_argument('--causelist', choices=['today', 'tomorrow'], 
                                 help="Download entire cause list for 'today' or 'tomorrow'.")

    args = parser.parse_args()
    
    # Determine the task
    is_case_search = args.cnr or args.case
    is_causelist = args.causelist
    
    if not is_case_search and not is_causelist:
        parser.print_help()
        return

    # Initialize WebDriver
    driver = setup_driver()
    if not driver:
        return

    try:
        # --- Handle Case Listing Check ---
        if is_case_search:
            if not (args.today or args.tomorrow):
                print("⚠️ Please specify at least --today or --tomorrow for case search.")
                return

            if args.cnr:
                case_details = {'cnr': args.cnr}
            elif args.case:
                case_details = {'type': args.case[0], 'number': args.case[1], 'year': args.case[2]}
            else:
                print("⚠️ Please provide either --cnr or --case details.")
                return

            target_dates = get_target_dates(args.today, args.tomorrow)
            check_case_listing(driver, case_details, target_dates)

        # --- Handle Cause List Download ---
        elif is_causelist:
            # Requirement: Download entire cause list for today on request 
            # Extended to tomorrow via CLI option 
            download_causelist(driver, args.causelist)

    finally:
        print("\nQuitting WebDriver...")
        driver.quit()

if __name__ == "__main__":
    # Ensure the output directory exists
    import os
    os.makedirs(OUTPUT_DIR, exist_ok=True)
    main()

usage: ipykernel_launcher.py [-h] [--cnr CNR] [--case TYPE NUMBER YEAR]
                             [--today] [--tomorrow]
                             [--causelist {today,tomorrow}]
ipykernel_launcher.py: error: unrecognized arguments: --f=c:\Users\dhruv\AppData\Roaming\jupyter\runtime\kernel-v3b3c695b7fcc55c874c7ac9a2b943e73c47938c47.json


SystemExit: 2

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)


In [2]:
import os 
os.getcwd()

'd:\\Internship\\Microsoft VS Code'