In [23]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException
import time
import pandas as pd


from bs4 import BeautifulSoup

def solve_captcha_automatically(driver):
    # Extract the CAPTCHA value from the HTML content
    html_content = driver.page_source
    soup = BeautifulSoup(html_content, 'html.parser')

    captcha_element = soup.find('p', {'id': 'cap'})
    captcha_value = captcha_element.find('font').text.strip()

    # Fill in the CAPTCHA input field
    captcha_input = driver.find_element(By.ID, 'ansCaptcha')  # Adjust the selector based on your HTML structure
    captcha_input.clear()
    captcha_input.send_keys(captcha_value)

def select_state(driver, state_value):
    # Assuming there is a dropdown for the state
    state_dropdown =  WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.ID, 'ddl_st_agncy')))
 # Adjust the selector based on your HTML structure
    state_dropdown.click()

    # Select the state option based on its value
    state_option_xpath = f'//select[@id="ddl_st_agncy"]/option[@value="{state_value}"]'
    state_option = driver.find_element(By.XPATH, state_option_xpath)
    state_option.click()

def select_year(driver, year_value):
    # Assuming there is a dropdown for the year
    year_dropdown = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.ID, 'ddl_ref_caseyr')))
 # Adjust the selector based on your HTML structure
    year_dropdown.click()

    # Select the year option based on its value
    year_option_xpath = f'//select[@id="ddl_ref_caseyr"]/option[@value="{year_value}"]'
    year_option = driver.find_element(By.XPATH, year_option_xpath)
    year_option.click()
def click_right_button(driver):
    try:
        # Click the right button to load the next set of data
        right_button = WebDriverWait(driver, 10).until(
            EC.element_to_be_clickable((By.ID, 'btn_right_cs'))
        )
        right_button.click()
        time.sleep(10)  # Adjust sleep time as needed
        return True
    except TimeoutException:
        return False
def get_table_data(driver,year):
    try:
        # Wait for the table to be present in the DOM
        table = WebDriverWait(driver, 10).until(
            EC.presence_of_element_located((By.XPATH, '//div[@id="dv_include_cs"]/table'))
        )
        resultant_page_html = driver.page_source

        # Parse the HTML content using BeautifulSoup
        soup = BeautifulSoup(resultant_page_html, 'html.parser')

        # Find the table element
        table = soup.find('div', {'id': 'dv_include_cs'}).find('table')

        # Check if the table exists
        if table:
            # Extract and print the header text
            headers = table.find_all('th')
            header_texts = [header.get_text(strip=True) for header in headers]

            # Extract data from td tags
            data = []
            for row in table.find_all('tr')[1:]:  # Skip the first row as it contains headers
                row_data = [td.get_text(strip=True) for td in row.find_all('td')]
                data.append(row_data)

            df = pd.DataFrame(data, columns=header_texts)

            # Add a column for the year
            df['Year'] = year

            return df

        else:
            return None


    except TimeoutException:
        return None

# Example usage
url = 'https://main.sci.gov.in/case-status#'  # Replace with the actual URL

driver = webdriver.Chrome()
driver.get(url)
court_tribunal_tab = driver.find_element(By.XPATH, '//li[@data-link="tab5"]/a[@class="z-link"]')
court_tribunal_tab.click()

# Assuming there is a form on the page and you need to fill out the CAPTCHA, state, year, and other fields
# Fill out other form fields if needed

# Call the function to fill in the CAPTCHA
solve_captcha_automatically(driver)

# Call the function to fill in the state
select_state(driver, '541950')  # '541950' corresponds to the value of Andhra Pradesh

# Call the function to fill in the year
all_data = pd.DataFrame()

for year in range(1950, 2024):
    solve_captcha_automatically(driver)
    select_state(driver, '21945')  # '541950' corresponds to the value of Andhra Pradesh

    # Call the function to fill in the year
    select_year(driver, str(year))  # Convert the year to a string

    # Submit the form or perform other actions as needed
    button = WebDriverWait(driver, 10).until(
        EC.element_to_be_clickable((By.ID, 'getLowerCourtData1'))
    )

    # Click the button
    button.click()
    time.sleep(10)
    
        # Add a column for the year
    current_page_data = get_table_data(driver,year)

    # If data is available, append to the overall DataFrame
    if current_page_data is not None:
        all_data = pd.concat([all_data, current_page_data], ignore_index=True)

    # Keep clicking the right button until it becomes unclickable
    while click_right_button(driver):
        # Get data from the current page
        current_page_data = get_table_data(driver,year)

        # If data is available, append to the overall DataFrame
        if current_page_data is not None:
            all_data = pd.concat([all_data, current_page_data], ignore_index=True)
        else:
            break  # Break the loop if no data is found on the current page






    

In [24]:
all_data


Unnamed: 0,S.No.,Diary No.,PetitionerVsRespondent,From Court,State,Bench,Case No.,Judgement Date,Year
0,1,13992-2001,SABITRI DEBI (D) BY LRS.VsTUSHAR KANTI BOSE .,High Court,WEST BENGAL,HIGH COURT AT CALCUTTA,WP-64-1950,04-05-2001,1950
1,1,10513-2009,"C.I.T.,MUMBAIVsM/S.HINDITRON SERVICE PVT.LTD.",High Court,WEST BENGAL,HIGH COURT AT CALCUTTA,ITA-34-1952,26-06-1953,1952
2,1,17100-2014,MAHARAJ KUMAR SADAY CHAND MAHTAB OF BURDWANVsS...,High Court,WEST BENGAL,HIGH COURT AT CALCUTTA,SN-2972-1953,05-05-1959,1953
3,2,24118-2013,DIBYENDU LALL SEALVsASHISH SEAL .,High Court,WEST BENGAL,HIGH COURT AT CALCUTTA,SN-514-1953,13-09-1956,1953
4,3,24119-2013,DIBYENDU LALL SEALVsMOHAN LAL SEAL .,High Court,WEST BENGAL,HIGH COURT AT CALCUTTA,SN-514-1953,13-09-1956,1953
...,...,...,...,...,...,...,...,...,...
82781,1588,50538-2023,KUDDUS MONDALVsSTATE OF WEST BENGAL,High Court,WEST BENGAL,HIGH COURT AT CALCUTTA,CRM (NDPS)-1527-2023,29-09-2023,2023
82782,1589,50542-2023,ASHOKE NUNIAVsTHE STATE OF WEST BENGAL,High Court,WEST BENGAL,HIGH COURT AT CALCUTTA,CRM (NDPS)-1759-2023,20-11-2023,2023
82783,1590,50542-2023,ASHOKE NUNIAVsTHE STATE OF WEST BENGAL,High Court,WEST BENGAL,HIGH COURT AT CALCUTTA,CRM (NDPS)-1041-2023,14-03-2023,2023
82784,1591,50542-2023,ASHOKE NUNIAVsTHE STATE OF WEST BENGAL,High Court,WEST BENGAL,HIGH COURT AT CALCUTTA,CRM(A)-1227-2023,24-03-2023,2023


In [25]:
all_data[['Petitioner', 'Respondent']] = all_data['PetitionerVsRespondent'].str.split('Vs', expand=True)           
all_data.drop(columns=['PetitionerVsRespondent'], inplace=True)
            # Rearrange columns to have "Petitioner" and "Respondent" at the 2nd and 3rd positions
column_order = ['S.No.', 'Diary No.', 'Petitioner', 'Respondent', 'From Court', 'State', 'Bench', 'Case No.', 'Judgement Date', 'Year']
all_data = all_data[column_order]



In [17]:
#df1=all_data.copy()

In [21]:
df1 = pd.concat([df1, all_data], ignore_index=True)


In [22]:
df1

Unnamed: 0,S.No.,Diary No.,Petitioner,Respondent,From Court,State,Bench,Case No.,Judgement Date,Year
0,1,19801-2020,DR.SUBRAMANIAN SWAMY,THE STATE OF UTTARAKHAND,High Court,TAMIL NADU,HIGH COURT OF JUDICATURE AT MADRAS,WP-379-1951,13-12-1951,1951
1,2,19801-2020,DR.SUBRAMANIAN SWAMY,THE STATE OF UTTARAKHAND,High Court,TAMIL NADU,HIGH COURT OF JUDICATURE AT MADRAS,WP-380-1951,13-12-1951,1951
2,3,33632-2009,DR. SUBRAMANIAN SWAMY,STATE OF TAMIL NADU .,High Court,TAMIL NADU,HIGH COURT OF JUDICATURE AT MADRAS,WP-379-1951,13-12-1951,1951
3,4,33632-2009,DR. SUBRAMANIAN SWAMY,STATE OF TAMIL NADU .,High Court,TAMIL NADU,HIGH COURT OF JUDICATURE AT MADRAS,WP-380-1951,13-12-1951,1951
4,1,7259-2016,K. KANNUSAMI,K. SUNDARASAMI,High Court,TAMIL NADU,HIGH COURT OF JUDICATURE AT MADRAS AT MADURAI,OA-24-1952,01-11-1952,1952
...,...,...,...,...,...,...,...,...,...,...
302896,2213,50733-2023,SELVARATHINAM,J. PREMAVATHI,High Court,TAMIL NADU,HIGH COURT OF JUDICATURE AT MADRAS,CP-1446-2023,23-11-2023,2023
302897,2214,50733-2023,SELVARATHINAM,J. PREMAVATHI,High Court,TAMIL NADU,HIGH COURT OF JUDICATURE AT MADRAS,CP-1446-2023,17-08-2023,2023
302898,2215,50745-2023,PERUMAL,PAPPATHI,High Court,TAMIL NADU,HIGH COURT OF JUDICATURE AT MADRAS AT MADURAI,CRPMD-47-2023,14-07-2023,2023
302899,2216,50745-2023,PERUMAL,PAPPATHI,High Court,TAMIL NADU,HIGH COURT OF JUDICATURE AT MADRAS AT MADURAI,CMPMD-253-2023,14-07-2023,2023


In [125]:
#df1=all_data.copy()
df1.drop("PetitionerVsRespondent",axis=1,inplace=True)


KeyError: "['PetitionerVsRespondent'] not found in axis"

In [126]:
df1

Unnamed: 0,S.No.,Diary No.,Petitioner,Respondent,From Court,State,Bench,Case No.,Judgement Date,Year
0,1,5638-2020,JAGPAL SINGH,VIJAI LAXMI (DECEASED) THROUGH HER LRS.,High Court,KERALA,HIGH COURT OF KERALA AT ERNAKULAM,OP-26-1954,22-12-1955,1954
1,1,9441-2021,KANAKAMMA,SARASAMMA,High Court,KERALA,HIGH COURT OF KERALA AT ERNAKULAM,AS-303-1955,27-08-1956,1955
2,1,7976-2010,KRISHNAT MADHUKAR KADAM,STATE OF KERALA,High Court,KERALA,HIGH COURT OF KERALA AT ERNAKULAM,CRLA-92-1956,15-01-1957,1956
3,2,7976-2010,KRISHNAT MADHUKAR KADAM,STATE OF KERALA,High Court,KERALA,HIGH COURT OF KERALA AT ERNAKULAM,CRLA-92-1956,15-01-1957,1956
4,3,24050-2018,P.V. KUNHIRAMAN,CUSTODIAN VESTED FOREST,High Court,KERALA,HIGH COURT OF KERALA AT ERNAKULAM,IA-734-1956,13-07-1966,1956
...,...,...,...,...,...,...,...,...,...,...
168455,829,50097-2023,M/S. POABS GRANITES PRODUCTS PVT. LTD.,SANEESH,High Court,KERALA,HIGH COURT OF KERALA AT ERNAKULAM,RP-851-2023,05-09-2023,2023
168456,830,50097-2023,M/S. POABS GRANITES PRODUCTS PVT. LTD.,SANEESH,High Court,KERALA,HIGH COURT OF KERALA AT ERNAKULAM,RP-784-2023,25-08-2023,2023
168457,831,50166-2023,ANUP CHANDY,DEEPU CHACKO,High Court,KERALA,HIGH COURT OF KERALA AT ERNAKULAM,CRP-0097-2023,20-07-2023,2023
168458,832,50320-2023,PRADEEP KUMAR,K.N. VIJAYALEKSHMI,High Court,KERALA,HIGH COURT OF KERALA AT ERNAKULAM,MAT.A-774-2023,14-11-2023,2023


In [26]:
all_data.to_csv(r'C:\Users\prasad bolgss\Downloads\West_Bengal.csv')

In [114]:
df2=df1.copy()