In [6]:
# Step 1: Import the libraries we installed
import requests
from bs4 import BeautifulSoup

# Step 2: Paste the URL of the case you found
# Replace this with the actual URL you copied
case_url = 'https://indiankanoon.org/doc/16772520/' 

# Step 3: Fetch the webpage content
print(f"Fetching data from: {case_url}")
response = requests.get(case_url)

# Step 4: Parse the HTML and find the title
# We use the 'html.parser' to process the page content
soup = BeautifulSoup(response.content, 'html.parser')

# On IndianKanoon, the main title is usually inside a <div> with the class 'doc_title'
# We need to find that specific element.
title_element = soup.find("div", class_='judgments')

# Step 5: Extract the text from the element and print it
if title_element:
    case_title = title_element.get_text(strip=True)
    print("\n--- VICTORY! ---")
    print(f"Successfully Extracted Title: {case_title}")
else:
    print("\n--- FAILED ---")
    print("Could not find the title element. The website structure might have changed.")

Fetching data from: https://indiankanoon.org/doc/16772520/

--- VICTORY! ---
Successfully Extracted Title: Take notes as you read a judgment using ourVirtual Legal Assistantand get email alerts whenever a new judgment matches your query (Query Alert Service). Try out ourPremium Member Services--Sign up todayand get free trial for one month.Madras High CourtSun Pharmaceutical Industries Limited vs Kivi Labs Ltd on 11 March, 2024Author:Abdul QuddhoseBench:Abdul QuddhoseC.S.No.87 of 2012

                              IN THE HIGH COURT OF JUDICATURE AT MADRAS

                                                Reserved on   : 22.02.2024

                                                Pronounced on : 11.03.2024

                                                       CORAM:

                                   THE HON'BLE MR. JUSTICE ABDUL QUDDHOSE

                                                   C.S.No.87 of 2012

                     Sun Pharmaceutical Industries Limited,
                

In [14]:
import requests
from bs4 import BeautifulSoup

# The URL from your screenshot's address bar
case_url = 'https://indiankanoon.org/doc/16772520/' 

print(f"Fetching data from: {case_url}")
response = requests.get(case_url)
soup = BeautifulSoup(response.content, 'html.parser')


# 1. Get Title (using the specific class from your screenshot)
title_element = soup.find(class_='doc_title')
if title_element:
    print(f"\nTitle: {title_element.get_text(strip=True)}")
else:
    print("\nCould not find the title element.")


# 2. Get Author (using the specific class from your screenshot)
author_element = soup.find(class_='doc_author')
if author_element:
    # We use .replace() to remove the "Author: " part
    author_name = author_element.get_text(strip=True).replace('Author: ', '')
    print(f"Author: {author_name}")
else:
    print("Could not find the author element.")


# 3. Get Cited Cases (This is our best guess, you may need to verify the class name)
cited_elements = soup.find_all(class_='caselink') # <-- IMPORTANT: Verify this class name by inspecting the links!

print("\n--- Cited Cases ---")
if cited_elements:
    for element in cited_elements:
        # We only want to print the case links, which start with "INDIAN KANOON" in their title attribute
        if element.get('title', '').startswith('INDIAN KANOON'):
             print(f"- {element.get_text(strip=True)}")
else:
    print("Could not find any cited cases. Double-check the tag and class name.")

Fetching data from: https://indiankanoon.org/doc/16772520/

Title: Sun Pharmaceutical Industries Limited vs Kivi Labs Ltd on 11 March, 2024
Author: Author:Abdul Quddhose

--- Cited Cases ---
Could not find any cited cases. Double-check the tag and class name.


In [18]:
import time
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from bs4 import BeautifulSoup

# --- CRUCIAL: Setting up Stealth Options ---
print("Configuring stealth options for the WebDriver...")
chrome_options = Options()
# This option makes the browser seem less like an automated bot
chrome_options.add_experimental_option("excludeSwitches", ["enable-automation"])
chrome_options.add_experimental_option('useAutomationExtension', False)
chrome_options.add_argument('--disable-blink-features=AutomationControlled')
# Standard user agent for a regular Chrome browser
chrome_options.add_argument('user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/98.0.4758.102 Safari/537.36')


# --- Setup Selenium WebDriver with our new options ---
print("Setting up stealth WebDriver...")
driver = webdriver.Chrome(options=chrome_options)
print("WebDriver setup complete.")

# The URL you provided
case_url = 'https://indiankanoon.org/doc/16772520/' 

try:
    # --- Navigation ---
    print(f"Navigating to: {case_url}")
    driver.get(case_url)

    # Adding a small, human-like pause
    time.sleep(3)

    # --- Handling the IFRAME (our original logic, which should now work) ---
    print("Waiting for the content iframe to load...")
    wait = WebDriverWait(driver, 15)
    wait.until(EC.frame_to_be_available_and_switch_to_it((By.NAME, "DOC iframe")))
    print("Successfully switched focus to the content iframe.")

    # --- Get the fully loaded page source OF THE IFRAME ---
    html_content = driver.page_source
    soup = BeautifulSoup(html_content, 'html.parser')

    # --- Parsing Logic ---
    # 1. Get Title
    title_element = soup.find(class_='doc_title')
    if title_element:
        print(f"\nTitle: {title_element.get_text(strip=True)}")
    else:
        print("\nCould not find the title element inside the iframe.")

    # 2. Get Author
    author_element = soup.find(class_='doc_author')
    if author_element:
        author_name = author_element.get_text(strip=True).replace('Author: ', '')
        print(f"Author: {author_name}")
    else:
        print("Could not find the author element inside the iframe.")

    # 3. Get Cited Cases
    print("\n--- Cited Cases ---")
    cite_container = soup.find('div', id='cited_by_div')
    if cite_container:
        cited_elements = cite_container.find_all('a')
        if cited_elements:
            for element in cited_elements:
                print(f"- {element.get_text(strip=True)}")
        else:
            print("Found the container, but no case links inside.")
    else:
        print("Could not find the cited cases container inside the iframe.")

finally:
    # --- Clean up and close the browser ---
    driver.switch_to.default_content() 
    print("\nClosing the browser.")
    driver.quit()

Configuring stealth options for the WebDriver...
Setting up stealth WebDriver...
WebDriver setup complete.
Navigating to: https://indiankanoon.org/doc/16772520/
Waiting for the content iframe to load...

Closing the browser.


TimeoutException: Message: 
Stacktrace:
0   chromedriver                        0x0000000100b6a668 cxxbridge1$str$ptr + 2723108
1   chromedriver                        0x0000000100b628dc cxxbridge1$str$ptr + 2690968
2   chromedriver                        0x00000001006b6714 cxxbridge1$string$len + 90428
3   chromedriver                        0x00000001006fd7c0 cxxbridge1$string$len + 381416
4   chromedriver                        0x000000010073ede8 cxxbridge1$string$len + 649232
5   chromedriver                        0x00000001006f19c8 cxxbridge1$string$len + 332784
6   chromedriver                        0x0000000100b2e28c cxxbridge1$str$ptr + 2476360
7   chromedriver                        0x0000000100b31520 cxxbridge1$str$ptr + 2489308
8   chromedriver                        0x0000000100b0fa78 cxxbridge1$str$ptr + 2351412
9   chromedriver                        0x0000000100b31da8 cxxbridge1$str$ptr + 2491492
10  chromedriver                        0x0000000100b00d6c cxxbridge1$str$ptr + 2290728
11  chromedriver                        0x0000000100b51d74 cxxbridge1$str$ptr + 2622512
12  chromedriver                        0x0000000100b51f00 cxxbridge1$str$ptr + 2622908
13  chromedriver                        0x0000000100b62528 cxxbridge1$str$ptr + 2690020
14  libsystem_pthread.dylib             0x0000000193aa2c0c _pthread_start + 136
15  libsystem_pthread.dylib             0x0000000193a9db80 thread_start + 8
