# Selenium Web Scraping Demo
This notebook demonstrates how to use Selenium to interact with a dynamic website that requires form inputs and button clicks. We will scrape air quality data from the CPCB website.

In [54]:
# Install required packages
!pip install selenium webdriver-manager



In [56]:
import time
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from webdriver_manager.chrome import ChromeDriverManager

# Setup Chrome options
chrome_options = Options()
# chrome_options.add_argument("--headless")  # Remove for visible browser
chrome_options.add_argument("--no-sandbox")

# Use webdriver-manager to get the driver
service = Service(ChromeDriverManager().install())
driver = webdriver.Chrome(service=service, options=chrome_options)

try:
    # Step 1: Open login page
    driver.get("https://the-internet.herokuapp.com/login")

    # Step 2: Login
    driver.find_element(By.ID, "username").send_keys("tomsmith")
    driver.find_element(By.ID, "password").send_keys("SuperSecretPassword!" + Keys.RETURN)

    # Step 3: Wait for successful login
    WebDriverWait(driver, 10).until(
        EC.presence_of_element_located((By.CLASS_NAME, "flash.success"))
    )

    # Step 4: Scrape secure content
    secure_area = driver.find_element(By.CLASS_NAME, "example")
    print("Secure Content:\n", secure_area.text)

    # Step 5: Stay on page for a while
    print("Staying on the secure page for 5 seconds...")
    time.sleep(5)

    # Step 6: Click logout button
    logout_button = driver.find_element(By.XPATH, "//a[@href='/logout']")
    logout_button.click()

    # Step 7: Confirm logout success
    WebDriverWait(driver, 10).until(
        EC.presence_of_element_located((By.CLASS_NAME, "flash.success"))
    )
    print("Logged out successfully.")

except Exception as e:
    print("Error:", e)

finally:
    driver.quit()


Secure Content:
 Secure Area
Welcome to the Secure Area. When you are done click logout below.
Logout
Staying on the secure page for 5 seconds...
Error: Message: invalid session id: session deleted as the browser has closed the connection
from disconnected: not connected to DevTools
  (Session info: chrome=135.0.7049.115)
Stacktrace:
	GetHandleVerifier [0x0067D363+60275]
	GetHandleVerifier [0x0067D3A4+60340]
	(No symbol) [0x004B06F3]
	(No symbol) [0x0049FF20]
	(No symbol) [0x004BDDA2]
	(No symbol) [0x00523D2F]
	(No symbol) [0x0053DFE9]
	(No symbol) [0x0051CE86]
	(No symbol) [0x004EC623]
	(No symbol) [0x004ED474]
	GetHandleVerifier [0x008C8FE3+2467827]
	GetHandleVerifier [0x008C45E6+2448886]
	GetHandleVerifier [0x008DF80C+2560028]
	GetHandleVerifier [0x00693DF5+153093]
	GetHandleVerifier [0x0069A3BD+179149]
	GetHandleVerifier [0x00684BB8+91080]
	GetHandleVerifier [0x00684D60+91504]
	GetHandleVerifier [0x0066FA10+4640]
	BaseThreadInitThunk [0x76145D49+25]
	RtlInitializeExceptionChain [0x