### Importing necessary libraries

In [100]:
import numpy as np
import pandas as pd
import time 
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import WebDriverWait
from selenium.common.exceptions import StaleElementReferenceException, NoSuchElementException

### implementing a page wait function to handle page loading

In [102]:
def page_wait(driver,wait):
    title = driver.title
    try:
        wait.until(
            #it expects a function that should take driver insatance as input
            lambda d : d.execute_script("return document.readyState") == "complete"
        )
    except :
        print(f"The webpage {title} loading is unsuccesful.\n")
    else:
        print(f"The webpage {title} was loaded succesfully\n")

### The scraping script

In [120]:

# Chrome options to bypass security mechanisms
chrome_options = Options()
chrome_options.add_argument("--disable-http2")
chrome_options.add_argument("--incognito")
chrome_options.add_argument("--disable-blink-features=AutomationControlled")
chrome_options.add_argument("--ignore-certificate-errors")
chrome_options.add_argument("--enable-features=NetworkServiceInProcess")
chrome_options.add_argument("--disable-features=NetworkService")
chrome_options.add_argument(
    "user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4577.63 Safari/537.36"
)

driver = webdriver.Chrome(options=chrome_options)
driver.maximize_window()

# Explicit waits
wait = WebDriverWait(driver, 5)

# Accessing the target webpage
url = "https://www.99acres.com"
driver.get(url)
page_wait(driver, wait)

# Finding the location search bar
try:
    search_bar = wait.until(
        EC.presence_of_element_located((By.XPATH, '//*[@id="keyword2"]'))
    )
except:
    print("Search bar wasn't found in time frame\n")
else:
    search_bar.send_keys("Mysore")

# Selecting Mysore
try:
    mysore = wait.until(
        EC.element_to_be_clickable((By.XPATH, '//*[@id="0"]'))
    )
except:
    print("Mysore option wasn't found in time frame\n")
else:
    mysore.click()
    time.sleep(2)

# Hitting enter
try:
    search_btn = wait.until(
        EC.element_to_be_clickable((By.XPATH, '//*[@id="searchform_search_btn"]'))
    )
except:
    print("Search button could not be clicked in time frame")
else:
    search_btn.click()
    page_wait(driver, wait)
    time.sleep(2)

# Adjusting budget slider to only consider properties worth less than 5 crores
try:
    slider = wait.until(
        EC.element_to_be_clickable((By.XPATH, '//*[@id="budgetLeftFilter_max_node"]'))
    )
except:
    print("Budget slider wasn't loaded in time frame")
else:
    actions = ActionChains(driver)
    (
        actions
        .click_and_hold(slider)
        .move_by_offset(-72, 0)
        .release()
        .perform()
    )
    time.sleep(2)

# Filter results to include only ready-to-move properties
try:
    ready = wait.until(
        EC.element_to_be_clickable((By.XPATH, '//*[@id="app"]/div/div/div[4]/div[3]/div[1]/div[3]/section/div/div/div/div/div[1]/div/div[4]'))
    )
except:
    print("Ready button wasn't loaded in time frame")
else:
    ready.click()
    time.sleep(1)

# Navigate and extract the data of properties
data = []
page_count = 0

while True:
    page_count += 1
    try:
        rows = driver.find_elements(By.CLASS_NAME, "tupleNew__contentWrap")
        for row in rows:
            try:
                name = row.find_element(By.CLASS_NAME, "tupleNew__headingNrera").text
            except:
                name = np.nan

            try:
                location = row.find_element(By.CLASS_NAME, "tupleNew__propType").text
            except:
                location = np.nan

            try:
                price = row.find_element(By.CLASS_NAME, "tupleNew__priceValWrap").text
            except:
                price = np.nan

            try:
                elements = row.find_elements(By.CLASS_NAME, "tupleNew__area1Type")
            except:
                area, bhk = np.nan, np.nan
            else:
                area, bhk = [ele.text for ele in elements]

            property = {
                "name": name,
                "location": location,
                "price": price,
                "area": area,
                "bhk": bhk
            }
            data.append(property)

        next_page = wait.until(EC.presence_of_element_located((By.XPATH, "//a[normalize-space()='Next Page >']")))
        driver.execute_script("arguments[0].scrollIntoView();", next_page)
        time.sleep(2)
        driver.execute_script("arguments[0].click();", next_page)
        page_wait(driver, wait)

    except StaleElementReferenceException:
        print("Stale element encountered. Retrying...")
        time.sleep(1)
        continue
    except NoSuchElementException:
        print(f"Next Page button not found. No more pages or site changed. {page_count} pages scrolled")
        break
    except Exception as e:
        print("Unexpected error:", e)
        time.sleep(1)
        break


Stale element encountered. Retrying...
Stale element encountered. Retrying...
Stale element encountered. Retrying...
Unexpected error: Message: 
Stacktrace:
	GetHandleVerifier [0x00007FF62AE7FE65+26629]
	(No symbol) [0x00007FF62ADE6030]
	(No symbol) [0x00007FF62AC7931A]
	(No symbol) [0x00007FF62ACCF8E7]
	(No symbol) [0x00007FF62ACCFB1C]
	(No symbol) [0x00007FF62AD234A7]
	(No symbol) [0x00007FF62ACF7AEF]
	(No symbol) [0x00007FF62AD20169]
	(No symbol) [0x00007FF62ACF7883]
	(No symbol) [0x00007FF62ACC0550]
	(No symbol) [0x00007FF62ACC1803]
	GetHandleVerifier [0x00007FF62B1D72DD+3529853]
	GetHandleVerifier [0x00007FF62B1EDA42+3621858]
	GetHandleVerifier [0x00007FF62B1E24F3+3575443]
	GetHandleVerifier [0x00007FF62AF4B79A+860474]
	(No symbol) [0x00007FF62ADF08AF]
	(No symbol) [0x00007FF62ADECBE4]
	(No symbol) [0x00007FF62ADECD86]
	(No symbol) [0x00007FF62ADDC2E9]
	BaseThreadInitThunk [0x00007FFEC42CE8D7+23]
	RtlUserThreadStart [0x00007FFEC4A1BF6C+44]



### The scraped data

In [122]:
data

[{'name': 'Rai Dream City',
  'location': 'Residential land / Plot in Bogadi, Mysore',
  'price': '₹17 - 41.25 Lac',
  'area': '581 - 1,453 sqft',
  'bhk': 'Plot/Land'},
 {'name': 'GSS Yogic Village',
  'location': 'Residential land / Plot in Jayapura, Mysore',
  'price': '₹12.85 - 43.61 Lac',
  'area': '680 - 2,560 sqft',
  'bhk': 'Plot/Land'},
 {'name': 'Lalithadripura, Mysore',
  'location': 'Residential land / Plot in Lalithadripura, Mysore',
  'price': '₹74 Lac',
  'area': '1,200 sqft',
  'bhk': 'Plot/Land'},
 {'name': 'Nestwell Starling Nest',
  'location': '3 BHK Flat in Vishweshwara Nagar , Mysore',
  'price': '₹90 Lac',
  'area': '1,420 sqft',
  'bhk': '3 BHK'},
 {'name': 'Vidyaranyapura, Mysore',
  'location': '4 Bedroom House in Vidyaranyapura, Mysore',
  'price': '₹2.59 Cr',
  'area': '2,587 sqft',
  'bhk': '4 BHK'},
 {'name': 'Madagalli, Kalyaninagar, Mysore',
  'location': 'Residential land / Plot in Madagalli, Kalyaninagar, Mysore',
  'price': '₹1.57 Cr',
  'area': '2,40

### Exporting the scraped data

In [129]:
raw_df = pd.DataFrame(data)

In [131]:
raw_df.to_csv('raw_mysuru.csv')