In [1]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import time

In [2]:
# Initialize the all_data list to store the final scraped data
all_data = []

# Example data appending process (replace this with actual scraping loop)
all_data.append({
    "City": "Example City",
    "Locality": "Example Locality",
    "Property Name": "Example Property"
})


In [3]:
# Constants
BASE_URL = "https://www.99acres.com"
CITY_URL = "https://www.99acres.com/rent-property-in-{city}-ffid"
HEADERS = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'}

In [4]:
# Function to scrape city names
def scrape_city_names():
    city_names = []
    try:
        response = requests.get(BASE_URL, headers=HEADERS)
        response.raise_for_status()
        soup = BeautifulSoup(response.text, 'html.parser')
        
        # Modify this selector based on the actual structure of the page
        cities = soup.select('a.city-link')  # Example of a class where city names might be listed
        for city in cities:
            city_name = city.text.strip()
            city_names.append(city_name)
        return city_names
    except requests.exceptions.RequestException as e:
        print(f"Error fetching city names: {e}")
        return []

In [5]:
# Function to scrape localities within a city
def scrape_localities(city):
    localities = []
    city_url = CITY_URL.format(city=city.lower().replace(" ", "-"))
    try:
        response = requests.get(city_url, headers=HEADERS)
        response.raise_for_status()
        soup = BeautifulSoup(response.text, 'html.parser')
        
        # Modify this selector based on the actual structure of the page
        locality_elements = soup.select('a.locality-link')  # Example of locality links
        for locality in locality_elements:
            locality_name = locality.text.strip()
            localities.append(locality_name)
        return localities
    except requests.exceptions.RequestException as e:
        print(f"Error fetching localities for {city}: {e}")
        return []

In [6]:
# Function to scrape properties in a locality
def scrape_properties(city, locality):
    properties = []
    locality_url = f"{BASE_URL}/{city}/{locality}-rentals"
    try:
        response = requests.get(locality_url, headers=HEADERS)
        response.raise_for_status()
        soup = BeautifulSoup(response.text, 'html.parser')

        # Modify this selector based on the actual structure of the page
        property_elements = soup.select('a.property-link')  # Example of property links
        for prop in property_elements:
            property_name = prop.text.strip()
            properties.append(property_name)
        return properties
    except requests.exceptions.RequestException as e:
        print(f"Error fetching properties in {locality}, {city}: {e}")
        return []

In [7]:
def main():
    all_data = []  # Initialize all_data list here

    # Step 1: Scrape all city names
    cities = scrape_city_names()

    for city in cities:
        time.sleep(1)  # Add delay to be polite
        
        # Step 2: Scrape localities in each city
        localities = scrape_localities(city)

        for locality in localities:
            time.sleep(1)  # Add delay to be polite
            
            # Step 3: Scrape properties in each locality
            properties = scrape_properties(city, locality)

            for property_name in properties:
                all_data.append({
                    "City": city,
                    "Locality": locality,
                    "Property Name": property_name
                })

In [8]:
df = pd.DataFrame(all_data)
df.to_csv('rental_properties.csv', index=False)
print("Data saved to rental_properties.csv")

Data saved to rental_properties.csv
