In [2]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import time
from requests.adapters import HTTPAdapter
from requests.packages.urllib3.util.retry import Retry
from datetime import datetime

def scrape_land_prices_requests():
    # URL for land/property listings in Nagpur
    url = "https://www.magicbricks.com/property-for-sale-in-nagpur-pppfs"

    # Set headers to mimic a real browser request
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
    }

    # Create a session with retry logic
    session = requests.Session()
    retries = Retry(
        total=3,                      # Max retry attempts
        backoff_factor=1,             # Delay between retries
        status_forcelist=[429, 500, 502, 503, 504]  # Retry on these status codes
    )
    session.mount('https://', HTTPAdapter(max_retries=retries))

    try:
        # Make a GET request to the website
        response = session.get(url, headers=headers, timeout=30)
        response.raise_for_status()  # Raise exception if request fails

        # Parse the HTML content
        soup = BeautifulSoup(response.text, 'html.parser')

        # Find all property cards (update class name if needed)
        listings = soup.find_all('div', class_='mb-srp__card')

        # Dictionary to store scraped data
        data = {
            "LOCATION": [],
            "PRICE": [],
            "SIZE": [],
            "DATE": []
        }

        # Get today's date to use as fallback
        current_date = datetime.now().strftime('%Y-%m-%d')

        # Extract details from each listing
        for listing in listings:
            location = listing.find('h2', class_='mb-srp__card--title')
            location = location.text.strip() if location else "N/A"

            price = listing.find('div', class_='mb-srp__card__price--amount')
            price = price.text.strip().replace('₹', '') if price else "N/A"

            size = listing.find('div', class_='mb-srp__card__summary--value')
            size = size.text.strip() if size else "N/A"

            date = listing.find('div', class_='mb-srp__card__date')
            date = date.text.strip() if date else current_date

            data["LOCATION"].append(location)
            data["PRICE"].append(price)
            data["SIZE"].append(size)
            data["DATE"].append(date)

        # Convert to DataFrame
        df = pd.DataFrame(data)

        # Save to CSV
        df.to_csv('NAGPUR_LAND_PRICES_MAGICBRICKS.csv', index=False, encoding='utf-8')
        print("DATA SAVED TO NAGPUR_LAND_PRICES_MAGICBRICKS.csv")

        return df

    except Exception as e:
        print(f"ERROR OCCURRED: {e}")
        return None

# Script entry point
if __name__ == "__main__":
    print("STARTING WEB SCRAPER FOR NAGPUR LAND PRICES ON MAGICBRICKS...")
    df = scrape_land_prices_requests()

    if df is None:
        print("SCRAPING FAILED. PLEASE CHECK THE SITE STRUCTURE OR YOUR CONNECTION.")
    else:
        print("SCRAPING COMPLETED SUCCESSFULLY!")


STARTING WEB SCRAPER FOR NAGPUR LAND PRICES ON MAGICBRICKS...
DATA SAVED TO NAGPUR_LAND_PRICES_MAGICBRICKS.csv
SCRAPING COMPLETED SUCCESSFULLY!
