In [1]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

# Initialize the starting URL
base_url = 'https://www.magicbricks.com'
initial_url = '/property-for-sale-in-jp-nagar-bangalore-pppfs/page-1'
current_url = base_url + initial_url

In [2]:
# List to store property data
property_data = []

# Function to extract label and value
def extract_label_and_value(card_soup, data_summary):
    item = card_soup.find("div", {"data-summary": data_summary})
    if item:
        label = item.find("div", class_="mb-srp__card__summary--label").get_text().strip()
        value = item.find("div", class_="mb-srp__card__summary--value").get_text().strip()
        return label, value
    return None, None

# Function to extract property name
def extract_property_name(card):
    society_div_element = card.find('div', class_='mb-srp__card__society')
    property_name = society_div_element.find('a', class_='mb-srp__card__society--name').text.strip() if society_div_element else None
    return property_name

# Function to extract property details from a property card
def extract_property_details(card):
    title = card.find('h2', class_='mb-srp__card--title').text.strip()
    price = card.find('div', class_='mb-srp__card__price--amount').text.strip()
    property_name = extract_property_name(card)  # Extract property name
    
    figure_count_element = card.find('span', class_='mb-srp__card__photo__fig--count')
    figure_count = figure_count_element.get_text(strip=True) if figure_count_element else "N/A"
    
    price_per_sqft_div_element = card.find('div', class_='mb-srp__card__price--size')
    price_per_sqft_div = price_per_sqft_div_element.get_text(strip=True) if price_per_sqft_div_element else "N/A"
    
    # List of possible data-summary values for carpet area
    carpet_area_summaries = ["carpet-area", "super-area", "plot-area"]

    # Extract details
    carpet_area_label, carpet_area_value = extract_label_and_value(card, carpet_area_summaries)
    status_label, status_value = extract_label_and_value(card, "status")
    floor_label, floor_value = extract_label_and_value(card, "floor")
    transaction_label, transaction_value = extract_label_and_value(card, "transaction")
    furnishing_label, furnishing_value = extract_label_and_value(card, "furnishing")
    overlooking_label, overlooking_value = extract_label_and_value(card, "overlooking")
    ownership_label, ownership_value = extract_label_and_value(card, "ownership")
    parking_label, parking_value = extract_label_and_value(card, "parking")
    bathroom_label, bathroom_value = extract_label_and_value(card, "bathroom")
    balcony_label, balcony_value = extract_label_and_value(card, "balcony")

    return {
        'Title': title,
        'Price': price,
        'Property Name': property_name,
        'Figure Count': figure_count,
        'Price per Sqft': price_per_sqft_div,
        'Carpet Area Label': carpet_area_label,
        'Carpet Area Value': carpet_area_value,
        'Status Label': status_label,
        'Status Value': status_value,
        'Floor Label': floor_label,
        'Floor Value': floor_value,
        'Transaction Label': transaction_label,
        'Transaction Value': transaction_value,
        'Furnishing Label': furnishing_label,
        'Furnishing Value': furnishing_value,
        'Overlooking Label': overlooking_label,
        'Overlooking Value': overlooking_value,
        'Ownership Label': ownership_label,
        'Ownership Value': ownership_value,
        'Parking Label': parking_label,
        'Parking Value': parking_value,
        'Bathroom Label': bathroom_label,
        'Bathroom Value': bathroom_value,
        'Balcony Label': balcony_label,
        'Balcony Value': balcony_value
    }

# Function to scrape a page and append data to property_data
def scrape_page(url):
    print(f"Scraping URL: {url}")  # Debug statement
    r = requests.get(url)
    r.raise_for_status()
    soup = BeautifulSoup(r.text, "lxml")
    
    # Find all property cards
    property_cards = soup.find_all('div', class_='mb-srp__card')

    for card in property_cards:
        property_data.append(extract_property_details(card))

    # Find the "Next" page link
    np_li = soup.find("li", class_='mb-pagination--next')
    if np_li:
        np = np_li.find('a').get('href')
        if np:
            next_url = base_url + np
            scrape_page(next_url)  # Recursively scrape the next page
        else:
            print("Next page URL is None. Stopping recursion.")
    else:
        print("No more pages to scrape.")

# Start scraping from the start URL
scrape_page(current_url)

# Create a DataFrame from property_data
df = pd.DataFrame(property_data)

# Print the DataFrame
print(df)


Scraping URL: https://www.magicbricks.com/property-for-sale-in-jp-nagar-bangalore-pppfs/page-1
Scraping URL: https://www.magicbricks.com/property-for-sale-in-jp-nagar-bangalore-pppfs/page-2
Scraping URL: https://www.magicbricks.com/property-for-sale-in-jp-nagar-bangalore-pppfs/page-3
Scraping URL: https://www.magicbricks.com/property-for-sale-in-jp-nagar-bangalore-pppfs/page-4
Scraping URL: https://www.magicbricks.com/property-for-sale-in-jp-nagar-bangalore-pppfs/page-5
Scraping URL: https://www.magicbricks.com/property-for-sale-in-jp-nagar-bangalore-pppfs/page-6
Scraping URL: https://www.magicbricks.com/property-for-sale-in-jp-nagar-bangalore-pppfs/page-7
Scraping URL: https://www.magicbricks.com/property-for-sale-in-jp-nagar-bangalore-pppfs/page-8
Scraping URL: https://www.magicbricks.com/property-for-sale-in-jp-nagar-bangalore-pppfs/page-9
Scraping URL: https://www.magicbricks.com/property-for-sale-in-jp-nagar-bangalore-pppfs/page-10
Scraping URL: https://www.magicbricks.com/propert

In [3]:
# Define the filename for the output file
csv_filename = 'C:/Users/drjun/OneDrive/Desktop/blore csv/jp-nagar_blore.csv'

# Save the DataFrame to CSV and Excel files
df.to_csv(csv_filename, index=False)

print("Data has been saved to CSV file:", csv_filename)

Data has been saved to CSV file: C:/Users/drjun/OneDrive/Desktop/blore csv/jp-nagar_blore.csv
