In [22]:
import requests
from bs4 import BeautifulSoup # beautifulsoup4
import pandas as pd
import lxml
import csv
import time
import random
import math


In [36]:
BASE_URL = "https://www.lamudi.com.ph/buy/metro-manila"
property_data = []

In [37]:
cities = [
    'quezon-city', 
    # 'makati', 'taguig', 
    # 'manila', 'mandaluyong', 'pasay', 
    # 'marikina', 'muntinlupa', 'san-juan-5', 
    # 'las-pinas', 'paranaque', 'pasig',
    ]

# Mapping URL city names to correct names
city_name_mapping = {
    "san-juan-5": "san juan",
    "las-pinas": "las piñas",
    "paranaque": "parañaque",  # Handle special characters
}

headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
}

column_names = [
    "price", "category", "bedrooms", "bathrooms", "building_size", 
    "condominium_name", "sku", "geo_location", "furnished_status", 
    "title", "address", "short_description", "url"
]



for city in cities:
    correct_city_name = city_name_mapping.get(city, city.replace("-", " "))

    page = 1  # Start from the first page
    max_pages = 1
    while page <= max_pages:
        listings_url = f"{BASE_URL}/{city}/condo/?page={page}"

        print(f"Fetching: {listings_url}")

        try:
            response = requests.get(listings_url, headers=headers, timeout=10)
            response.raise_for_status()
        except requests.exceptions.RequestException as e:
            print(f"Error fetching {listings_url}: {e}")
            break  # Stop fetching for this city if an error occurs

        soup = BeautifulSoup(response.text, "lxml")

        # Find all property listings on the page
        listing_containers = soup.find_all("div", class_="ListingCell-AllInfo")

        if not listing_containers:
            print(f"No more listings found on page {page}. Moving to next city.")
            break  # Exit loop when no more listings are found (last page reached)

        for listing in listing_containers:
            property_info = {key: None for key in column_names}

            # Extract data attributes
            property_info["price"] = listing.get("data-price", None)
            property_info["category"] = listing.get("data-category", None)
            property_info["bedrooms"] = listing.get("data-bedrooms", None)
            property_info["bathrooms"] = listing.get("data-bathrooms", None)
            property_info["building_size"] = listing.get("data-building_size", None)
            property_info["condominium_name"] = listing.get("data-condominiumname", None)
            property_info["sku"] = listing.get("data-sku", None)
            property_info["geo_location"] = listing.get("data-geo-point", None)
            property_info["furnished_status"] = listing.get("data-furnished", None)

            # Extract title
            title = listing.find("h3", class_="ListingCell-KeyInfo-title")
            property_info["title"] = title.text.strip() if title else None

            # Extract address
            address = listing.find("span", class_="ListingCell-KeyInfo-address-text")
            property_info["address"] = address.text.strip() if address else None

            # **City Filtering - Ensure listing is for the correct city**
            if property_info["address"]:
                address_lower = property_info["address"].lower()
                if correct_city_name.lower() not in address_lower:  # Check against correct name
                    print(f"Skipping listing: {property_info['title']} - Address does not match {correct_city_name}")
                    continue  # Skip incorrect listings

            # Extract short description
            description = listing.find("div", class_="ListingCell-shortDescription")
            property_info["short_description"] = description.text.strip() if description else None

            # Extract link
            link = listing.find("a", class_="js-listing-link")
            property_info["url"] = f"https://www.lamudi.com.ph{link['href']}" if link else None

            if property_info not in property_data:
                property_data.append(property_info)
            else:
                print(f"{property_info['address']} already in list. currently at city:{correct_city_name} and page{page}")

        print(f"Scraped {len(listing_containers)} listings from page {page}.")
        
        # Random delay to avoid overwhelming the server
        delay = random.uniform(3, 5)
        print(f"Waiting {delay:.2f} seconds before the next request...")
        time.sleep(delay)

        count_element = soup.find("span", class_="CountTitle-number")
        if count_element:
            total_listings = int(count_element.text.strip().replace(',',''))  # Extract and convert to int
            max_pages = min(math.ceil(total_listings / 30), 50) 
        
        page += 1  # Move to the next page



print(f"\nTotal properties scraped: {len(property_data)}")
for element in property_data[:5]:  # Print first 5 for testing
    print(f"{element}\n")


Fetching: https://www.lamudi.com.ph/buy/metro-manila/quezon-city/condo/?page=1
Scraped 30 listings from page 1.
Waiting 3.68 seconds before the next request...
Fetching: https://www.lamudi.com.ph/buy/metro-manila/quezon-city/condo/?page=2
Scraped 30 listings from page 2.
Waiting 3.42 seconds before the next request...
Fetching: https://www.lamudi.com.ph/buy/metro-manila/quezon-city/condo/?page=3
Nagkaisang Nayon, Quezon City already in list. currently at city:quezon city and page3
Scraped 30 listings from page 3.
Waiting 4.76 seconds before the next request...
Fetching: https://www.lamudi.com.ph/buy/metro-manila/quezon-city/condo/?page=4
Scraped 30 listings from page 4.
Waiting 3.91 seconds before the next request...
Fetching: https://www.lamudi.com.ph/buy/metro-manila/quezon-city/condo/?page=5
Scraped 30 listings from page 5.
Waiting 3.09 seconds before the next request...
Fetching: https://www.lamudi.com.ph/buy/metro-manila/quezon-city/condo/?page=6
Scraped 30 listings from page 6.
W

Price
Property Type - House, Condo
Bedrooms
Bathrooms
Building Size
Land Size
Subdivision Name
Fully Furnished
Foreclosures
Title
Link


In [38]:
#Convert to Pandas DF
df = pd.DataFrame(property_data)
df.head(10)

Unnamed: 0,price,category,bedrooms,bathrooms,building_size,condominium_name,sku,geo_location,furnished_status,title,address,short_description,url
0,3757084,condo,1,1,22.0,Calle Centrale,CD673EAE3357E3CPH,"[121.081903,14.605396]",270.0,Calle Centrale QC Pre-Selling Condo: Below Mar...,"Bagumbayan, Quezon City",Don’t miss out on this limited-time offer! Sec...,https://www.lamudi.com.phhttps://www.lamudi.co...
1,3737860,condo,1,1,22.0,Calle Centrale,CD673EAD9B4C741PH,"[121.081903,14.605396]",270.0,"Pre-selling Condo for Sale in QC Quezon City, ...","Bagumbayan, Quezon City",Don’t miss out on this limited-time offer! Sec...,https://www.lamudi.com.phhttps://www.lamudi.co...
2,3764292,condo,1,1,22.0,Calle Centrale,CD6735CC7707C77PH,"[121.081903,14.605396]",,NO DOWNPAYMENT! Pre-Selling Condo for Sale Que...,"Bagumbayan, Quezon City","Secure your dream home before January 31, 2025...",https://www.lamudi.com.phhttps://www.lamudi.co...
3,12500000,condo,3,2,82.0,Zinnia Towers,CD67984C6062788PH,"[121.076504,14.640624]",,Zinnia South Tower 3 Bedroom Quezon City DMCI ...,"Katipunan, Quezon City",‼️NEW UNIT FOR SALE‼️\n\nFor Sale Condominium ...,https://www.lamudi.com.phhttps://www.lamudi.co...
4,16000000,condo,3,2,84.5,The Crestmont,CD679758068B22FPH,"[121.03282,14.63977]",,3BR Condo With Tandem Parking Crestmont DMCI N...,"Panay Avenue, Quezon City",DMCI Homes Proudly Develop The Crestmont Resid...,https://www.lamudi.com.phhttps://www.lamudi.co...
5,2800000,condo,0,1,20.0,MPlace at South Triangle,CD679823F4AF34EPH,"[121.0443089,14.6621443]",,1 Studio Unit at MPlace at South Triangle in Q...,Quezon City,Tower D\n20 sqm\n1 Studio Unit\n1 Toilet & Bat...,https://www.lamudi.com.phhttps://www.lamudi.co...
6,10800000,condo,1,1,51.0,Shang Summit,CD670F2F9D530ECPH,"[121.035979,14.637561]",270.0,The Shang Summit Condo unit in Quezon City by ...,"South Triangle, Quezon City",Elevate your lifestyle with a prestigious cond...,https://www.lamudi.com.phhttps://www.lamudi.co...
7,8155000,condo,1,1,37.5,Avida Towers Cloverleaf,CD60D00DC1A0059PH,"[121.0016279564,14.6541841262]",,1BR Condo Unit w/ Balcony for Sale at Avida To...,"Balingasa, Quezon City",Avida Towers Cloverleaf Tower 2 is a residenti...,https://www.lamudi.com.phhttps://www.lamudi.co...
8,6301200,condo,2,1,35.6,Suntrust Shanata,CD5E2004A9244B9PH,"[121.0198457392,14.6814855146]",270.0,"2-Bedroom Condo Unit for Sale in Novaliches, Q...","Novaliches, Quezon City",Suntrust Shanata: A Serene Condominium in Nova...,https://www.lamudi.com.phhttps://www.lamudi.co...
9,7455240,condo,2,1,39.0,Suntrust Asmara,CD5B57EE56CB91CPH,"[121.0228079949,14.6211236045]",270.0,"2-Bedroom Condo Unit for Sale in New Manila, Q...","New Manila, Quezon City",Suntrust Asmara: A Premium Condominium in New ...,https://www.lamudi.com.phhttps://www.lamudi.co...


In [39]:
#Convert to CSV
df.to_csv("property_data.csv", index=False)