## House Scraping

In [1]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import re
import time
from datetime import datetime

def get_property_listings(url, page=1):
    full_url = f"{url}?page={page}"
    print(f"\nScraping URL: {full_url}")
    
    headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'}
    try:
        response = requests.get(full_url, headers=headers)
        response.raise_for_status()
        soup = BeautifulSoup(response.text, 'html.parser')
        listings = soup.find_all('article', class_='listing-snippet')
        print(f"Found {len(listings)} listings on page {page}")
        return listings
    except requests.exceptions.RequestException as e:
        print(f"Error fetching data: {str(e)}")
        return []

def extract_listing_data(listing):
    data = {}
    
    title_div = listing.find('div', class_='text-2xl font-semibold')
    data['title'] = title_div.get('title', '') if title_div else ''
    
    location_div = listing.find('div', class_='text-neutral-2 my-[5px]')
    data['location'] = location_div.text.strip() if location_div else ''
    
    price_div = listing.find('div', {'class': ['text-secondary-base', 'whitespace-nowrap', 'font-bold', 'text-3xl']})
    if price_div:
        data['price'] = price_div.text.strip()
        price_per_sqm_div = price_div.find_next_sibling('div', {'class': ['text-neutral-2', 'whitespace-nowrap', 'font-bold', 'text-xl']})
        if price_per_sqm_div:
            data['price_per_sqm'] = price_per_sqm_div.text.strip()
    else:
        data['price'] = ''
        data['price_per_sqm'] = ''
    
    link = listing.find('a', href=True)
    data['url'] = link['href'] if link else ''
    
    return data

def get_detailed_property_info(url):
    headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'}
    try:
        response = requests.get(url, headers=headers)
        response.raise_for_status()
        soup = BeautifulSoup(response.text, 'html.parser')
        
        data = {}
        key_features = soup.find('ul', class_='key-featured')
        if key_features:
            for item in key_features.find_all('li'):
                span_text = item.find_all('span')[-1].text.strip()
                if 'Bed:' in span_text or 'Beds:' in span_text:
                    data['bedrooms'] = span_text.replace('Bed:', '').replace('Beds:', '').strip()
                elif 'Bath:' in span_text or 'Baths:' in span_text:
                    data['bathrooms'] = span_text.replace('Bath:', '').replace('Baths:', '').strip()
                elif 'Usable area:' in span_text:
                    data['floor_area'] = span_text.replace('Usable area:', '').replace('m²', '').strip()
                elif 'Land area:' in span_text:
                    data['land_area'] = span_text.replace('Land area:', '').replace('m²', '').strip()
                elif 'Floor:' in span_text or 'Floors:' in span_text:
                    data['floor'] = span_text.replace('Floor:', '').replace('Floors:', '').strip()
        
        scripts = soup.find_all('script', type=lambda t: t and 'javascript' in t)
        for script in scripts:
            script_text = script.string if script.string else ''
            if 'var name =' in str(script_text):
                data['name'] = re.search('var name = "(.*?)";', str(script_text)).group(1)
            elif 'var gps_lat =' in str(script_text):
                data['latitude'] = re.search('var gps_lat = "(.*?)";', str(script_text)).group(1)
            elif 'var gps_lon =' in str(script_text):
                data['longitude'] = re.search('var gps_lon = "(.*?)";', str(script_text)).group(1)
            elif 'var address =' in str(script_text):
                data['address'] = re.search('var address = "(.*?)";', str(script_text)).group(1)
        
        facilities = []
        facility_items = soup.find_all('img', title=lambda t: t and 'Facilities:' in t)
        for item in facility_items:
            facility = item.get('title', '').replace('Facilities: ', '').strip()
            if facility:
                facilities.append(facility)
        
        data['facilities'] = facilities
        return data
        
    except Exception as e:
        print(f"Error fetching detailed info for {url}: {str(e)}")
        return {}

def create_facilities_columns(facilities_list):
    base_facilities = [
        'Access for the disabled', 'Air conditioning', 'Alarm', 'Balcony',
        'Built-in kitchen', 'Built-in wardrobe', 'Car park', "Children's area",
        'Cistern', 'Concierge', 'Electricity', 'Elevator', 'Equipped kitchen',
        'Garden', 'Grill', 'Guardhouse', 'Gym', 'Heating', 'Hot Tub', 'Internet',
        'Library', 'Natural gas', 'Office', 'Panoramic view', 'Patio',
        'Roof garden', 'Sauna', 'Security', 'Swimming pool', 'Tennis court',
        'Terrace', 'Utility room', 'Video cable', 'Water'
    ]
    
    facilities_dict = {facility: 0 for facility in base_facilities}
    for facility in facilities_list:
        if facility in facilities_dict:
            facilities_dict[facility] = 1
        else:
            print(f"Found new facility: {facility}")
            facilities_dict[facility] = 1
            
    return facilities_dict

def scrape_properties(base_url, max_pages=50):
    all_properties = []
    all_facilities = set()
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    output_file = f"dotproperty_listings_{timestamp}.csv"
    
    for page in range(1, max_pages + 1):
        listings = get_property_listings(base_url, page)
        if not listings:
            print(f"No listings found on page {page}")
            break
            
        for i, listing in enumerate(listings, 1):
            try:
                property_data = extract_listing_data(listing)
                if property_data['url']:
                    detailed_data = get_detailed_property_info(property_data['url'])
                    facilities = detailed_data.get('facilities', [])
                    all_facilities.update(facilities)
                    facilities_data = create_facilities_columns(facilities)
                    
                    property_data.update(detailed_data)
                    property_data.update(facilities_data)
                    all_properties.append(property_data)
                    
                    print(f"Property {i} on page {page} processed successfully")
                time.sleep(1)
            except Exception as e:
                print(f"Error processing property {i} on page {page}: {str(e)}")
        
        df = pd.DataFrame(all_properties)
        df.to_csv(output_file, index=False)
        print(f"Saved {len(all_properties)} listings to {output_file}")

    print(f"\nTotal unique facilities found: {len(all_facilities)}")
    print("Facility columns created:", sorted(list(all_facilities)))
    print(f"\nTotal listings scraped: {len(all_properties)}")
    
    return pd.DataFrame(all_properties)

if __name__ == "__main__":
    url = "https://www.dotproperty.com.ph/houses-for-sale/metro-manila/quezon-city"
    df = scrape_properties(url)
    print("Scraping completed!")


Scraping URL: https://www.dotproperty.com.ph/houses-for-sale/metro-manila/quezon-city?page=1
Found 25 listings on page 1
Property 1 on page 1 processed successfully
Property 2 on page 1 processed successfully
Property 3 on page 1 processed successfully
Property 4 on page 1 processed successfully
Property 5 on page 1 processed successfully
Property 6 on page 1 processed successfully
Property 7 on page 1 processed successfully
Found new facility: Cellar
Property 8 on page 1 processed successfully
Property 9 on page 1 processed successfully
Found new facility: Fireplace
Property 10 on page 1 processed successfully
Found new facility: Fireplace
Found new facility: Cellar
Property 11 on page 1 processed successfully
Found new facility: Cellar
Property 12 on page 1 processed successfully
Found new facility: Fireplace
Found new facility: Cellar
Property 13 on page 1 processed successfully
Found new facility: Fireplace
Found new facility: Cellar
Property 14 on page 1 processed successfully
Fo

Found new facility: Fireplace
Found new facility: Cellar
Property 6 on page 5 processed successfully
Property 7 on page 5 processed successfully
Property 8 on page 5 processed successfully
Property 9 on page 5 processed successfully
Found new facility: Cellar
Property 10 on page 5 processed successfully
Property 11 on page 5 processed successfully
Property 12 on page 5 processed successfully
Property 13 on page 5 processed successfully
Found new facility: Cellar
Property 14 on page 5 processed successfully
Property 15 on page 5 processed successfully
Property 16 on page 5 processed successfully
Property 17 on page 5 processed successfully
Found new facility: Fireplace
Found new facility: Cellar
Property 18 on page 5 processed successfully
Property 19 on page 5 processed successfully
Property 20 on page 5 processed successfully
Property 21 on page 5 processed successfully
Property 22 on page 5 processed successfully
Property 23 on page 5 processed successfully
Property 24 on page 5 proc

Found 25 listings on page 11
Found new facility: Cellar
Property 1 on page 11 processed successfully
Property 2 on page 11 processed successfully
Property 3 on page 11 processed successfully
Property 4 on page 11 processed successfully
Property 5 on page 11 processed successfully
Found new facility: Cellar
Property 6 on page 11 processed successfully
Property 7 on page 11 processed successfully
Property 8 on page 11 processed successfully
Property 9 on page 11 processed successfully
Property 10 on page 11 processed successfully
Property 11 on page 11 processed successfully
Property 12 on page 11 processed successfully
Property 13 on page 11 processed successfully
Property 14 on page 11 processed successfully
Property 15 on page 11 processed successfully
Property 16 on page 11 processed successfully
Property 17 on page 11 processed successfully
Property 18 on page 11 processed successfully
Property 19 on page 11 processed successfully
Property 20 on page 11 processed successfully
Found 

Found 25 listings on page 17
Property 1 on page 17 processed successfully
Property 2 on page 17 processed successfully
Property 3 on page 17 processed successfully
Property 4 on page 17 processed successfully
Property 5 on page 17 processed successfully
Property 6 on page 17 processed successfully
Found new facility: Cellar
Property 7 on page 17 processed successfully
Property 8 on page 17 processed successfully
Property 9 on page 17 processed successfully
Property 10 on page 17 processed successfully
Property 11 on page 17 processed successfully
Property 12 on page 17 processed successfully
Property 13 on page 17 processed successfully
Property 14 on page 17 processed successfully
Property 15 on page 17 processed successfully
Property 16 on page 17 processed successfully
Property 17 on page 17 processed successfully
Found new facility: Fireplace
Found new facility: Cellar
Property 18 on page 17 processed successfully
Property 19 on page 17 processed successfully
Property 20 on page 17

Property 25 on page 22 processed successfully
Saved 550 listings to dotproperty_listings_20241130_025356.csv

Scraping URL: https://www.dotproperty.com.ph/houses-for-sale/metro-manila/quezon-city?page=23
Found 25 listings on page 23
Property 1 on page 23 processed successfully
Property 2 on page 23 processed successfully
Property 3 on page 23 processed successfully
Property 4 on page 23 processed successfully
Property 5 on page 23 processed successfully
Property 6 on page 23 processed successfully
Property 7 on page 23 processed successfully
Property 8 on page 23 processed successfully
Property 9 on page 23 processed successfully
Property 10 on page 23 processed successfully
Property 11 on page 23 processed successfully
Property 12 on page 23 processed successfully
Property 13 on page 23 processed successfully
Property 14 on page 23 processed successfully
Property 15 on page 23 processed successfully
Property 16 on page 23 processed successfully
Property 17 on page 23 processed success

Found 25 listings on page 29
Property 1 on page 29 processed successfully
Property 2 on page 29 processed successfully
Property 3 on page 29 processed successfully
Property 4 on page 29 processed successfully
Property 5 on page 29 processed successfully
Property 6 on page 29 processed successfully
Property 7 on page 29 processed successfully
Property 8 on page 29 processed successfully
Property 9 on page 29 processed successfully
Property 10 on page 29 processed successfully
Property 11 on page 29 processed successfully
Property 12 on page 29 processed successfully
Property 13 on page 29 processed successfully
Property 14 on page 29 processed successfully
Property 15 on page 29 processed successfully
Property 16 on page 29 processed successfully
Property 17 on page 29 processed successfully
Property 18 on page 29 processed successfully
Property 19 on page 29 processed successfully
Property 20 on page 29 processed successfully
Property 21 on page 29 processed successfully
Property 22 on

Property 4 on page 35 processed successfully
Property 5 on page 35 processed successfully
Property 6 on page 35 processed successfully
Property 7 on page 35 processed successfully
Property 8 on page 35 processed successfully
Property 9 on page 35 processed successfully
Found new facility: Fireplace
Found new facility: Cellar
Property 10 on page 35 processed successfully
Property 11 on page 35 processed successfully
Property 12 on page 35 processed successfully
Property 13 on page 35 processed successfully
Property 14 on page 35 processed successfully
Property 15 on page 35 processed successfully
Property 16 on page 35 processed successfully
Property 17 on page 35 processed successfully
Property 18 on page 35 processed successfully
Property 19 on page 35 processed successfully
Property 20 on page 35 processed successfully
Property 21 on page 35 processed successfully
Property 22 on page 35 processed successfully
Property 23 on page 35 processed successfully
Property 24 on page 35 proces

Property 8 on page 41 processed successfully
Property 9 on page 41 processed successfully
Property 10 on page 41 processed successfully
Property 11 on page 41 processed successfully
Property 12 on page 41 processed successfully
Property 13 on page 41 processed successfully
Property 14 on page 41 processed successfully
Property 15 on page 41 processed successfully
Property 16 on page 41 processed successfully
Property 17 on page 41 processed successfully
Property 18 on page 41 processed successfully
Property 19 on page 41 processed successfully
Property 20 on page 41 processed successfully
Property 21 on page 41 processed successfully
Property 22 on page 41 processed successfully
Property 23 on page 41 processed successfully
Property 24 on page 41 processed successfully
Property 25 on page 41 processed successfully
Saved 1025 listings to dotproperty_listings_20241130_025356.csv

Scraping URL: https://www.dotproperty.com.ph/houses-for-sale/metro-manila/quezon-city?page=42
Found 25 listin

Property 12 on page 47 processed successfully
Property 13 on page 47 processed successfully
Property 14 on page 47 processed successfully
Property 15 on page 47 processed successfully
Property 16 on page 47 processed successfully
Property 17 on page 47 processed successfully
Property 18 on page 47 processed successfully
Property 19 on page 47 processed successfully
Property 20 on page 47 processed successfully
Property 21 on page 47 processed successfully
Property 22 on page 47 processed successfully
Property 23 on page 47 processed successfully
Property 24 on page 47 processed successfully
Property 25 on page 47 processed successfully
Saved 1175 listings to dotproperty_listings_20241130_025356.csv

Scraping URL: https://www.dotproperty.com.ph/houses-for-sale/metro-manila/quezon-city?page=48
Found 25 listings on page 48
Property 1 on page 48 processed successfully
Property 2 on page 48 processed successfully
Property 3 on page 48 processed successfully
Property 4 on page 48 processed s

## Condo Scraping

In [2]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import re
import time
from datetime import datetime

def extract_range_value(text):
    # Handle ranges like "1-2" or single values like "1"
    if '-' in text:
        start, end = text.split('-')
        return f"{start.strip()}-{end.strip()}"
    return text.strip()

def extract_area_range(text):
    # Handle area ranges like "26-78 m²"
    match = re.search(r'(\d+(?:\.\d+)?)-?(\d+(?:\.\d+)?)?\s*m', text)
    if match:
        start = match.group(1)
        end = match.group(2)
        return f"{start}-{end}" if end else start
    return text.strip()

def extract_price_range(text):
    # Handle "From ₱ X" or regular price
    if 'From' in text:
        price = re.search(r'From ₱\s*([\d,]+)', text)
        return f"From {price.group(1)}" if price else text.strip()
    else:
        price = re.search(r'₱\s*([\d,]+)', text)
        return price.group(1) if price else text.strip()

def get_property_listings(url, page=1):
    full_url = f"{url}?page={page}"
    print(f"\nScraping URL: {full_url}")
    
    headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'}
    try:
        response = requests.get(full_url, headers=headers)
        response.raise_for_status()
        soup = BeautifulSoup(response.text, 'html.parser')
        listings = soup.find_all('article', class_='listing-snippet')
        print(f"Found {len(listings)} listings on page {page}")
        return listings
    except Exception as e:
        print(f"Error fetching data: {str(e)}")
        return []

def extract_listing_data(listing):
    data = {}
    
    title_div = listing.find('div', class_='text-2xl font-semibold')
    data['title'] = title_div.get('title', '') if title_div else ''
    
    location_div = listing.find('div', class_='text-neutral-2 my-[5px]')
    data['location'] = location_div.text.strip() if location_div else ''
    
    # Find price within the flex container
    flex_container = listing.find('div', class_='sm:flex gap-4 justify-between')
    if flex_container:
        price_div = flex_container.find('div', class_='inline-block text-secondary-base whitespace-nowrap font-bold text-3xl')
        if price_div:
            data['price'] = price_div.text.strip().replace('₱', '').replace(',', '').strip()
            price_sqm_div = flex_container.find('div', class_='inline-block text-neutral-2 whitespace-nowrap font-bold text-xl text-right sm:block')
            if price_sqm_div:
                # Extract price per sqm from format like (₱ 175,152 / m²)
                sqm_match = re.search(r'₱\s*([\d,]+)', price_sqm_div.text)
                data['price_per_sqm'] = sqm_match.group(1).replace(',', '') if sqm_match else ''
    
    link = listing.find('a', href=True)
    data['url'] = link['href'] if link else ''
    
    return data

def get_detailed_property_info(url):
    headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'}
    try:
        response = requests.get(url, headers=headers)
        response.raise_for_status()
        soup = BeautifulSoup(response.text, 'html.parser')
       
        data = {}
        key_features = soup.find('ul', class_='key-featured')
        if key_features:
            for item in key_features.find_all('li'):
                spans = item.find_all('span')
                if spans and len(spans) > 1:
                    text = spans[-1].text.strip()
                    if 'Bed:' in text or 'Beds:' in text:
                        data['bedrooms'] = text.replace('Bed:', '').replace('Beds:', '').strip()
                    elif 'Bath:' in text or 'Baths:' in text:
                        data['bathrooms'] = text.replace('Bath:', '').replace('Baths:', '').strip()
                    elif 'Usable area:' in text:
                        data['floor_area'] = text.replace('Usable area:', '').replace('m²', '').strip()
                    elif 'Floor:' in text or 'Floors:' in text:
                        data['floor'] = text.replace('Floor:', '').replace('Floors:', '').strip()
        
        # Rest of the function remains the same
        scripts = soup.find_all('script', type=lambda t: t and 'javascript' in t)
        for script in scripts:
            script_text = script.string if script.string else ''
            if 'var name =' in str(script_text):
                data['name'] = re.search('var name = "(.*?)";', str(script_text)).group(1)
            elif 'var gps_lat =' in str(script_text):
                data['latitude'] = re.search('var gps_lat = "(.*?)";', str(script_text)).group(1)
            elif 'var gps_lon =' in str(script_text):
                data['longitude'] = re.search('var gps_lon = "(.*?)";', str(script_text)).group(1)
            elif 'var address =' in str(script_text):
                data['address'] = re.search('var address = "(.*?)";', str(script_text)).group(1)
        
        facilities = []
        facility_items = soup.find_all('img', title=lambda t: t and 'Facilities:' in t)
        for item in facility_items:
            facility = item.get('title', '').replace('Facilities: ', '').strip()
            if facility:
                facilities.append(facility)
        
        data['facilities'] = facilities
        return data
    except Exception as e:
        print(f"Error fetching detailed info for {url}: {str(e)}")
        return {}

def create_facilities_columns(facilities_list):
    base_facilities = [
        'Access for the disabled', 'Air conditioning', 'Alarm', 'Balcony',
        'Built-in kitchen', 'Built-in wardrobe', 'Car park', "Children's area",
        'Cistern', 'Concierge', 'Electricity', 'Elevator', 'Equipped kitchen',
        'Garden', 'Grill', 'Guardhouse', 'Gym', 'Heating', 'Hot Tub', 'Internet',
        'Library', 'Natural gas', 'Office', 'Panoramic view', 'Patio',
        'Roof garden', 'Sauna', 'Security', 'Swimming pool', 'Tennis court',
        'Terrace', 'Utility room', 'Video cable', 'Water'
    ]
    
    facilities_dict = {facility: 0 for facility in base_facilities}
    for facility in facilities_list:
        if facility in facilities_dict:
            facilities_dict[facility] = 1
        else:
            print(f"Found new facility: {facility}")
            facilities_dict[facility] = 1
            
    return facilities_dict

def scrape_properties(base_url, max_pages=50):
    all_properties = []
    all_facilities = set()
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    output_file = f"dotproperty_condo_listings_{timestamp}.csv"
    
    for page in range(1, max_pages + 1):
        listings = get_property_listings(base_url, page)
        if not listings:
            print(f"No listings found on page {page}")
            break
            
        for i, listing in enumerate(listings, 1):
            try:
                property_data = extract_listing_data(listing)
                if property_data['url']:
                    detailed_data = get_detailed_property_info(property_data['url'])
                    facilities = detailed_data.get('facilities', [])
                    all_facilities.update(facilities)
                    facilities_data = create_facilities_columns(facilities)
                    
                    property_data.update(detailed_data)
                    property_data.update(facilities_data)
                    all_properties.append(property_data)
                    
                    print(f"Property {i} on page {page} processed successfully")
                time.sleep(1)
            except Exception as e:
                print(f"Error processing property {i} on page {page}: {str(e)}")
        
        df = pd.DataFrame(all_properties)
        df.to_csv(output_file, index=False)
        print(f"Saved {len(all_properties)} listings to {output_file}")

    print(f"\nTotal unique facilities found: {len(all_facilities)}")
    print("Facility columns created:", sorted(list(all_facilities)))
    print(f"\nTotal listings scraped: {len(all_properties)}")
    
    return pd.DataFrame(all_properties)

if __name__ == "__main__":
    url = "https://www.dotproperty.com.ph/condos-for-sale/metro-manila/quezon-city?exact_bed=false"
    df = scrape_properties(url)
    print("Scraping completed!")


Scraping URL: https://www.dotproperty.com.ph/condos-for-sale/metro-manila/quezon-city?exact_bed=false?page=1
Found 25 listings on page 1
Property 1 on page 1 processed successfully
Property 2 on page 1 processed successfully
Found new facility: Fireplace
Found new facility: Cellar
Property 3 on page 1 processed successfully
Found new facility: Fireplace
Found new facility: Cellar
Property 4 on page 1 processed successfully
Property 5 on page 1 processed successfully
Property 6 on page 1 processed successfully
Property 7 on page 1 processed successfully
Property 8 on page 1 processed successfully
Property 9 on page 1 processed successfully
Found new facility: Fireplace
Found new facility: Cellar
Property 10 on page 1 processed successfully
Found new facility: Fireplace
Property 11 on page 1 processed successfully
Property 12 on page 1 processed successfully
Property 13 on page 1 processed successfully
Property 14 on page 1 processed successfully
Property 15 on page 1 processed successf

Found 25 listings on page 6
Property 1 on page 6 processed successfully
Property 2 on page 6 processed successfully
Found new facility: Fireplace
Found new facility: Cellar
Property 3 on page 6 processed successfully
Found new facility: Fireplace
Found new facility: Cellar
Property 4 on page 6 processed successfully
Property 5 on page 6 processed successfully
Property 6 on page 6 processed successfully
Property 7 on page 6 processed successfully
Property 8 on page 6 processed successfully
Property 9 on page 6 processed successfully
Found new facility: Fireplace
Found new facility: Cellar
Property 10 on page 6 processed successfully
Found new facility: Fireplace
Property 11 on page 6 processed successfully
Property 12 on page 6 processed successfully
Property 13 on page 6 processed successfully
Property 14 on page 6 processed successfully
Property 15 on page 6 processed successfully
Property 16 on page 6 processed successfully
Property 17 on page 6 processed successfully
Property 18 on 

Found 25 listings on page 11
Property 1 on page 11 processed successfully
Property 2 on page 11 processed successfully
Found new facility: Fireplace
Found new facility: Cellar
Property 3 on page 11 processed successfully
Found new facility: Fireplace
Found new facility: Cellar
Property 4 on page 11 processed successfully
Property 5 on page 11 processed successfully
Property 6 on page 11 processed successfully
Property 7 on page 11 processed successfully
Property 8 on page 11 processed successfully
Property 9 on page 11 processed successfully
Found new facility: Fireplace
Found new facility: Cellar
Property 10 on page 11 processed successfully
Found new facility: Fireplace
Property 11 on page 11 processed successfully
Property 12 on page 11 processed successfully
Property 13 on page 11 processed successfully
Property 14 on page 11 processed successfully
Property 15 on page 11 processed successfully
Property 16 on page 11 processed successfully
Property 17 on page 11 processed successful

Found 25 listings on page 16
Property 1 on page 16 processed successfully
Property 2 on page 16 processed successfully
Found new facility: Fireplace
Found new facility: Cellar
Property 3 on page 16 processed successfully
Found new facility: Fireplace
Found new facility: Cellar
Property 4 on page 16 processed successfully
Property 5 on page 16 processed successfully
Property 6 on page 16 processed successfully
Property 7 on page 16 processed successfully
Property 8 on page 16 processed successfully
Property 9 on page 16 processed successfully
Found new facility: Fireplace
Found new facility: Cellar
Property 10 on page 16 processed successfully
Found new facility: Fireplace
Property 11 on page 16 processed successfully
Property 12 on page 16 processed successfully
Property 13 on page 16 processed successfully
Property 14 on page 16 processed successfully
Property 15 on page 16 processed successfully
Property 16 on page 16 processed successfully
Property 17 on page 16 processed successful

Found 25 listings on page 21
Property 1 on page 21 processed successfully
Property 2 on page 21 processed successfully
Found new facility: Fireplace
Found new facility: Cellar
Property 3 on page 21 processed successfully
Found new facility: Fireplace
Found new facility: Cellar
Property 4 on page 21 processed successfully
Property 5 on page 21 processed successfully
Property 6 on page 21 processed successfully
Property 7 on page 21 processed successfully
Property 8 on page 21 processed successfully
Property 9 on page 21 processed successfully
Found new facility: Fireplace
Found new facility: Cellar
Property 10 on page 21 processed successfully
Found new facility: Fireplace
Property 11 on page 21 processed successfully
Property 12 on page 21 processed successfully
Property 13 on page 21 processed successfully
Property 14 on page 21 processed successfully
Property 15 on page 21 processed successfully
Property 16 on page 21 processed successfully
Property 17 on page 21 processed successful

Found 25 listings on page 26
Property 1 on page 26 processed successfully
Property 2 on page 26 processed successfully
Found new facility: Fireplace
Found new facility: Cellar
Property 3 on page 26 processed successfully
Found new facility: Fireplace
Found new facility: Cellar
Property 4 on page 26 processed successfully
Property 5 on page 26 processed successfully
Property 6 on page 26 processed successfully
Property 7 on page 26 processed successfully
Property 8 on page 26 processed successfully
Property 9 on page 26 processed successfully
Found new facility: Fireplace
Found new facility: Cellar
Property 10 on page 26 processed successfully
Found new facility: Fireplace
Property 11 on page 26 processed successfully
Property 12 on page 26 processed successfully
Property 13 on page 26 processed successfully
Property 14 on page 26 processed successfully
Property 15 on page 26 processed successfully
Property 16 on page 26 processed successfully
Property 17 on page 26 processed successful

Property 1 on page 31 processed successfully
Property 2 on page 31 processed successfully
Found new facility: Fireplace
Found new facility: Cellar
Property 3 on page 31 processed successfully
Found new facility: Fireplace
Found new facility: Cellar
Property 4 on page 31 processed successfully
Property 5 on page 31 processed successfully
Property 6 on page 31 processed successfully
Property 7 on page 31 processed successfully
Property 8 on page 31 processed successfully
Property 9 on page 31 processed successfully
Found new facility: Fireplace
Found new facility: Cellar
Property 10 on page 31 processed successfully
Found new facility: Fireplace
Property 11 on page 31 processed successfully
Property 12 on page 31 processed successfully
Property 13 on page 31 processed successfully
Property 14 on page 31 processed successfully
Property 15 on page 31 processed successfully
Property 16 on page 31 processed successfully
Property 17 on page 31 processed successfully
Property 18 on page 31 pro

Property 2 on page 36 processed successfully
Found new facility: Fireplace
Found new facility: Cellar
Property 3 on page 36 processed successfully
Found new facility: Fireplace
Found new facility: Cellar
Property 4 on page 36 processed successfully
Property 5 on page 36 processed successfully
Property 6 on page 36 processed successfully
Property 7 on page 36 processed successfully
Property 8 on page 36 processed successfully
Property 9 on page 36 processed successfully
Found new facility: Fireplace
Found new facility: Cellar
Property 10 on page 36 processed successfully
Found new facility: Fireplace
Property 11 on page 36 processed successfully
Property 12 on page 36 processed successfully
Property 13 on page 36 processed successfully
Property 14 on page 36 processed successfully
Property 15 on page 36 processed successfully
Property 16 on page 36 processed successfully
Property 17 on page 36 processed successfully
Property 18 on page 36 processed successfully
Property 19 on page 36 pr

Found new facility: Fireplace
Found new facility: Cellar
Property 3 on page 41 processed successfully
Found new facility: Fireplace
Found new facility: Cellar
Property 4 on page 41 processed successfully
Property 5 on page 41 processed successfully
Property 6 on page 41 processed successfully
Property 7 on page 41 processed successfully
Property 8 on page 41 processed successfully
Property 9 on page 41 processed successfully
Found new facility: Fireplace
Found new facility: Cellar
Property 10 on page 41 processed successfully
Found new facility: Fireplace
Property 11 on page 41 processed successfully
Property 12 on page 41 processed successfully
Property 13 on page 41 processed successfully
Property 14 on page 41 processed successfully
Property 15 on page 41 processed successfully
Property 16 on page 41 processed successfully
Property 17 on page 41 processed successfully
Property 18 on page 41 processed successfully
Found new facility: Fireplace
Found new facility: Cellar
Property 19 o

Found new facility: Fireplace
Found new facility: Cellar
Property 4 on page 46 processed successfully
Property 5 on page 46 processed successfully
Property 6 on page 46 processed successfully
Property 7 on page 46 processed successfully
Property 8 on page 46 processed successfully
Property 9 on page 46 processed successfully
Found new facility: Fireplace
Found new facility: Cellar
Property 10 on page 46 processed successfully
Found new facility: Fireplace
Property 11 on page 46 processed successfully
Property 12 on page 46 processed successfully
Property 13 on page 46 processed successfully
Property 14 on page 46 processed successfully
Property 15 on page 46 processed successfully
Property 16 on page 46 processed successfully
Found new facility: Fireplace
Found new facility: Cellar
Property 17 on page 46 processed successfully
Property 18 on page 46 processed successfully
Property 19 on page 46 processed successfully
Property 20 on page 46 processed successfully
Property 21 on page 46 

In [4]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import re
import time
from datetime import datetime

def extract_range_value(text):
    # Handle ranges like "1-2" or single values like "1"
    if '-' in text:
        start, end = text.split('-')
        return f"{start.strip()}-{end.strip()}"
    return text.strip()

def extract_area_range(text):
    # Handle area ranges like "26-78 m²"
    match = re.search(r'(\d+(?:\.\d+)?)-?(\d+(?:\.\d+)?)?\s*m', text)
    if match:
        start = match.group(1)
        end = match.group(2)
        return f"{start}-{end}" if end else start
    return text.strip()

def extract_price_range(text):
    # Handle "From ₱ X" or regular price
    if 'From' in text:
        price = re.search(r'From ₱\s*([\d,]+)', text)
        return f"From {price.group(1)}" if price else text.strip()
    else:
        price = re.search(r'₱\s*([\d,]+)', text)
        return price.group(1) if price else text.strip()

def get_property_listings(url, page=1):
    full_url = f"{url}?page={page}"
    print(f"\nScraping URL: {full_url}")
    
    headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'}
    try:
        response = requests.get(full_url, headers=headers)
        response.raise_for_status()
        soup = BeautifulSoup(response.text, 'html.parser')
        listings = soup.find_all('article', class_='listing-snippet')
        print(f"Found {len(listings)} listings on page {page}")
        return listings
    except Exception as e:
        print(f"Error fetching data: {str(e)}")
        return []

def extract_listing_data(listing):
    data = {}
    
    title_div = listing.find('div', class_='text-2xl font-semibold')
    data['title'] = title_div.get('title', '') if title_div else ''
    
    location_div = listing.find('div', class_='text-neutral-2 my-[5px]')
    data['location'] = location_div.text.strip() if location_div else ''
    
    # Find price within the flex container
    flex_container = listing.find('div', class_='sm:flex gap-4 justify-between')
    if flex_container:
        price_div = flex_container.find('div', class_='inline-block text-secondary-base whitespace-nowrap font-bold text-3xl')
        if price_div:
            data['price'] = price_div.text.strip().replace('₱', '').replace(',', '').strip()
            price_sqm_div = flex_container.find('div', class_='inline-block text-neutral-2 whitespace-nowrap font-bold text-xl text-right sm:block')
            if price_sqm_div:
                # Extract price per sqm from format like (₱ 175,152 / m²)
                sqm_match = re.search(r'₱\s*([\d,]+)', price_sqm_div.text)
                data['price_per_sqm'] = sqm_match.group(1).replace(',', '') if sqm_match else ''
    
    link = listing.find('a', href=True)
    data['url'] = link['href'] if link else ''
    
    return data

def get_detailed_property_info(url):
    headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'}
    try:
        response = requests.get(url, headers=headers)
        response.raise_for_status()
        soup = BeautifulSoup(response.text, 'html.parser')
       
        data = {}
        key_features = soup.find('ul', class_='key-featured')
        if key_features:
            for item in key_features.find_all('li'):
                spans = item.find_all('span')
                if spans and len(spans) > 1:
                    text = spans[-1].text.strip()
                    if 'Bed:' in text or 'Beds:' in text:
                        data['bedrooms'] = text.replace('Bed:', '').replace('Beds:', '').strip()
                    elif 'Bath:' in text or 'Baths:' in text:
                        data['bathrooms'] = text.replace('Bath:', '').replace('Baths:', '').strip()
                    elif 'Usable area:' in text:
                        data['floor_area'] = text.replace('Usable area:', '').replace('m²', '').strip()
                    elif 'Floor:' in text or 'Floors:' in text:
                        data['floor'] = text.replace('Floor:', '').replace('Floors:', '').strip()
        
        # Rest of the function remains the same
        scripts = soup.find_all('script', type=lambda t: t and 'javascript' in t)
        for script in scripts:
            script_text = script.string if script.string else ''
            if 'var name =' in str(script_text):
                data['name'] = re.search('var name = "(.*?)";', str(script_text)).group(1)
            elif 'var gps_lat =' in str(script_text):
                data['latitude'] = re.search('var gps_lat = "(.*?)";', str(script_text)).group(1)
            elif 'var gps_lon =' in str(script_text):
                data['longitude'] = re.search('var gps_lon = "(.*?)";', str(script_text)).group(1)
            elif 'var address =' in str(script_text):
                data['address'] = re.search('var address = "(.*?)";', str(script_text)).group(1)
        
        facilities = []
        facility_items = soup.find_all('img', title=lambda t: t and 'Facilities:' in t)
        for item in facility_items:
            facility = item.get('title', '').replace('Facilities: ', '').strip()
            if facility:
                facilities.append(facility)
        
        data['facilities'] = facilities
        return data
    except Exception as e:
        print(f"Error fetching detailed info for {url}: {str(e)}")
        return {}

def create_facilities_columns(facilities_list):
    base_facilities = [
        'Access for the disabled', 'Air conditioning', 'Alarm', 'Balcony',
        'Built-in kitchen', 'Built-in wardrobe', 'Car park', "Children's area",
        'Cistern', 'Concierge', 'Electricity', 'Elevator', 'Equipped kitchen',
        'Garden', 'Grill', 'Guardhouse', 'Gym', 'Heating', 'Hot Tub', 'Internet',
        'Library', 'Natural gas', 'Office', 'Panoramic view', 'Patio',
        'Roof garden', 'Sauna', 'Security', 'Swimming pool', 'Tennis court',
        'Terrace', 'Utility room', 'Video cable', 'Water'
    ]
    
    facilities_dict = {facility: 0 for facility in base_facilities}
    for facility in facilities_list:
        if facility in facilities_dict:
            facilities_dict[facility] = 1
        else:
            print(f"Found new facility: {facility}")
            facilities_dict[facility] = 1
            
    return facilities_dict

def scrape_properties(base_url, max_pages=10):
    all_properties = []
    all_facilities = set()
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    output_file = f"dotproperty_apt_listings_{timestamp}.csv"
    
    for page in range(1, max_pages + 1):
        listings = get_property_listings(base_url, page)
        if not listings:
            print(f"No listings found on page {page}")
            break
            
        for i, listing in enumerate(listings, 1):
            try:
                property_data = extract_listing_data(listing)
                if property_data['url']:
                    detailed_data = get_detailed_property_info(property_data['url'])
                    facilities = detailed_data.get('facilities', [])
                    all_facilities.update(facilities)
                    facilities_data = create_facilities_columns(facilities)
                    
                    property_data.update(detailed_data)
                    property_data.update(facilities_data)
                    all_properties.append(property_data)
                    
                    print(f"Property {i} on page {page} processed successfully")
                time.sleep(1)
            except Exception as e:
                print(f"Error processing property {i} on page {page}: {str(e)}")
        
        df = pd.DataFrame(all_properties)
        df.to_csv(output_file, index=False)
        print(f"Saved {len(all_properties)} listings to {output_file}")

    print(f"\nTotal unique facilities found: {len(all_facilities)}")
    print("Facility columns created:", sorted(list(all_facilities)))
    print(f"\nTotal listings scraped: {len(all_properties)}")
    
    return pd.DataFrame(all_properties)

if __name__ == "__main__":
    url = "https://www.dotproperty.com.ph/apartments-for-sale/metro-manila/quezon-city?exact_bed=false"
    df = scrape_properties(url)
    print("Scraping completed!")


Scraping URL: https://www.dotproperty.com.ph/apartments-for-sale/metro-manila/quezon-city?exact_bed=false?page=1
Found 25 listings on page 1
Property 1 on page 1 processed successfully
Property 2 on page 1 processed successfully
Property 3 on page 1 processed successfully
Property 4 on page 1 processed successfully
Property 5 on page 1 processed successfully
Property 6 on page 1 processed successfully
Property 7 on page 1 processed successfully
Property 8 on page 1 processed successfully
Found new facility: Fireplace
Found new facility: Cellar
Property 9 on page 1 processed successfully
Found new facility: Fireplace
Found new facility: Cellar
Property 10 on page 1 processed successfully
Property 11 on page 1 processed successfully
Found new facility: Fireplace
Found new facility: Cellar
Property 12 on page 1 processed successfully
Found new facility: Fireplace
Found new facility: Cellar
Property 13 on page 1 processed successfully
Found new facility: Fireplace
Found new facility: Cell

Found new facility: Cellar
Property 17 on page 5 processed successfully
Found new facility: Fireplace
Found new facility: Cellar
Property 18 on page 5 processed successfully
Property 19 on page 5 processed successfully
Property 20 on page 5 processed successfully
Property 21 on page 5 processed successfully
Property 22 on page 5 processed successfully
Property 23 on page 5 processed successfully
Property 24 on page 5 processed successfully
Property 25 on page 5 processed successfully
Saved 125 listings to dotproperty_apt_listings_20241130_190513.csv

Scraping URL: https://www.dotproperty.com.ph/apartments-for-sale/metro-manila/quezon-city?exact_bed=false?page=6
Found 25 listings on page 6
Property 1 on page 6 processed successfully
Property 2 on page 6 processed successfully
Property 3 on page 6 processed successfully
Property 4 on page 6 processed successfully
Property 5 on page 6 processed successfully
Property 6 on page 6 processed successfully
Property 7 on page 6 processed success

Property 11 on page 10 processed successfully
Found new facility: Fireplace
Found new facility: Cellar
Property 12 on page 10 processed successfully
Found new facility: Fireplace
Found new facility: Cellar
Property 13 on page 10 processed successfully
Found new facility: Fireplace
Found new facility: Cellar
Property 14 on page 10 processed successfully
Found new facility: Fireplace
Found new facility: Cellar
Property 15 on page 10 processed successfully
Property 16 on page 10 processed successfully
Found new facility: Cellar
Property 17 on page 10 processed successfully
Found new facility: Fireplace
Found new facility: Cellar
Property 18 on page 10 processed successfully
Property 19 on page 10 processed successfully
Property 20 on page 10 processed successfully
Property 21 on page 10 processed successfully
Property 22 on page 10 processed successfully
Property 23 on page 10 processed successfully
Property 24 on page 10 processed successfully
Property 25 on page 10 processed successfull

## Apartment Scraping

In [None]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import re
import time
from datetime import datetime

def extract_range_value(text):
    # Handle ranges like "1-2" or single values like "1"
    if '-' in text:
        start, end = text.split('-')
        return f"{start.strip()}-{end.strip()}"
    return text.strip()

def extract_area_range(text):
    # Handle area ranges like "26-78 m²"
    match = re.search(r'(\d+(?:\.\d+)?)-?(\d+(?:\.\d+)?)?\s*m', text)
    if match:
        start = match.group(1)
        end = match.group(2)
        return f"{start}-{end}" if end else start
    return text.strip()

def extract_price_range(text):
    # Handle "From ₱ X" or regular price
    if 'From' in text:
        price = re.search(r'From ₱\s*([\d,]+)', text)
        return f"From {price.group(1)}" if price else text.strip()
    else:
        price = re.search(r'₱\s*([\d,]+)', text)
        return price.group(1) if price else text.strip()

def get_property_listings(url, page=1):
    full_url = f"{url}?page={page}"
    print(f"\nScraping URL: {full_url}")
    
    headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'}
    try:
        response = requests.get(full_url, headers=headers)
        response.raise_for_status()
        soup = BeautifulSoup(response.text, 'html.parser')
        listings = soup.find_all('article', class_='listing-snippet')
        print(f"Found {len(listings)} listings on page {page}")
        return listings
    except Exception as e:
        print(f"Error fetching data: {str(e)}")
        return []

def extract_listing_data(listing):
    data = {}
    
    title_div = listing.find('div', class_='text-2xl font-semibold')
    data['title'] = title_div.get('title', '') if title_div else ''
    
    location_div = listing.find('div', class_='text-neutral-2 my-[5px]')
    data['location'] = location_div.text.strip() if location_div else ''
    
    # Find price within the flex container
    flex_container = listing.find('div', class_='sm:flex gap-4 justify-between')
    if flex_container:
        price_div = flex_container.find('div', class_='inline-block text-secondary-base whitespace-nowrap font-bold text-3xl')
        if price_div:
            data['price'] = price_div.text.strip().replace('₱', '').replace(',', '').strip()
            price_sqm_div = flex_container.find('div', class_='inline-block text-neutral-2 whitespace-nowrap font-bold text-xl text-right sm:block')
            if price_sqm_div:
                # Extract price per sqm from format like (₱ 175,152 / m²)
                sqm_match = re.search(r'₱\s*([\d,]+)', price_sqm_div.text)
                data['price_per_sqm'] = sqm_match.group(1).replace(',', '') if sqm_match else ''
    
    link = listing.find('a', href=True)
    data['url'] = link['href'] if link else ''
    
    return data

def get_detailed_property_info(url):
    headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'}
    try:
        response = requests.get(url, headers=headers)
        response.raise_for_status()
        soup = BeautifulSoup(response.text, 'html.parser')
       
        data = {}
        key_features = soup.find('ul', class_='key-featured')
        if key_features:
            for item in key_features.find_all('li'):
                spans = item.find_all('span')
                if spans and len(spans) > 1:
                    text = spans[-1].text.strip()
                    if 'Bed:' in text or 'Beds:' in text:
                        data['bedrooms'] = text.replace('Bed:', '').replace('Beds:', '').strip()
                    elif 'Bath:' in text or 'Baths:' in text:
                        data['bathrooms'] = text.replace('Bath:', '').replace('Baths:', '').strip()
                    elif 'Usable area:' in text:
                        data['floor_area'] = text.replace('Usable area:', '').replace('m²', '').strip()
                    elif 'Floor:' in text or 'Floors:' in text:
                        data['floor'] = text.replace('Floor:', '').replace('Floors:', '').strip()
        
        # Rest of the function remains the same
        scripts = soup.find_all('script', type=lambda t: t and 'javascript' in t)
        for script in scripts:
            script_text = script.string if script.string else ''
            if 'var name =' in str(script_text):
                data['name'] = re.search('var name = "(.*?)";', str(script_text)).group(1)
            elif 'var gps_lat =' in str(script_text):
                data['latitude'] = re.search('var gps_lat = "(.*?)";', str(script_text)).group(1)
            elif 'var gps_lon =' in str(script_text):
                data['longitude'] = re.search('var gps_lon = "(.*?)";', str(script_text)).group(1)
            elif 'var address =' in str(script_text):
                data['address'] = re.search('var address = "(.*?)";', str(script_text)).group(1)
        
        facilities = []
        facility_items = soup.find_all('img', title=lambda t: t and 'Facilities:' in t)
        for item in facility_items:
            facility = item.get('title', '').replace('Facilities: ', '').strip()
            if facility:
                facilities.append(facility)
        
        data['facilities'] = facilities
        return data
    except Exception as e:
        print(f"Error fetching detailed info for {url}: {str(e)}")
        return {}

def create_facilities_columns(facilities_list):
    base_facilities = [
        'Access for the disabled', 'Air conditioning', 'Alarm', 'Balcony',
        'Built-in kitchen', 'Built-in wardrobe', 'Car park', "Children's area",
        'Cistern', 'Concierge', 'Electricity', 'Elevator', 'Equipped kitchen',
        'Garden', 'Grill', 'Guardhouse', 'Gym', 'Heating', 'Hot Tub', 'Internet',
        'Library', 'Natural gas', 'Office', 'Panoramic view', 'Patio',
        'Roof garden', 'Sauna', 'Security', 'Swimming pool', 'Tennis court',
        'Terrace', 'Utility room', 'Video cable', 'Water'
    ]
    
    facilities_dict = {facility: 0 for facility in base_facilities}
    for facility in facilities_list:
        if facility in facilities_dict:
            facilities_dict[facility] = 1
        else:
            print(f"Found new facility: {facility}")
            facilities_dict[facility] = 1
            
    return facilities_dict

def scrape_properties(base_url, max_pages=10):
    all_properties = []
    all_facilities = set()
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    output_file = f"dotproperty_comm_listings_{timestamp}.csv"
    
    for page in range(1, max_pages + 1):
        listings = get_property_listings(base_url, page)
        if not listings:
            print(f"No listings found on page {page}")
            break
            
        for i, listing in enumerate(listings, 1):
            try:
                property_data = extract_listing_data(listing)
                if property_data['url']:
                    detailed_data = get_detailed_property_info(property_data['url'])
                    facilities = detailed_data.get('facilities', [])
                    all_facilities.update(facilities)
                    facilities_data = create_facilities_columns(facilities)
                    
                    property_data.update(detailed_data)
                    property_data.update(facilities_data)
                    all_properties.append(property_data)
                    
                    print(f"Property {i} on page {page} processed successfully")
                time.sleep(1)
            except Exception as e:
                print(f"Error processing property {i} on page {page}: {str(e)}")
        
        df = pd.DataFrame(all_properties)
        df.to_csv(output_file, index=False)
        print(f"Saved {len(all_properties)} listings to {output_file}")

    print(f"\nTotal unique facilities found: {len(all_facilities)}")
    print("Facility columns created:", sorted(list(all_facilities)))
    print(f"\nTotal listings scraped: {len(all_properties)}")
    
    return pd.DataFrame(all_properties)

if __name__ == "__main__":
    url = "https://www.dotproperty.com.ph/apartments-for-sale/metro-manila/quezon-city?exact_bed=false"
    df = scrape_properties(url)
    print("Scraping completed!")

## Commercial Scraping

In [6]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import re
import time
from datetime import datetime

def get_property_listings(url, page=1):
    full_url = f"{url}?page={page}"
    print(f"\nScraping URL: {full_url}")
    
    headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'}
    try:
        response = requests.get(full_url, headers=headers)
        response.raise_for_status()
        soup = BeautifulSoup(response.text, 'html.parser')
        listings = soup.find_all('article', class_='listing-snippet')
        print(f"Found {len(listings)} listings on page {page}")
        return listings
    except requests.exceptions.RequestException as e:
        print(f"Error fetching data: {str(e)}")
        return []

def extract_listing_data(listing):
    data = {}
    
    title_div = listing.find('div', class_='text-2xl font-semibold')
    data['title'] = title_div.get('title', '') if title_div else ''
    
    location_div = listing.find('div', class_='text-neutral-2 my-[5px]')
    data['location'] = location_div.text.strip() if location_div else ''
    
    price_div = listing.find('div', {'class': ['text-secondary-base', 'whitespace-nowrap', 'font-bold', 'text-3xl']})
    if price_div:
        data['price'] = price_div.text.strip()
        price_per_sqm_div = price_div.find_next_sibling('div', {'class': ['text-neutral-2', 'whitespace-nowrap', 'font-bold', 'text-xl']})
        if price_per_sqm_div:
            data['price_per_sqm'] = price_per_sqm_div.text.strip()
    else:
        data['price'] = ''
        data['price_per_sqm'] = ''
    
    link = listing.find('a', href=True)
    data['url'] = link['href'] if link else ''
    
    return data

def get_detailed_property_info(url):
    headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'}
    try:
        response = requests.get(url, headers=headers)
        response.raise_for_status()
        soup = BeautifulSoup(response.text, 'html.parser')
        
        data = {}
        key_features = soup.find('ul', class_='key-featured')
        if key_features:
            for item in key_features.find_all('li'):
                span_text = item.find_all('span')[-1].text.strip()
                if 'Bed:' in span_text or 'Beds:' in span_text:
                    data['bedrooms'] = span_text.replace('Bed:', '').replace('Beds:', '').strip()
                elif 'Bath:' in span_text or 'Baths:' in span_text:
                    data['bathrooms'] = span_text.replace('Bath:', '').replace('Baths:', '').strip()
                elif 'Usable area:' in span_text:
                    data['floor_area'] = span_text.replace('Usable area:', '').replace('m²', '').strip()
                elif 'Land area:' in span_text:
                    data['land_area'] = span_text.replace('Land area:', '').replace('m²', '').strip()
                elif 'Floor:' in span_text or 'Floors:' in span_text:
                    data['floor'] = span_text.replace('Floor:', '').replace('Floors:', '').strip()
        
        scripts = soup.find_all('script', type=lambda t: t and 'javascript' in t)
        for script in scripts:
            script_text = script.string if script.string else ''
            if 'var name =' in str(script_text):
                data['name'] = re.search('var name = "(.*?)";', str(script_text)).group(1)
            elif 'var gps_lat =' in str(script_text):
                data['latitude'] = re.search('var gps_lat = "(.*?)";', str(script_text)).group(1)
            elif 'var gps_lon =' in str(script_text):
                data['longitude'] = re.search('var gps_lon = "(.*?)";', str(script_text)).group(1)
            elif 'var address =' in str(script_text):
                data['address'] = re.search('var address = "(.*?)";', str(script_text)).group(1)
        
        facilities = []
        facility_items = soup.find_all('img', title=lambda t: t and 'Facilities:' in t)
        for item in facility_items:
            facility = item.get('title', '').replace('Facilities: ', '').strip()
            if facility:
                facilities.append(facility)
        
        data['facilities'] = facilities
        return data
        
    except Exception as e:
        print(f"Error fetching detailed info for {url}: {str(e)}")
        return {}

def create_facilities_columns(facilities_list):
    base_facilities = [
        'Access for the disabled', 'Air conditioning', 'Alarm', 'Balcony',
        'Built-in kitchen', 'Built-in wardrobe', 'Car park', "Children's area",
        'Cistern', 'Concierge', 'Electricity', 'Elevator', 'Equipped kitchen',
        'Garden', 'Grill', 'Guardhouse', 'Gym', 'Heating', 'Hot Tub', 'Internet',
        'Library', 'Natural gas', 'Office', 'Panoramic view', 'Patio',
        'Roof garden', 'Sauna', 'Security', 'Swimming pool', 'Tennis court',
        'Terrace', 'Utility room', 'Video cable', 'Water'
    ]
    
    facilities_dict = {facility: 0 for facility in base_facilities}
    for facility in facilities_list:
        if facility in facilities_dict:
            facilities_dict[facility] = 1
        else:
            print(f"Found new facility: {facility}")
            facilities_dict[facility] = 1
            
    return facilities_dict

def scrape_properties(base_url, max_pages=50):
    all_properties = []
    all_facilities = set()
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    output_file = f"dotproperty_comm_listings_{timestamp}.csv"
    
    for page in range(1, max_pages + 1):
        listings = get_property_listings(base_url, page)
        if not listings:
            print(f"No listings found on page {page}")
            break
            
        for i, listing in enumerate(listings, 1):
            try:
                property_data = extract_listing_data(listing)
                if property_data['url']:
                    detailed_data = get_detailed_property_info(property_data['url'])
                    facilities = detailed_data.get('facilities', [])
                    all_facilities.update(facilities)
                    facilities_data = create_facilities_columns(facilities)
                    
                    property_data.update(detailed_data)
                    property_data.update(facilities_data)
                    all_properties.append(property_data)
                    
                    print(f"Property {i} on page {page} processed successfully")
                time.sleep(1)
            except Exception as e:
                print(f"Error processing property {i} on page {page}: {str(e)}")
        
        df = pd.DataFrame(all_properties)
        df.to_csv(output_file, index=False)
        print(f"Saved {len(all_properties)} listings to {output_file}")

    print(f"\nTotal unique facilities found: {len(all_facilities)}")
    print("Facility columns created:", sorted(list(all_facilities)))
    print(f"\nTotal listings scraped: {len(all_properties)}")
    
    return pd.DataFrame(all_properties)

if __name__ == "__main__":
    url = "https://www.dotproperty.com.ph/commercial-property-for-sale/metro-manila/quezon-city?exact_bed=false"
    df = scrape_properties(url)
    print("Scraping completed!")


Scraping URL: https://www.dotproperty.com.ph/commercial-property-for-sale/metro-manila/quezon-city?exact_bed=false?page=1
Found 25 listings on page 1
Property 1 on page 1 processed successfully
Property 2 on page 1 processed successfully
Property 3 on page 1 processed successfully
Property 4 on page 1 processed successfully
Property 5 on page 1 processed successfully
Property 6 on page 1 processed successfully
Property 7 on page 1 processed successfully
Property 8 on page 1 processed successfully
Property 9 on page 1 processed successfully
Property 10 on page 1 processed successfully
Property 11 on page 1 processed successfully
Property 12 on page 1 processed successfully
Property 13 on page 1 processed successfully
Property 14 on page 1 processed successfully
Property 15 on page 1 processed successfully
Property 16 on page 1 processed successfully
Property 17 on page 1 processed successfully
Property 18 on page 1 processed successfully
Property 19 on page 1 processed successfully
Pro

Property 2 on page 7 processed successfully
Property 3 on page 7 processed successfully
Property 4 on page 7 processed successfully
Property 5 on page 7 processed successfully
Property 6 on page 7 processed successfully
Property 7 on page 7 processed successfully
Property 8 on page 7 processed successfully
Property 9 on page 7 processed successfully
Property 10 on page 7 processed successfully
Property 11 on page 7 processed successfully
Property 12 on page 7 processed successfully
Property 13 on page 7 processed successfully
Property 14 on page 7 processed successfully
Property 15 on page 7 processed successfully
Property 16 on page 7 processed successfully
Property 17 on page 7 processed successfully
Property 18 on page 7 processed successfully
Property 19 on page 7 processed successfully
Property 20 on page 7 processed successfully
Property 21 on page 7 processed successfully
Property 22 on page 7 processed successfully
Property 23 on page 7 processed successfully
Property 24 on pag

Property 5 on page 13 processed successfully
Property 6 on page 13 processed successfully
Property 7 on page 13 processed successfully
Property 8 on page 13 processed successfully
Property 9 on page 13 processed successfully
Property 10 on page 13 processed successfully
Property 11 on page 13 processed successfully
Property 12 on page 13 processed successfully
Property 13 on page 13 processed successfully
Property 14 on page 13 processed successfully
Property 15 on page 13 processed successfully
Property 16 on page 13 processed successfully
Property 17 on page 13 processed successfully
Property 18 on page 13 processed successfully
Property 19 on page 13 processed successfully
Property 20 on page 13 processed successfully
Property 21 on page 13 processed successfully
Property 22 on page 13 processed successfully
Property 23 on page 13 processed successfully
Property 24 on page 13 processed successfully
Property 25 on page 13 processed successfully
Saved 325 listings to dotproperty_comm_

Property 6 on page 19 processed successfully
Property 7 on page 19 processed successfully
Property 8 on page 19 processed successfully
Property 9 on page 19 processed successfully
Property 10 on page 19 processed successfully
Property 11 on page 19 processed successfully
Property 12 on page 19 processed successfully
Property 13 on page 19 processed successfully
Property 14 on page 19 processed successfully
Property 15 on page 19 processed successfully
Property 16 on page 19 processed successfully
Property 17 on page 19 processed successfully
Property 18 on page 19 processed successfully
Property 19 on page 19 processed successfully
Property 20 on page 19 processed successfully
Property 21 on page 19 processed successfully
Property 22 on page 19 processed successfully
Property 23 on page 19 processed successfully
Property 24 on page 19 processed successfully
Property 25 on page 19 processed successfully
Saved 475 listings to dotproperty_comm_listings_20241130_193711.csv

Scraping URL: h

Property 7 on page 25 processed successfully
Property 8 on page 25 processed successfully
Property 9 on page 25 processed successfully
Property 10 on page 25 processed successfully
Property 11 on page 25 processed successfully
Property 12 on page 25 processed successfully
Property 13 on page 25 processed successfully
Property 14 on page 25 processed successfully
Property 15 on page 25 processed successfully
Property 16 on page 25 processed successfully
Property 17 on page 25 processed successfully
Property 18 on page 25 processed successfully
Property 19 on page 25 processed successfully
Property 20 on page 25 processed successfully
Property 21 on page 25 processed successfully
Property 22 on page 25 processed successfully
Property 23 on page 25 processed successfully
Property 24 on page 25 processed successfully
Property 25 on page 25 processed successfully
Saved 625 listings to dotproperty_comm_listings_20241130_193711.csv

Scraping URL: https://www.dotproperty.com.ph/commercial-prop

Property 8 on page 31 processed successfully
Property 9 on page 31 processed successfully
Property 10 on page 31 processed successfully
Property 11 on page 31 processed successfully
Property 12 on page 31 processed successfully
Property 13 on page 31 processed successfully
Property 14 on page 31 processed successfully
Property 15 on page 31 processed successfully
Property 16 on page 31 processed successfully
Property 17 on page 31 processed successfully
Property 18 on page 31 processed successfully
Property 19 on page 31 processed successfully
Property 20 on page 31 processed successfully
Property 21 on page 31 processed successfully
Property 22 on page 31 processed successfully
Property 23 on page 31 processed successfully
Property 24 on page 31 processed successfully
Property 25 on page 31 processed successfully
Saved 775 listings to dotproperty_comm_listings_20241130_193711.csv

Scraping URL: https://www.dotproperty.com.ph/commercial-property-for-sale/metro-manila/quezon-city?exact_

Property 9 on page 37 processed successfully
Property 10 on page 37 processed successfully
Property 11 on page 37 processed successfully
Property 12 on page 37 processed successfully
Property 13 on page 37 processed successfully
Property 14 on page 37 processed successfully
Property 15 on page 37 processed successfully
Property 16 on page 37 processed successfully
Property 17 on page 37 processed successfully
Property 18 on page 37 processed successfully
Property 19 on page 37 processed successfully
Property 20 on page 37 processed successfully
Property 21 on page 37 processed successfully
Property 22 on page 37 processed successfully
Property 23 on page 37 processed successfully
Property 24 on page 37 processed successfully
Property 25 on page 37 processed successfully
Saved 925 listings to dotproperty_comm_listings_20241130_193711.csv

Scraping URL: https://www.dotproperty.com.ph/commercial-property-for-sale/metro-manila/quezon-city?exact_bed=false?page=38
Found 25 listings on page 3

Property 10 on page 43 processed successfully
Property 11 on page 43 processed successfully
Property 12 on page 43 processed successfully
Property 13 on page 43 processed successfully
Property 14 on page 43 processed successfully
Property 15 on page 43 processed successfully
Property 16 on page 43 processed successfully
Property 17 on page 43 processed successfully
Property 18 on page 43 processed successfully
Property 19 on page 43 processed successfully
Property 20 on page 43 processed successfully
Property 21 on page 43 processed successfully
Property 22 on page 43 processed successfully
Property 23 on page 43 processed successfully
Property 24 on page 43 processed successfully
Property 25 on page 43 processed successfully
Saved 1075 listings to dotproperty_comm_listings_20241130_193711.csv

Scraping URL: https://www.dotproperty.com.ph/commercial-property-for-sale/metro-manila/quezon-city?exact_bed=false?page=44
Found 25 listings on page 44
Property 1 on page 44 processed successful

Property 11 on page 49 processed successfully
Property 12 on page 49 processed successfully
Property 13 on page 49 processed successfully
Property 14 on page 49 processed successfully
Property 15 on page 49 processed successfully
Property 16 on page 49 processed successfully
Property 17 on page 49 processed successfully
Property 18 on page 49 processed successfully
Property 19 on page 49 processed successfully
Property 20 on page 49 processed successfully
Property 21 on page 49 processed successfully
Property 22 on page 49 processed successfully
Property 23 on page 49 processed successfully
Property 24 on page 49 processed successfully
Property 25 on page 49 processed successfully
Saved 1225 listings to dotproperty_comm_listings_20241130_193711.csv

Scraping URL: https://www.dotproperty.com.ph/commercial-property-for-sale/metro-manila/quezon-city?exact_bed=false?page=50
Found 25 listings on page 50
Property 1 on page 50 processed successfully
Property 2 on page 50 processed successfull