In [1]:
from bs4 import BeautifulSoup
import requests
import pandas as pd
from urllib.parse import urljoin
import json
import re
import geopandas as gpd
from shapely.geometry import Point

In [2]:
base_url = "https://www.lamudi.com.ph"
urls = ["/agusan-del-norte/condominium/buy/",
        "/aklan/condominium/buy/",
        "/albay/condominium/buy/",
        "/antique/condominium/buy/",
        "/bataan/condominium/buy/", 
        "/batangas/condominium/buy/",
        "/benguet/condominium/buy/",
        "/biliran/condominium/buy/",
        "/bohol/condominium/buy/",
        "/bukidnon/condominium/buy/",
        "/bulacan/condominium/buy/",
        "/cagayan/condominium/buy/",
        "/camarines-sur/condominium/buy/",
        "/capiz/condominium/buy/",
        "/cavite/condominium/buy/",
        "/cebu/condominium/buy/",
        "/davao-del-norte/condominium/buy/",
        "/davao-del-sur/condominium/buy/",
        "/guimaras/condominium/buy/",
        "/ifugao/condominium/buy/",
        "/ilocos-norte/condominium/buy/",
        "/ilocos-sur/condominium/buy/",
        "/iloilo/condominium/buy/",
        "/isabela/condominium/buy/",
        "/kalinga/condominium/buy/"]

In [None]:
base_url = "https://www.lamudi.com.ph"
urls = ["/agusan-del-norte/condominium/buy/"]

In [3]:
# Initialize empty lists to store the scraped data
offer_types = []
prop_types = []
titles = []
addresses = []
prices = []
listing_links = []
provinces = []
municipalities = []
barangays = []
coordinates = []

In [4]:
latitude = None
longitude = None
province = None
municipality = None
barangay = None

In [5]:
for url in urls:
    while url:
        response = requests.get(urljoin(base_url, url))
        soup = BeautifulSoup(response.text, 'html.parser')

        scripts = soup.find_all("script", {"type": "application/ld+json"})

        for script in scripts:
            # Getting the script content and parsing it as JSON
            json_data = json.loads(script.string)

            if '@type' in json_data and json_data['@type'] == 'RealEstateListing':
                listings = json_data['mainEntity']['itemListElement']
                for listing in listings:
                    if 'geo' in listing['item']:
                        latitude = listing['item']['geo'].get('latitude', None)
                        longitude = listing['item']['geo'].get('longitude', None)
                        province = listing['item']['address'].get('addressRegion', None)
                        municipality = listing['item']['address'].get('addressLocality', None)
                        barangay = listing['item']['address'].get('streetAddress', None)
                        
                        break  # Exit the loop after printing the first instance
        
        listings = soup.find_all('div', class_='row ListingCell-row ListingCell-agent-redesign')  
        
        for listing in listings:
        

            # TITLE
            title = listing.find('h2', class_='ListingCell-KeyInfo-title')['title']
            titles.append(title)
            
            if title != "":
                # OFFER_TYPE – Buy, Rent
                offer_type = "Buy"
                offer_types.append(offer_type)

                # PROP_TYPE – Land, House, Condominium, Apartment, Commercial
                prop_type = "Condominium"
                prop_types.append(prop_type)

            # ADDRESS
            address_tag = listing.find('span', class_='ListingCell-KeyInfo-address-text')
            address = address_tag.text.strip()
            addresses.append(address)

            # PRICE
            # Try to find the price with the first class
            price_tag = listing.find('span', class_='PriceSection-FirstPrice')

            # If not found, try to find the price with the second class
            if price_tag is None:
                price_tag = listing.find('div', class_='PriceSection-NoPrice elipsis_v2')

            # If a price tag is found, extract the text
            if price_tag is not None:
                price = price_tag.text.strip()
            else:
                price = None  # Or any other value indicating no price

            prices.append(price)
            
            # ADDRESS
            provinces.append(province)
            municipalities.append(municipality)
            barangays.append(barangay)
            
            # COORDINATES
            if latitude is not None and longitude is not None:
                point = Point(float(longitude), float(latitude))
                coordinates.append(point)

            # LISTING LINK
            listing_link = listing.find('a', class_="js-listing-link")['href'] 
            listing_links.append(listing_link)

            # Try to find the link to the next page
            next_div = soup.find('div', class_='next')

            # Check if the div was found
            if next_div:
                # Find the 'a' tag within the 'div'
                next_link = next_div.find('a')

                # Check if the 'a' tag was found
                if next_link:
                    # Check if the 'href' attribute contains a complete URL
                    url = next_link['href']
                else:
                    url = None
            else:
                url = None


In [6]:
data = pd.DataFrame({
    'Offer Type': offer_types,
    'Property Type': prop_types,
    'Title': titles,
    'Address': addresses,
    'Price': prices,
    'Listing Link': listing_links,
    'Province': provinces,
    'Municipality': municipalities,
    'Barangay': barangays,
    'Coordinates': coordinates
})

In [7]:
data.head()

Unnamed: 0,Offer Type,Property Type,Title,Address,Price,Listing Link,Province,Municipality,Barangay,Coordinates
0,Buy,Condominium,"Studio for Sale at Camella Manors Soleia, Butu...","Villa Kananga, Butuan",Contact agent for price,https://www.lamudi.com.ph/studio-for-sale-at-c...,Agusan del Norte,Butuan Agusan del Norte,"Montalban Street Villa Kananga, Butuan City",POINT (125.5384633 8.9290268)
1,Buy,Condominium,Camella Manors Soleia - Building 1-Bedroom Co...,"Villa Kananga, Butuan",Contact agent for price,https://www.lamudi.com.ph/1-bedroom-condo-for-...,Agusan del Norte,Butuan Agusan del Norte,"Montalban Street Villa Kananga, Butuan City",POINT (125.5384633 8.9290268)
2,Buy,Condominium,1st - 4th Floor 2 Bedroom Corner Unit Amenity ...,"Villa Kananga, Butuan","₱ 6,751,000",https://www.lamudi.com.ph/1st-4th-floor-2-bedr...,Agusan del Norte,Butuan Agusan del Norte,"Montalban Street Villa Kananga, Butuan City",POINT (125.5384633 8.9290268)
3,Buy,Condominium,Beach Front Condo Property in Boracay Philippines,"Yapak, Malay","₱ 20,000,000",https://www.lamudi.com.ph/beach-front-condo-pr...,Aklan,Malay Aklan,"Ilig-Iligan Beach, Brgy. Yapak, Boracay, Borac...",POINT (121.915286 11.987)
4,Buy,Condominium,Pre-selling Studio Deluxe Unit for Sale at Cos...,"Yapak, Malay",Contact agent for price,https://www.lamudi.com.ph/pre-selling-studio-d...,Aklan,Malay Aklan,"Ilig-Iligan Beach, Brgy. Yapak, Boracay, Borac...",POINT (121.915286 11.987)


In [8]:
file_path = 'Condominium.csv'
data.to_csv(file_path, index=False)