# Data Extraction from Private Property.ng Website


In [1]:
# Importing Libraries

import requests
from bs4 import BeautifulSoup 
import re

# importing the library to scrap image
import os
from urllib.parse import urljoin

# import library for date
from datetime import datetime, timedelta
import time

In [2]:
# Error handling function while loading the site and
# Defining the Header for using User-Agent approach to have access to the site info

HEADER = {
    "User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/141.0.0.0 Safari/537.36"
}


def make_connection(url):
    """ Make a request to http url
    and return a beautiful soup object"""
    try:
        response = requests.get(url, headers=HEADER)
        if response.status_code == 200:
            soup_content = BeautifulSoup(response.content, 'html.parser')
            return soup_content
        else:
            print(f"Resources Not available! Status Code {response.status_code}")
    except Exception as e:
        print(f"An Error occurs. Message: {e}")

# Extract amenities 
def property_amenities(property_info):
    amenities = {"bedrooms": "N/A", "bathrooms": "N/A", "toilets": "N/A"}
    
    amenity_div = property_info.find("ul", class_="property-benefit")
    if not amenity_div:
        return amenities
    
    amenity_tags = amenity_div.find_all("li")
    
    # Use index positions
    if len(amenity_tags) >= 1:
        text = amenity_tags[0].get_text(strip=True)
        amenities["bedrooms"] = int(text) if text.isdigit() else "N/A"
    if len(amenity_tags) >= 2:
        text = amenity_tags[1].get_text(strip=True)
        amenities["bathrooms"] = int(text) if text.isdigit() else "N/A"
    if len(amenity_tags) >= 3:
        text = amenity_tags[2].get_text(strip=True)
        amenities["toilets"] = int(text) if text.isdigit() else "N/A"
    
    return amenities
    
# property image url
def extract_image_url(property_info):
    """
    Extracts the image URL from a property card.
    Returns the URL string if found, otherwise None.
    """
    img_tag = property_info.find("img")
    if img_tag and img_tag.get("src"):
        return img_tag["src"]
    return None


# Furnishing status detection 
def furnishing_status(property_info, img_url: str) -> str:
    """ First check image URL if not found then
        check description text"""
    if img_url:
        text = img_url.lower()
        if "semi-furnished" in text or "partially-furnished" in text:
            return "Partially Furnished"
        elif "furnished" in text:
            return "Furnished"
        elif "unfurnished" in text:
            return "Unfurnished"
    
    """ check description text"""
    desc_tag = property_info.find("div", class_="similar-listings-info")
    description = desc_tag.get_text(strip=True).lower() if desc_tag else ""
    
    if "semi-furnished" in description or "partially furnished" in description:
        return "Partially Furnished"
    elif "furnished" in description:
        return "Furnished"
    elif "unfurnished" in description:
        return "Unfurnished"
    
    return "Not Specified"

# property date
def normalize_date(raw_date: str) -> dict:
    """
    Normalize property date strings when both Updated and Added
    are in the same span.
    """
    raw_date = raw_date.lower()
    today = datetime.today()
    result = {"updated_date": None, "added_date": None}
    
    # Handle relative dates first
    if "updated today" in raw_date:
        result["updated_date"] = today.strftime("%Y-%m-%d")
    elif "updated yesterday" in raw_date:
        result["updated_date"] = (today - timedelta(days=1)).strftime("%Y-%m-%d")
    
    # Handle absolute dates (both updated and added in same string)
    parts = raw_date.split(",")
    for part in parts:
        part = part.strip()
        if part.startswith("updated"):
            date_str = part.replace("updated", "").strip()
            try:
                result["updated_date"] = datetime.strptime(date_str, "%d %b %Y").strftime("%Y-%m-%d")
            except:
                pass
        elif part.startswith("added"):
            date_str = part.replace("added", "").strip()
            try:
                result["added_date"] = datetime.strptime(date_str, "%d %b %Y").strftime("%Y-%m-%d")
            except:
                pass
    
    return result


# scrap all peoperties
def scrap_page_property(property_info):
    """
    scrap single page property to return list of dictionary
    """
    single_page_property= []
    
    for property_info in properties_info:
        property_title_div= property_info.find("div", class_= "similar-listings-info").find('h2')
        property_title= property_title_div.get_text() if property_title_div is not None else "N/A"

        listing_type_div= property_info.find("div", class_= "similar-listings-info").find('h3')
        listing_type = listing_type_div.get_text() if listing_type_div is not None else "N/A"
        
        property_type_div= property_info.find("div", class_= "similar-listings-info").find('h2')
        property_type = property_type_div.get_text() if property_type_div is not None else "N/A"
        
        property_price_div= property_info.find("div", class_= "similar-listings-price").find('h4')
        property_price= property_price_div.get_text()if property_price_div is not None else "N/A"  
        
        property_price_currency_div= property_info.find("div", class_= "similar-listings-price").find('span')
        property_price_currency= property_price_currency_div.get_text()if property_price_currency_div is not None else "N/A"  

        amenities = property_amenities(property_info)
         
        property_location_div= property_info.find("div", class_= "similar-listings-info").find('p')
        property_location= property_location_div.get_text()if property_location_div is not None else "N/A" 

        property_agent_name_div = property_info.find('div', class_="media").find('img')
        property_agent_name= property_agent_name_div.get('alt') if property_location_div is not None else "N/A"
        
        Property_agent_link_div= property_info.find('div', class_='similar-listing-contact').find('a')
        Property_agent_link= Property_agent_link_div.get('href') if Property_agent_link_div is not None else "N/A"
        
        property_date = property_info.find("div", class_= "media-body").find('h5')
        raw_date = property_date.get_text(strip=True) if property_date else ""
        dates = normalize_date(raw_date)

        img_url = extract_image_url(property_info)

        furnishing = furnishing_status(property_info, img_url)

        favorites_div = property_info.find('a', class_="listings-favorite")
        favorites = favorites_div.get('href') if favorites_div is not None else "N/A"

        
    
        single_property = {
            "title": property_title,
            "listing_type": listing_type,
            "property_type": property_type,
            "price": property_price,
            "currency": property_price_currency,
            "bedrooms": amenities["bedrooms"],
            "bathrooms": amenities["bathrooms"],
            "toilets": amenities["toilets"],
            "location": property_location,
            "agent_name": property_agent_name,
            "agent_link": Property_agent_link,
            "updated_date": dates["updated_date"],
            "added_date": dates["added_date"], 
            "image_url": img_url,
            "furnishing": furnishing,
            "favorite": favorites
        }
        single_page_property.append(single_property)
    
    return single_page_property

In [3]:
# all properties
all_property = {}

# all pages of website
for page_num in range(1,137):
    # website URL
    URL = f"https://privateproperty.ng/property-for-rent?page={page_num}"
    print(f"Scrapping property data from page {page_num}")

    # make connection
    soup_content = make_connection(URL)

    # single page property 
    properties_info = soup_content.find_all("div", class_="similar-listings-item" )

    single_page_property = scrap_page_property(properties_info)
    print(f"Property Scraped successfully: {len(single_page_property)} properties Retrived")

     # sleep
    time.sleep(10)  
    
    all_property.update({f"page {page_num}":single_page_property})

Scrapping property data from page 1
Property Scraped successfully: 22 properties Retrived
Scrapping property data from page 2
Property Scraped successfully: 22 properties Retrived
Scrapping property data from page 3
Property Scraped successfully: 22 properties Retrived
Scrapping property data from page 4
Property Scraped successfully: 22 properties Retrived
Scrapping property data from page 5
Property Scraped successfully: 22 properties Retrived
Scrapping property data from page 6
Property Scraped successfully: 22 properties Retrived
Scrapping property data from page 7
Property Scraped successfully: 22 properties Retrived
Scrapping property data from page 8
Property Scraped successfully: 22 properties Retrived
Scrapping property data from page 9
Property Scraped successfully: 22 properties Retrived
Scrapping property data from page 10
Property Scraped successfully: 22 properties Retrived
Scrapping property data from page 11
Property Scraped successfully: 22 properties Retrived
Scrappin

In [4]:
len(all_property)

136

In [5]:
len(all_property["page 1"])

22

In [6]:
all_property

{'page 1': [{'title': 'Fully Detached 6 Bedroom House + 2 Rooms Bq+ Large Parking',
   'listing_type': '6 BEDROOM DETACHED DUPLEX For Rent',
   'property_type': 'Fully Detached 6 Bedroom House + 2 Rooms Bq+ Large Parking',
   'price': '₦ 59,999,999',
   'currency': '₦',
   'bedrooms': 6,
   'bathrooms': 6,
   'toilets': 7,
   'location': '\n\n\n Old Ikoyi Lagos',
   'agent_name': 'PRINCE ADEMOLA OSINUGA INTERNATIONAL ',
   'agent_link': '/estate-agents/princeademolaosinugainternational',
   'updated_date': '2025-11-28',
   'added_date': '2025-11-23',
   'image_url': 'data:image/png;base64,R0lGODlhAQABAAD/ACwAAAAAAQABAAACADs=',
   'furnishing': 'Not Specified',
   'favorite': '/profile/add-favorite/48642799'},
  {'title': '2 Bedroom Apartment',
   'listing_type': '2 BEDROOM FLAT & APARTMENT FOR RENT',
   'property_type': '2 Bedroom Apartment',
   'price': '₦ 10,000,000/year',
   'currency': '₦',
   'bedrooms': 2,
   'bathrooms': 2,
   'toilets': 3,
   'location': '\n\n\n Ikate Elegushi 

/*Attributes:

-property title = property_info.find("div", class_= "similar-listings-info").find('h2').get_text()
-listing type = property_info.find("div", class_= "similar-listings-info").find('h3').get_text()
-property price = property_info.find("div", class_= "similar-listings-price").find('h4').get_text()
-property price currency = property_info.find("div", class_= "similar-listings-price").find('span').get_text()
-property location = property_info.find("div", class_= "similar-listings-info").find('p').get_text()
-Property agent link = property_info.find('div', class_='similar-listing-contact').find('a').get('href')
-property date = property_info.find("div", class_= "media-body").find('h5').get_text() */

# Property for Sale

In [7]:
# Importing Libraries

import requests
from bs4 import BeautifulSoup 
import re

# importing the library to scrap image
import os
from urllib.parse import urljoin

# import library for date
from datetime import datetime, timedelta
import time

In [8]:
# Error handling function while loading the site and
# Defining the Header for using User-Agent approach to have access to the site info

HEADER = {
    "User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/141.0.0.0 Safari/537.36"
}

def make_connection_now(url):
    try:
        response = requests.get(url, headers=HEADER)
        if response.status_code == 200:
            soup_contents = BeautifulSoup(response.content, 'html.parser')
            return soup_contents
        else:
            print(f"Resources Not available! Status Code {response.status_code}")
    except Exception as e:
        print(f"An Error occurs. Message: {e}")

def extract_image_url(property_info_div):
    """
    Extracts the image URL from a property card.
    Returns the URL string if found, otherwise None.
    """
    img_tag = property_info_div.find("img")
    if img_tag and img_tag.get("src"):
        return img_tag["src"]
    return None


def property_amenities(property_info_div):
    amenities = {"bedrooms": "N/A", "bathrooms": "N/A", "toilets": "N/A"}
    
    amenity_div = property_info_div.find("ul", class_="property-benefit")
    if not amenity_div:
        return amenities
    
    amenity_tags = amenity_div.find_all("li")
    
    # Use index positions
    if len(amenity_tags) >= 1:
        text = amenity_tags[0].get_text(strip=True)
        amenities["bedrooms"] = int(text) if text.isdigit() else "N/A"
    if len(amenity_tags) >= 2:
        text = amenity_tags[1].get_text(strip=True)
        amenities["bathrooms"] = int(text) if text.isdigit() else "N/A"
    if len(amenity_tags) >= 3:
        text = amenity_tags[2].get_text(strip=True)
        amenities["toilets"] = int(text) if text.isdigit() else "N/A"
    
    return amenities

# property date
def normalize_date_div(raw_date: str) -> dict:
    """
    Normalize property date strings when both Updated and Added
    are in the same span.
    """
    raw_date = raw_date.lower()
    today = datetime.today()
    result = {"updated_date": None, "added_date": None}
    
    # Handle relative dates first
    if "updated today" in raw_date:
        result["updated_date"] = today.strftime("%Y-%m-%d")
    elif "updated yesterday" in raw_date:
        result["updated_date"] = (today - timedelta(days=1)).strftime("%Y-%m-%d")
    
    # Handle absolute dates (both updated and added in same string)
    parts = raw_date.split(",")
    for part in parts:
        part = part.strip()
        if part.startswith("updated"):
            date_str = part.replace("updated", "").strip()
            try:
                result["updated_date"] = datetime.strptime(date_str, "%d %b %Y").strftime("%Y-%m-%d")
            except:
                pass
        elif part.startswith("added"):
            date_str = part.replace("added", "").strip()
            try:
                result["added_date"] = datetime.strptime(date_str, "%d %b %Y").strftime("%Y-%m-%d")
            except:
                pass
    
    return result

# scrap all peoperties
def scrap_page_saproperty(property_info_div):
    """
    scrap single page property to return list of dictionary
    """
    single_page_saproperty= []
    
    for property_info_div in properties_info_div:
        property_title_div= property_info_div.find("div", class_= "similar-listings-info").find('h2')
        property_title= property_title_div.get_text() if property_title_div is not None else "N/A"

        listing_type_div= property_info_div.find("div", class_= "similar-listings-info").find('h3')
        listing_type = listing_type_div.get_text() if listing_type_div is not None else "N/A"
        
        property_type_div= property_info_div.find("div", class_= "similar-listings-info").find('h2')
        property_type = property_type_div.get_text() if property_type_div is not None else "N/A"
        
        property_price_div= property_info_div.find("div", class_= "similar-listings-price").find('h4')
        property_price= property_price_div.get_text()if property_price_div is not None else "N/A"  
        
        property_price_currency_div= property_info_div.find("div", class_= "similar-listings-price").find('span')
        property_price_currency= property_price_currency_div.get_text()if property_price_currency_div is not None else "N/A"  

        amenities = property_amenities(property_info_div)
         
        property_location_div= property_info_div.find("div", class_= "similar-listings-info").find('p')
        property_location= property_location_div.get_text()if property_location_div is not None else "N/A" 

        property_agent_name_div = property_info_div.find('div', class_="media").find('img')
        property_agent_name= property_agent_name_div.get('alt') if property_location_div is not None else "N/A"
        
        Property_agent_link_div= property_info_div.find('div', class_='similar-listing-contact').find('a')
        Property_agent_link= Property_agent_link_div.get('href') if Property_agent_link_div is not None else "N/A"
        
        property_date_div = property_info_div.find("div", class_= "media-body").find('h5')
        raw_date = property_date_div.get_text(strip=True) if property_date_div else ""
        dates = normalize_date_div(raw_date)
        
        img_url = extract_image_url(property_info_div)

        favorites_div = property_info_div.find('a', class_="listings-favorite")
        favorites = favorites_div.get('href') if favorites_div is not None else "N/A"

        
    
        single_property = {
            "title": property_title,
            "listing_type": listing_type,
            "property_type": property_type,
            "price": property_price,
            "currency": property_price_currency,
            "bedrooms": amenities["bedrooms"],
            "bathrooms": amenities["bathrooms"],
            "toilets": amenities["toilets"],
            "location": property_location,
            "agent_name": property_agent_name,
            "agent_link": Property_agent_link,
            "updated_date": dates["updated_date"],
            "added_date": dates["added_date"],
            "image_url": img_url,
            "favorite": favorites
        }
        single_page_saproperty.append(single_property)
    
    return single_page_saproperty

In [9]:
# all properties
all_saproperty = {}

# all pages of website
for page_num in range(1,137):
    # website URL
    URL = f"https://privateproperty.ng/property-for-sale?page={page_num}"
    print(f"Scrapping property data from page {page_num}")

    # make connection
    soup_contents = make_connection_now(URL)

    # single page property 
    properties_info_div = soup_contents.find_all("div", class_="similar-listings-item" )
    
    single_page_saproperty = scrap_page_saproperty(properties_info_div)
    print(f"Property Scrapped successfully: {len(single_page_saproperty)} properties Retrived")

     # sleep
    time.sleep(10)  
    
    all_saproperty.update({f"page {page_num}":single_page_saproperty})

Scrapping property data from page 1
Property Scrapped successfully: 22 properties Retrived
Scrapping property data from page 2
Property Scrapped successfully: 22 properties Retrived
Scrapping property data from page 3
Property Scrapped successfully: 22 properties Retrived
Scrapping property data from page 4
Property Scrapped successfully: 22 properties Retrived
Scrapping property data from page 5
Property Scrapped successfully: 22 properties Retrived
Scrapping property data from page 6
Property Scrapped successfully: 22 properties Retrived
Scrapping property data from page 7
Property Scrapped successfully: 22 properties Retrived
Scrapping property data from page 8
Property Scrapped successfully: 22 properties Retrived
Scrapping property data from page 9
Property Scrapped successfully: 22 properties Retrived
Scrapping property data from page 10
Property Scrapped successfully: 22 properties Retrived
Scrapping property data from page 11
Property Scrapped successfully: 22 properties Retriv

In [10]:
all_saproperty

{'page 1': [{'title': 'Guzape 2 Exclusive Land Offer',
   'listing_type': 'MIXED USE LAND For Sale',
   'property_type': 'Guzape 2 Exclusive Land Offer',
   'price': '₦ 15,000,000,000',
   'currency': '₦',
   'bedrooms': 'N/A',
   'bathrooms': 'N/A',
   'toilets': 'N/A',
   'location': '\n\n\n Guzape 2 Guzape Abuja Phase 1',
   'agent_name': 'O3 Intellectual Properties',
   'agent_link': '/estate-agents/o3intellectualproperties',
   'updated_date': '2025-11-30',
   'added_date': None,
   'image_url': 'data:image/png;base64,R0lGODlhAQABAAD/ACwAAAAAAQABAAACADs=',
   'favorite': '/profile/add-favorite/48641554'},
  {'title': 'Waterfront Commercial Land In Chevron Lekki',
   'listing_type': 'MIXED USE LAND FOR SALE',
   'property_type': 'Waterfront Commercial Land In Chevron Lekki',
   'price': '₦ 230,000,000',
   'currency': '₦',
   'bedrooms': 'N/A',
   'bathrooms': 'N/A',
   'toilets': 'N/A',
   'location': '\n\n\n Chevron Drive Lekki Lagos',
   'agent_name': 'Timabestproperties',
   'a

In [11]:
len(all_saproperty)

136

/*Attributes:

-property title = property_info_div.find("div", class_= "similar-listings-info").find('h2').get_text() 
-listing type = property_info_div.find("div", class_= "similar-listings-info").find('h3').get_text() 
-property type = property_info_div.find("div", class_= "similar-listings-info").find('h2').get_text() 
-property price = property_info_div.find("div", class_= "similar-listings-price").find('h4').get_text() 
-property price currency = property_info_div.find("div", class_= "similar-listings-price").find('span').get_text() 
-property benefit- bedroom = property_info_div.find("ul", class_= "property-benefit").find('path').get_text() 
-property benefit- bathroom = property_info_div.find("ul", class_= "property-benefit").find('li').get_text() 
-property benefit- toilet = property_info_div.find("ul", class_= "property-benefit").find('li').find('path').get_text() 
-property location = property_info_div.find("div", class_= "similar-listings-info").find('p').get_text()
-property agent name = property_info_div.find('div', class_="media").find('img').get('alt')
-Property agent link = property_info_div.find('div', class_='similar-listing-contact').find('a').get('href') 
-property date = property_info_div.find("div", class_= "media-body").find('h5').get_text() 
-Property image url = property_info_div.find('div', class_='similar-listings-image sponsored').find('img').get('data-src')
-property favorites =property_info_div.find('a', class_="listings-favorite").get('href')
*/