# Data Extraction from land.ng website 

In [11]:

#Import necessary libaries 

import requests
from bs4 import BeautifulSoup
import pandas as pd
import time
import re
import os

In [None]:

#load the website
BASE_URL = "https://land.ng/search-results/?keyword=&states%5B%5D=lagos&page="
HEADERS = {
    "User-Agent": "Mozilla/5.0"
}

# clean numbers from price
def clean_price(text):
    if not text:
        return None
    text = text.replace("₦", "").replace(",", "")
    numbers = re.findall(r"\d+", text)
    return int(numbers[0]) if numbers else None

# clean sqm
def clean_sqm(text):
    if not text:
        return None
    numbers = re.findall(r"\d+", text)
    return int(numbers[0]) if numbers else None

# get coordinates from detail page
def get_coordinates(detail_url):
    try:
        html = requests.get(detail_url, headers=HEADERS, timeout=15).text
        coords = re.findall(r"([-+]?\d{1,2}\.\d+)", html)
        if len(coords) >= 2:
            return coords[0], coords[1]
    except:
        return None, None
    return None, None

all_data = []

# scrape only 6 pages
for page in range(1, 7):
    print(f"Scraping page {page}...")
    url = BASE_URL + str(page)

    response = requests.get(url, headers=HEADERS)
    soup = BeautifulSoup(response.text, "html.parser")

    listings = soup.find_all("div", class_="item-wrap")
    if not listings:
        print("No listings found. Stopping.")
        break

    for item in listings:
        # TITLE + LINK
        title_tag = item.find("h2", class_="item-title")
        title = title_tag.get_text(strip=True) if title_tag else None
        link = title_tag.find("a")["href"] if title_tag else None

        # PRICE
        price_tag = item.find("li", class_="item-price")
        price_raw = price_tag.get_text(strip=True) if price_tag else None
        price_clean = clean_price(price_raw)

        # LOCATION
        loc_tag = item.find("address", class_="item-address")
        location = loc_tag.get_text(strip=True) if loc_tag else None

        #price type
        price_type_tag = item.select_one("span.price-postfix")
        price_type = price_type_tag.text.strip() if price_type_tag else None


        # LAND SIZE + TYPE
        land_size = None
        land_type = None

        am = item.find("ul", class_="item-amenities-with-icons")
        if am:
            for li in am.find_all("li"):
                text = li.get_text(strip=True)
                if "Square" in text:
                    land_size = clean_sqm(text)
                else:
                    land_type = text  # last text is land type

        # COORDINATES
        lat, lon = get_coordinates(link) if link else (None, None)

        # AVERAGE NAIRA PER SQM
        if price_clean and land_size:
            avg = round(price_clean / land_size, 2)
        else:
            avg = None

        all_data.append({
            "Title": title,
            "Link": link,
            "Price (₦)": price_clean,
            "Location": location,
            "Land Size (sqm)": land_size,
            "Land Type": land_type,
            "Latitude": lat,
            "Longitude": lon,
            "price type": price_type,
            "Average ₦/sqm": avg
        })

    time.sleep(2)

# SAVE TO CSV
save_path = r"C:\Users\THIS-PC\Documents\LagosPropIQ\Data\raw\landsng_full_dataset.csv"
os.makedirs(os.path.dirname(save_path), exist_ok=True)
pd.DataFrame(all_data).to_csv(save_path, index=False, encoding="utf-8-sig")

print("\nDone! Saved to:", save_path)



Scraping page 1...
