In [5]:
import requests
from bs4 import BeautifulSoup

URL = "https://land.ng/search-results/?keyword=&states%5B%5D=lagos"

HEADER = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
                  "AppleWebKit/537.36 (KHTML, like Gecko) "
                  "Chrome/126.0.0.0 Safari/537.36",
    "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
    "Accept-Encoding": "gzip, deflate, br",
    "Accept-Language": "en-US,en;q=0.9",
    "Connection": "keep-alive",
    "Upgrade-Insecure-Requests": "1"
}

def make_connection(url):
    try:
        response = requests.get(url, headers=HEADER)
        if response.status_code == 200:
            soup_content = BeautifulSoup(response.content, 'html.parser')
            return soup_content
        else:
            print(f"Resources Not available! Status Code {response.status_code}")
    except Exception as e:
        print(f"An Error occurs. Message: {e}")

In [6]:
content = make_connection(URL)

In [7]:
# This finds ALL land listing cards on the page
land_content_all = content.find_all("div", class_="item-listing-wrap")

In [8]:
EXTRACT COORDINATES FROM EACH LISTING PAGE
# ------------------------------------------------------------
def extract_coordinates(detail_soup):
    scripts = detail_soup.find_all("script")
    lat, lng = None, None

    for s in scripts:
        text = s.text

        # Pattern 1: "lat": "6.524379"
        m1 = re.search(r'"lat"\s*:\s*"([^"]+)"', text)
        m2 = re.search(r'"lng"\s*:\s*"([^"]+)"', text)

        # Pattern 2: latitude: "6.524379"
        m3 = re.search(r'latitude:\s*"([^"]+)"', text)
        m4 = re.search(r'longitude:\s*"([^"]+)"', text)

        if m1 and m2:
            lat, lng = m1.group(1), m2.group(1)
            break
        if m3 and m4:
            lat, lng = m3.group(1), m4.group(1)
            break

    return lat, lng
EXTRACT AVERAGE NAIRA PER SQM
# ------------------------------------------------------------
def extract_naira_per_sqm(detail_soup):
    item = detail_soup.find("strong", string=lambda x: x and "sqm" in x.lower())
    if item:
        span = item.find_next("span")
        if span:
            return span.get_text(strip=True)
    return None
def scrape_page_land(land_content):
    """
    scrape single land 
    return a dictionary
    """
all_lands = []

for land_content in land_content_all:

    land_price = land_content.find("li", class_="item-price")
    land_price = land_price.get_text(strip=True) if land_price else None

    land_title = land_content.find("h2", class_="item-title")
    land_title = land_title.get_text(strip=True) if land_title else None

    land_link = land_content.find("h2", class_="item-title")
    land_link = land_link.find("a")["href"] if land_link and land_link.find("a") else None

    land_address = land_content.find("address", class_="item-address")
    land_address = land_address.get_text(strip=True) if land_address else None

    land_type = land_content.find("li", class_="h-type")
    land_type = land_type.get_text(strip=True) if land_type else None

    land_area = land_content.find("li", class_="h-area")
    land_area = land_area.get_text(strip=True) if land_area else None

    land_author = land_content.find("div", class_="item-author")
    land_author = land_author.get_text(strip=True) if land_author else None

     # --------- VISIT EACH LISTING PAGE FOR COORDINATES ----------
    lat, lng, sqm = None, None, None

    if land_link:
        detail_soup = make_connection(land_link)
        if detail_soup:
            lat, lng = extract_coordinates(detail_soup)
            sqm = extract_naira_per_sqm(detail_soup)

    single_land = {
        "title": land_title,
        "price": land_price,
        "link": land_link,
        "address": land_address,
        "type": land_type,
        "area": land_area,
        "author": land_author,
        "latitude": lat,
        "longitude": lng,
        "avg_naira_sqm": sqm
    }

    all_lands.append(single_land)

print(f"land_scrapped_successfully: {len(all_lands)} land retrieved")


land_scrapped_successfully: 12 land retrieved


In [13]:
# SAVE TO CSV
# ------------------------------------------------------------
import os
import pandas as pd
save_path = r"C:\Users\THIS-PC\Documents\LagosPropIQ\Data\raw\landsng_full_data.csv"
os.makedirs(os.path.dirname(save_path), exist_ok=True)

pd.DataFrame(all_lands).to_csv(save_path, index=False, encoding="utf-8-sig")

print("Saved to:", save_path)

Saved to: C:\Users\THIS-PC\Documents\LagosPropIQ\Data\raw\landsng_full_data.csv


In [17]:
scrape_page_land(land_content)

**attributes**
-**land price**: land_content = content.find("li", class_="item-price").get_text()
-**Land title**: land content = content.find("h2", class_="item-title").get_text()
-**land link**: land_content = content.find("h2", class_="item-title"). find("a").get("href")
-**land address**: land_content = content.find("address", class_="item-address").get_text()
-**land type**: land_content = content.find("li", class_="h-type").get_text()
-**land area**:land_content = content.find("li", class_="h-area").get_text()
-**land author**land_content = content.find("div", class_="item-author").get_text()


In [20]:
import requests
import json
import re

HEADER = {"User-Agent": "Mozilla/5.0"}
BASE = "https://land.ng"

def get_soup(url):
    r = requests.get(url, headers=HEADER)
    return BeautifulSoup(r.text, "html.parser")

def get_property_api(property_id):
    api_url = f"{BASE}/wp-json/houzez/v1/property/{property_id}"
    r = requests.get(api_url, headers=HEADER)
    if r.status_code == 200:
        return r.json()
    return None

def scrape_listing_card(card):
    # Basic listing info
    link_tag = card.find("h2", class_="item-title").find("a")
    link = link_tag["href"]
    full_link = link if link.startswith("http") else BASE + link

    # Go to detail page
    soup = get_soup(full_link)

    # Extract property_id from data-map
    map_div = soup.find("div", id="houzez-single-listing-map-address") \
              or soup.find("div", id="houzez-single-listing-map")
    property_id = None
    if map_div and map_div.has_attr("data-map"):
        try:
            data = json.loads(map_div["data-map"])
            property_id = data.get("property_id") or data.get("post_id")
        except:
            property_id = None

    lat = lng = None
    avg_price_sqm = None

    if property_id:
        api_data = get_property_api(property_id)
        if api_data:
            lat = api_data.get("lat")
            lng = api_data.get("lng")
            # Some sites store per-sqm price under a different key; try common ones
            avg_price_sqm = api_data.get("price_sqft") or api_data.get("price_sqm") or api_data.get("price_unit")

    return {
        "link": full_link,
        "property_id": property_id,
        "latitude": lat,
        "longitude": lng,
        "avg_price_sqm": avg_price_sqm
    }

# Example: scrape first page
search_url = BASE + "/search-results/?keyword=&states%5B%5D=lagos"
soup = get_soup(search_url)
cards = soup.find_all("div", class_="item-listing-wrap")

data = []
for c in cards:
    rec = scrape_listing_card(c)
    data.append(rec)

df = pd.DataFrame(data)
print(df.head())
# Save if needed
save_path = r"C:\Users\THIS-PC\Documents\LagosPropIQ\Data\raw\landsng_lagos_coordinates.csv"
df.to_csv(save_path, index=False, encoding="utf-8-sig")
print("Saved to:", save_path)


                                                link  property_id latitude  \
0  https://land.ng/land/fenced-land-with-governor...        25149     None   
1  https://land.ng/land/plot-of-land-close-to-dan...        25216     None   
2  https://land.ng/land/land-on-block-12-orange-i...        25152     None   
3  https://land.ng/land/freedom-city-estate-ibeju...        25104     None   
4  https://land.ng/land/a-developers-fit-land-for...        24888     None   

  longitude avg_price_sqm  
0      None          None  
1      None          None  
2      None          None  
3      None          None  
4      None          None  
Saved to: C:\Users\THIS-PC\Documents\LagosPropIQ\Data\raw\landsng_lagos_coordinates.csv
