**Data Cleaning**

In [2]:
# Import essential libraries for data manipulation, geospatial processing, 
# geocoding, and visualization

import numpy as np
import pandas as pd
import geopandas as gpd
import requests

from shapely.geometry import Point

# Geocoding
from geopy.geocoders import Nominatim
from geopy.extra.rate_limiter import RateLimiter

# Visualization
import matplotlib.pyplot as plt
import seaborn as sns

In [5]:
# Load the raw land listings dataset 

df = pd.read_csv( r"C:\Users\THIS-PC\Documents\LagosPropIQ\Data\raw\landsng_full_dataset.csv")

In [6]:
# Display the first few rows to preview the dataset structure

df.head()

Unnamed: 0,Title,Link,Price (₦),Location,Land Size (sqm),Land Type,Latitude,Longitude,price type,Average ₦/sqm
0,Fenced Land with Governor’s consent,https://land.ng/land/fenced-land-with-governor...,2000000000,"Agungi / Idado, Lekki, Eti-Osa, Lagos",1800.0,Mixed-Use Land,1.0,10.7,/Outright,1111111.11
1,Plot of land close to Dangote Refinery for sale,https://land.ng/land/plot-of-land-close-to-dan...,11000000,"Epe Ilara epe flourish estate, Epe, Lagos",600.0,"Estate Land, Mixed-Use Land, Residential Land",1.0,10.7,/Outright,18333.33
2,"Land on Block 12, Orange Island, Lekki Phase 1...",https://land.ng/land/land-on-block-12-orange-i...,800000000,"Orange Island, Lekki Phase 1 Extension, Eti-Os...",1000.0,Residential Land,1.0,10.7,/Outright,800000.0
3,"Freedom City Estate, Ibeju-Lekki",https://land.ng/land/freedom-city-estate-ibeju...,4000000,"Eti-osho,Eripa. Ibeju lekki., Ibeju-Lekki, Lagos",600.0,Estate Land,1.0,10.7,/Per Plot,6666.67
4,A developer’s – fit land for sale.,https://land.ng/land/a-developers-fit-land-for...,700000,"Cowrie Creek Estate, Elegushi, Lekki, Lagos, I...",,"Estate Land, Residential Land",1.0,10.7,,


In [7]:
# View summary information about the dataset, including data types and missing values

df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 72 entries, 0 to 71
Data columns (total 10 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   Title            72 non-null     object 
 1   Link             72 non-null     object 
 2   Price (₦)        72 non-null     int64  
 3   Location         72 non-null     object 
 4   Land Size (sqm)  60 non-null     float64
 5   Land Type        72 non-null     object 
 6   Latitude         71 non-null     float64
 7   Longitude        71 non-null     float64
 8   price type       60 non-null     object 
 9   Average ₦/sqm    60 non-null     float64
dtypes: float64(4), int64(1), object(5)
memory usage: 5.8+ KB


**Drop Wrong Latitude & Longitude**

In [10]:
df = df.drop(columns=["Latitude", "Longitude"], errors="ignore")


In [24]:
##Clean Price Column to Remove symbols like ₦, commas, text ("Outright", "Per Plot"), etc.
df["Price (₦)"] = (
    df["Price (₦)"]
    .astype(str)
    .str.replace("₦", "", regex=False)
    .str.replace(",", "", regex=False)
    .str.extract(r"(\d+)")
    .astype(float)
)
#covert to integer
df["Price (₦)"] = df["Price (₦)"].astype("Int64")



In [25]:
#Clean Price Type
df["price type"] = (
    df["price type"]
    .astype(str)
    .str.replace("/", "", regex=False)
    .str.strip()
)


In [27]:
#clean land size
df["Land Size (sqm)"] = (
    df["Land Size (sqm)"]
    .astype(str)
    .str.extract(r"(\d+\.?\d*)")
    .astype(float)
)
df["Land Size (sqm)"] = pd.to_numeric(df["Land Size (sqm)"], errors="coerce")



In [26]:
#Clean Location Column to Remove unwanted commas, spaces.

df["Location"] = (
    df["Location"]
    .str.title()
    .str.replace(" ,", ",", regex=False)
    .str.replace(",,", ",", regex=False)
    .str.strip(" ,")
)



In [28]:
#standarduze land type
df["price type"] = df["price type"].fillna("Unknown")
df["price type"] = df["price type"].str.title()


In [29]:
#Recalculate Average ₦/sqm to keep only valid where both price & land size exist
df["Average ₦/sqm"] = (df["Price (₦)"] / df["Land Size (sqm)"]).round(2)
#convert to numeric
df["Average ₦/sqm"] = pd.to_numeric(df["Average ₦/sqm"], errors="coerce")



In [30]:
#Handle Missing Values to remove rows with no price or land size.
df = df.dropna(subset=["Price (₦)", "Land Size (sqm)"])


In [31]:
df.head()

Unnamed: 0,Title,Link,Price (₦),Location,Land Size (sqm),Land Type,price type,Average ₦/sqm
0,Fenced Land with Governor’s consent,https://land.ng/land/fenced-land-with-governor...,2000000000,"Agungi / Idado, Lekki, Eti-Osa, Lagos",1800.0,Mixed-Use Land,Outright,1111111.11
1,Plot of land close to Dangote Refinery for sale,https://land.ng/land/plot-of-land-close-to-dan...,11000000,"Epe Ilara Epe Flourish Estate, Epe, Lagos",600.0,"Estate Land, Mixed-Use Land, Residential Land",Outright,18333.33
2,"Land on Block 12, Orange Island, Lekki Phase 1...",https://land.ng/land/land-on-block-12-orange-i...,800000000,"Orange Island, Lekki Phase 1 Extension, Eti-Os...",1000.0,Residential Land,Outright,800000.0
3,"Freedom City Estate, Ibeju-Lekki",https://land.ng/land/freedom-city-estate-ibeju...,4000000,"Eti-Osho,Eripa. Ibeju Lekki., Ibeju-Lekki, Lagos",600.0,Estate Land,Per Plot,6666.67
5,THE MAINLAND HERITAGE,https://land.ng/land/the-mainland-heritage/,35000000,"Ikola Ipaja, Ipaja, Lagos",500.0,Estate Land,Per Plot,70000.0


In [32]:
df.info()


<class 'pandas.core.frame.DataFrame'>
Index: 60 entries, 0 to 71
Data columns (total 8 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   Title            60 non-null     object 
 1   Link             60 non-null     object 
 2   Price (₦)        60 non-null     Int64  
 3   Location         60 non-null     object 
 4   Land Size (sqm)  60 non-null     float64
 5   Land Type        60 non-null     object 
 6   price type       60 non-null     object 
 7   Average ₦/sqm    60 non-null     Float64
dtypes: Float64(1), Int64(1), float64(1), object(5)
memory usage: 4.3+ KB


In [33]:
df.to_csv( r"C:\Users\THIS-PC\Documents\LagosPropIQ\landsng_clean_ready_for_geocoding.csv", index=False, encoding="utf-8-sig")
print("File saved as: landsng_clean_ready_for_geocoding.csv")


File saved as: landsng_clean_ready_for_geocoding.csv


In [34]:
API_KEY = "a9e7d025d0044717bcf6d8fba12a4d91"

In [36]:
# Perform a test geocoding request using Geoapify API to obtain latitude and longitude
# for a sample address and display the coordinates

API_KEY = "a9e7d025d0044717bcf6d8fba12a4d91"

def test_geocoding(address):
    url = f"https://api.geoapify.com/v1/geocode/search?text={address}&apiKey={API_KEY}"

    r = requests.get(url)
    data = r.json()

    if "features" in data and len(data["features"]) > 0:
        props = data["features"][0]["properties"]
        print("Address:", props.get("formatted"))
        print("Latitude:", props.get("lat"))
        print("Longitude:", props.get("lon"))
    else:
        print("No result found. Check API key or address.")

test_geocoding("Lekki Phase 1, Lagos, Nigeria")


Address: Lekki, Lagos, LA, Nigeria
Latitude: 6.44632
Longitude: 3.47717


In [87]:
 #CLEANING & FEATURE ENGINEERING

# Clean location
df["Location"] = df["Location"].str.replace(",,", ",").str.strip()

# Compute average price/sqm
df["₦/sqm"] = df["Price (₦)"] / df["Land Size (sqm)"]

# Corridor classification
def classify_corridor(location):
    loc = str(location).lower()

    if any(x in loc for x in ["lekki", "ibeju", "epe", "sangotedo", "abijo", "ajah"]):
        return "Lekki–Epe Corridor"
    if any(x in loc for x in ["yaba", "surulere", "lagos mainland", "shomolu"]):
        return "Mainland"
    if any(x in loc for x in ["victoria island", "ikoyi"]):
        return "Island"
    if "ikorodu" in loc:
        return "Ikorodu"
    if "badagry" in loc:
        return "Badagry"
    return "Other"

df["Corridor"] = df["Location"].apply(classify_corridor)


In [93]:
#ADDRESS FIXING FOR BETTER GEOCODING

def improve_address(location, corridor):
    if pd.isna(location): 
        return "Lagos, Nigeria"

    if corridor == "Lekki–Epe Corridor":
        return location + ", Ibeju-Lekki, Lagos, Nigeria"

    elif corridor == "Mainland":
        return location + ", Lagos Mainland, Lagos, Nigeria"

    elif corridor == "Island":
        return location + ", Victoria Island, Lagos, Nigeria"

    elif corridor == "Ikorodu":
        return location + ", Ikorodu, Lagos, Nigeria"

    elif corridor == "Badagry":
        return location + ", Badagry, Lagos, Nigeria"

    return location + ", Lagos, Nigeria"


In [94]:
df["clean_address"] = df.apply(
    lambda row: improve_address(row["Location"], row["Corridor"]), 
    axis=1
)


In [95]:
df[["Location", "Corridor", "clean_address"]].head()


Unnamed: 0,Location,Corridor,clean_address
0,"Agungi / Idado, Lekki, Eti-Osa, Lagos",Lekki–Epe Corridor,"Agungi / Idado, Lekki, Eti-Osa, Lagos, Ibeju-L..."
1,"Epe Ilara Epe Flourish Estate, Epe, Lagos",Lekki–Epe Corridor,"Epe Ilara Epe Flourish Estate, Epe, Lagos, Ibe..."
2,"Orange Island, Lekki Phase 1 Extension, Eti-Os...",Lekki–Epe Corridor,"Orange Island, Lekki Phase 1 Extension, Eti-Os..."
3,"Eti-Osho,Eripa. Ibeju Lekki., Ibeju-Lekki, Lagos",Lekki–Epe Corridor,"Eti-Osho,Eripa. Ibeju Lekki., Ibeju-Lekki, Lag..."
4,"Ikola Ipaja, Ipaja, Lagos",Other,"Ikola Ipaja, Ipaja, Lagos, Lagos, Nigeria"


In [99]:
#GEOAPIFY GEOCODING

API_KEY = "a9e7d025d0044717bcf6d8fba12a4d91"  

def geocode_geoapify(address):
    url = f"https://api.geoapify.com/v1/geocode/search?text={address}&format=json&apiKey={API_KEY}"
    try:
        response = requests.get(url).json()
    except:
        return None, None

    if "results" in response and response["results"]:
        r = response["results"][0]
        return r["lat"], r["lon"]
    return None, None


# Geocode all rows
df["Latitude"] = None
df["Longitude"] = None

for i, row in df.iterrows():
    address = row["clean_address"]            # the address sending to Geoapify
    lat, lon = geocode_geoapify(address)      # geocode it

    # Save to dataframe if found
    if lat:
        df.at[i, "Latitude"] = lat
        df.at[i, "Longitude"] = lon

        #  PRINT 
        print(f"[{i}] Address: {address} → Latitude: {lat}, Longitude: {lon}")

    else:
        print(f"[{i}] Address: {address} → ❌ No result found")

    time.sleep(1)




[0] Address: Agungi / Idado, Lekki, Eti-Osa, Lagos, Ibeju-Lekki, Lagos, Nigeria → Latitude: 6.447006, Longitude: 3.51776
[1] Address: Epe Ilara Epe Flourish Estate, Epe, Lagos, Ibeju-Lekki, Lagos, Nigeria → Latitude: 6.4550575, Longitude: 3.3941795
[2] Address: Orange Island, Lekki Phase 1 Extension, Eti-Osa, Lagos, Ibeju-Lekki, Lagos, Nigeria → Latitude: 6.4550575, Longitude: 3.3941795
[3] Address: Eti-Osho,Eripa. Ibeju Lekki., Ibeju-Lekki, Lagos, Ibeju-Lekki, Lagos, Nigeria → Latitude: 6.4627777, Longitude: 3.5852178
[4] Address: Ikola Ipaja, Ipaja, Lagos, Lagos, Nigeria → Latitude: 6.81667, Longitude: 2.88333
[5] Address: Epe, Nigeria, Epe, Lagos, Ibeju-Lekki, Lagos, Nigeria → Latitude: 6.5858219, Longitude: 3.9753453
[6] Address: Agege, Lagos, Lagos, Nigeria → Latitude: 6.4550575, Longitude: 3.3941795
[7] Address: Lepia, Ibeju-Lekki, Lagos, Ibeju-Lekki, Lagos, Nigeria → Latitude: 6.4550575, Longitude: 3.3941795
[8] Address: Oko Rice Baba Adidas Ibeju Lekki Lagos State, Ibeju-Lekki,

In [100]:
df["Latitude"].isna().sum(), df["Longitude"].isna().sum()


(0, 0)

In [104]:
df.to_csv("land_data_clean_geocoded.csv", index=False)


print(" FULL PIPELINE COMPLETE ")
print(" No missing coordinates.")
print(" File: land_data_clean_geocoded.csv")


 FULL PIPELINE COMPLETE 
 No missing coordinates.
 File: land_data_clean_geocoded.csv


In [102]:
df.head()

Unnamed: 0,Title,Link,Price (₦),Location,Land Size (sqm),Land Type,price type,Average ₦/sqm,Latitude,Longitude,Corridor,clean_address
0,Fenced Land with Governor’s consent,https://land.ng/land/fenced-land-with-governor...,2000000000,"Agungi / Idado, Lekki, Eti-Osa, Lagos",1800.0,Mixed-Use Land,Outright,1111111.11,6.447006,3.51776,Lekki–Epe Corridor,"Agungi / Idado, Lekki, Eti-Osa, Lagos, Ibeju-L..."
1,Plot of land close to Dangote Refinery for sale,https://land.ng/land/plot-of-land-close-to-dan...,11000000,"Epe Ilara Epe Flourish Estate, Epe, Lagos",600.0,"Estate Land, Mixed-Use Land, Residential Land",Outright,18333.33,6.455057,3.394179,Lekki–Epe Corridor,"Epe Ilara Epe Flourish Estate, Epe, Lagos, Ibe..."
2,"Land on Block 12, Orange Island, Lekki Phase 1...",https://land.ng/land/land-on-block-12-orange-i...,800000000,"Orange Island, Lekki Phase 1 Extension, Eti-Os...",1000.0,Residential Land,Outright,800000.0,6.455057,3.394179,Lekki–Epe Corridor,"Orange Island, Lekki Phase 1 Extension, Eti-Os..."
3,"Freedom City Estate, Ibeju-Lekki",https://land.ng/land/freedom-city-estate-ibeju...,4000000,"Eti-Osho,Eripa. Ibeju Lekki., Ibeju-Lekki, Lagos",600.0,Estate Land,Per Plot,6666.67,6.462778,3.585218,Lekki–Epe Corridor,"Eti-Osho,Eripa. Ibeju Lekki., Ibeju-Lekki, Lag..."
4,THE MAINLAND HERITAGE,https://land.ng/land/the-mainland-heritage/,35000000,"Ikola Ipaja, Ipaja, Lagos",500.0,Estate Land,Per Plot,70000.0,6.81667,2.88333,Other,"Ikola Ipaja, Ipaja, Lagos, Lagos, Nigeria"
