In [1]:
import requests
import pandas as pd
from bs4 import BeautifulSoup
import time

In [None]:
# Column Lists

title_col = []
location_col = []
price_col = []
bedroom_col = []
bathroom_col = []
toilet_col = []
property_type_col = []
link_col = []

print("Starting scrapper...")

#RENT PROPERTIES IN AJAH - Pages 1 t 64

print("Scraping rental properties...")
for page in range(1,65):
  url = f"https://nigeriapropertycentre.com/for-rent?keywords=ajah&page={page}"

  try:
    headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3"}
    response = requests.get(url, timeout=15, headers=headers)

    soup = BeautifulSoup(response.text, "lxml")
    property_listing = soup.find("div", {"itemtype": "https://schema.org/ItemList"})

    #TITLES
    titles = property_listing.find_all("h3", {"itemprop": "name"})
    for title in titles:
      title_col.append(title.text.strip())

    #LOCATIONS
    addresses = property_listing.find_all("address", class_ = "voffset-bottom-10")
    for address in addresses:
      location_col.append(address.strong.text.strip())

    #PRICES
    prices = property_listing.find_all("span", class_="price")
    for i in range(1, len(prices), 2):
      price_col.append(f"₦ {prices[i].text} per annum")

    #ROOMS (bedroom, bathroom, toilet)
    rooms_container = property_listing.find_all("ul", class_="aux-info")
    for container in rooms_container:
      room_items = container.find_all("li")

      #Bedroom
      bedroom = room_items[0].text.strip()
      bedroom_col.append(bedroom)

      #Bathroom and Toilet
      if "Bathroom" in container.text or "Bathrooms" in container.text:
        bathroom = room_items[1].text.strip()
        bathroom_col.append(bathroom)
        if "Toilet" in container.text or "Toilets" in container.text:
          toilet = room_items[2].text.strip()
          toilet_col.append(toilet)
        else:
          toilet_col.append("N/A")
      else:
        bathroom_col.append("N/A")
        if "Toilet" in container.text or "Toilets" in container.text:
          toilet = room_items[1].text
          toilet_col.append(toilet)
        else:
          toilet_col.append("N/A")

    #PROPERTY TYPE
    property_type_container = property_listing.find_all("h4", class_="content-title")
    for property_type in property_type_container:
      property_type_col.append(property_type.text.strip())

    #LINKS
    links_container = property_listing.find_all("div", class_="description")
    for links in links_container:
      link = links.a["href"]
      link_col.append(f"https://nigeriapropertycentre.com{link}")
    
    time.sleep(1)
  except requests.Timeout:
        print(f"✗ Rent page {page} timed out, skipping...")
        continue
  except requests.ConnectionError:
        print(f"✗ Rent page {page} connection error, skipping...")
        continue
  except Exception as e:
        print(f"✗ Rent page {page} error: {e}, skipping...")
        continue


# SALE PROPERTIES IN AJAH - Pages 1 to 200

for page in range(1,201):
  url = f"https://nigeriapropertycentre.com/for-sale?keywords=ajah&page={page}"

  try:
    response = requests.get(url, timeout=15, headers=headers)

    soup = BeautifulSoup(response.text, "lxml")
    
    property_listing = soup.find("div", {"itemtype": "https://schema.org/ItemList"})

    #TITLES
    titles = property_listing.find_all("h3", {"itemprop": "name"})
    for title in titles:
      title_col.append(title.text.strip())

    #LOCATIONS
    addresses = property_listing.find_all("address", class_ = "voffset-bottom-10")
    for address in addresses:
      location_col.append(address.strong.text.strip())

    #PRICES
    prices = property_listing.find_all("span", class_="price")
    for i in range(1, len(prices), 2):
      price_col.append(f"₦ {prices[i].text.strip()}")

    #ROOMS
    rooms_container = property_listing.find_all("ul", class_="aux-info")
    for container in rooms_container:
      room_items = container.find_all("li")
      bedroom = room_items[0].text.strip()
      bedroom_col.append(bedroom)

      if "Bathroom" in container.text or "Bathrooms" in container.text:
        bathroom = room_items[1].text.strip()
        bathroom_col.append(bathroom)
        if "Toilet" in container.text or "Toilets" in container.text:
          toilet = room_items[2].text.strip()
          toilet_col.append(toilet)
        else:
          toilet_col.append("N/A")
      else:
        bathroom_col.append("N/A")
        if "Toilet" in container.text or "Toilets" in container.text:
          toilet = room_items[1].text.strip()
          toilet_col.append(toilet)
        else:
          toilet_col.append("N/A")

    #PROPERTY TYPE
    property_type_container = property_listing.find_all("h4", class_="content-title")
    for property_type in property_type_container:
      property_type_col.append(property_type.text.strip())

    #LINKS
    links_container = property_listing.find_all("div", class_="description")
    for links in links_container:
      link = links.a["href"]
      link_col.append(f"https://nigeriapropertycentre.com{link}")
    time.sleep(1)
  except requests.Timeout:
      print(f"✗ Sale page {page} timed out, skipping...")
      continue
  except requests.ConnectionError:
      print(f"✗ Sale page {page} connection error, skipping...")
      continue
  except Exception as e:
      print(f"✗ Sale page {page} error: {e}, skipping...")
      continue

print(f"Scraping completed in {elapsed_time:.2f} seconds.")

#Create DataFrame
df = pd.DataFrame({
   "Title": title_col,
   "Location": location_col,
   "Price": price_col,
   "Bedrooms": bedroom_col,
   "Bathrooms": bathroom_col,
   "Toilets": toilet_col,
   "Property Type": property_type_col,
   "URL": link_col
 }, index=range(1, len(title_col)+1))

df.to_csv("ajah_properties.csv", index=True)



Starting scrapper...
Scraping rental properties...
✗ Rent page 24 connection error, skipping...
✗ Rent page 41 connection error, skipping...
✗ Rent page 48 connection error, skipping...
✗ Rent page 50 connection error, skipping...
✗ Rent page 57 connection error, skipping...
✗ Rent page 61 connection error, skipping...
✗ Sale page 7 connection error, skipping...
✗ Sale page 19 timed out, skipping...
✗ Sale page 28 connection error, skipping...
✗ Sale page 74 connection error, skipping...
✗ Sale page 86 connection error, skipping...
✗ Sale page 87 connection error, skipping...
✗ Sale page 88 connection error, skipping...
✗ Sale page 96 connection error, skipping...
✗ Sale page 110 timed out, skipping...
✗ Sale page 173 connection error, skipping...
✗ Sale page 175 connection error, skipping...
✗ Sale page 183 connection error, skipping...
✗ Sale page 189 connection error, skipping...
Scraping completed in 1598.99 seconds.


In [None]:
# import time
# from concurrent.futures import ThreadPoolExecutor, as_completed
# import pandas as pd
# from bs4 import BeautifulSoup
# import requests


# def scrape_all_pages_parallel(start_page: int = 1, end_page: int = 300, max_worker: int = 5):
#   """
#   Scrape multiple pages at the same time using threading.

#   Parameters:
#   - start_page: Firstpage to scrape
#   - end_page: Last page to scrape
#   - max_worker:How many pages to scrape simultaneously  
  
#   """

#   all_data ={
#     "Titles": [],
#     "Locations": [],
#     "Prices": [],
#     "Bedrooms": [],
#     "Bathrooms": [],
#     "Toilets": [],
#     "Property_Type": [],
#     "URLs": []
#   }

#   print(f"Starting scrapes of pages {start_page} to {end_page}...")
#   start_time = time.time()

#   with ThreadPoolExecutor(max_workers=max_workers) as executor:
    