[Website](https://nigeriapropertycentre.com/for-sale/houses/showtype)

### Importing necessary Libraries
- requests to send HTTP/1.1 requests easily, such as GET and POST, to interact with web APIs or retrieve web pages

- pandas for data handling, cleaning, manipulation and analysis.
- BeautifulSoup for parsing HTML and XML documents to extract specific data elements using a tree-like structure.
- selenium automates web browsers and user interaction like clicking buttons or dynamically waiting for items to load. Also for scraping sites that requires JavaScript rendering.
    - Service manages the ChromeDriver service for Selenium to interact with the Chrome browse

    - By provides methods to locate elements on a webpage (e.g., by ID, name, class name, etc.).
- time provides time-related functions like adding delays (e.g., time.sleep()), and working with timestamps, or measuring execution time.
- tqdm for visualizing the progress of loops in data processing or web scraping.
- json for serializing Python objects into JSON format.

In [7]:
import pandas as pd
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.common.by import By
import pandas as pd
from selenium.webdriver.chrome.service import Service
import time
from tqdm import tqdm
import os
import json

### 📄 Full Scraping Run (Pages to Properties)

The below cell:

- Visits each page in the specified range (_start_page_ to _end_page_)

- Collects all property listing links

- Iterates through each link, scrapes property details, and saves them to _crib.json_

If interrupted, I can resume scraping later using the saved links — see the next cell for how I did that.

In [None]:
path = "C:/Users/HP/Downloads/chromedriver-win64/chromedriver.exe"
service = Service(path)

driver = webdriver.Chrome(service=service)

total_properties = 0 
property_links_all_pages = [] 

destination = "crib.json" 
if not os.path.exists(destination):
    with open(destination, 'w') as f:
        json.dump([], f)

first_iteration = True
start_page = 419
end_page = 430

for x in range(start_page, end_page):
    
    url = f"https://nigeriapropertycentre.com/for-sale/houses/showtype?page={x}"
    driver.get(url)

    property_links = [link.get_attribute('href') for link in driver.find_elements(By.XPATH, '//a[@itemprop="url"]')]
    property_links_all_pages.extend(property_links) 

with tqdm(total=len(property_links_all_pages), desc="Scraping properties") as pbar:
    for link_url in property_links_all_pages:
        driver.get(link_url)
        properties = []

        time.sleep(10)

        page_source = driver.page_source
        soup = BeautifulSoup(page_source, 'html.parser')

        rows = soup.find('table', class_= 'table table-bordered table-striped')

        try:
            for row in rows:
                line = row.find_all('td')
                fields = [h.text.strip() for h in line]
                

                table = {
                    field.split(":")[0].strip(): field.split(":")[1].strip() 
                    for field in fields if ":" in field # and len(field.split(":")) > 1
                    }
                try:
                    figure = soup.find_all('span', class_='price')[1]
                    price = float(figure.attrs['content'])
                    table['Price'] = price
                except:
                    pass 
                try: 
                    dollar = soup.find('span', class_='naira-equiv')
                    equiv = float(dollar.text.split()[1].replace(',','')[1:])           
                    table['Price'] = equiv
                except:
                    pass

                address = soup.find('div', class_='col-sm-8 f15 property-details')
                location = address.text.strip().split(',')[-2:]

                if '\n \xa0'in location[0]:
                    city = location[0].split('\n \xa0')
                    district = city[1].strip()
                else:
                    district = location[0].strip()

                state = location[1].strip()

                table['District'] = district 
                table['State'] = state

                with open(destination, 'r') as f:
                    content = f.read()
                    data = json.loads(content) if content.strip() else []
                data.append(table)

                with open(destination, 'w') as f:
                    json.dump(data, f, indent=4) 
                
        except Exception as e:
            print(f'Error with property: {link_url} - {e}')
        
        total_properties +=1
        pbar.update(1)

            # back = driver.find_element(By.XPATH, '//a[@class="underline"]')
            # back.click()
        # driver.back()                                                                                                                                                      
        time.sleep(2) 

driver.quit()
print(f"Successfully scraped {len(property_links_all_pages)} properties across {end_page - start_page} pages.")

### 🔄 Resuming Scraping After Interruption

If scraping is interrupted before all properties are processed (e.g., due to a network or WebDriver error), the cell below allows me to resume from where the process stopped.

I reuse the previously collected list of property links and simply iterate from the index of the last successfully scraped property to the end of the list — avoiding duplicates and saving time.

In [None]:
path = "C:/Users/HP/Downloads/chromedriver-win64/chromedriver.exe"
service = Service(path)

driver = webdriver.Chrome(service=service)

remaining_properties = 0

with tqdm(total=len(property_links_all_pages[69:]), desc="Scraping properties") as pbar:
    for link_url in property_links_all_pages[69:]:
        driver.get(link_url)
        properties = []

        time.sleep(10)

        page_source = driver.page_source
        soup = BeautifulSoup(page_source, 'html.parser')

        rows = soup.find('table', class_= 'table table-bordered table-striped')

        try:
            for row in rows:
                line = row.find_all('td')
                fields = [h.text.strip() for h in line]
                

                table = {
                    field.split(":")[0].strip(): field.split(":")[1].strip() 
                    for field in fields if ":" in field # and len(field.split(":")) > 1
                    }
                try:
                    figure = soup.find_all('span', class_='price')[1]
                    price = float(figure.attrs['content'])
                    table['Price'] = price
                except:
                    pass 
                try: 
                    dollar = soup.find('span', class_='naira-equiv')
                    equiv = float(dollar.text.split()[1].replace(',','')[1:])           
                    table['Price'] = equiv
                except:
                    pass

                address = soup.find('div', class_='col-sm-8 f15 property-details')
                location = address.text.strip().split(',')[-2:]

                if '\n \xa0'in location[0]:
                    city = location[0].split('\n \xa0')
                    district = city[1].strip()
                else:
                    district = location[0].strip()

                state = location[1].strip()

                table['District'] = district 
                table['State'] = state

                with open(destination, 'r') as f:
                    content = f.read()
                    data = json.loads(content) if content.strip() else []
                data.append(table)

                with open(destination, 'w') as f:
                    json.dump(data, f, indent=4) 
                
        except Exception as e:
            print(f'Error with property: {link_url} - {e}')
        
        remaining_properties +=1
        pbar.update(1)                                                                                                                                                 
        time.sleep(2) 

driver.quit()
print(f"Successfully scraped the remaining {len(property_links_all_pages[69:])} properties from the entire {property_links_all_pages} properties.")

In [None]:
len(data)

In [11]:
final_destination = "crib.csv"

with open(destination, 'r') as f:
    properties = json.load(f)

df = pd.DataFrame(properties)

df.to_csv(final_destination, index=False)

print(f"Successfully converted JSON to CSV. The CSV is saved as {final_destination}")

Successfully converted JSON to CSV. The CSV is saved as crib.csv
