[Website](https://nigeriapropertycentre.com/for-sale/houses/showtype)

### Importing necessary Libraries
- requests to send HTTP/1.1 requests easily, such as GET and POST, to interact with web APIs or retrieve web pages

- pandas for data handling, cleaning, manipulation and analysis.
- BeautifulSoup for parsing HTML and XML documents to extract specific data elements using a tree-like structure.
- selenium automates web browsers and user interaction like clicking buttons or dynamically waiting for items to load. Also for scraping sites that requires JavaScript rendering.
    - Service manages the ChromeDriver service for Selenium to interact with the Chrome browse

    - By provides methods to locate elements on a webpage (e.g., by ID, name, class name, etc.).
- time provides time-related functions like adding delays (e.g., time.sleep()), and working with timestamps, or measuring execution time.
- tqdm for visualizing the progress of loops in data processing or web scraping.
- json for serializing Python objects into JSON format.

In [1]:
import requests
import pandas as pd
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
import pandas as pd
import time
from tqdm import tqdm

### Scraping Real Estate Data

This page contains over 44000 properties so the scraping logic is basically: 
- iterate through between 100 and 200 pages at once (each page contains 10 properties)
- each iteration collects the link to each property on that page 
- when page iteration is concluded, property iteration begins directly through each property
- adjust the start_page and end_page variables to scrape other pages as required

In [None]:
path = "C:/Users/HP/Downloads/chromedriver-win64/chromedriver.exe"
service = Service(path)

driver = webdriver.Chrome(service=service)
properties = []

total_properties = 0 
property_links_all_pages = []

start_page = 1
end_page = 100

for x in range(start_page, end_page):
    
    url = f"https://nigeriapropertycentre.com/for-sale/houses/showtype?page={x}"
    driver.get(url)

    property_links = [link.get_attribute('href') for link in driver.find_elements(By.XPATH, '//a[@itemprop="url"]')]
    property_links_all_pages.extend(property_links) 

with tqdm(total=len(property_links_all_pages), desc="Scraping properties") as pbar:
    for link_url in property_links_all_pages:
        driver.get(link_url)

        time.sleep(10)

        page_source = driver.page_source
        soup = BeautifulSoup(page_source, 'html')

        rows = soup.find('table', class_= 'table table-bordered table-striped')

        try:
            for row in rows:
                line = row.find_all('td')
                fields = [h.text.strip() for h in line]
                

                table = {
                    field.split(":")[0].strip(): field.split(":")[1].strip() 
                    for field in fields if ":" in field # and len(field.split(":")) > 1
                    }
                try:
                    figure = soup.find_all('span', class_='price')[1]
                    price = float(figure.attrs['content'])
                    table['Price'] = price
                except:
                    pass 
                try: 
                    dollar = soup.find('span', class_='naira-equiv')
                    equiv = float(dollar.text.split()[1].replace(',','')[1:])           
                    table['Price'] = equiv
                except:
                    pass

                address = soup.find('div', class_='col-sm-8 f15 property-details')
                location = address.text.strip().split(',')[-2:]

                if '\n \xa0'in location[0]:
                    city = location[0].split('\n \xa0')
                    district = city[1].strip()
                else:
                    district = location[0].strip()

                state = location[1].strip()

                table['District'] = district 
                table['State'] = state

        except Exception as e:
            print(f'Error with property: {link_url} - {e}')
        properties.append(table)
        df = pd.DataFrame(properties)

        if not file_exists:  # If the file doesn't exist, write headers
            df.to_csv('crib.csv', mode='w', index=False)
            file_exists = True  # Mark file as existing
        else:  # Append without headers
            df.to_csv('crib.csv', mode='a', index=False, header=False)

        total_properties +=1
        pbar.update(1)

            # back = driver.find_element(By.XPATH, '//a[@class="underline"]')
            # back.click()
        # driver.back()
        time.sleep(2) 

# driver.quit()
print(f"Successfully scraped {len(properties)} properties across {end_page - start_page} pages.")