# Web Scraping Housing Data 

## Import Libraries

In [1]:
pip install BeautifulSoup4

Defaulting to user installation because normal site-packages is not writeable
Note: you may need to restart the kernel to use updated packages.


In [2]:
import requests
from bs4 import BeautifulSoup
import pandas as pd 
import numpy as np


from requests.adapters import HTTPAdapter
from requests.packages.urllib3.util.retry import Retry

In [3]:
# This would make us scrape in sessions so that if we have network issues
# we wont need to start scraping again we would just revert to the previous
# session tracked
session = requests.Session()
retry = Retry(connect=3, backoff_factor=0.5)
adapter = HTTPAdapter(max_retries=retry)
session.mount('http://', adapter)
session.mount('https://', adapter)

## Extract Data on the first page

In [4]:
r = requests.get('https://www.propertypro.ng/property-for-rent/in/lagos')
soup = BeautifulSoup(r.content, 'html.parser')

In [5]:
page = soup.find_all('div', class_='single-room-sale listings-property')

In [6]:
print("Number of apartment listings on a page:", len(page))

Number of apartment listings on a page: 21


In [7]:
page_list = []

for list in page:
    try:
        property_name = list.find('h3', class_='listings-property-title2').text
        address = list.find_all('h4')[1].text
        price_info = list.find('h3',class_='listings-price')
        price = price_info.find_all('span')[1].text.strip() 
        last_updated = list.find('h5').text
        serviced_newly_built = list.find('div',class_='furnished-btn').text.strip().replace('\n', ' ')
        amenities = list.find('div',class_='fur-areea').text.strip().replace('\n', ' ')
        more_info = list.find('h4', class_='listings-property-title').text
    
    
    except:
        "None"
    
        
    row = [property_name,address,price,serviced_newly_built,amenities,more_info,last_updated]
    page_list.append(row)

In [8]:
len(page_list)

21

## Extract Data from other pages

In [9]:
# other_pages_list = []

for page in range(1, 785):
    link = session.get(f"https://www.propertypro.ng/property-for-rent/in/lagos?page={page}").text 
    soup = BeautifulSoup(link, "html.parser")
    
    for list in soup.find_all('div', class_='single-room-sale listings-property'):
        try:
            property_name = list.find('h3', class_='listings-property-title2').text
            address = list.find_all('h4')[1].text
            price_info = list.find('h3',class_='listings-price')
            price = price_info.find_all('span')[1].text.strip() 
            last_updated = list.find('h5').text
            serviced_newly_built = list.find('div',class_='furnished-btn').text.strip().replace('\n', ' ')
            amenities = list.find('div',class_='fur-areea').text.strip().replace('\n', ' ')
            more_info = list.find('h4', class_='listings-property-title').text


        except:
            "None"


        row = [property_name,address,price,serviced_newly_built,amenities,more_info,last_updated]
        page_list.append(row)

In [10]:
len(page_list)

14803

In [11]:
pages_list_df = pd.DataFrame(page_list, columns=['Property Name', 'Address', 'Price', 'Serviced/Newly Built', 'Amenities', 'More Info', 'Last Updated'])

In [12]:
pages_list_df

Unnamed: 0,Property Name,Address,Price,Serviced/Newly Built,Amenities,More Info,Last Updated
0,Spacious 3 Bedroom Flat,Lagos Business School Olokonla Ajah Lagos,"2,000,000/year",,3 beds 3 baths 4 Toilets,3 BEDROOM HOUSE FOR RENT,"Updated 11 Jun 2023, Added 03 Apr 2023"
1,4 Bedroom Terrace Duplex,Orchid Lekki Lagos,"4,000,000/year",,0 beds 0 baths 0 Toilets,FLAT / APARTMENT FOR RENT,"Updated 11 Jun 2023, Added 25 Apr 2023"
2,Office Space At The Finery Ikoyi,Ikoyi Lagos,"6,400,000/year",,beds baths Toilets,COMMERCIAL PROPERTY FOR RENT,"Updated 11 Jun 2023, Added 12 Jan 2023"
3,4 Bedroom Penthouse + 2 Bq,Adeola Odeku Victoria Island Lagos,"25,000,000/year",Serviced Newly Built,4 beds 4 baths 5 Toilets,4 BEDROOM HOUSE FOR RENT,"Updated 11 Jun 2023, Added 18 May 2023"
4,Office Space At Nestoil Towers,Akin Adesola Victoria Island Lagos,800/sqm,,beds baths Toilets,COMMERCIAL PROPERTY FOR RENT,"Updated 11 Jun 2023, Added 09 May 2023"
...,...,...,...,...,...,...,...
14798,4 Bedroom Terrace Duplex,Agungi Lekki Lagos,"4,200,000/year",,4 beds 4 baths 5 Toilets,4 BEDROOM HOUSE FOR RENT,"Updated 13 Feb 2023, Added 08 Feb 2023"
14799,4 Bedroom Terrace Duplex,Agungi Lekki Lagos,"4,200,000/year",,4 beds 4 baths 5 Toilets,4 BEDROOM HOUSE FOR RENT,"Updated 13 Feb 2023, Added 08 Feb 2023"
14800,4 Bedroom Detached Duplex,Ikeja Lagos,"9,000,000/year",,4 beds 4 baths 5 Toilets,4 BEDROOM HOUSE FOR RENT,"Updated 13 Feb 2023, Added 07 Feb 2023"
14801,3 Bedroom Apartment,Majek Sangotedo Ajah Lagos,"1,500,000/year",Newly Built,3 beds 3 baths 4 Toilets,3 BEDROOM FLAT / APARTMENT FOR RENT,Added 10 Jun 2023


In [13]:
df = pd.read_csv("rent_property.csv")

## Read Data

In [16]:
df.sample(5)

Unnamed: 0.1,Unnamed: 0,Property Name,Address,Price,Serviced/Newly Built,Amenities,More Info,Last Updated
2855,2855,4 Bedroom Maisonette,Parkview Estate Ikoyi Lagos,"14,000,000/year",,4 beds 5 baths 5 Toilets,4 BEDROOM HOUSE FOR RENT,"Updated 20 May 2023, Added 26 Mar 2023"
25011,25011,3 Bedroom Terrace Duplex,Lekki Lagos,"6,000,000/year",Serviced,3 beds 3 baths 4 Toilets,3 BEDROOM HOUSE FOR RENT,"Updated 19 May 2023, Added 15 May 2023"
5394,5394,4 Bedroom Semi Detached Duplex,Picadilly Suites Hotel Idado Estate Agungi Lek...,"6,000,000/year",Newly Built,4 beds 4 baths 5 Toilets,4 BEDROOM COMMERCIAL PROPERTY FOR RENT,"Updated 13 May 2023, Added 20 Feb 2023"
8169,8169,Mini Flat,Omole Phase 1 Ojodu Lagos,"1,000,000/year",,1 beds 0 baths 0 Toilets,1 BEDROOM FLAT / APARTMENT FOR RENT,"Updated 23 May 2023, Added 02 May 2023"
5697,5697,Newly Built Mini Flat,Ikorodu Road(ilupeju) Ilupeju Lagos,"600,000/year",Furnished Newly Built,2 beds 2 baths 2 Toilets,2 BEDROOM FLAT / APARTMENT FOR RENT,"Updated 13 May 2023, Added 07 May 2023"
