## **Import Packages**

In [1]:
# pip install google-search-results

In [2]:
# Import Packages
from bs4 import BeautifulSoup
from datetime import datetime 
import requests   
import pandas as pd 
import numpy as np
from tqdm import trange
pd.set_option('display.max_rows',1000)
import time
from google.cloud import bigquery

## **Initialize Client Object**

In [3]:
# initialize client object
client = bigquery.Client(project='project-adrian-julius-aluoch')

## **Scrape Real Estate Listings in Nairobi & Mombasa, Kenya**

### **a. Apartments for Rent in Nairobi**

In [4]:
# Initialize DataFrame outside the loop
bigdata = pd.DataFrame()

for page in trange(1,101):
    counties = ['nairobi']
    furnished = ['true','false']
    rental_rates = ['day','month','week','year']
    
    for county in counties:
        for furnish in furnished:
            for rate in rental_rates:
                
                # specify website url 
                url = 'https://www.property24.co.ke/property-to-rent-in-'+str(county)+'-p95?rentalterm='+str(rate)+'&isfurnished='+str(furnish)+'&propertytypes=apartments-flats,townhouses&Page=' + str(page)
                req = requests.get(url).text 
                soup = BeautifulSoup(req,'lxml')
                listings = soup.find_all('span',class_='p24_content')

                for listing in listings:
                    try:
                        property_title = listing.find('span',class_='p24_propertyTitle').text.strip()
                        property_availability = 'For Rent'
                        property_location = listing.find('span',class_='p24_location').text.strip()
                        property_address = listing.find('span',class_='p24_address').text.strip()
                        floor_size = listing.find('span', class_='p24_size', title='Floor Size').text.strip()
                        furnished = furnish
                        rental_rate = rate
                        property_price = listing.find('span',class_='p24_price').text.strip()
                        last_scraped = datetime.now()
                        
                        # Create DataFrame
                        data = pd.DataFrame({
                            'county':[county],
                            'property_title':[property_title],
                            'property_availability':[property_availability],
                            'property_location':[property_location],
                            'property_address':[property_address],
                            'floor_size':[floor_size],
                            'furnished':[furnished],
                            'rental_rate':[rental_rate],
                            'property_price':[property_price],
                            'last_scraped':[last_scraped]
                        })

                        # Append data to bigdata DataFrame
                        bigdata = pd.concat([bigdata,data],ignore_index=True)
                        
                    except Exception as e:
                        pass            

# Handle Database Import Error
table_id = 'project-adrian-julius-aluoch.cronjobs.real_estate_data'
job = client.load_table_from_dataframe(bigdata,table_id)
while job.state != 'DONE':
    time.sleep(4)
    job.reload()
    print(f"Data Upload Status : {job.state}")

100%|██████████| 100/100 [22:35<00:00, 13.56s/it]


Data Upload Status : DONE


In [5]:
# Shape of Data Collected
bigdata.shape

(1758, 10)

In [6]:
# Apartments for Rent in Nairobi
bigdata.head()

Unnamed: 0,county,property_title,property_availability,property_location,property_address,floor_size,furnished,rental_rate,property_price,last_scraped
0,nairobi,2 Bedroom Apartment / Flat,For Rent,Kilimani,"Kilimani Estate Kilimani, Kilimani, Nairobi",120 m²,False,day,KSh 12 000\r\n per day,2024-09-10 19:03:57.662506
1,nairobi,Apartment / Flat,For Rent,Kilimani,"Kilimani Estate Kilimani, Kilimani, Nairobi",39 m²,False,day,KSh 4 500\r\n per day,2024-09-10 19:03:57.667252
2,nairobi,2 Bedroom Apartment / Flat,For Rent,Kilimani,"Kilimani Estate Kilimani, Kilimani, Nairobi",120 m²,False,day,KSh 12 000\r\n per day,2024-09-10 19:03:57.669368
3,nairobi,2 Bedroom Apartment / Flat,For Rent,Kilimani,"Kilimani Estate Kilimani, Kilimani, Nairobi",110 m²,False,day,KSh 10 000\r\n per day,2024-09-10 19:03:57.671254
4,nairobi,3 Bedroom Apartment / Flat,For Rent,Lavington,"Gitanga Rd Nairobi, Lavington, Nairobi",200 m²,False,day,KSh 9 000\r\n per day,2024-09-10 19:03:57.673129


### **b. Apartmensts for Sale in Nairobi**

In [7]:
# Initialize DataFrame outside the loop
bigdata = pd.DataFrame()

for page in trange(1,101):
    counties = ['nairobi']
    
    for county in counties:
        # specify website url 
        url = 'https://www.property24.co.ke/property-for-sale-in-'+str(county)+'-p95?propertytypes=apartments-flats,townhouses&Page=' + str(page)
        req = requests.get(url).text 
        soup = BeautifulSoup(req,'lxml')
        listings = soup.find_all('span',class_='p24_content')

        for listing in listings:
            try:
                property_title = listing.find('span',class_='p24_propertyTitle').text.strip()
                property_availability = 'For Sale'
                property_location = listing.find('span',class_='p24_location').text.strip()
                property_address = listing.find('span',class_='p24_address').text.strip()
                floor_size = listing.find('span',class_='p24_size').text.strip()
                property_price = listing.find('span',class_='p24_price').text.strip()
                rental_rate = np.NAN
                furnished = np.NAN
                last_scraped = datetime.now()
                
                # Create DataFrame
                data = pd.DataFrame({
                    'county':[county],
                    'property_title':[property_title],
                    'property_availability':[property_availability],
                    'property_location':[property_location],
                    'property_address':[property_address],
                    'floor_size':[floor_size],
                    'furnished':[furnished],
                    'rental_rate':[rental_rate],
                    'property_price':[property_price],
                    'last_scraped':[last_scraped]
                })

                # Append data to bigdata DataFrame
                bigdata = pd.concat([bigdata,data],ignore_index=True)
                
            except Exception as e:
                pass   

# Handle Database Import Error
table_id = 'project-adrian-julius-aluoch.cronjobs.real_estate_data'
job = client.load_table_from_dataframe(bigdata,table_id)
while job.state != 'DONE':
    time.sleep(4)
    job.reload()
    print(f"Data Upload Status : {job.state}")

100%|██████████| 100/100 [02:58<00:00,  1.78s/it]


Data Upload Status : DONE


In [8]:
# Shape of Data Collected
bigdata.shape

(1913, 10)

In [9]:
# Apartments for Sale in Nairobi
bigdata.head()

Unnamed: 0,county,property_title,property_availability,property_location,property_address,floor_size,furnished,rental_rate,property_price,last_scraped
0,nairobi,4 Bedroom Townhouse,For Sale,Karen,"Karen, Nairobi",4 acres,,,KSh 335 400 000,2024-09-10 19:26:32.607510
1,nairobi,4 Bedroom Apartment / Flat,For Sale,Kileleshwa,"Kileleshwa Nairobi, Kileleshwa, Nairobi",250 m²,,,KSh 26 000 000,2024-09-10 19:26:32.610171
2,nairobi,1 Bedroom Apartment / Flat,For Sale,Ongata Rongai,"Ongata Rongai, Nairobi",44 m²,,,KSh 2 960 000,2024-09-10 19:26:32.613069
3,nairobi,2 Bedroom Apartment / Flat,For Sale,Kileleshwa,"Kileleshwa, Nairobi",118 m²,,,KSh 12 000 000,2024-09-10 19:26:32.615605
4,nairobi,4 Bedroom Apartment / Flat,For Sale,Kilimani,"Likoni lane, Kilimani, Nairobi",147.5 m²,,,KSh 14 800 000,2024-09-10 19:26:32.618667


### **c. Apartment for Rent in Mombasa**

In [10]:
# Initialize DataFrame outside the loop
bigdata = pd.DataFrame()

for page in trange(1,101):
    counties = ['mombasa']
    furnished = ['true','false']
    rental_rates = ['day','month','week','year']
    
    for county in counties:
        for furnish in furnished:
            for rate in rental_rates:
                
                # specify website url 
                url = 'https://www.property24.co.ke/property-to-rent-in-'+str(county)+'-p93?rentalterm='+str(rate)+'&isfurnished='+str(furnish)+'&propertytypes=apartments-flats,townhouses&Page=' + str(page)
                req = requests.get(url).text 
                soup = BeautifulSoup(req,'lxml')
                listings = soup.find_all('span',class_='p24_content')

                for listing in listings:
                    try:
                        property_title = listing.find('span',class_='p24_propertyTitle').text.strip()
                        property_availability = 'For Rent'
                        property_location = listing.find('span',class_='p24_location').text.strip()
                        property_address = listing.find('span',class_='p24_address').text.strip()
                        floor_size = listing.find('span', class_='p24_size', title='Floor Size').text.strip()
                        furnished = furnish
                        rental_rate = rate
                        property_price = listing.find('span',class_='p24_price').text.strip()
                        last_scraped = datetime.now()
                        
                        # Create DataFrame
                        data = pd.DataFrame({
                            'county':[county],
                            'property_title':[property_title],
                            'property_availability':[property_availability],
                            'property_location':[property_location],
                            'property_address':[property_address],
                            'floor_size':[floor_size],
                            'furnished':[furnished],
                            'rental_rate':[rental_rate],
                            'property_price':[property_price],
                            'last_scraped':[last_scraped]
                        })

                        # Append data to bigdata DataFrame
                        bigdata = pd.concat([bigdata,data],ignore_index=True)
                        
                    except Exception as e:
                        pass            

# Handle Database Import Error
table_id = 'project-adrian-julius-aluoch.cronjobs.real_estate_data'
job = client.load_table_from_dataframe(bigdata,table_id)
while job.state != 'DONE':
    time.sleep(4)
    job.reload()
    print(f"Data Upload Status : {job.state}")

100%|██████████| 100/100 [20:44<00:00, 12.45s/it]


Data Upload Status : DONE


In [11]:
# Shape of Data Collected
bigdata.shape

(1729, 10)

In [12]:
# Apartments for Rent in Mombasa
bigdata.head()

Unnamed: 0,county,property_title,property_availability,property_location,property_address,floor_size,furnished,rental_rate,property_price,last_scraped
0,mombasa,3 Bedroom Apartment / Flat,For Rent,Nyali,"Mombasa Mombasa County, Nyali, Mombasa",240 m²,False,day,KSh 20 000\r\n per day,2024-09-10 19:29:41.109873
1,mombasa,2 Bedroom Apartment / Flat,For Rent,Mombasa Island,"Shanzu Mombasa, Mombasa Island, Mombasa",140 m²,False,day,KSh 14 000\r\n per day,2024-09-10 19:29:41.112039
2,mombasa,2 Bedroom Apartment / Flat,For Rent,Nyali,"1058 1st Ave, Nyali, Mombasa",200 m²,False,day,KSh 12 000\r\n per day,2024-09-10 19:29:41.114036
3,mombasa,4 Bedroom Apartment / Flat,For Rent,Nyali,"Bungalows Rd Mombasa, Nyali, Mombasa",2700 m²,False,month,KSh 150 000,2024-09-10 19:29:42.700735
4,mombasa,3 Bedroom Apartment / Flat,For Rent,Nyali,"XP64+VRJ Links Rd, Nyali, Mombasa",1800 m²,False,month,KSh 55 000,2024-09-10 19:29:42.703596


### **d. Apartments for Sale in Mombasa**

In [13]:
# Initialize DataFrame outside the loop
bigdata = pd.DataFrame()

# specify website url 
for page in trange(1,101):
    counties = ['mombasa']
    for county in counties:
        url = 'https://www.property24.co.ke/property-for-sale-in-'+str(county)+'-p93?propertytypes=apartments-flats,townhouses&Page=' + str(page)
        req = requests.get(url).text 
        soup = BeautifulSoup(req,'lxml')

        listings = soup.find_all('span',class_='p24_content')

        for listing in listings:
            try:
                property_title = listing.find('span',class_='p24_propertyTitle').text.strip()
                property_availability = 'For Sale'
                property_location = listing.find('span',class_='p24_location').text.strip()
                property_address = listing.find('span',class_='p24_address').text.strip()
                floor_size = listing.find('span',class_='p24_size').text.strip()
                property_price = listing.find('span',class_='p24_price').text.strip()
                rental_rate = np.NAN
                furnished = np.NAN
                last_scraped = datetime.now()
                data = pd.DataFrame({
                    'county':[county],
                    'property_title':[property_title],
                    'property_availability':[property_availability],
                    'property_location':[property_location],
                    'property_address':[property_address],
                    'floor_size':[floor_size],
                    'furnished':[furnished],
                    'rental_rate':[rental_rate],
                    'property_price':[property_price],
                    'last_scraped':[last_scraped]
                })

                # Append data to bigdata DataFrame
                bigdata = pd.concat([bigdata,data],ignore_index=True)
                
            except Exception as e:
                pass     

# Handle Database Import Error
table_id = 'project-adrian-julius-aluoch.cronjobs.real_estate_data'
job = client.load_table_from_dataframe(bigdata,table_id)
while job.state != 'DONE':
    time.sleep(4)
    job.reload()
    print(f"Data Upload Status : {job.state}")

100%|██████████| 100/100 [02:43<00:00,  1.63s/it]


Data Upload Status : DONE


In [14]:
# Shape of Data Collected
bigdata.shape

(1585, 10)

In [15]:
# Apartments for Sale in Mombasa
bigdata.head()

Unnamed: 0,county,property_title,property_availability,property_location,property_address,floor_size,furnished,rental_rate,property_price,last_scraped
0,mombasa,3 Bedroom Apartment / Flat,For Sale,Nyali,"XP99+52X Mount Kenya Rd, Nyali, Mombasa",2700 m²,,,KSh 23 000 000,2024-09-10 19:50:25.860548
1,mombasa,2 Bedroom Apartment / Flat,For Sale,Nyali,"Greenwood Dr Mombasa, Nyali, Mombasa",1600 m²,,,KSh 7 500 000,2024-09-10 19:50:25.863897
2,mombasa,3 Bedroom Apartment / Flat,For Sale,Nyali,"Nyali, Mombasa",172 m²,,,KSh 17 500 000,2024-09-10 19:50:25.866893
3,mombasa,3 Bedroom Apartment / Flat,For Sale,Nyali,"Beach Rd Mombasa, Nyali, Mombasa",1700 m²,,,KSh 7 750 000,2024-09-10 19:50:25.869439
4,mombasa,2 Bedroom Apartment / Flat,For Sale,Nyali,"XPHF+3JG 1st Ave, Nyali, Mombasa",1400 m²,,,KSh 8 500 000,2024-09-10 19:50:25.871793


### **Basic Data Cleaning**

In [16]:
# Define SQL Query to Retrieve Real Estate Data from Google Cloud BigQuery
sql = (
       'SELECT *'
       'FROM `cronjobs.real_estate_data`'
       )

# Run SQL Query
data = client.query(sql).to_dataframe()
print(f'Rows of Real Estate Data in Google BigQuery : {data.shape[0]:,.0f}\nCols of Real Estate Data in Google BigQuery : {data.shape[1]:,.0f}')

Rows of Real Estate Data in Google BigQuery : 12,395
Cols of Real Estate Data in Google BigQuery : 10


In [17]:
# Check Total Number of Duplicate Records
duplicated = data.duplicated(subset=[
       'county', 'property_title', 'property_availability',
       'property_location', 'property_address', 'floor_size', 
       'furnished', 'rental_rate', 'property_price'
                                    ]).sum()

# Remove Duplicate Records
data.drop_duplicates(subset=[
       'county', 'property_title', 'property_availability',
       'property_location', 'property_address', 'floor_size', 
       'furnished', 'rental_rate', 'property_price'
                            ],inplace=True)

# Display Initial & Final Number of Duplicate Records
print(f"Initial Shape of Dataset : {data.shape}\nTotal Duplicate Records : {duplicated:,.0f}\nFinal Shape of Dataset : {data.shape}")

Initial Shape of Dataset : (5495, 10)
Total Duplicate Records : 6,900
Final Shape of Dataset : (5495, 10)


In [18]:
# Drop Original Real Estate Table 
table_id = 'project-adrian-julius-aluoch.cronjobs.real_estate_data'
client.delete_table(table_id)

# Upload Final Real Estate Table
job = client.load_table_from_dataframe(data,table_id)
while job.state != 'DONE':
    time.sleep(1)
    job.reload()
    print(f'Real Estate Data Update : {job.state}')

Real Estate Data Update : RUNNING
Real Estate Data Update : DONE


### **Save the Results to CSV**

In [19]:
data.to_csv('real estate.csv',index=False)
print(f'Real Estate Data Successfully Saved to CSV.')

Real Estate Data Successfully Saved to CSV.


### **Display the Real Estate Data**

In [20]:
data.iloc[:100].sort_values(by='last_scraped',ascending=False).reset_index(drop=True)

Unnamed: 0,county,property_title,property_availability,property_location,property_address,floor_size,furnished,rental_rate,property_price,last_scraped
0,nairobi,Apartment / Flat,For Sale,Westlands,"Lantana road, Westlands, Nairobi",33 m²,,,KSh 8 500 000,2024-09-09 19:13:55.201672+00:00
1,nairobi,Apartment / Flat,For Sale,Syokimau,"Off Mombasa road, Syokimau, Nairobi",30 m²,,,KSh 3 000 000,2024-09-09 19:13:48.905206+00:00
2,nairobi,Apartment / Flat,For Sale,Kileleshwa,"Kileleshwa, Kileleshwa, Nairobi",40 m²,,,KSh 4 500 000,2024-09-09 19:13:20.111396+00:00
3,nairobi,Apartment / Flat,For Sale,Dagoretti,"Off Naivasha road, Dagoretti, Nairobi",23 m²,,,KSh 2 300 000,2024-09-09 19:13:02.237038+00:00
4,nairobi,Apartment / Flat,For Sale,Kileleshwa,"Near Kasuku Centre, Kileleshwa, Nairobi",40 m²,,,KSh 4 500 000,2024-09-09 19:12:54.943077+00:00
5,nairobi,Apartment / Flat,For Sale,Syokimau,"Mombasa Road Nairobi, Syokimau, Nairobi",47 m²,,,KSh 3 900 000,2024-09-09 19:12:06.796322+00:00
6,nairobi,Apartment / Flat,For Sale,Syokimau,"Mombasa Road Nairobi, Syokimau, Nairobi",47 m²,,,KSh 4 700 000,2024-09-09 19:12:06.794383+00:00
7,nairobi,Apartment / Flat,For Sale,Kerarapon,"Kerarapon Drive Road, Kerarapon, Nairobi",14 acres,,,KSh 700 000 000,2024-09-09 19:11:43.840301+00:00
8,nairobi,1 Bedroom Townhouse,For Rent,Karen,"Langata Rd Langata, Karen, Nairobi",200 m²,False,month,KSh 25 000,2024-09-09 19:11:20.580786+00:00
9,nairobi,2 Bedroom Townhouse,For Rent,Kilimani,"Lenana road, Kilimani, Nairobi",1200 m²,False,month,KSh 60 000,2024-09-09 19:07:43.704428+00:00
