In [2]:
import requests
import pandas as pd
from bs4 import BeautifulSoup

# Rightmove outcodes

Rightmove has a unique ID for each outcode. I had to use Selenium to scrape them and downloaded them into a csv file.

In [3]:
df = pd.read_csv("/Users/alexander.girardet/Code/Personal/projects/rightmove_project/notebooks/resources/data/rightmove_outcodes.csv", index_col=0)

In [4]:
df.head()

Unnamed: 0,outcode,rightmove_code
0,AB10,1
1,AB11,2
2,AB12,3
3,AB13,4
4,AB14,5


# Query

## Headers for the request to be authenticated.

In [5]:
headers = {
            'Accept': 'application/json, text/plain, */*',
            'Accept-Language': 'en-GB,en-US;q=0.9,en;q=0.8',
            'Connection': 'keep-alive',
            'Referer': 'https://www.rightmove.co.uk/property-to-rent/find.html?locationIdentifier=REGION%5E87490&index=24&propertyTypes=&includeLetAgreed=false&mustHave=&dontShow=&furnishTypes=&keywords=',
            'Sec-Fetch-Dest': 'empty',
            'Sec-Fetch-Mode': 'cors',
            'Sec-Fetch-Site': 'same-origin',
            'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.88 Safari/537.36',
            'sec-ch-ua': '" Not A;Brand";v="99", "Chromium";v="100", "Google Chrome";v="100"',
            'sec-ch-ua-mobile': '?0',
            'sec-ch-ua-platform': '"macOS"'
        }

## Request properties in a postcode.

In [6]:
rightmove_code = df.iloc[0].rightmove_code

index_jump = 0

url = f"https://www.rightmove.co.uk/api/_search?locationIdentifier=OUTCODE%5E{rightmove_code}&numberOfPropertiesPerPage=24&radius=10.0&sortType=6&index={index_jump}&includeLetAgreed=false&viewType=LIST&channel=RENT&areaSizeUnit=sqft&currencyCode=GBP&isFetching=false"

In [7]:
res = requests.get(url, headers=headers)

In [8]:
properties = res.json()['properties']

In [9]:
len(properties)

25

Returns 25 properties per page.

## Extract content for a property

In [70]:
property = properties[2]
property_id = property['id']

In [71]:
property_url = f"https://www.rightmove.co.uk/properties/{property_id}"

In [72]:
property_url

'https://www.rightmove.co.uk/properties/142659089'

In [73]:
property_res = requests.get(property_url, headers=headers)

## BeautifulSoup HTML parsing

In [74]:
soup = BeautifulSoup(property_res.text, 'lxml')

In [75]:
summary = soup.find("div", {"class": "OD0O7FWw1TjbTD4sdRi1_"}).div.text

In [76]:
uls = soup.find("ul", {"class": "_1uI3IvdF5sIuBtRIvKrreQ"})

In [77]:
features = uls.find_all("li")
feature_list = [feature.text for feature in features]

In [78]:
feature_list

['No Agent Fees', 'Students Can Enquire', 'Property Reference Number: 1915996']

In [79]:
summary

"Property Reference: 1915996.This 2 bedroom house is in the popular area of Mugiemoss, Bucksburn and is available for let from the start of January 2024The property boasts an enviable location within an established community and with an easy commute to the city centre. Good quality laminate and carpet along with blinds and white goods are all included with the house. Upstairs there are two double bedrooms, one featuring a built in wardrobe. There is also a three piece bathroom suite upstairs with a shower over the bath and there is a separate downstairs cloakroom.The spacious open plan lounge/kitchen features a built in oven and hob, washer-dryer and fridge freezer.Outside there is a fully enclosed garden and also a residents parking area.A deposit of one month's rent is required and eligibility criteria will need to be met. This property is suitable for pets with an additional 50% deposit.\xa0Scottish charity number: SCO44825, **some photos are for illustration purposes only**Summary 

In [80]:
feature_list

['No Agent Fees', 'Students Can Enquire', 'Property Reference Number: 1915996']

### Final Property output

In [81]:
from pydantic import BaseModel, ValidationError

In [82]:
# class Property(BaseModel):
#     id: int
#     bedrooms: Optional[int]
#     bathrooms: Optional[int]
#     numberOfImages: Optional[int]
#     numberOfFloorplans: Optional[int]
#     numberOfVirtualTours: Optional[int]
#     countCode: Optional[str]
#     feature_list: Optional[List[str]] = None
#     summary: str

# class Location(BaseModel):
#     latitude: float
#     longitude: float

# class PropertyImages(BaseModel):
#     images: Optional[List[Image]]

# class Image(BaseModel):
#     srcUrl: Optional[str]
#     url: Optional[str]
#     caption: Optional[str]

In [83]:
property['summary'] = summary

In [84]:
property['feature_list'] = feature_list

### Load data to a JSON file

In [85]:
import json
with open("../resources/data/property_1.json", "w") as file:
    json.dump(property, file, indent=4)