In [2]:
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC  
import pandas as pd
from geopy.geocoders import Nominatim
import time
import requests
import credentials

In [3]:
def accept_cookies_if_asked(driver):
    try:
        driver.find_element(By.ID, "onetrust-accept-btn-handler").click()
    except:
        pass

In [4]:
class Data:
    def __init__(self):
        self.ids = []
        self.prices = []
        self.address = []
        self.postcodes = []
        self.latitudes = []
        self.longitudes = []

    def add_page_data(self, driver):
        properties = driver.find_elements(By.CLASS_NAME, "l-searchResult")
        for property in properties:
            self.ids.append(property.find_element(By.CLASS_NAME, "propertyCard-anchor").get_attribute('id'))
            self.prices.append(property.find_element(By.CLASS_NAME, "propertyCard-priceValue").text)
            self.address.append(property.find_element(By.CLASS_NAME, "propertyCard-address").get_attribute('title'))

    def transform_location_data(self):
        for addy in self.address:
            addy = addy + ", London, UK"
            addy = addy.replace(' ', '%20')
            r = requests.get(rf'https://dev.virtualearth.net/REST/v1/Locations?q={addy}&key={credentials.bingmaps_api_key}')
            if r.status_code >= 300:
                print(r.status_code)
                raise Exception(r.reason)
            try:
                self.postcodes.append(r.json()['resourceSets'][0]['resources'][0]['address']['postalCode'])
            except KeyError:
                self.postcodes.append(None)
                self.latitudes.append(r.json()['resourceSets'][0]['resources'][0]['geocodePoints'][0]['coordinates'][0])
                self.longitudes.append(r.json()['resourceSets'][0]['resources'][0]['geocodePoints'][0]['coordinates'][1])

    def transform_prices(self):
        self.prices = [int(price[1:-4].replace(',','')) for price in self.prices]

    def make_df(self):
        return pd.DataFrame({'id': self.ids, 'price': self.prices, 'address': self.address, 
                             'postcode': self.postcodes, 'latitude': self.latitudes, 'longitude': self.longitudes})


In [6]:

url = r'''https://www.rightmove.co.uk/property-to-rent/find.html?locationIdentifier=REGION%5E92829&maxBedrooms=2&minBedrooms=2&propertyTypes=&includeLetAgreed=false&mustHave=&dontShow=&furnishTypes=&keywords='''
options = Options()
options.add_argument('--headless')
driver = webdriver.Chrome()
driver.get(url)

time.sleep(2)
accept_cookies_if_asked(driver)

data = Data()
data.add_page_data(driver)
pages_scanned = 1

button = driver.find_element(By.XPATH, "//button[@title='Next page']")

while button.is_enabled():
    WebDriverWait(driver, 20).until(EC.element_to_be_clickable(button))
    button.click()
    accept_cookies_if_asked(driver)
    WebDriverWait(driver, 20).until(EC.presence_of_element_located((By.CLASS_NAME, 'propertyCard-anchor')))
    data.add_page_data(driver)
    pages_scanned += 1
    button = driver.find_element(By.XPATH, "//button[@title='Next page']")

driver.close()


In [7]:
pages_scanned

42

In [8]:
data.transform_prices()
df = data.make_df()
df

Unnamed: 0,id,price,address
0,prop137334449,3497,"Hester Road, London, SW11"
1,prop137582966,2250,Palmerston Road Wimbledon SW19
2,prop136762952,4767,"Wilshire House, Prospect Way, Battersea Power ..."
3,prop137200952,3501,"Parliament View Apartments, 1 Albert Embankmen..."
4,prop137581226,2400,"Wimbledon park Road, SW18"
...,...,...,...
1045,prop136838285,2450,"Springett House, London, SW2"
1046,prop135976382,5200,"Pico House, Prospect Way, London, SW11"
1047,prop135976166,4767,"Electric Boulevard, London, SW11"
1048,prop133601537,4485,"Thornes House, London, SW11"


In [34]:
postcodes = []
latitude = []
longitude = []
for addy in data.address:
    addy = addy + ", London, UK"
    addy = addy.replace(' ', '%20')
    r = requests.get(rf'https://dev.virtualearth.net/REST/v1/Locations?q={addy}&key={credentials.bingmaps_api_key}')
    if r.status_code >= 300:
        print(r.status_code)
        raise Exception(r.reason)
    try:
        postcodes.append(r.json()['resourceSets'][0]['resources'][0]['address']['postalCode'])
    except KeyError:
        postcodes.append(None)
    latitude.append(r.json()['resourceSets'][0]['resources'][0]['geocodePoints'][0]['coordinates'][0])
    longitude.append(r.json()['resourceSets'][0]['resources'][0]['geocodePoints'][0]['coordinates'][1])


In [36]:
coords

[[51.4794993, -0.17091455],
 [51.4170765, -0.1994005],
 [51.48171616, -0.1434886],
 [51.4940102, -0.1204542],
 [51.44974712, -0.20265386],
 [51.41452675, -0.2199256],
 [51.4173615, -0.1783624],
 [51.44086035, -0.16140985],
 [51.44974712, -0.20265386],
 [51.46114276, -0.21832615],
 [51.46543926, -0.16116546],
 [51.421145, -0.1304105],
 [51.44355505, -0.1079709],
 [51.47398, -0.1394068],
 [51.5182676, -0.2015852],
 [51.4573342, -0.1889142],
 [51.4610714, -0.2170229],
 [51.45617357, -0.21323499],
 [51.4812313, -0.1324998],
 [51.46918488, -0.16321801],
 [51.46379358, -0.12346714],
 [51.47169835, -0.1361606],
 [51.42807427, -0.20376272],
 [51.45141975, -0.1888786],
 [51.4106447, -0.21984955],
 [51.4111075, -0.0329158],
 [51.46765505, -0.14372515],
 [51.42051525, -0.20515051],
 [51.45862575, -0.13601956],
 [51.47483805, -0.1534559],
 [51.44136595, -0.14918125],
 [51.4601129, -0.21386885],
 [51.4822138, -0.1374877],
 [51.49395752, -0.12097189],
 [51.46335335, -0.11479055],
 [51.45179275, -0.2