In [2]:
from selenium.webdriver.common.by import By
from webscraper import WebScraper

postcode = 'hp6 6hq'
no_beds = 2
radius = 0.25
last_years = 10
ws = WebScraper()
dataset = []

for i in range(1, 10):
    url = f'https://www.rightmove.co.uk/house-prices/{postcode}.html?tenure=FREEHOLD&soldIn={last_years}&radius={radius}&page={i}'
    ws.driver.get(url)
    listings = ws.find_xpaths('class="propertyCard"', multi=True)
    if listings == None:
        break
    for listing in listings:
        data = listing.text
        dataset.append(data.split('\n'))

In [3]:
import googlemaps
import pandas as pd
import os

gmaps_key = googlemaps.Client(key=os.getenv('GMAPS_API_KEY'))
pd.set_option("max_colwidth", 200)

def geocode(address):
    g = gmaps_key.geocode(address)
    lat = g[0]["geometry"]["location"]["lat"]
    lng = g[0]["geometry"]["location"]["lng"]
    return (lat, lng)

df = pd.DataFrame(dataset, columns=['address', 'house_size', '1', '2', '3', 'ignore', 'ignore', 'ignore'])
map_centre = geocode(postcode)
for i in range(1, 4):
    df.loc[df[f'{i}'].str.split().str[0].str[0] == '£', f'{i}_price'] = df[f'{i}'].str.split().str[0]
    df.loc[df[f'{i}'].str.split().str[0].str[0] == '£', f'{i}_date'] = df[f'{i}'].str.split().str[2:4]
    df[f'{i}_price'] = df[f'{i}_price'].str[1:].str.replace(',', '').astype('float')
    df[f'{i}_date'] = df.loc[~df[f'{i}_date'].isna(), f'{i}_date'].apply(lambda x: ' '.join(x))
df['size'] = df.loc[df['house_size'].str.contains('bed', case=False), 'house_size'].str.split().str[0]
df.drop(df[df['size'].isna()].index, inplace=True)
df['size'] = pd.to_numeric(df['size'], errors='coerce')
df.drop(df[df['size'] > (no_beds + 1)].index, inplace=True)
df['geocoded'] = df['address'].apply(geocode)
df['hover_text'] = df["address"] + '<br>' + \
                    df["size"].map(str) + ' beds<br>' + \
                    '£' + ((df["1_price"]/1000).map(int)).map(str) + 'k<br>' + \
                    df["1_date"]
df = df.drop(columns=['ignore', 'house_size'])
df.head()
# https://www.natasshaselvaraj.com/a-step-by-step-guide-on-geocoding-in-python/

Unnamed: 0,address,1,2,3,1_price,1_date,2_price,2_date,3_price,3_date,size,geocoded,hover_text
1,"3, Hardwicke Gardens, Amersham, Buckinghamshire HP6 6AH","£355,000 10 Dec 2021 Freehold",No other historical records,8,355000.0,Dec 2021,,,,,2,"(51.6762036, -0.598047)","3, Hardwicke Gardens, Amersham, Buckinghamshire HP6 6AH<br>2 beds<br>£355k<br>Dec 2021"
3,"18, Hyrons Lane, Amersham, Buckinghamshire HP6 5AS","£635,000 15 Sep 2021 Freehold",No other historical records,9,635000.0,Sep 2021,,,,,2,"(51.67462889999999, -0.6002071999999999)","18, Hyrons Lane, Amersham, Buckinghamshire HP6 5AS<br>2 beds<br>£635k<br>Sep 2021"
4,"69, Grimsdells Lane, Amersham, Buckinghamshire HP6 6HH","£985,000 2 Aug 2021 Freehold","£680,000 15 Jun 2015 Freehold","£278,500 6 Apr 2000 Freehold",985000.0,Aug 2021,680000.0,Jun 2015,278500.0,Apr 2000,3,"(51.6782694, -0.5973126)","69, Grimsdells Lane, Amersham, Buckinghamshire HP6 6HH<br>3 beds<br>£985k<br>Aug 2021"
6,"25, Mitchell Walk, Amersham, Buckinghamshire HP6 6NW","£875,000 25 Jun 2021 Freehold",No other historical records,8,875000.0,Jun 2021,,,,,3,"(51.6765959, -0.5944775)","25, Mitchell Walk, Amersham, Buckinghamshire HP6 6NW<br>3 beds<br>£875k<br>Jun 2021"
7,"10, Meadow Drive, Amersham, Buckinghamshire HP6 6LB","£440,000 23 Jun 2021 Freehold",No other historical records,8,440000.0,Jun 2021,,,,,3,"(51.6792617, -0.5944294)","10, Meadow Drive, Amersham, Buckinghamshire HP6 6LB<br>3 beds<br>£440k<br>Jun 2021"


In [4]:
import plotly.graph_objects as go
import plotly.express as px

fig = go.Figure(go.Scattermapbox(
        lat=df['geocoded'].apply(lambda x: x[0]),
        lon=df['geocoded'].apply(lambda x: x[1]),
        marker=dict(
            size=df['size'],
            color=df['1_price'],
            colorscale='bluered',
            showscale=True,
            sizeref=max(df['size'])/20
            ),
        text=df['hover_text'],
        mode='markers',
    ))

fig.update_layout(
    mapbox_style="carto-positron",
    margin={"r":0,"t":0,"l":0,"b":0},
    autosize=True,
    height=1000,
    mapbox=dict(
        center=dict(
            lat=map_centre[0],
            lon=map_centre[1]
        ),
        zoom=16
        ))
fig.show()