# Attempt webscraping on Rightmove

In [1]:
# Import Libraries:
import requests
from bs4 import BeautifulSoup
from selenium import webdriver
import re
import pandas as pd
from selenium.webdriver.chrome.options import Options

In [2]:
!pip install selenium



# Check that I can use the website for webscraping

In [None]:
## Create a request to connect to the website
site = 'https://www.rightmove.co.uk/property-for-sale/find.html?searchType=SALE&locationIdentifier=REGION%5E787&insId=1&radius=0.0&minPrice=&maxPrice=&minBedrooms=&maxBedrooms=&displayPropertyType=&maxDaysSinceAdded=&_includeSSTC=on&sortByPriceDescending=&primaryDisplayPropertyType=&secondaryDisplayPropertyType=&oldDisplayPropertyType=&oldPrimaryDisplayPropertyType=&newHome=&auction=false'
response = requests.get(site)

In [81]:
## Check our response
response

<Response [200]>

In [83]:
## Check the 'status_code' of our response - have it delivered as an integer
response.status_code

200

In [22]:
## Function to give us more details about our message
## Two bits of data retrieved: The Response type and the Details associated with it
def verify_request(response):
    resp = response.status_code
    response_dict = {100:'Continue', 101:'Switching Protocols', 103:'Early Hints', 200:'OK', 201:'Created', 202:'Accepted', 203:'NAI',
                     204:'Reset Content', 206:'Partial Content', 300:'Multiple Choices', 301:'Moved Permanently', 302:'Found', 303:'See Other',
                     304:'Not Modified', 307:'Temporary Redirect', 308:'Permanent Redirect', 400:'Bad Request', 401: 'Unauthorized',
                     402:'Payment Required', 403:'Forbidden', 404:'Not Found', 405:'Method not allowed', 406:'Not Acceptable', 407:'Proxy Authentification Required',
                     408:'Request Timeout', 409:'Conflict', 410:'Gone', 411:'Length Required', 412:'Precondition Failed', 413:'Request Too Large', 414:'Request-URI Too Long',
                     415:'Unsupported Media Length', 416:'Range Not Satisfiable', 417:'Expectation Failed', 500:'Internal Server Error', 501:'Not Implemented',
                     502:'Bad Gateway', 503:'Service Not Availabe', 504:'Gateway Timeout', 505:'HTTP Version Not Supported', 511:'NA Required',
                     'Access denied':'ahref'}
    if(resp<200):
        print("Gateway response type: Information", end="\n")
    elif(resp<300):
        print("Gateway response type: Successful", end="\n")
    elif(resp<400):
        print("Gateway response type: Redirection", end="\n")
    elif(resp<500):
        print("Gateway response type: Client Error", end="\n")
    else:
        print("Gateway response type: Server Error", end="\n")

    print(f'Details - Request type: {response_dict[resp]}')

In [23]:
# Response status code of 200, I am good to go
verify_request(response)

Gateway response type: Successful
Details - Request type: OK


# Acquire all the url links of Leeds property lisitngs - Build this up into a function

## Number of pages returned

In [24]:
# Need to use a webdriver as
url = 'https://www.rightmove.co.uk/property-for-sale/find.html?searchType=SALE&locationIdentifier=REGION%5E787&insId=1&radius=0.0&minPrice=&maxPrice=&minBedrooms=&maxBedrooms=&displayPropertyType=&maxDaysSinceAdded=&_includeSSTC=on&sortByPriceDescending=&primaryDisplayPropertyType=&secondaryDisplayPropertyType=&oldDisplayPropertyType=&oldPrimaryDisplayPropertyType=&newHome=&auction=false'
# Need to use a webdriver as browser
driver = webdriver.Chrome()
driver.get(url)
# Get HTML of page that has all HTML tags rendered
html = driver.page_source
soup = BeautifulSoup(html, 'html.parser')
# Find the number of pages of results there
no_of_pages = [x.text for x in soup.findAll('span', class_='pagination-pageInfo')]
no_of_pages = int(no_of_pages[2])
no_of_pages

42

## Get the urls for a property listing on a single page

In [25]:
# Need to use a webdriver as
url = 'https://www.rightmove.co.uk/property-for-sale/find.html?locationIdentifier=REGION%5E787&index=0&propertyTypes=&includeSSTC=false&mustHave=&dontShow=&furnishTypes=&keywords='
# Need to use a webdriver as browser
driver = webdriver.Chrome()
driver.get(url)
# Get HTML of page that has all HTML tags rendered
html = driver.page_source
soup = BeautifulSoup(html, 'html.parser')
# Get all the property listing urls
rightmove_url = 'https://www.rightmove.co.uk'
single_page_listings_urls = []
single_page_listings_urls.extend([rightmove_url+x.get('href') for x in soup.findAll('a', class_='propertyCard-link property-card-updates')])
single_page_listings_urls

['https://www.rightmove.co.uk/properties/128845637#/?channel=RES_BUY',
 'https://www.rightmove.co.uk/properties/85503291#/?channel=RES_BUY',
 'https://www.rightmove.co.uk/properties/125443259#/?channel=RES_BUY',
 'https://www.rightmove.co.uk/properties/132860708#/?channel=RES_BUY',
 'https://www.rightmove.co.uk/properties/132809183#/?channel=RES_BUY',
 'https://www.rightmove.co.uk/properties/132898226#/?channel=RES_BUY',
 'https://www.rightmove.co.uk/properties/132194903#/?channel=COM_BUY',
 'https://www.rightmove.co.uk/properties/129748748#/?channel=RES_BUY',
 'https://www.rightmove.co.uk/properties/131784080#/?channel=RES_BUY',
 'https://www.rightmove.co.uk/properties/132698111#/?channel=RES_BUY',
 'https://www.rightmove.co.uk/properties/131784101#/?channel=RES_BUY',
 'https://www.rightmove.co.uk/properties/128614343#/?channel=RES_BUY',
 'https://www.rightmove.co.uk/properties/132611999#/?channel=RES_BUY',
 'https://www.rightmove.co.uk/properties/131955779#/?channel=RES_BUY',
 'https

# Way of getting all the paginated urls for a location

- This example is a way of getting the urls for all the pages of results that are for Leeds property listings

In [26]:
url = 'https://www.rightmove.co.uk/property-for-sale/find.html?locationIdentifier=REGION%5E787&index=0&propertyTypes=&includeSSTC=false&mustHave=&dontShow=&furnishTypes=&keywords='
no_of_pages = 42 # At this moment I know the property listings for leeds returns 42 pages of results
for i in range(1, no_of_pages+1):
    i = (i * 24) - 24 # Has to be done to get the following set of listings
    url = re.sub(r'&index=\d+&', f'&index={i}&', url)
    print(url)

https://www.rightmove.co.uk/property-for-sale/find.html?locationIdentifier=REGION%5E787&index=0&propertyTypes=&includeSSTC=false&mustHave=&dontShow=&furnishTypes=&keywords=
https://www.rightmove.co.uk/property-for-sale/find.html?locationIdentifier=REGION%5E787&index=24&propertyTypes=&includeSSTC=false&mustHave=&dontShow=&furnishTypes=&keywords=
https://www.rightmove.co.uk/property-for-sale/find.html?locationIdentifier=REGION%5E787&index=48&propertyTypes=&includeSSTC=false&mustHave=&dontShow=&furnishTypes=&keywords=
https://www.rightmove.co.uk/property-for-sale/find.html?locationIdentifier=REGION%5E787&index=72&propertyTypes=&includeSSTC=false&mustHave=&dontShow=&furnishTypes=&keywords=
https://www.rightmove.co.uk/property-for-sale/find.html?locationIdentifier=REGION%5E787&index=96&propertyTypes=&includeSSTC=false&mustHave=&dontShow=&furnishTypes=&keywords=
https://www.rightmove.co.uk/property-for-sale/find.html?locationIdentifier=REGION%5E787&index=120&propertyTypes=&includeSSTC=false&

# Get all the property listings for Leeds

In [40]:
# Start at page 1
url = 'https://www.rightmove.co.uk/property-for-sale/find.html?locationIdentifier=REGION%5E787&index=0&propertyTypes=&includeSSTC=false&mustHave=&dontShow=&furnishTypes=&keywords='
# Need to use a webdriver as browser
driver = webdriver.Chrome()
driver.get(url)
# Get HTML of page that has all HTML tags rendered
html = driver.page_source
soup = BeautifulSoup(html, 'html.parser')
# Find the number of pages of results there
no_of_pages = [x.text for x in soup.findAll('span', class_='pagination-pageInfo')]
no_of_pages = int(no_of_pages[2])
no_of_pages # Number of pages of results there are

rightmove_url = 'https://www.rightmove.co.uk'

test_listings_urls = []

# Scrape all the web pages of results
for i in range(1, no_of_pages+1):
    print('Page: ' + str(i) + ' is being scraped for property listing urls')

    i = (i * 24) - 24 # Has to be done to get the following set of listings
    url = re.sub(r'&index=\d+&', f'&index={i}&', url) # Creates the url of the page I want to scrape from

    driver.get(url)

    html = driver.page_source
    soup = BeautifulSoup(html, 'html.parser')

    # Get the listing urls for a page
    test_listings_urls.extend([rightmove_url+x.get('href') for x in soup.findAll('a', class_='propertyCard-link property-card-updates')])

Page: 1 is being scraped for property listing urls
Page: 2 is being scraped for property listing urls
Page: 3 is being scraped for property listing urls
Page: 4 is being scraped for property listing urls
Page: 5 is being scraped for property listing urls
Page: 6 is being scraped for property listing urls
Page: 7 is being scraped for property listing urls
Page: 8 is being scraped for property listing urls
Page: 9 is being scraped for property listing urls
Page: 10 is being scraped for property listing urls
Page: 11 is being scraped for property listing urls
Page: 12 is being scraped for property listing urls
Page: 13 is being scraped for property listing urls
Page: 14 is being scraped for property listing urls
Page: 15 is being scraped for property listing urls
Page: 16 is being scraped for property listing urls
Page: 17 is being scraped for property listing urls
Page: 18 is being scraped for property listing urls
Page: 19 is being scraped for property listing urls
Page: 20 is being scr

In [41]:
len(test_listings_urls) # There are this number of property listings for leeds

1050

In [13]:
test_listings_urls[24] # Checking a random property

'https://www.rightmove.co.uk/properties/130282589#/?channel=RES_BUY'

# Create a function to find all the property listing urls for a major city/town

In [38]:
def scrape_listing_urls_per_locations(url):
    # Need to use a webdriver as browser
    driver = webdriver.Chrome()
    driver.get(url)
    # Get HTML of page that has all HTML tags rendered
    html = driver.page_source
    soup = BeautifulSoup(html, 'html.parser')
    # Find the number of pages of results there
    no_of_pages = [x.text for x in soup.findAll('span', class_='pagination-pageInfo')]
    no_of_pages = int(no_of_pages[2])

    no_of_pages # Number of pages of results there are

    # The rightmove url we will append to the listing url strings we grab from scraping
    rightmove_url = 'https://www.rightmove.co.uk'

    # An emppty list of urls to populat
    listings_urls = []

    # Scrape all the web pages of results
    for i in range(1, no_of_pages+1):
        # Keeps track to see progress
        # print('Page: ' + str(i) + ' is being scraped for property listing urls')

        # The url needs regex substitution in order to access every page of results for a location
        i = (i * 24) - 24
        # Creates the url of the page I want to scrape from
        url = re.sub(r'&index=\d+&', f'&index={i}&', url)

        # Get the webdriver to access the current page in the loop
        driver.get(url)

        html = driver.page_source
        soup = BeautifulSoup(html, 'html.parser')

        # Get all the listing urls on the current page
        listings_urls.extend([rightmove_url+x.get('href') for x in soup.findAll('a', class_='propertyCard-link property-card-updates')])

    # Return all the property listing urls for a location as a list
    return listings_urls

# Get all the listings for every postcode outcode for Halifax, Huddersfield, Wakefield, Leeds, Bradford

In [28]:
# Get all the listings for Halifax Outcodes
halifax_outcodes = {'HX1': 'https://www.rightmove.co.uk/property-for-sale/find.html?locationIdentifier=OUTCODE%5E1139&index=0&propertyTypes=&mustHave=&dontShow=&furnishTypes=&keywords=',
                    'HX2': 'https://www.rightmove.co.uk/property-for-sale/find.html?locationIdentifier=OUTCODE%5E1140&index=0&propertyTypes=&mustHave=&dontShow=&furnishTypes=&keywords=',
                    'HX3': 'https://www.rightmove.co.uk/property-for-sale/find.html?locationIdentifier=OUTCODE%5E1141&index=0&propertyTypes=&mustHave=&dontShow=&furnishTypes=&keywords=',
                    'HX4': 'https://www.rightmove.co.uk/property-for-sale/find.html?locationIdentifier=OUTCODE%5E1142&index=0&propertyTypes=&mustHave=&dontShow=&furnishTypes=&keywords=',
                    'HX5': 'https://www.rightmove.co.uk/property-for-sale/find.html?locationIdentifier=OUTCODE%5E1143&index=0&propertyTypes=&mustHave=&dontShow=&furnishTypes=&keywords=',
                    'HX6': 'https://www.rightmove.co.uk/property-for-sale/find.html?locationIdentifier=OUTCODE%5E1144&index=0&propertyTypes=&mustHave=&dontShow=&furnishTypes=&keywords=',
                    'HX7': 'https://www.rightmove.co.uk/property-for-sale/find.html?locationIdentifier=OUTCODE%5E1145&index=0&propertyTypes=&mustHave=&dontShow=&furnishTypes=&keywords='}

halifax_listings = []
for value in halifax_outcodes.values():
    halifax_listings.extend(scrape_listing_urls_per_locations(value))

In [53]:
len(halifax_listings)

732

In [59]:
# Get all the listings for Huddersfield Outcodes
huddersfield_outcodes = {'HD1': 'https://www.rightmove.co.uk/property-for-sale/find.html?locationIdentifier=OUTCODE%5E1063&index=0&propertyTypes=&mustHave=&dontShow=&furnishTypes=&keywords=',
                    'HD2': 'https://www.rightmove.co.uk/property-for-sale/find.html?locationIdentifier=OUTCODE%5E1064&index=0&propertyTypes=&mustHave=&dontShow=&furnishTypes=&keywords=',
                    'HD3': 'https://www.rightmove.co.uk/property-for-sale/find.html?locationIdentifier=OUTCODE%5E1065&index=0&propertyTypes=&mustHave=&dontShow=&furnishTypes=&keywords=',
                    'HD4': 'https://www.rightmove.co.uk/property-for-sale/find.html?locationIdentifier=OUTCODE%5E1066&index=0&propertyTypes=&mustHave=&dontShow=&furnishTypes=&keywords=',
                    'HD5': 'https://www.rightmove.co.uk/property-for-sale/find.html?locationIdentifier=OUTCODE%5E1067&index=0&propertyTypes=&mustHave=&dontShow=&furnishTypes=&keywords=',
                    'HD6': 'https://www.rightmove.co.uk/property-for-sale/find.html?locationIdentifier=OUTCODE%5E1068&index=0&propertyTypes=&mustHave=&dontShow=&furnishTypes=&keywords=',
                    'HD7': 'https://www.rightmove.co.uk/property-for-sale/find.html?locationIdentifier=OUTCODE%5E1069&index=0&propertyTypes=&mustHave=&dontShow=&furnishTypes=&keywords=',
                    'HD8': 'https://www.rightmove.co.uk/property-for-sale/find.html?locationIdentifier=OUTCODE%5E1070&index=0&propertyTypes=&mustHave=&dontShow=&furnishTypes=&keywords=',
                    'HD9': 'https://www.rightmove.co.uk/property-for-sale/find.html?locationIdentifier=OUTCODE%5E1071&index=0&propertyTypes=&mustHave=&dontShow=&furnishTypes=&keywords='}

huddersfield_listings = []
for value in huddersfield_outcodes.values():
    huddersfield_listings.extend(scrape_listing_urls_per_locations(value))

In [60]:
len(huddersfield_listings)

1471

In [61]:
hudslinks = pd.DataFrame(huddersfield_listings)
hudslinks.to_csv('hudslinks.csv', index=False)

In [75]:
# Get all the listings for Wakefield Outcodes
wakefield_outcodes = {'WF1': 'https://www.rightmove.co.uk/property-for-sale/find.html?locationIdentifier=OUTCODE%5E2814&index=0&propertyTypes=&mustHave=&dontShow=&furnishTypes=&keywords=',
                         'WF2': 'https://www.rightmove.co.uk/property-for-sale/find.html?locationIdentifier=OUTCODE%5E2823&index=0&propertyTypes=&mustHave=&dontShow=&furnishTypes=&keywords=',
                         'WF3': 'https://www.rightmove.co.uk/property-for-sale/find.html?locationIdentifier=OUTCODE%5E2824&index=0&propertyTypes=&mustHave=&dontShow=&furnishTypes=&keywords=',
                         'WF4': 'https://www.rightmove.co.uk/property-for-sale/find.html?locationIdentifier=OUTCODE%5E2825&index=0&propertyTypes=&mustHave=&dontShow=&furnishTypes=&keywords=',
                         'WF5': 'https://www.rightmove.co.uk/property-for-sale/find.html?locationIdentifier=OUTCODE%5E2826&index=0&propertyTypes=&mustHave=&dontShow=&furnishTypes=&keywords=',
                         'WF6': 'https://www.rightmove.co.uk/property-for-sale/find.html?locationIdentifier=OUTCODE%5E2827&index=0&propertyTypes=&mustHave=&dontShow=&furnishTypes=&keywords=',
                         'WF7': 'https://www.rightmove.co.uk/property-for-sale/find.html?locationIdentifier=OUTCODE%5E2828&index=0&propertyTypes=&mustHave=&dontShow=&furnishTypes=&keywords=',
                         'WF8': 'https://www.rightmove.co.uk/property-for-sale/find.html?locationIdentifier=OUTCODE%5E2829&index=0&propertyTypes=&mustHave=&dontShow=&furnishTypes=&keywords=',
                         'WF9': 'https://www.rightmove.co.uk/property-for-sale/find.html?locationIdentifier=OUTCODE%5E2830&index=0&propertyTypes=&mustHave=&dontShow=&furnishTypes=&keywords=',
                         'WF10': 'https://www.rightmove.co.uk/property-for-sale/find.html?locationIdentifier=OUTCODE%5E2815&index=0&propertyTypes=&mustHave=&dontShow=&furnishTypes=&keywords=',
                         'WF11': 'https://www.rightmove.co.uk/property-for-sale/find.html?locationIdentifier=OUTCODE%5E2816&index=0&propertyTypes=&mustHave=&dontShow=&furnishTypes=&keywords=',
                         'WF12': 'https://www.rightmove.co.uk/property-for-sale/find.html?locationIdentifier=OUTCODE%5E2817&index=0&propertyTypes=&mustHave=&dontShow=&furnishTypes=&keywords=',
                         'WF13': 'https://www.rightmove.co.uk/property-for-sale/find.html?locationIdentifier=OUTCODE%5E2818&index=0&propertyTypes=&mustHave=&dontShow=&furnishTypes=&keywords=',
                         'WF14': 'https://www.rightmove.co.uk/property-for-sale/find.html?locationIdentifier=OUTCODE%5E2819&index=0&propertyTypes=&mustHave=&dontShow=&furnishTypes=&keywords=',
                         'WF15': 'https://www.rightmove.co.uk/property-for-sale/find.html?locationIdentifier=OUTCODE%5E2820&index=0&propertyTypes=&mustHave=&dontShow=&furnishTypes=&keywords=',
                         'WF16': 'https://www.rightmove.co.uk/property-for-sale/find.html?locationIdentifier=OUTCODE%5E2821&index=0&propertyTypes=&mustHave=&dontShow=&furnishTypes=&keywords=',
                         'WF17': 'https://www.rightmove.co.uk/property-for-sale/find.html?locationIdentifier=OUTCODE%5E2822&index=0&propertyTypes=&mustHave=&dontShow=&furnishTypes=&keywords='}

wakefield_listings = []
for value in wakefield_outcodes.values():
    wakefield_listings.extend(scrape_listing_urls_per_locations(value))

In [76]:
len(wakefield_listings)

2669

In [51]:
wakefield_listings[25]

'https://www.rightmove.co.uk/properties/131973236#/?channel=RES_BUY'

In [77]:
# Save all urls in csv
wakeflinks = pd.DataFrame(wakefield_listings)
wakeflinks.to_csv('wakeflinks.csv', index=False)

In [39]:
# Get all the listings for Bradford Outcodes
bradford_outcodes = {'BD1': 'https://www.rightmove.co.uk/property-for-sale/find.html?locationIdentifier=OUTCODE%5E155&index=0&propertyTypes=&mustHave=&dontShow=&furnishTypes=&keywords=',
                      'BD2': 'https://www.rightmove.co.uk/property-for-sale/find.html?locationIdentifier=OUTCODE%5E166&index=0&propertyTypes=&includeSSTC=false&mustHave=&dontShow=&furnishTypes=&keywords=',
                      'BD3': 'https://www.rightmove.co.uk/property-for-sale/find.html?locationIdentifier=OUTCODE%5E172&index=0&propertyTypes=&includeSSTC=false&mustHave=&dontShow=&furnishTypes=&keywords=',
                      'BD4': 'https://www.rightmove.co.uk/property-for-sale/find.html?locationIdentifier=OUTCODE%5E173&index=0&propertyTypes=&includeSSTC=false&mustHave=&dontShow=&furnishTypes=&keywords=',
                      'BD5': 'https://www.rightmove.co.uk/property-for-sale/find.html?locationIdentifier=OUTCODE%5E174&index=0&propertyTypes=&mustHave=&dontShow=&furnishTypes=&keywords=',
                      'BD6': 'https://www.rightmove.co.uk/property-for-sale/find.html?locationIdentifier=OUTCODE%5E175&index=0&propertyTypes=&mustHave=&dontShow=&furnishTypes=&keywords=',
                      'BD7': 'https://www.rightmove.co.uk/property-for-sale/find.html?locationIdentifier=OUTCODE%5E176&index=0&propertyTypes=&mustHave=&dontShow=&furnishTypes=&keywords=',
                      'BD8': 'https://www.rightmove.co.uk/property-for-sale/find.html?locationIdentifier=OUTCODE%5E177&index=0&propertyTypes=&mustHave=&dontShow=&furnishTypes=&keywords=',
                      'BD9': 'https://www.rightmove.co.uk/property-for-sale/find.html?locationIdentifier=OUTCODE%5E178&index=0&propertyTypes=&mustHave=&dontShow=&furnishTypes=&keywords=',
                      'BD10': 'https://www.rightmove.co.uk/property-for-sale/find.html?locationIdentifier=OUTCODE%5E156&index=0&propertyTypes=&mustHave=&dontShow=&furnishTypes=&keywords=',
                      'BD11': 'https://www.rightmove.co.uk/property-for-sale/find.html?locationIdentifier=OUTCODE%5E157&index=0&propertyTypes=&mustHave=&dontShow=&furnishTypes=&keywords=',
                      'BD12': 'https://www.rightmove.co.uk/property-for-sale/find.html?locationIdentifier=OUTCODE%5E158&index=0&propertyTypes=&mustHave=&dontShow=&furnishTypes=&keywords=',
                      'BD13': 'https://www.rightmove.co.uk/property-for-sale/find.html?locationIdentifier=OUTCODE%5E159&index=0&propertyTypes=&mustHave=&dontShow=&furnishTypes=&keywords=',
                      'BD14': 'https://www.rightmove.co.uk/property-for-sale/find.html?locationIdentifier=OUTCODE%5E160&index=0&propertyTypes=&mustHave=&dontShow=&furnishTypes=&keywords=',
                      'BD15': 'https://www.rightmove.co.uk/property-for-sale/find.html?locationIdentifier=OUTCODE%5E161&index=0&propertyTypes=&mustHave=&dontShow=&furnishTypes=&keywords=',
                      'BD16': 'https://www.rightmove.co.uk/property-for-sale/find.html?locationIdentifier=OUTCODE%5E162&index=0&propertyTypes=&mustHave=&dontShow=&furnishTypes=&keywords=',
                      'BD17': 'https://www.rightmove.co.uk/property-for-sale/find.html?locationIdentifier=OUTCODE%5E163&index=0&propertyTypes=&mustHave=&dontShow=&furnishTypes=&keywords=',
                      'BD18': 'https://www.rightmove.co.uk/property-for-sale/find.html?locationIdentifier=OUTCODE%5E164&index=0&propertyTypes=&mustHave=&dontShow=&furnishTypes=&keywords=',
                      'BD19': 'https://www.rightmove.co.uk/property-for-sale/find.html?locationIdentifier=OUTCODE%5E165&index=0&propertyTypes=&mustHave=&dontShow=&furnishTypes=&keywords=',
                      'BD20': 'https://www.rightmove.co.uk/property-for-sale/find.html?locationIdentifier=OUTCODE%5E167&index=0&propertyTypes=&mustHave=&dontShow=&furnishTypes=&keywords=',
                      'BD21': 'https://www.rightmove.co.uk/property-for-sale/find.html?locationIdentifier=OUTCODE%5E168&index=0&propertyTypes=&mustHave=&dontShow=&furnishTypes=&keywords=',
                      'BD22': 'https://www.rightmove.co.uk/property-for-sale/find.html?locationIdentifier=OUTCODE%5E169&index=0&propertyTypes=&mustHave=&dontShow=&furnishTypes=&keywords=',
                      'BD23': 'https://www.rightmove.co.uk/property-for-sale/find.html?locationIdentifier=OUTCODE%5E170&index=0&propertyTypes=&mustHave=&dontShow=&furnishTypes=&keywords=',
                      'BD24': 'https://www.rightmove.co.uk/property-for-sale/find.html?locationIdentifier=OUTCODE%5E171&index=0&propertyTypes=&mustHave=&dontShow=&furnishTypes=&keywords='}

bradford_listings = []
for value in bradford_outcodes.values():
    bradford_listings.extend(scrape_listing_urls_per_locations(value))

In [40]:
len(bradford_listings)

2413

In [44]:
bfdlinks = pd.DataFrame(bradford_listings)
bfdlinks.to_csv('bfdlinks.csv', index=False)

In [49]:
bradford_listings[23]

'https://www.rightmove.co.uk/properties/132236708#/?channel=RES_BUY'

In [113]:
# Get all the listings for Leeds Outcodes
leeds_outcodes = {
                'LS1': 'https://www.rightmove.co.uk/property-for-sale/find.html?locationIdentifier=OUTCODE%5E1525&index=0&propertyTypes=&mustHave=&dontShow=&furnishTypes=&keywords=',
                'LS2': 'https://www.rightmove.co.uk/property-for-sale/find.html?locationIdentifier=OUTCODE%5E1536&index=0&propertyTypes=&mustHave=&dontShow=&furnishTypes=&keywords=',
                'LS3': 'https://www.rightmove.co.uk/property-for-sale/find.html?locationIdentifier=OUTCODE%5E1547&index=0&propertyTypes=&mustHave=&dontShow=&furnishTypes=&keywords=',
                'LS4': 'https://www.rightmove.co.uk/property-for-sale/find.html?locationIdentifier=OUTCODE%5E1548&index=0&propertyTypes=&mustHave=&dontShow=&furnishTypes=&keywords=',
                'LS5': 'https://www.rightmove.co.uk/property-for-sale/find.html?locationIdentifier=OUTCODE%5E1549&index=0&propertyTypes=&mustHave=&dontShow=&furnishTypes=&keywords=',
                'LS6': 'https://www.rightmove.co.uk/property-for-sale/find.html?locationIdentifier=OUTCODE%5E1550&index=0&propertyTypes=&mustHave=&dontShow=&furnishTypes=&keywords=',
                'LS7': 'https://www.rightmove.co.uk/property-for-sale/find.html?locationIdentifier=OUTCODE%5E1551&index=0&propertyTypes=&mustHave=&dontShow=&furnishTypes=&keywords=',
                'LS8': 'https://www.rightmove.co.uk/property-for-sale/find.html?locationIdentifier=OUTCODE%5E1552&index=0&propertyTypes=&mustHave=&dontShow=&furnishTypes=&keywords=',
                'LS9': 'https://www.rightmove.co.uk/property-for-sale/find.html?locationIdentifier=OUTCODE%5E1553&index=0&propertyTypes=&mustHave=&dontShow=&furnishTypes=&keywords=',
                'LS10': 'https://www.rightmove.co.uk/property-for-sale/find.html?locationIdentifier=OUTCODE%5E1526&index=0&propertyTypes=&mustHave=&dontShow=&furnishTypes=&keywords=',
                'LS11': 'https://www.rightmove.co.uk/property-for-sale/find.html?locationIdentifier=OUTCODE%5E1527&index=0&propertyTypes=&mustHave=&dontShow=&furnishTypes=&keywords=',
                'LS12': 'https://www.rightmove.co.uk/property-for-sale/find.html?locationIdentifier=OUTCODE%5E1528&index=0&propertyTypes=&mustHave=&dontShow=&furnishTypes=&keywords=',
                'LS13': 'https://www.rightmove.co.uk/property-for-sale/find.html?locationIdentifier=OUTCODE%5E1529&index=0&propertyTypes=&mustHave=&dontShow=&furnishTypes=&keywords=',
                'LS14': 'https://www.rightmove.co.uk/property-for-sale/find.html?locationIdentifier=OUTCODE%5E1530&index=0&propertyTypes=&mustHave=&dontShow=&furnishTypes=&keywords=',
                'LS15': 'https://www.rightmove.co.uk/property-for-sale/find.html?locationIdentifier=OUTCODE%5E1531&index=0&propertyTypes=&mustHave=&dontShow=&furnishTypes=&keywords=',
                'LS16': 'https://www.rightmove.co.uk/property-for-sale/find.html?locationIdentifier=OUTCODE%5E1532&index=0&propertyTypes=&mustHave=&dontShow=&furnishTypes=&keywords=',
                'LS17': 'https://www.rightmove.co.uk/property-for-sale/find.html?locationIdentifier=OUTCODE%5E1533&index=0&propertyTypes=&mustHave=&dontShow=&furnishTypes=&keywords=',
                'LS18': 'https://www.rightmove.co.uk/property-for-sale/find.html?locationIdentifier=OUTCODE%5E1534&index=0&propertyTypes=&mustHave=&dontShow=&furnishTypes=&keywords=',
                'LS19': 'https://www.rightmove.co.uk/property-for-sale/find.html?locationIdentifier=OUTCODE%5E1535&index=0&propertyTypes=&mustHave=&dontShow=&furnishTypes=&keywords=',
                'LS20': 'https://www.rightmove.co.uk/property-for-sale/find.html?locationIdentifier=OUTCODE%5E1537&index=0&propertyTypes=&mustHave=&dontShow=&furnishTypes=&keywords=',
                'LS21': 'https://www.rightmove.co.uk/property-for-sale/find.html?locationIdentifier=OUTCODE%5E1538&index=0&propertyTypes=&mustHave=&dontShow=&furnishTypes=&keywords=',
                'LS22': 'https://www.rightmove.co.uk/property-for-sale/find.html?locationIdentifier=OUTCODE%5E1539&index=0&propertyTypes=&mustHave=&dontShow=&furnishTypes=&keywords=',
                'LS23': 'https://www.rightmove.co.uk/property-for-sale/find.html?locationIdentifier=OUTCODE%5E1540&index=0&propertyTypes=&mustHave=&dontShow=&furnishTypes=&keywords=',
                'LS24': 'https://www.rightmove.co.uk/property-for-sale/find.html?locationIdentifier=OUTCODE%5E1541&index=0&propertyTypes=&mustHave=&dontShow=&furnishTypes=&keywords=',
                'LS25': 'https://www.rightmove.co.uk/property-for-sale/find.html?locationIdentifier=OUTCODE%5E1542&index=0&propertyTypes=&mustHave=&dontShow=&furnishTypes=&keywords=',
                'LS26': 'https://www.rightmove.co.uk/property-for-sale/find.html?locationIdentifier=OUTCODE%5E1543&index=0&propertyTypes=&mustHave=&dontShow=&furnishTypes=&keywords=',
                'LS27': 'https://www.rightmove.co.uk/property-for-sale/find.html?locationIdentifier=OUTCODE%5E1544&index=0&propertyTypes=&mustHave=&dontShow=&furnishTypes=&keywords=',
                'LS28': 'https://www.rightmove.co.uk/property-for-sale/find.html?locationIdentifier=OUTCODE%5E1545&index=0&propertyTypes=&mustHave=&dontShow=&furnishTypes=&keywords=',
                'LS29': 'https://www.rightmove.co.uk/property-for-sale/find.html?locationIdentifier=OUTCODE%5E1546&index=0&propertyTypes=&mustHave=&dontShow=&furnishTypes=&keywords='}

leeds_listings = []
for value in leeds_outcodes.values():
    leeds_listings.extend(scrape_listing_urls_per_locations(value))

In [114]:
len(leeds_listings)

4017

In [115]:
ldslinks = pd.DataFrame(leeds_listings)
ldslinks.to_csv('ldslinks.csv', index=False)

# Get information about a single property

The url below is a property listing that has all the following features:
- Address
- Price
- Property Type
- Bedrooms
- Bathrooms
- Tenure Type
- Distance to nearest train station

In [26]:
url = 'https://www.rightmove.co.uk/properties/128545784#/?channel=RES_BUY'

In [27]:
# Need to use a webdriver as
driver = webdriver.Chrome()
driver.get(url)
# Get HTML of page that has all HTML tags rendered
html = driver.page_source
soup = BeautifulSoup(html, 'html.parser')

In [28]:
# Get the address
soup.find('h1', class_='_2uQQ3SV0eMHL1P6t5ZDo2q').text

'Ben Rhydding Road, ILKLEY'

In [29]:
# Get the price
soup.find('div', class_='_1gfnqJ3Vtd1z40MlC0MzXu').span.text

'£1,625,000'

In [30]:
# Get the distance to nearest train station
soup.find('span', class_='_1ZY603T1ryTT3dMgGkM7Lg').text

'0.5 miles'

In [31]:
property_info_headings = [x.dt.text for x in soup.findAll('dl')]
property_info_headings

['PROPERTY TYPE', 'BEDROOMS', 'BATHROOMS', 'SIZE', 'TENURE']

In [32]:
property_info_values = [x.dd.text for x in soup.findAll('dl')]
property_info_values

['Detached', '×5', '×4', '3,389 sq. ft.', 'Freehold']

In [33]:
# using dictionary comprehension
# to convert lists to dictionary
property_info_dict = {property_info_headings[i]: property_info_values[i] for i in range(len(property_info_headings))}
property_info_dict

{'PROPERTY TYPE': 'Detached',
 'BEDROOMS': '×5',
 'BATHROOMS': '×4',
 'SIZE': '3,389 sq. ft.',
 'TENURE': 'Freehold'}

In [34]:
property_type, bedrooms ,bathrooms ,size ,tenure = None, None, None, None, None
for key, value in property_info_dict.items():
    if key == 'PROPERTY TYPE':
        property_type = value
    if key == 'BEDROOMS':
        bedrooms = value
    if key == 'BATHROOMS':
        bathrooms = value
    if key == 'SIZE':
        size = value
    if key == 'TENURE':
        tenure = value

# Create a function to get information about every property listing

In [2]:
## Write a function to get details about the property

def get_property_details(url):

    responseObj = requests.get(url)
    ## Check if we can access the site
    if responseObj.status_code == 200:

        # Make this run headless
        chrome_options = Options()
        chrome_options.add_argument("--headless")

        # Need to use a webdriver as
        driver = webdriver.Chrome(options=chrome_options)
        driver.get(url)
        # Get HTML of page that has all HTML tags rendered
        html = driver.page_source
        soup = BeautifulSoup(html, 'html.parser')

        # The
        try:
            # Get the address
            try:
                address = soup.find('h1', class_='_2uQQ3SV0eMHL1P6t5ZDo2q').text
            except:
                address = None

            # Get the price
            try:
                price = soup.find('div', class_='_1gfnqJ3Vtd1z40MlC0MzXu').span.text
            except:
                price = None

            # Save property info in a dictionary - variable amount displayed per page
            prop_info_dict = {x.dt.text : x.dd.text for x in soup.findAll('dl')}

            try:
                property_type = prop_info_dict['PROPERTY TYPE']
            except:
                property_type = None
            try:
                bedrooms = prop_info_dict['BEDROOMS']
            except:
                bedrooms = None
            try:
                bathrooms = prop_info_dict['BATHROOMS']
            except:
                bathrooms = None
            try:
                size = prop_info_dict['SIZE']
            except:
                size = None
            try:
                tenure = prop_info_dict['TENURE']
            except:
                tenure = None

            # Get the distance to nearest train station
            try:
                nearest_station = soup.find('span', class_='_1ZY603T1ryTT3dMgGkM7Lg').text
            except:
                nearest_station = None

            return address, property_type, bedrooms, bathrooms, size, tenure, nearest_station, price
        except:
            return None # returning one when there is no mark up on the page - Acts as 8 Nones like below
    else:
        return None, None, None, None, None, None, None, None # What's returned when a link has been deleted or can't be connected to

In [15]:
test_property_details = pd.DataFrame(columns=['address', 'property_type', 'bedrooms', 'bathrooms', 'size_sq_ft', 'tenure', 'nearest_station', 'price'])
test_property_details

Unnamed: 0,address,property_type,bedrooms,bathrooms,size_sq_ft,tenure,nearest_station,price


In [16]:
# Attempt with a property that has all features
url = 'https://www.rightmove.co.uk/properties/128545784#/?channel=RES_BUY'
address, property_type, bedrooms, bathrooms, size, tenure, nearest_station, price = get_property_details(url)
test_property_details.loc[0] = address, property_type, bedrooms, bathrooms, size, tenure, nearest_station, price
test_property_details

Unnamed: 0,address,property_type,bedrooms,bathrooms,size_sq_ft,tenure,nearest_station,price
0,"Ben Rhydding Road, ILKLEY",Detached,×5,×4,"3,389 sq. ft.",Freehold,0.5 miles,"£1,625,000"


In [7]:
test_property_details.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 1 entries, 0 to 0
Data columns (total 8 columns):
 #   Column           Non-Null Count  Dtype 
---  ------           --------------  ----- 
 0   address          1 non-null      object
 1   property_type    1 non-null      object
 2   bedrooms         1 non-null      object
 3   bathrooms        1 non-null      object
 4   size_sq_ft       1 non-null      object
 5   tenure           1 non-null      object
 6   nearest_station  1 non-null      object
 7   price            1 non-null      object
dtypes: object(8)
memory usage: 72.0+ bytes


In [17]:
# Attempt with a property that has all property info features apart from the size
url = 'https://www.rightmove.co.uk/properties/132157613#/?channel=RES_BUY'
address, property_type, bedrooms, bathrooms, size, tenure, nearest_station, price = get_property_details(url)
test_property_details.loc[1] = address, property_type, bedrooms, bathrooms, size, tenure, nearest_station, price
test_property_details

Unnamed: 0,address,property_type,bedrooms,bathrooms,size_sq_ft,tenure,nearest_station,price
0,"Ben Rhydding Road, ILKLEY",Detached,×5,×4,"3,389 sq. ft.",Freehold,0.5 miles,"£1,625,000"
1,"Curly Hill, Ilkley, West Yorkshire, LS29",Detached,×4,×2,,Freehold,0.7 miles,"£1,300,000"


In [9]:
test_property_details.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 2 entries, 0 to 1
Data columns (total 8 columns):
 #   Column           Non-Null Count  Dtype 
---  ------           --------------  ----- 
 0   address          2 non-null      object
 1   property_type    2 non-null      object
 2   bedrooms         2 non-null      object
 3   bathrooms        2 non-null      object
 4   size_sq_ft       1 non-null      object
 5   tenure           2 non-null      object
 6   nearest_station  2 non-null      object
 7   price            2 non-null      object
dtypes: object(8)
memory usage: 144.0+ bytes


In [10]:
# Attempt with a property that has only the property type, number of bedrooms and leasehold features
url = 'https://www.rightmove.co.uk/properties/108492428#/?channel=RES_BUY'
address, property_type, bedrooms, bathrooms, size, tenure, nearest_station, price = get_property_details(url)
test_property_details.loc[2] = address, property_type, bedrooms, bathrooms, size, tenure, nearest_station, price
test_property_details

Unnamed: 0,address,property_type,bedrooms,bathrooms,size_sq_ft,tenure,nearest_station,price
0,"Ben Rhydding Road, ILKLEY",Detached,×5,×4,"3,389 sq. ft.",Freehold,0.5 miles,"£1,625,000"
1,"Curly Hill, Ilkley, West Yorkshire, LS29",Detached,×4,×2,,Freehold,0.7 miles,"£1,300,000"
2,"11 Regent Street, Leeds, West Yorkshire, LS2 7JQ",Flat,×1,,,Leasehold,0.8 miles,"£45,000"


In [44]:
test_property_details

Unnamed: 0,address,property_type,bedrooms,bathrooms,size_sq_ft,tenure,nearest_station,price
0,"Ben Rhydding Road, ILKLEY",Detached,×5,×4,"3,389 sq. ft.",Freehold,0.5 miles,"£1,625,000"
1,"Curly Hill, Ilkley, West Yorkshire, LS29",Detached,×4,×2,,Freehold,0.7 miles,"£1,300,000"
2,"11 Regent Street, Leeds, West Yorkshire, LS2 7JQ",Flat,×1,,,Leasehold,0.8 miles,"£45,000"
3,"11 Regent Street, Leeds, West Yorkshire, LS2 7JQ",Flat,×1,,,Leasehold,0.8 miles,"£45,000"
4,"11 Regent Street, Leeds, West Yorkshire, LS2 7JQ",Flat,×1,,,Leasehold,0.8 miles,"£45,000"


In [None]:
# Save the results of the test scraping to a csv
test_property_details.to_csv('test_property_details.csv', index=False)

# Webscrape every property listing for Bradford, Wakefield and Leeds and create dataframe
- Halifax and Huddersfield will be done on the cloud

In [29]:
property_details_halifax = pd.DataFrame(columns=['url','address', 'property_type', 'bedrooms', 'bathrooms', 'size_sq_ft', 'tenure', 'nearest_station', 'price'])
property_details_halifax

Unnamed: 0,url,address,property_type,bedrooms,bathrooms,size_sq_ft,tenure,nearest_station,price


In [30]:
hfx = pd.read_csv('halifaxlinks.csv')
hfx_list = hfx[hfx.columns[0]].values.tolist()
hfx_list

['https://www.rightmove.co.uk/properties/132544862#/?channel=RES_BUY',
 'https://www.rightmove.co.uk/properties/132319409#/?channel=RES_BUY',
 'https://www.rightmove.co.uk/properties/128796872#/?channel=RES_BUY',
 'https://www.rightmove.co.uk/properties/132907541#/?channel=RES_BUY',
 'https://www.rightmove.co.uk/properties/131601284#/?channel=RES_BUY',
 'https://www.rightmove.co.uk/properties/131940062#/?channel=RES_BUY',
 'https://www.rightmove.co.uk/properties/126048383#/?channel=RES_BUY',
 'https://www.rightmove.co.uk/properties/132544862#/?channel=RES_BUY',
 'https://www.rightmove.co.uk/properties/132648128#/?channel=RES_BUY',
 'https://www.rightmove.co.uk/properties/132644687#/?channel=RES_BUY',
 'https://www.rightmove.co.uk/properties/131600915#/?channel=RES_BUY',
 'https://www.rightmove.co.uk/properties/126926192#/?channel=RES_BUY',
 'https://www.rightmove.co.uk/properties/128379653#/?channel=RES_BUY',
 'https://www.rightmove.co.uk/properties/132843467#/?channel=RES_BUY',
 'http

In [31]:
for i in range(0, len(hfx_list)):

    url = hfx_list[i]
    address, property_type, bedrooms, bathrooms, size, tenure, nearest_station, price = get_property_details(hfx_list[i])

    property_details_halifax.loc[i] = url, address, property_type, bedrooms, bathrooms, size, tenure, nearest_station, price
    # Keep track of the scraping
    print(i)

0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
27

In [32]:
property_details_halifax

Unnamed: 0,url,address,property_type,bedrooms,bathrooms,size_sq_ft,tenure,nearest_station,price
0,https://www.rightmove.co.uk/properties/1325448...,"Heath Park Avenue, Halifax",End of Terrace,×4,×3,,Freehold,0.5 miles,"£285,000"
1,https://www.rightmove.co.uk/properties/1323194...,"St James Road, Halifax, HX1 1YS",House of Multiple Occupation,×6,×6,,Ask agent,0.5 miles,"£400,000"
2,https://www.rightmove.co.uk/properties/1287968...,"Balmoral Place, Halifax, West Yorkshire, HX1",Semi-Detached,×5,×2,,Freehold,0.5 miles,"£399,995"
3,https://www.rightmove.co.uk/properties/1329075...,"Heath Lea, Halifax",Detached,×4,×1,,Freehold,0.5 miles,"£365,000"
4,https://www.rightmove.co.uk/properties/1316012...,"Wellgarth, Well Head, Halifax",Detached Bungalow,×3,×2,,Freehold,0.6 miles,"£350,000"
...,...,...,...,...,...,...,...,...,...
726,https://www.rightmove.co.uk/properties/1286439...,"Longfellow Court, Hebden Bridge, HX7 5LG",Apartment,×2,×1,,Leasehold,0.2 miles,"£115,000"
727,https://www.rightmove.co.uk/properties/1317687...,"Aspinall Street, Hebden Bridge",Terraced,×2,×1,,Freehold,0.2 miles,"£100,000"
728,https://www.rightmove.co.uk/properties/1272469...,"Jumble Hole Road, Hebden Bridge",Land,,,,,1.6 miles,"£90,000"
729,https://www.rightmove.co.uk/properties/1253660...,"Plot 4, Long Causeway, Blackshaw Head, Hebden ...",Land,,,"274,428 sq. ft.",,2.4 miles,"£75,000"


In [34]:
property_details_halifax.to_csv('halifax_properties.csv', index=False)

# Huddersfield csv

In [68]:
property_details_huddersfield = pd.DataFrame(columns=['url','address', 'property_type', 'bedrooms', 'bathrooms', 'size_sq_ft', 'tenure', 'nearest_station', 'price'])
property_details_huddersfield

Unnamed: 0,url,address,property_type,bedrooms,bathrooms,size_sq_ft,tenure,nearest_station,price


In [64]:
huds = pd.read_csv('hudslinks.csv')
huds_list = huds[huds.columns[0]].values.tolist()
len(huds_list)

1471

In [69]:
# Scrape remaining huddersfield properties
huds_pt2 = huds_list[1050:]
len(huds_pt2)

421

In [None]:
for i in range(0, len(huds_pt2)):

    url = huds_pt2[i]
    address, property_type, bedrooms, bathrooms, size, tenure, nearest_station, price = get_property_details(huds_pt2[i])

    property_details_huddersfield.loc[i] = url, address, property_type, bedrooms, bathrooms, size, tenure, nearest_station, price
    # Keep track of the scraping
    print(i)

In [73]:
property_details_huddersfield.tail()

Unnamed: 0,url,address,property_type,bedrooms,bathrooms,size_sq_ft,tenure,nearest_station,price
405,https://www.rightmove.co.uk/properties/1328843...,"Greenhill Bank Road, New Mill, Holmfirth, West...",Terraced,×2,×1,,Freehold,1.6 miles,"£150,000"
406,https://www.rightmove.co.uk/properties/1281187...,"Thick Hollins, Meltham, Holmfirth, West Yorksh...",Terraced,×2,×1,,Freehold,2.8 miles,"£150,000"
407,https://www.rightmove.co.uk/properties/1328625...,"Holmclose, Holmbridge, Holmfirth, West Yorkshi...",Semi-Detached,×2,×1,,Freehold,3.4 miles,"£150,000"
408,https://www.rightmove.co.uk/properties/1265926...,"France Hill, Honley, Holmfirth, HD9",Terraced,×2,×1,,Freehold,0.5 miles,"£148,000"
409,https://www.rightmove.co.uk/properties/1327196...,"Upper Sunny Bank Mews, Meltham, Holmfirth",Apartment,×2,×1,,Leasehold,2.2 miles,"£140,000"


In [72]:
# Save the results of scraping to a csv
property_details_huddersfield.to_csv('huds_properties_pt2.csv', index=False)

# Bradford csv

In [45]:
# Read in bradford property url links from csv
bfd = pd.read_csv('bfdlinks.csv')
bfd_list = bfd[bfd.columns[0]].values.tolist()
bfd_list

['https://www.rightmove.co.uk/properties/132757523#/?channel=RES_BUY',
 'https://www.rightmove.co.uk/properties/123962387#/?channel=RES_BUY',
 'https://www.rightmove.co.uk/properties/81355908#/?channel=RES_BUY',
 'https://www.rightmove.co.uk/properties/131966627#/?channel=RES_BUY',
 'https://www.rightmove.co.uk/properties/132757523#/?channel=RES_BUY',
 'https://www.rightmove.co.uk/properties/130350101#/?channel=RES_BUY',
 'https://www.rightmove.co.uk/properties/121355228#/?channel=RES_BUY',
 'https://www.rightmove.co.uk/properties/132236801#/?channel=RES_BUY',
 'https://www.rightmove.co.uk/properties/125514563#/?channel=RES_BUY',
 'https://www.rightmove.co.uk/properties/86046072#/?channel=RES_BUY',
 'https://www.rightmove.co.uk/properties/131658380#/?channel=RES_BUY',
 'https://www.rightmove.co.uk/properties/131966636#/?channel=RES_BUY',
 'https://www.rightmove.co.uk/properties/131066510#/?channel=RES_BUY',
 'https://www.rightmove.co.uk/properties/129829655#/?channel=RES_BUY',
 'https:

In [51]:
len(bfd_list)

2413

In [52]:
property_details_bradford = pd.DataFrame(columns=['url','address', 'property_type', 'bedrooms', 'bathrooms', 'size_sq_ft', 'tenure', 'nearest_station', 'price'])
property_details_bradford

Unnamed: 0,url,address,property_type,bedrooms,bathrooms,size_sq_ft,tenure,nearest_station,price


In [53]:
for i in range(0, len(bfd_list)):

    url = bfd_list[i]
    address, property_type, bedrooms, bathrooms, size, tenure, nearest_station, price = get_property_details(bfd_list[i])

    property_details_bradford.loc[i] = url, address, property_type, bedrooms, bathrooms, size, tenure, nearest_station, price
    # Keep track of the scraping
    print(i)

0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
27

In [54]:
property_details_bradford.head(10)

Unnamed: 0,url,address,property_type,bedrooms,bathrooms,size_sq_ft,tenure,nearest_station,price
0,https://www.rightmove.co.uk/properties/1327575...,"Broadgate House, 2 Broad Street, Bradford, Wes...",Apartment,×3,×1,,Leasehold,0.0 miles,"£130,000"
1,https://www.rightmove.co.uk/properties/1239623...,"Hanover Square, Bradford, West Yorkshire, BD1",Terraced,×5,×2,,Freehold,0.4 miles,"£200,000"
2,https://www.rightmove.co.uk/properties/8135590...,"Hanover Square, Bradford, BD1 3BY",Terraced,×5,×3,,Freehold,0.4 miles,"£179,000"
3,https://www.rightmove.co.uk/properties/1319666...,"Thornton Road, Bradford",Apartment,×2,×1,,Freehold,0.4 miles,"£135,000"
4,https://www.rightmove.co.uk/properties/1327575...,"Broadgate House, 2 Broad Street, Bradford, Wes...",Apartment,×3,×1,,Leasehold,0.0 miles,"£130,000"
5,https://www.rightmove.co.uk/properties/1303501...,"East Parade, Behrens Warehouse, BD1",Apartment,×1,×1,,Leasehold,0.3 miles,"£125,000"
6,https://www.rightmove.co.uk/properties/1213552...,"Conditioning House, Cape Street, Bradford, Yor...",Apartment,×2,×2,,Ask agent,0.1 miles,"£124,995"
7,https://www.rightmove.co.uk/properties/1322368...,"Broadway Residence, Fields Street, Bradford, BD1",Flat,×2,×2,614 sq. ft.,Leasehold,0.2 miles,"£115,000"
8,https://www.rightmove.co.uk/properties/1255145...,"Conditioning House, Cape Street, Bradford, Yor...",Apartment,×3,×2,,Ask agent,0.3 miles,"£114,995"
9,https://www.rightmove.co.uk/properties/8604607...,"Cape Street, Bradford, West Yorkshire, BD1",Apartment,×2,×1,,Leasehold,0.3 miles,"£110,000"


In [55]:
# Save the results of scraping to a csv
property_details_bradford.to_csv('bradford_properties.csv', index=False)

# Wakefield csv

In [105]:
property_details_wakefield = pd.DataFrame(columns=['url', 'address', 'property_type', 'bedrooms', 'bathrooms', 'size_sq_ft', 'tenure', 'nearest_station','price'])
property_details_wakefield

Unnamed: 0,url,address,property_type,bedrooms,bathrooms,size_sq_ft,tenure,nearest_station,price


In [85]:
# Read in wakefield property url links from csv
wakey = pd.read_csv('wakeflinks.csv')
wakey_list = wakey[wakey.columns[0]].values.tolist()
wakey_list

['https://www.rightmove.co.uk/properties/132668261#/?channel=RES_BUY',
 'https://www.rightmove.co.uk/properties/127390190#/?channel=RES_BUY',
 'https://www.rightmove.co.uk/properties/122253707#/?channel=RES_BUY',
 'https://www.rightmove.co.uk/properties/123663902#/?channel=RES_BUY',
 'https://www.rightmove.co.uk/properties/132574289#/?channel=RES_BUY',
 'https://www.rightmove.co.uk/properties/126457667#/?channel=RES_BUY',
 'https://www.rightmove.co.uk/properties/131550776#/?channel=RES_BUY',
 'https://www.rightmove.co.uk/properties/128727149#/?channel=RES_BUY',
 'https://www.rightmove.co.uk/properties/132839501#/?channel=RES_BUY',
 'https://www.rightmove.co.uk/properties/127107008#/?channel=RES_BUY',
 'https://www.rightmove.co.uk/properties/132045974#/?channel=RES_BUY',
 'https://www.rightmove.co.uk/properties/127728041#/?channel=RES_BUY',
 'https://www.rightmove.co.uk/properties/123514769#/?channel=RES_BUY',
 'https://www.rightmove.co.uk/properties/128684015#/?channel=RES_BUY',
 'http

In [86]:
len(wakey_list)

2669

In [96]:
wakey_listpt2 = wakey_list[1996:]

In [102]:
len(wakey_listpt2)

673

In [103]:
wakey_listpt2

['https://www.rightmove.co.uk/properties/128715890#/?channel=RES_BUY',
 'https://www.rightmove.co.uk/properties/127616060#/?channel=RES_BUY',
 'https://www.rightmove.co.uk/properties/123334355#/?channel=COM_BUY',
 'https://www.rightmove.co.uk/properties/128053874#/?channel=RES_BUY',
 'https://www.rightmove.co.uk/properties/131573798#/?channel=RES_BUY',
 'https://www.rightmove.co.uk/properties/130705031#/?channel=RES_BUY',
 'https://www.rightmove.co.uk/properties/132551609#/?channel=RES_BUY',
 'https://www.rightmove.co.uk/properties/132601793#/?channel=RES_BUY',
 'https://www.rightmove.co.uk/properties/122052275#/?channel=RES_BUY',
 'https://www.rightmove.co.uk/properties/131897588#/?channel=RES_BUY',
 'https://www.rightmove.co.uk/properties/132793523#/?channel=RES_BUY',
 'https://www.rightmove.co.uk/properties/131572826#/?channel=RES_BUY',
 'https://www.rightmove.co.uk/properties/132585284#/?channel=RES_BUY',
 'https://www.rightmove.co.uk/properties/86037618#/?channel=RES_BUY',
 'https

In [106]:
for i in range(0, len(wakey_listpt2)):

    url = wakey_listpt2[i]
    address, property_type, bedrooms, bathrooms, size, tenure, nearest_station, price = get_property_details(wakey_listpt2[i])

    property_details_wakefield.loc[i] = url, address, property_type, bedrooms, bathrooms, size, tenure, nearest_station, price
    # Keep track of the scraping
    print(i)

0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
27

In [107]:
property_details_wakefield

Unnamed: 0,url,address,property_type,bedrooms,bathrooms,size_sq_ft,tenure,nearest_station,price
0,https://www.rightmove.co.uk/properties/1287158...,"Wrights Lane, Cridling Stubbs, Knottingley",Detached,×4,×2,,Freehold,2.3 miles,"£425,000"
1,https://www.rightmove.co.uk/properties/1276160...,"Womersley Road, Knottingley, WF11",Detached,×3,×1,,Freehold,0.9 miles,"£385,000"
2,https://www.rightmove.co.uk/properties/1233343...,"Land, Old Great North Road, Brotherton",Plot,,,,,1.5 miles,"£375,000"
3,https://www.rightmove.co.uk/properties/1280538...,"Pontefract Road, Knottingley",Detached,×4,×1,,Freehold,0.3 miles,"£350,000"
4,https://www.rightmove.co.uk/properties/1315737...,"Pontefract Road, Ferrybridge, WF11",Bungalow,×4,×2,,Freehold,0.7 miles,"£325,000"
...,...,...,...,...,...,...,...,...,...
668,https://www.rightmove.co.uk/properties/1326477...,"5 Bar Street, Batley, West Yorkshire, WF17 5PG",Flat,×2,,,Leasehold,0.2 miles,"£50,000"
669,https://www.rightmove.co.uk/properties/1320358...,"Apartment , - Bar Street, Batley",Apartment,×1,×1,,Leasehold,0.2 miles,"£50,000"
670,https://www.rightmove.co.uk/properties/1239716...,"Station Road, Batley, WF17",Flat,×1,,,Leasehold,0.2 miles,"£40,000"
671,https://www.rightmove.co.uk/properties/1299072...,"5 Bar Street, Batley, WF17",Apartment,×1,×1,,Leasehold,0.2 miles,"£35,000"


In [109]:
# Save the results of scraping to a csv
property_details_wakefield.to_csv('wakefield_properties_pt2.csv', index=False)

# Leeds csv

In [22]:
property_details_leeds = pd.DataFrame(columns=['url', 'address', 'property_type', 'bedrooms', 'bathrooms', 'size_sq_ft', 'tenure', 'nearest_station','price'])
property_details_leeds

Unnamed: 0,url,address,property_type,bedrooms,bathrooms,size_sq_ft,tenure,nearest_station,price


In [4]:
leeds = pd.read_csv('ldslinks.csv')
leeds_list = leeds[leeds.columns[0]].values.tolist()
leeds_list

['https://www.rightmove.co.uk/properties/131988005#/?channel=RES_BUY',
 'https://www.rightmove.co.uk/properties/127267460#/?channel=RES_BUY',
 'https://www.rightmove.co.uk/properties/132583922#/?channel=RES_BUY',
 'https://www.rightmove.co.uk/properties/132583937#/?channel=RES_BUY',
 'https://www.rightmove.co.uk/properties/132583970#/?channel=RES_BUY',
 'https://www.rightmove.co.uk/properties/132583997#/?channel=RES_BUY',
 'https://www.rightmove.co.uk/properties/132584012#/?channel=RES_BUY',
 'https://www.rightmove.co.uk/properties/129394193#/?channel=RES_BUY',
 'https://www.rightmove.co.uk/properties/132931109#/?channel=RES_BUY',
 'https://www.rightmove.co.uk/properties/132418703#/?channel=RES_BUY',
 'https://www.rightmove.co.uk/properties/132080543#/?channel=RES_BUY',
 'https://www.rightmove.co.uk/properties/132418667#/?channel=RES_BUY',
 'https://www.rightmove.co.uk/properties/132418652#/?channel=RES_BUY',
 'https://www.rightmove.co.uk/properties/131593181#/?channel=RES_BUY',
 'http

In [5]:
leeds_list_pt1 = leeds_list[0:2294]

In [9]:
len(leeds_list_pt1)

2294

In [27]:
leeds_list_pt2 = leeds_list[2300:]

In [28]:
len(leeds_list_pt2)

1717

In [29]:
leeds_list_pt2

['https://www.rightmove.co.uk/properties/132895976#/?channel=RES_BUY',
 'https://www.rightmove.co.uk/properties/129520181#/?channel=RES_BUY',
 'https://www.rightmove.co.uk/properties/132467975#/?channel=RES_BUY',
 'https://www.rightmove.co.uk/properties/130762091#/?channel=RES_BUY',
 'https://www.rightmove.co.uk/properties/131928431#/?channel=RES_BUY',
 'https://www.rightmove.co.uk/properties/132655511#/?channel=RES_BUY',
 'https://www.rightmove.co.uk/properties/131845130#/?channel=RES_BUY',
 'https://www.rightmove.co.uk/properties/132939854#/?channel=RES_BUY',
 'https://www.rightmove.co.uk/properties/132689807#/?channel=RES_BUY',
 'https://www.rightmove.co.uk/properties/130294631#/?channel=RES_BUY',
 'https://www.rightmove.co.uk/properties/132809204#/?channel=RES_BUY',
 'https://www.rightmove.co.uk/properties/130389164#/?channel=RES_BUY',
 'https://www.rightmove.co.uk/properties/131847656#/?channel=RES_BUY',
 'https://www.rightmove.co.uk/properties/86039484#/?channel=RES_BUY',
 'https

In [31]:
for i in range(0, len(leeds_list_pt2)):

    url = leeds_list_pt2[i]
    address, property_type, bedrooms, bathrooms, size, tenure, nearest_station, price = get_property_details(leeds_list_pt2[i])

    property_details_leeds.loc[i] = url, address, property_type, bedrooms, bathrooms, size, tenure, nearest_station, price
    # Keep track of the scraping
    print(i)

0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
27

In [32]:
property_details_leeds

Unnamed: 0,url,address,property_type,bedrooms,bathrooms,size_sq_ft,tenure,nearest_station,price
0,https://www.rightmove.co.uk/properties/1328959...,"Broadgate Walk, Horsforth, Leeds, West Yorkshi...",Semi-Detached,×3,×2,,Freehold,0.7 miles,"£315,000"
1,https://www.rightmove.co.uk/properties/1295201...,"Bletchley Way, Leeds, LS18",Detached,×4,×2,,Freehold,1.6 miles,"£525,000"
2,https://www.rightmove.co.uk/properties/1324679...,"Poets Place, Horsforth, Leeds, West Yorkshire,...",House,×3,×2,,Freehold,0.4 miles,"£525,000"
3,https://www.rightmove.co.uk/properties/1307620...,"Lanark Drive, Horsforth",Semi-Detached,×4,×2,,Freehold,0.7 miles,"£500,000"
4,https://www.rightmove.co.uk/properties/1319284...,"Newlay Wood Fold, Horsforth, Leeds, West Yorks...",Detached,×4,×2,,Freehold,0.7 miles,"£500,000"
...,...,...,...,...,...,...,...,...,...
1712,https://www.rightmove.co.uk/properties/1261194...,"Crossbeck Road, Ilkley, West Yorkshire, LS29",Bungalow,×1,×1,,Leasehold,0.3 miles,"£150,000"
1713,https://www.rightmove.co.uk/properties/1298128...,"Wells Promenade, Ilkley, West Yorkshire, LS29",Retirement Property,×2,×1,,Leasehold,0.1 miles,"£149,950"
1714,https://www.rightmove.co.uk/properties/1282129...,"St. Peters Way, Menston, Ilkley, West Yorkshir...",Apartment,×1,×1,,Leasehold,0.4 miles,"£149,000"
1715,https://www.rightmove.co.uk/properties/1301562...,"Queens Road, Ilkley, West Yorkshire, LS29",Apartment,×2,×1,,Leasehold,0.2 miles,"£147,000"


In [33]:
# Save the results of scraping to a csv
property_details_leeds.to_csv('leeds_properties_pt2.csv', index=False)