# Web Scrape using Selenium to Extract Properties from Rightmove

## About:

I'm searching for a home as a first-time buyer. I like using websites like Rightmove, but find their lack of filters frustrating. I have a clear set of criteria, so I am building a Selenium wrapper to automate property searching on Rightmove. 

The wrapper will pull relevant links using my search criteria and display them here.

## Setting up environment:

Make sure to have all the relevant conda and python configurations set up.
I followed these instructions:
https://saturncloud.io/blog/how-to-create-a-conda-environment-with-a-specific-python-version/
https://saturncloud.io/blog/how-to-use-conda-environment-in-a-jupyter-notebook/

In [1]:
# Install Selenium
# %pip install selenium

# Import relevant packages
import time
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.common.exceptions import NoSuchElementException
from selenium.webdriver.support.ui import Select
from selenium.common.exceptions import TimeoutException
from selenium.common.exceptions import TimeoutException, ElementNotInteractableException
from selenium.webdriver.common.action_chains import ActionChains


In [None]:
# Navigate to the webpage
options = webdriver.ChromeOptions()
driver = webdriver.Chrome(options=options)


# Open Rightmove and navigate to the search page
driver.get('https://www.rightmove.co.uk/')


# Reject cookies, the banner takes a long time to show up and it changes each time, so make a cup of tea.
try:
    # Wait for the reject cookies button to be clickable using its ID
    reject_cookies_button = WebDriverWait(driver, 60).until(
        EC.element_to_be_clickable((By.ID, 'onetrust-reject-all-handler'))
    )
    reject_cookies_button.click()
    print("Rejected cookies.")
except TimeoutException:
    print("Cookie consent dialog did not appear.")


# # Wait until the search box is available and enter the location details
search_box = WebDriverWait(driver, 10).until(
    EC.presence_of_element_located((By.CSS_SELECTOR, "input#ta_searchInput"))
)

# Enter the location
search_box.send_keys("Barons Court Station")
search_box.send_keys(Keys.RETURN)


# For sale
for_sale_button = WebDriverWait(driver, 30).until(
    EC.presence_of_element_located((By.CSS_SELECTOR, "button[data-testid='forSaleCta']"))
)
for_sale_button.click()
print("Clicked 'For Sale' button.")


# Wait for the search results page to load
time.sleep(1)  # Adding sleep to ensure page is loaded

    
# Set the filters:
# Filter 1: Search Radius - dropdown
try:
    radius_dropdown = WebDriverWait(driver, 5).until(
        EC.presence_of_element_located((By.ID, 'radius'))
    )
    select = Select(radius_dropdown)
    select.select_by_visible_text("Within ¼ mile")
    print("Set radius to 1/4 mile.")
except TimeoutException:
    print("Radius filter not found!")    
    
    
# Filter 2: Price - dropdown
try:
    # Wait for the dropdown to be present
    max_price_dropdown = WebDriverWait(driver, 10).until(
        EC.presence_of_element_located((By.ID, 'maxPrice'))
    )
    # Select the option for £1,000,000
    select = Select(max_price_dropdown)
    select.select_by_visible_text("1,000,000")  # Change to £1,000,000
    print("Set maximum price to £1,000,000.")
except TimeoutException:
    print("Maximum price filter not found!")


# Filter 3: No. of Bedrooms - dropdown
try:
    # Wait for the dropdown to be present
    min_bedrooms_dropdown = WebDriverWait(driver, 10).until(
        EC.presence_of_element_located((By.ID, 'minBedrooms'))
    )
    # Select the option for 3 bedrooms
    select = Select(min_bedrooms_dropdown)
    select.select_by_visible_text("3")  # Select 3 bedrooms
    print("Set minimum bedrooms to 3.")
except TimeoutException:
    print("Minimum bedrooms filter not found!")

    
# Click the "Search Properties" button
try:
    see_properties_button = WebDriverWait(driver, 10).until(
        EC.presence_of_element_located((By.ID, 'submit'))
    )
    see_properties_button.click()
    print("Clicked 'Search properties' button.")
except TimeoutException:
    print("'Search properties' button not found!")

    
# Allow the search results page to load
time.sleep(5)


# Step 2: Click the first result link
# Wait for the property results to load
try:
    # Modify the selector based on the actual property link structure
    property_links = WebDriverWait(driver, 5).until(
        EC.presence_of_all_elements_located((By.CSS_SELECTOR, '.l-searchResult .propertyCard-anchor'))  # Updated selector
    )
    
    property_cards = driver.find_elements(By.CSS_SELECTOR, ".propertyCard")

    if property_cards:
        # Get the first property card
        first_property_card = property_cards[0]
        
        # Try to find the anchor tag within the first property card
        first_property_link = first_property_card.find_element(By.CSS_SELECTOR, "a.swipe-wrap")  # Adjust the selector if needed

        try:
            # Wait until the anchor tag is clickable
            WebDriverWait(driver, 5).until(EC.element_to_be_clickable(first_property_link))
            first_property_link.click()  # Click on the property link
            print("Clicked the property link.")
        except ElementNotInteractableException:
            print("The property link is not interactable.")
        except Exception as e:
            print(f"An error occurred while clicking the property link: {e}")
    else:
        print("No property cards found.")

except Exception as e:
    print(f"An error occurred while waiting for property links: {e}")

    
# Define link collection list:
saved_links = []  # Initialize an empty list to store saved links


# Get property link
def process_property(driver, property_link):
    # Navigate to the property link
    driver.get(property_link)

    
# Wait for the page to load and scroll to the tenancy button
try:
    # Scroll down to make the button visible
    tenure_button = WebDriverWait(driver, 5).until(
        EC.presence_of_element_located((By.CSS_SELECTOR, "button[aria-label*='tenure']"))
    )

    # Use ActionChains to scroll to the button
    actions = ActionChains(driver)
    actions.move_to_element(tenure_button).perform()

    # Now wait for the element containing the tenancy type to be visible
    tenancy_type_element = WebDriverWait(driver, 5).until(
        EC.visibility_of_element_located((By.CSS_SELECTOR, "button[aria-label*='tenure'] ._1hV1kqpVceE9m-QrX_hWDN"))
    )

    # Extract the tenancy type text
    tenancy_type = tenancy_type_element.text
    print(f"Tenancy Type: {tenancy_type}")

except Exception as e:
    print(f"An error occurred: {e}") 

    
# Check the tenancy type
if 'tenancy_type' in locals() and "freehold" in tenancy_type.lower():
    # Save the link if it's a freehold
    saved_links.append(property_link)  # Replace `saved_links` with your list/collection for saving links
    print(f"Link saved: {property_link}")
else:
    print(f"Link not saved, tenancy type: {tenancy_type}")


# After extracting the tenancy information
try:
    # Scroll back to the top of the page
    driver.execute_script("window.scrollTo(0, 0);")

    # Wait for a brief moment to ensure the page has adjusted
    time.sleep(1)

    # Locate the 'Back to search results' link using its text or surrounding elements
    back_button = WebDriverWait(driver, 5).until(
        EC.element_to_be_clickable((By.LINK_TEXT, "Back to search results"))
    )

    # Click the 'Back to search results' link
    back_button.click()
    print("Returned to the search results page.")
except Exception as e:
    print(f"Could not navigate back: {e}")


Rejected cookies.
Clicked 'For Sale' button.
Set radius to 1/4 mile.
Set maximum price to £1,000,000.
Set minimum bedrooms to 3.
Clicked 'Search properties' button.
Clicked the property link.
Tenancy Type: Share of Freehold


NameError: name 'property_link' is not defined

## Run Python Script

In [None]:
#### TEST ENVIRONMENT BELOW:

In [None]:
# Navigate to the webpage
options = webdriver.ChromeOptions()
driver = webdriver.Chrome(options=options)


# Open Rightmove and navigate to the search page
driver.get('https://www.rightmove.co.uk/')


# Reject cookies, the banner takes a long time to show up and it changes each time, so make a cup of tea.
try:
    # Wait for the reject cookies button to be clickable using its ID
    reject_cookies_button = WebDriverWait(driver, 60).until(
        EC.element_to_be_clickable((By.ID, 'onetrust-reject-all-handler'))
    )
    reject_cookies_button.click()
    print("Rejected cookies.")
except TimeoutException:
    print("Cookie consent dialog did not appear.")


# # Wait until the search box is available and enter the location details
search_box = WebDriverWait(driver, 10).until(
    EC.presence_of_element_located((By.CSS_SELECTOR, "input#ta_searchInput"))
)

# Enter the location
search_box.send_keys("Barons Court Station")
search_box.send_keys(Keys.RETURN)


# For sale
for_sale_button = WebDriverWait(driver, 10).until(
    EC.presence_of_element_located((By.CSS_SELECTOR, "button[data-testid='forSaleCta']"))
)
for_sale_button.click()
print("Clicked 'For Sale' button.")


# Wait for the search results page to load
time.sleep(1)  # Adding sleep to ensure page is loaded

    
# Set the filters:
# Filter 1: Search Radius - dropdown
try:
    radius_dropdown = WebDriverWait(driver, 5).until(
        EC.presence_of_element_located((By.ID, 'radius'))
    )
    select = Select(radius_dropdown)
    select.select_by_visible_text("Within ¼ mile")
    print("Set radius to 1/4 mile.")
except TimeoutException:
    print("Radius filter not found!")    
    
    
# Filter 2: Price - dropdown
try:
    # Wait for the dropdown to be present
    max_price_dropdown = WebDriverWait(driver, 10).until(
        EC.presence_of_element_located((By.ID, 'maxPrice'))
    )
    # Select the option for £1,000,000
    select = Select(max_price_dropdown)
    select.select_by_visible_text("1,000,000")  # Change to £1,000,000
    print("Set maximum price to £1,000,000.")
except TimeoutException:
    print("Maximum price filter not found!")


# Filter 3: No. of Bedrooms - dropdown
try:
    # Wait for the dropdown to be present
    min_bedrooms_dropdown = WebDriverWait(driver, 10).until(
        EC.presence_of_element_located((By.ID, 'minBedrooms'))
    )
    # Select the option for 3 bedrooms
    select = Select(min_bedrooms_dropdown)
    select.select_by_visible_text("3")  # Select 3 bedrooms
    print("Set minimum bedrooms to 3.")
except TimeoutException:
    print("Minimum bedrooms filter not found!")

    
# Click the "Search Properties" button
try:
    see_properties_button = WebDriverWait(driver, 10).until(
        EC.presence_of_element_located((By.ID, 'submit'))
    )
    see_properties_button.click()
    print("Clicked 'Search properties' button.")
except TimeoutException:
    print("'Search properties' button not found!")

    
# Allow the search results page to load
time.sleep(5)


# Step 2: Click the first result link
# Wait for the property results to load
try:
    # Modify the selector based on the actual property link structure
    property_links = WebDriverWait(driver, 5).until(
        EC.presence_of_all_elements_located((By.CSS_SELECTOR, '.l-searchResult .propertyCard-anchor'))  # Updated selector
    )
    
    property_cards = driver.find_elements(By.CSS_SELECTOR, ".propertyCard")

    if property_cards:
        # Get the first property card
        first_property_card = property_cards[0]
        
        # Try to find the anchor tag within the first property card
        first_property_link = first_property_card.find_element(By.CSS_SELECTOR, "a.swipe-wrap")  # Adjust the selector if needed

        try:
            # Wait until the anchor tag is clickable
            WebDriverWait(driver, 5).until(EC.element_to_be_clickable(first_property_link))
            first_property_link.click()  # Click on the property link
            print("Clicked the property link.")
        except ElementNotInteractableException:
            print("The property link is not interactable.")
        except Exception as e:
            print(f"An error occurred while clicking the property link: {e}")
    else:
        print("No property cards found.")

except Exception as e:
    print(f"An error occurred while waiting for property links: {e}")

    
# Define link collection list:
saved_links = []  # Initialize an empty list to store saved links


# Get property link
def process_property(driver, property_link):
    # Navigate to the property link
    driver.get(property_link)

    
# Wait for the page to load and scroll to the tenancy button
try:
    # Scroll down to make the button visible
    tenure_button = WebDriverWait(driver, 5).until(
        EC.presence_of_element_located((By.CSS_SELECTOR, "button[aria-label*='tenure']"))
    )

    # # Use ActionChains to scroll to the button
    # actions = ActionChains(driver)
    # actions.move_to_element(tenure_button).perform()

    # Now wait for the element containing the tenancy type to be visible
    tenancy_type_element = WebDriverWait(driver, 5).until(
        EC.visibility_of_element_located((By.CSS_SELECTOR, "button[aria-label*='tenure'] ._1hV1kqpVceE9m-QrX_hWDN"))
    )

    # Extract the tenancy type text
    tenancy_type = tenancy_type_element.text
    print(f"Tenancy Type: {tenancy_type}")

except Exception as e:
    print(f"An error occurred: {e}") 

    
# Check the tenancy type
if 'tenancy_type' in locals() and "freehold" in tenancy_type.lower():
    # Save the link if it's a freehold
    saved_links.append(property_link)  # Replace `saved_links` with your list/collection for saving links
    print(f"Link saved: {property_link}")
else:
    print(f"Link not saved, tenancy type: {tenancy_type}")


# After extracting the tenancy information
try:
    # Scroll back to the top of the page
    driver.execute_script("window.scrollTo(0, 0);")

    # Wait for a brief moment to ensure the page has adjusted
    time.sleep(1)

    # Locate the 'Back to search results' link using its text or surrounding elements
    back_button = WebDriverWait(driver, 10).until(
        EC.element_to_be_clickable((By.LINK_TEXT, "Back to search results"))
    )

    # Click the 'Back to search results' link
    back_button.click()
    print("Returned to the search results page.")
except Exception as e:
    print(f"Could not navigate back: {e}")



Rejected cookies.
Clicked 'For Sale' button.
Set radius to 1/4 mile.
Set maximum price to £1,000,000.
Set minimum bedrooms to 3.
Clicked 'Search properties' button.
Clicked the property link.
Tenancy Type: Leasehold
Link not saved, tenancy type: Leasehold
Returned to the search results page.


In [16]:
# Navigate to the webpage
options = webdriver.ChromeOptions()
driver = webdriver.Chrome(options=options)

# Open Rightmove and navigate to the search page
driver.get('https://www.rightmove.co.uk/')

# Reject cookies
try:
    reject_cookies_button = WebDriverWait(driver, 60).until(
        EC.element_to_be_clickable((By.ID, 'onetrust-reject-all-handler'))
    )
    reject_cookies_button.click()
    print("Rejected cookies.")
except TimeoutException:
    print("Cookie consent dialog did not appear.")

# Enter the location and search
search_box = WebDriverWait(driver, 10).until(
    EC.presence_of_element_located((By.CSS_SELECTOR, "input#ta_searchInput"))
)
search_box.send_keys("Barons Court Station")
search_box.send_keys(Keys.RETURN)

# For sale button
for_sale_button = WebDriverWait(driver, 10).until(
    EC.presence_of_element_located((By.CSS_SELECTOR, "button[data-testid='forSaleCta']"))
)
for_sale_button.click()
print("Clicked 'For Sale' button.")

time.sleep(1)  # Ensure page is loaded

# Set filters
try:
    radius_dropdown = WebDriverWait(driver, 5).until(
        EC.presence_of_element_located((By.ID, 'radius'))
    )
    select = Select(radius_dropdown)
    select.select_by_visible_text("Within ¼ mile")
    print("Set radius to 1/4 mile.")
except TimeoutException:
    print("Radius filter not found!")    

try:
    max_price_dropdown = WebDriverWait(driver, 10).until(
        EC.presence_of_element_located((By.ID, 'maxPrice'))
    )
    select = Select(max_price_dropdown)
    select.select_by_visible_text("1,000,000")
    print("Set maximum price to £1,000,000.")
except TimeoutException:
    print("Maximum price filter not found!")

try:
    min_bedrooms_dropdown = WebDriverWait(driver, 10).until(
        EC.presence_of_element_located((By.ID, 'minBedrooms'))
    )
    select = Select(min_bedrooms_dropdown)
    select.select_by_visible_text("3")
    print("Set minimum bedrooms to 3.")
except TimeoutException:
    print("Minimum bedrooms filter not found!")

# Click the "Search Properties" button
try:
    see_properties_button = WebDriverWait(driver, 10).until(
        EC.presence_of_element_located((By.ID, 'submit'))
    )
    see_properties_button.click()
    print("Clicked 'Search properties' button.")
except TimeoutException:
    print("'Search properties' button not found!")

time.sleep(5)  # Allow the search results page to load

# Step 2: Iterate through all result links
saved_links = []  # List to store saved links

try:
    property_cards = WebDriverWait(driver, 10).until(
        EC.presence_of_all_elements_located((By.CSS_SELECTOR, '.propertyCard'))
    )

    for idx, property_card in enumerate(property_cards):
        try:
            # Extract the actual property link from the anchor tag inside the property card
            property_link_element = property_card.find_element(By.CSS_SELECTOR, "a.propertyCard-link")
            property_link = property_link_element.get_attribute('href')
            
            if property_link is None:
                print(f"Property {idx+1} has no valid link.")
                continue

            print(f"Processing property {idx+1}: {property_link}")

            # Navigate to the property page
            driver.get(property_link)

            # Wait for the tenancy button and scroll to it
            tenure_button = WebDriverWait(driver, 10).until(
                EC.presence_of_element_located((By.CSS_SELECTOR, "button[aria-label*='tenure']"))
            )
            actions = ActionChains(driver)
            actions.move_to_element(tenure_button).perform()

            # Extract tenancy type
            tenancy_type_element = WebDriverWait(driver, 5).until(
                EC.visibility_of_element_located((By.CSS_SELECTOR, "button[aria-label*='tenure'] ._1hV1kqpVceE9m-QrX_hWDN"))
            )
            tenancy_type = tenancy_type_element.text
            print(f"Tenancy Type: {tenancy_type}")

            # Save link if freehold
            if "freehold" in tenancy_type.lower():
                saved_links.append(property_link)
                print(f"Link saved: {property_link}")
            else:
                print(f"Link not saved, tenancy type: {tenancy_type}")

            # Navigate back to search results
            try:
                driver.execute_script("window.scrollTo(0, 0);")
                back_button = WebDriverWait(driver, 10).until(
                    EC.element_to_be_clickable((By.LINK_TEXT, "Back to search results"))
                )
                back_button.click()
                print("Returned to the search results page.")
            except Exception as e:
                print(f"Could not navigate back: {e}")
                driver.back()  # Use browser back if the link fails

        except Exception as e:
            print(f"Error processing property {idx+1}: {e}")

except Exception as e:
    print(f"An error occurred while waiting for property links: {e}")

# Final output
print(f"Saved {len(saved_links)} links: {saved_links}")


Rejected cookies.
Clicked 'For Sale' button.
Set radius to 1/4 mile.
Set maximum price to £1,000,000.
Set minimum bedrooms to 3.
Clicked 'Search properties' button.
Processing property 1: https://www.rightmove.co.uk/properties/152302472#/?channel=RES_BUY
Tenancy Type: Leasehold
Link not saved, tenancy type: Leasehold
Could not navigate back: Message: 
Stacktrace:
0   chromedriver                        0x0000000102a90248 cxxbridge1$str$ptr + 1907280
1   chromedriver                        0x0000000102a88730 cxxbridge1$str$ptr + 1875768
2   chromedriver                        0x000000010269c260 cxxbridge1$string$len + 89488
3   chromedriver                        0x00000001026e050c cxxbridge1$string$len + 368700
4   chromedriver                        0x000000010271a7d0 cxxbridge1$string$len + 606976
5   chromedriver                        0x00000001026d512c cxxbridge1$string$len + 322652
6   chromedriver                        0x00000001026d5d7c cxxbridge1$string$len + 325804
7   chrom