In [20]:
import requests
from bs4 import BeautifulSoup

In [122]:
# Base URL
base_url = "https://www.rightmove.co.uk/property-for-sale/find.html"

# Parameters
params = {
    "locationIdentifier": "REGION^79192",
    "propertyTypes": "",
    "includeSSTC": "false",
    "mustHave": "",
    "dontShow": "",
    "furnishTypes": "",
    "keywords": ""
}

# List to store scraped data
all_titles = []
all_addresses = []
all_descriptions = []
all_prices = []
all_dates = []


In [123]:
for page in range(1, 43):
    # Add page index to the parameters
    params["index"] = (page - 1) * 24

    # Send GET request to the page URL
    response = requests.get(base_url, params=params)
    soup = BeautifulSoup(response.text, 'html.parser')
    title_elements = soup.find_all('h2', class_='propertyCard-title')
    titles = [title.text.strip() if title else "N/A" for title in title_elements]
    all_titles.extend(titles)
    # Scrape data from the page
    address_elements = soup.find_all('address', class_='propertyCard-address')
    addresses = [address_element.find('span').text.strip() if address_element else "N/A" for address_element in address_elements]
    all_addresses.extend(addresses)

    description_elements = soup.find_all('div', class_='propertyCard-description')
    descriptions = [description_element.find('span', itemprop='description').text.strip() if description_element else "N/A" for description_element in description_elements]
    all_descriptions.extend(descriptions)

    price_elements = soup.find_all('div', class_='propertyCard-priceValue')
    prices = [price_element.text.strip() if price_element else "" for price_element in price_elements]
    all_prices.extend(prices)

    date_span_elements = soup.find_all('span', class_='propertyCard-branchSummary-addedOrReduced')
    dates = [date_span.text.split(' ')[-1] if date_span else "" for date_span in date_span_elements]
    all_dates.extend(dates)

In [124]:
#print("Addresses:", all_addresses)
#print("Descriptions:", all_descriptions)
#print("Prices:", all_prices)
#print("Dates:", all_dates)
print(len(all_titles))
print(len(all_addresses))
print(len(all_addresses))
print(len(all_prices))
print(len(all_dates))

1050
1050
1050
1050
1050


In [125]:
import pandas as pd

# Create a dictionary from the scraped lists
data = {
    'Title': all_titles,
    'Address': all_addresses,
    'Description': all_descriptions,
    'Price': all_prices,
    'Date Added': all_dates
}

# Create a DataFrame
df = pd.DataFrame(data)


In [113]:
df

Unnamed: 0,Title,Address,Description,Price,Date Added
0,2 bedroom apartment for sale,"Water Street, Radcliffe",THE PERFECT FIRST TIME HOME OR RENTAL INVESTME...,"£85,000",19/05/2023
1,Block of apartments for sale,"Manchester, Greater Manchester, M3","OFF MARKET BUILDINGS AVAILABLE, Prices from £1...","£10,000,000",19/05/2023
2,7 bedroom detached house for sale,"Devisdale Road, Altrincham, Cheshire, WA14",A spectacular detached family residence extend...,"£6,250,000",12/05/2023
3,4 bedroom apartment for sale,"Battersea, London, Greater London, SW11",A brand new collection of luxury apartments ju...,"£4,850,000",05/06/2023
4,6 bedroom detached house for sale,"Hale, Cheshire WA15",This STUNNING discreet listing makes for a per...,"£4,500,000",06/06/2023
...,...,...,...,...,...
2095,4 bedroom cottage for sale,"Rosemary Cottage, Roe Green Worsley",NO VENDOR CHAIN! LOCATE ESTATE AGENT are delig...,"£639,950",04/11/2022
2096,4 bedroom semi-detached house for sale,"Bramley Road, Bramhall, SK7",***ZERO DEPOSIT GUARANTEE AVAILABLE*** AVAILAB...,"£635,000",30/05/2023
2097,4 bedroom semi-detached house for sale,"Manley Road, Sale, Cheshire, Greater Mancheste...",** STOP SCROLLING ** A VIEWING OF THIS STUNNIN...,"£635,000",07/06/2023
2098,3 bedroom detached house for sale,"ROBINS CLOSE, Bramhall",Super CUL DE SAC position in SOUGHT AFTER RESI...,"£635,000",26/03/2023


In [126]:
import re

# Extract the number of bedrooms from the title
pattern = r'(\d+)\s+bedroom'
df['Bedrooms'] = df['Title'].str.extract(pattern, expand=False).astype(float)

# Print the DataFrame with the new 'Bedrooms' column
df

Unnamed: 0,Title,Address,Description,Price,Date Added,Bedrooms
0,3 bedroom detached house for sale,"Windy Bank, Higher Blackley M9",Hunters are delighted to bring to market this ...,"£300,000",11/05/2023,3.0
1,Block of apartments for sale,"Manchester, Greater Manchester, M3","OFF MARKET BUILDINGS AVAILABLE, Prices from £1...","£10,000,000",19/05/2023,
2,7 bedroom detached house for sale,"Devisdale Road, Altrincham, Cheshire, WA14",A spectacular detached family residence extend...,"£6,250,000",12/05/2023,7.0
3,4 bedroom apartment for sale,"Battersea, London, Greater London, SW11",A brand new collection of luxury apartments ju...,"£4,850,000",05/06/2023,4.0
4,6 bedroom detached house for sale,"Hale, Cheshire WA15",This STUNNING discreet listing makes for a per...,"£4,500,000",06/06/2023,6.0
...,...,...,...,...,...,...
1045,4 bedroom cottage for sale,"Rosemary Cottage, Roe Green Worsley",NO VENDOR CHAIN! LOCATE ESTATE AGENT are delig...,"£639,950",04/11/2022,4.0
1046,4 bedroom semi-detached house for sale,"Bramley Road, Bramhall, SK7",***ZERO DEPOSIT GUARANTEE AVAILABLE*** AVAILAB...,"£635,000",30/05/2023,4.0
1047,4 bedroom semi-detached house for sale,"Manley Road, Sale, Cheshire, Greater Mancheste...",** STOP SCROLLING ** A VIEWING OF THIS STUNNIN...,"£635,000",07/06/2023,4.0
1048,3 bedroom detached house for sale,"ROBINS CLOSE, Bramhall",Super CUL DE SAC position in SOUGHT AFTER RESI...,"£635,000",26/03/2023,3.0


In [129]:
property_types = ['apartment', 'detached', 'bungalow', 'semi-detached', 'flat','apartments','house','land','plot','penthouse','property'
                 ,'farm house','Barn Conversion','duplex','triplex','cottage','samallholding','equestrian facility']

# Extract the property type from the title
pattern = fr'\b({"|".join(property_types)})\b'
df['PropertyType'] = df['Title'].str.extract(pattern, flags=re.IGNORECASE)
df

Unnamed: 0,Title,Address,Description,Price,Date Added,Bedrooms,PropertyType
0,3 bedroom detached house for sale,"Windy Bank, Higher Blackley M9",Hunters are delighted to bring to market this ...,"£300,000",11/05/2023,3.0,detached
1,Block of apartments for sale,"Manchester, Greater Manchester, M3","OFF MARKET BUILDINGS AVAILABLE, Prices from £1...","£10,000,000",19/05/2023,,apartments
2,7 bedroom detached house for sale,"Devisdale Road, Altrincham, Cheshire, WA14",A spectacular detached family residence extend...,"£6,250,000",12/05/2023,7.0,detached
3,4 bedroom apartment for sale,"Battersea, London, Greater London, SW11",A brand new collection of luxury apartments ju...,"£4,850,000",05/06/2023,4.0,apartment
4,6 bedroom detached house for sale,"Hale, Cheshire WA15",This STUNNING discreet listing makes for a per...,"£4,500,000",06/06/2023,6.0,detached
...,...,...,...,...,...,...,...
1045,4 bedroom cottage for sale,"Rosemary Cottage, Roe Green Worsley",NO VENDOR CHAIN! LOCATE ESTATE AGENT are delig...,"£639,950",04/11/2022,4.0,cottage
1046,4 bedroom semi-detached house for sale,"Bramley Road, Bramhall, SK7",***ZERO DEPOSIT GUARANTEE AVAILABLE*** AVAILAB...,"£635,000",30/05/2023,4.0,semi-detached
1047,4 bedroom semi-detached house for sale,"Manley Road, Sale, Cheshire, Greater Mancheste...",** STOP SCROLLING ** A VIEWING OF THIS STUNNIN...,"£635,000",07/06/2023,4.0,semi-detached
1048,3 bedroom detached house for sale,"ROBINS CLOSE, Bramhall",Super CUL DE SAC position in SOUGHT AFTER RESI...,"£635,000",26/03/2023,3.0,detached


In [130]:
# Save the DataFrame to a CSV file
df.to_csv('property_data4.csv', index=False)
