# Finding Properties from Which to Extract Data

## Import Packages

In [150]:
# Web - Scraping and API Requests
import requests
from httpx import AsyncClient, Response
from parsel import Selector
import parsel
import jmespath
import asyncio
from urllib.parse import urlencode

# Data Manipulation and Analysis
import pandas as pd
from pprint import pprint 
import json
from typing import List
from typing import TypedDict

# Database Connection
from sqlalchemy import create_engine

# File and System Operations
import os
import sys

In [151]:
pd.set_option('display.max_columns', None) # Display all columns in any given DataFrame

In [152]:
# This allows one to reload the custom package without having to install it again
%load_ext autoreload 

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [153]:
# this allows one to reload the custom package without having to install it again
%autoreload 1

sys.path.insert(0,'../src/')

# https://stackoverflow.com/questions/70898150/jupyter-autoreload-workflow/73623267#73623267
# My custom package
%aimport rental_utils

In [154]:
# Load SQL Magic for Jupyter Notebooks
%load_ext sql
%config SqlMagic.displaylimit = None
%config SqlMagic.autocommit=True # for engines that do not support autommit

The sql extension is already loaded. To reload it, use:
  %reload_ext sql


In [155]:
# 1. establish HTTP client with browser-like headers to avoid being blocked
client = AsyncClient(headers={
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64)",
    "Accept": "application/json",  # This is important!
    "Referer": "https://www.rightmove.co.uk/",  # Helps mimic browser use
})


In [156]:
async def find_locations(query: str) -> List[str]:
    """use rightmove's typeahead api to find location IDs. Returns list of location IDs in most likely order"""
    # rightmove uses two character long tokens so "cornwall" becomes "CO/RN/WA/LL"
    tokenize_query = "".join(c + ("/" if i % 2 == 0 else "") for i, c in enumerate(query.upper(), start=1))
    url = f"https://www.rightmove.co.uk/typeAhead/uknostreet/{tokenize_query.strip('/')}/"
    response = await client.get(url)
    data = json.loads(response.text)
    return [prediction["locationIdentifier"] for prediction in data["typeAheadLocations"]]

In [157]:
london_id = (await find_locations("london"))[0]
cornwall_id = (await find_locations("cornwall"))[0]


print(cornwall_id)
print(london_id)


REGION^61294
REGION^87490


In [158]:
async def scrape_search(location_id: str) -> str:
    RESULTS_PER_PAGE = 24

    def make_url(offset: int) -> str:
        url = "https://www.rightmove.co.uk/api/_search?"
        params = {
            "areaSizeUnit": "sqft",
            "channel": "RENT",  # BUY or RENT
            "currencyCode": "GBP",
            "includeSSTC": "false",
            "index": offset,
            "isFetching": "false",
            "locationIdentifier": location_id,
            "numberOfPropertiesPerPage": RESULTS_PER_PAGE,
            "radius": "0.0",
            "sortType": "6",
            "viewType": "LIST",
        }
        return url + urlencode(params)

    url = make_url(0)
    print(f"Requesting URL: {url}")
    first_page = await client.get(url)
    print(f"Status: {first_page.status_code}")
    first_page_data = first_page.json()
    results = first_page_data["properties"]
    print(f"Found {len(results)} properties")
    return results  # Return raw response content, even if it's HTML or error


In [159]:
london_results = await scrape_search(london_id)
with open("rightmove_debug_response.json", "w", encoding="utf-8") as f:
    f.write(json.dumps(london_results, indent=2))
print(london_results[:1000])  # Print first 1000 characters to inspect



Requesting URL: https://www.rightmove.co.uk/api/_search?areaSizeUnit=sqft&channel=RENT&currencyCode=GBP&includeSSTC=false&index=0&isFetching=false&locationIdentifier=REGION%5E87490&numberOfPropertiesPerPage=24&radius=0.0&sortType=6&viewType=LIST
Status: 200
Found 25 properties
[{'id': 162632486, 'bedrooms': 2, 'bathrooms': 1, 'numberOfImages': 8, 'numberOfFloorplans': 0, 'numberOfVirtualTours': 0, 'summary': 'Located in the heart of Fulham, this charming split-level period conversion offers two large double bedrooms and a bright, airy living space. The open-plan kitchen and living room measures approximately 15\'9" x 14\'10", perfect for modern living and entertaining. Situated on a...', 'displayAddress': 'Mirabel Road, Fulham, London, SW6', 'countryCode': 'GB', 'location': {'latitude': 51.48128, 'longitude': -0.204159}, 'propertyImages': {'images': [{'url': '45k/44364/162632486/44364_LRS2001D9A_IMG_00_0000.jpeg', 'caption': None, 'srcUrl': 'https://media.rightmove.co.uk:443/dir/crop/1

In [160]:
pprint(cornwall_results.json())

AttributeError: 'list' object has no attribute 'json'