In [28]:
import asyncio
from playwright.async_api import async_playwright
import nest_asyncio
import json # To print results nicely

# Apply nest_asyncio for Jupyter compatibility
nest_asyncio.apply()

# --- Configuration ---
LONDON_URL = "https://en.uhomes.com/uk/london/imperial-college-london-icl/u96a1-sorta2"

# Pop-up Selectors (Working!)
COOKIE_ALLOW_SELECTOR = 'div.consent_button.all_action_button:has-text("Allow All")'
DREAM_HOME_CLOSE_SELECTOR = 'div.lead-form-dialog div.close'
DREAM_HOME_CLOSE_SELECTOR = 'i.el-dialog__close.el-icon-close'

# <i class="el-dialog__close el-icon el-icon-close"></i>
# --- NEW: Scraping Selectors (Based on your HTML) ---
LISTING_SELECTOR = 'a.house-card.list'
NAME_SELECTOR = 'h2.title'
ADDRESS_SELECTOR = 'div.location'
PRICE_SELECTOR = 'span.current-price'
AMENITIES_SELECTOR = 'div.tags-box span.item'
# ---------------------------------------------------

print("Libraries imported. All pop-up and scraping selectors defined.")

Libraries imported. All pop-up and scraping selectors defined.


In [31]:
async def scrape_london_uhomes():
    """
    Opens uhomes.com, handles pop-ups, and scrapes listing data.
    """
    print("Starting Playwright...")
    scraped_data = [] # List to hold our results
    p = await async_playwright().start()
    browser = await p.chromium.launch(headless=False) # Keep visible for now
    page = await browser.new_page()
    print(f"Playwright started. Navigating to {LONDON_URL}...")

    try:
        # Go to the page
        await page.goto(LONDON_URL, wait_until="networkidle", timeout=60000) # Wait for network idle
        print("Page loaded. Waiting 3 seconds...")
        await page.wait_for_timeout(3000)

        # --- 1. Attempt to click the cookie banner (FIRST) ---
        try:
            print(f"  - Looking for cookie banner: {COOKIE_ALLOW_SELECTOR}")
            await page.locator(COOKIE_ALLOW_SELECTOR).click(timeout=7000)
            print("  - Clicked cookie banner.")
            await page.wait_for_timeout(1000)
        except Exception as e:
            print(f"  - Could not click cookie banner: {type(e).__name__}")
        # ----------------------------------------

        # --- 2. Attempt to close 'Dream Home' Pop-up (SECOND) ---
        try:
            print(f"  - Looking for Dream Home close button: {DREAM_HOME_CLOSE_SELECTOR}")
            await page.locator(DREAM_HOME_CLOSE_SELECTOR).click(timeout=10*7000)
            print("  - Clicked 'Dream Home' close button.")
            await page.wait_for_timeout(1000)
        except Exception as e:
            print(f"  - Could not click 'Dream Home' close button: {type(e).__name__}")
        # ----------------------------------------

        print("\nPop-up handling done. Starting scraping...")
        await page.wait_for_timeout(3000) # Final wait before scraping

        # --- 3. Scrape the listings ---
        print(f"Looking for listings with selector: {LISTING_SELECTOR}")
        await page.wait_for_selector(LISTING_SELECTOR, timeout=20000)
        listings = await page.locator(LISTING_SELECTOR).all()
        print(f"Found {len(listings)} listings on this page.")

        for i, listing in enumerate(listings):
            print(f"  - Processing listing {i+1}...")
            try:
                name = await listing.locator(NAME_SELECTOR).text_content(timeout=2000)
                address = await listing.locator(ADDRESS_SELECTOR).text_content(timeout=2000)
                price = await listing.locator(PRICE_SELECTOR).text_content(timeout=2000)
                link = await listing.get_attribute('href')
                
                # Get all amenities - this returns a list of strings
                amenity_elements = await listing.locator(AMENITIES_SELECTOR).all()
                amenities = [await amenity.text_content() for amenity in amenity_elements]
                # Clean up amenities (remove empty strings, strip whitespace)
                amenities = [a.strip() for a in amenities if a and a.strip()]

                # Build the full URL for the link
                full_link = f"https://en.uhomes.com{link}" if link and link.startswith('/') else link

                scraped_data.append({
                    "name": name.strip() if name else "N/A",
                    "address": address.strip() if address else "N/A",
                    "price_per_week": price.strip() if price else "N/A",
                    "amenities": amenities,
                    "link": full_link
                })
                print(f"    -> Scraped: {name.strip() if name else 'N/A'}")

            except Exception as e:
                print(f"    -> FAILED to process listing {i+1}: {e}")
            
            await page.wait_for_timeout(50) # Tiny delay between listings

        # -----------------------------

    except Exception as e:
        print(f"An major error occurred during scraping: {e}")
        await page.screenshot(path="scraping_error_screenshot.png")
        print("Saved screenshot to scraping_error_screenshot.png")

    finally:
        # Ensure the browser is always closed
        print("Closing browser...")
        await browser.close()
        await p.stop()
        print("Browser closed.")

    # --- *** NEW: Save the data to a JSON file *** ---
    if scraped_data:
        try:
            with open('uhomes_data.json', 'w', encoding='utf-8') as f:
                json.dump(scraped_data, f, ensure_ascii=False, indent=4)
            print(f"Successfully saved {len(scraped_data)} listings to uhomes_data.json")
        except Exception as e:
            print(f"Error saving data to JSON: {e}")
    else:
        print("No data was scraped, so no file was saved.")
    # --- *** End of New Code *** ---

    return scraped_data # Still return the data

In [14]:
import pandas as pd

In [6]:

pd.DataFrame(all_scraped_data)

Unnamed: 0,name,address,price_per_week,amenities,link
0,Canvas Student Arthur House,"1 Rutherford Way, London HA9 0BP, United Kingdom",£349,"[ Verified,  Student Accommodation, Parcels ...",https://en.uhomes.com/uk/london/detail-apartme...
1,Chapter Lewisham,"46 Thurston Road, London SE13 7SD, United Kingdom",£308,"[ Verified,  Student Accommodation, Free reg...",https://en.uhomes.com/uk/london/detail-apartme...
2,Chapter London Bridge,"48-50 Weston Street, London SE1 3QJ, United Ki...",£493,"[ Verified,  Student Accommodation, Sky Gard...",https://en.uhomes.com/uk/london/detail-apartme...
3,iQ Vega,"6 Miles Street Vauxhall, London SW8 1RZ, Unite...",£422,"[ Verified,  Student Accommodation, Live wit...",https://en.uhomes.com/uk/london/detail-apartme...
4,HL-Skyline Apartments,"11 Makers Yard, London E3 3JX, United Kingdom",£578.02,"[ Apartment, Parcels collect, Elevator]",https://en.uhomes.com/uk/london/detail-apartme...
5,Vita Student Lewisham Exchange,"Exchange Point, Loampit Vale, London SE13 7NX,...",£355,"[ Verified,  Student Accommodation, Breakfas...",https://en.uhomes.com/uk/london/detail-apartme...
6,Downing The Lyra,"North Acton, London W3 6BX, United Kingdom",£343,"[ Verified,  Student Accommodation, ""Operato...",https://en.uhomes.com/uk/london/detail-apartme...
7,Scape Hammersmith,"Talgarth Road, London W6 8DN, United Kingdom",£403,"[ Verified,  Student Accommodation, with air...",https://en.uhomes.com/uk/london/detail-apartme...
8,Portchester House,"50 New Kent Road, London SE1 6SH, United Kingdom",£441.02,"[ Verified,  Student Accommodation, Parcels ...",https://en.uhomes.com/uk/london/detail-apartme...
9,Emily Bowes Court,"6 Lebus Street, London N17 9FD, United Kingdom",£272,"[ Verified,  Student Accommodation, Near Sub...",https://en.uhomes.com/uk/london/detail-apartme...


In [10]:
# Run the full function
print("Executing the full scraping script...")
all_scraped_data = await scrape_london_uhomes()
print("\n--- Scraping Finished ---")

# Print the results in a readable format
print(f"Total listings scraped: {len(all_scraped_data)}\n")
print("--- First 5 Results ---")
print(json.dumps(all_scraped_data[:5], indent=2)) # Using json for pretty printing
print("-----------------------")

# You can now use 'all_scraped_data' (which is a list of dictionaries)
# for your real-time tracking application (e.g., save to a database, compare prices).

Executing the full scraping script...
Starting Playwright...
Playwright started. Navigating to https://en.uhomes.com/uk/london/imperial-college-london-icl...
Page loaded. Waiting 3 seconds...
  - Looking for cookie banner: div.consent_button.all_action_button:has-text("Allow All")
  - Clicked cookie banner.
  - Looking for Dream Home close button: div.lead-form-dialog div.close
  - Could not click 'Dream Home' close button: TimeoutError

Pop-up handling done. Starting scraping...
Looking for listings with selector: a.house-card.list
Found 12 listings on this page.
  - Processing listing 1...
    -> Scraped: Fifty One London
  - Processing listing 2...
    -> Scraped: Downing The Lyra
  - Processing listing 3...
    -> Scraped: Chapter Ealing
  - Processing listing 4...
    -> Scraped: iQ Hammersmith
  - Processing listing 5...
    -> Scraped: Chelsea Lightfoot Hall
  - Processing listing 6...
    -> Scraped: Scape Hammersmith
  - Processing listing 7...
    -> Scraped: Chapter Portobel

In [20]:
pd.DataFrame(all_scraped_data)

Unnamed: 0,name,address,price_per_week,amenities,link
0,Fifty One London,"Cromwell Road, London SW7 2EH, United Kingdom",£828,"[ Verified,  Student Accommodation, Free reg...",https://en.uhomes.com/uk/london/detail-apartme...
1,Downing The Lyra,"North Acton, London W3 6BX, United Kingdom",£343,"[ Verified,  Student Accommodation, ""Operato...",https://en.uhomes.com/uk/london/detail-apartme...
2,Chapter Ealing,"Victoria Road, London W3 6UN, United Kingdom",£345,"[ Verified,  Student Accommodation, No Depos...",https://en.uhomes.com/uk/london/detail-apartme...
3,iQ Hammersmith,"230 Shepherds Bush Road, London W6 7NL, United...",£435,"[ Verified,  Student Accommodation, Referral...",https://en.uhomes.com/uk/london/detail-apartme...
4,Chelsea Lightfoot Hall,"Manresa Road, London SW3 6NA, United Kingdom",£218.02,"[ Verified,  Student Accommodation, Walk to ...",https://en.uhomes.com/uk/london/detail-apartme...
5,Scape Hammersmith,"Talgarth Road, London W6 8DN, United Kingdom",£403,"[ Verified,  Student Accommodation, with air...",https://en.uhomes.com/uk/london/detail-apartme...
6,Chapter Portobello,"1 Alderson Street, London W10 5JY, United Kingdom",£307,"[ Verified,  Student Accommodation, No Depos...",https://en.uhomes.com/uk/london/detail-apartme...
7,Chapter White City,"10 Westway Shepherd's Bush, London W12 0DD, Un...",£397,"[ Verified,  Student Accommodation, No Depos...",https://en.uhomes.com/uk/london/detail-apartme...
8,Dolphin Square,"Chichester Street, London SW1V 3LX, United Kin...","£1,909","[ Verified,  Apartment, Walk to school, Doub...",https://en.uhomes.com/uk/london/detail-apartme...
9,Ravenscourt House,"3 Paddenswick Rd, London W6 0BY, United Kingdom",£349.41,"[ Verified,  Student Accommodation, Bills in...",https://en.uhomes.com/uk/london/detail-apartme...


In [30]:
pd.DataFrame(all_scraped_data)

Unnamed: 0,name,address,price_per_week,amenities,link
0,The Other House South Kensington,"15-17 Harrington Gardens, South Kensington, Lo...","£1,050","[ Student Accommodation, Bills included, 24 h...",https://en.uhomes.com/uk/london/detail-apartme...
1,Fifty One London,"Cromwell Road, London SW7 2EH, United Kingdom",£828,"[ Verified,  Student Accommodation, Free reg...",https://en.uhomes.com/uk/london/detail-apartme...
2,Scape Kings Cross,"30 Thanet Street, London WC1H 9QH, United Kingdom",£689.02,"[ Verified,  Student Accommodation, Near rai...",https://en.uhomes.com/uk/london/detail-apartme...
3,iQ Bloomsbury,"200 Euston Road, London NW1 2FA, United Kingdom",£666.02,"[ Verified,  Student Accommodation, Referral...",https://en.uhomes.com/uk/london/detail-apartme...
4,Gob Spitalfields Residence,"9 Frying Pan Alley, London E1 7HS, United Kingdom",£599,"[ Verified,  Student Accommodation, Bills in...",https://en.uhomes.com/uk/london/detail-apartme...
5,Lit Westminster Residence,"200A Lambeth Road, London SE1 7LR, United Kingdom",£590,"[ Student Accommodation, No Deposit, Bills in...",https://en.uhomes.com/uk/london/detail-apartme...
6,Gob Aldgate Residence,"1-2 Education Square, London E1 1FA, United Ki...",£580,"[ Student Accommodation, Walk to school, 24 h...",https://en.uhomes.com/uk/london/detail-apartme...
7,Gob Westminster Residence,"200A Lambeth Road, London SE1 7LR, United Kingdom",£575,"[ Student Accommodation, Bills included, Gym]",https://en.uhomes.com/uk/london/detail-apartme...
8,Lit Old Street Residence,"18 Paul Street, London EC2A 4JH, United Kingdom",£575,"[ Verified,  Student Accommodation, No Depos...",https://en.uhomes.com/uk/london/detail-apartme...
9,Lit Spitalfields Residence,"9 Frying Pan Alley, London E1 7HS, United Kingdom",£575,"[ Verified,  Student Accommodation, No Depos...",https://en.uhomes.com/uk/london/detail-apartme...


In [32]:
# Run the full function
print("Executing the full scraping script...")
all_scraped_data = await scrape_london_uhomes()
print("\n--- Scraping Finished ---")

# Print the results in a readable format
print(f"Total listings scraped: {len(all_scraped_data)}\n")
print("--- First 5 Results ---")
print(json.dumps(all_scraped_data[:5], indent=2)) # Using json for pretty printing
print("-----------------------")

# You can now use 'all_scraped_data' (which is a list of dictionaries)
# for your real-time tracking application (e.g., save to a database, compare prices).

Executing the full scraping script...
Starting Playwright...
Playwright started. Navigating to https://en.uhomes.com/uk/london/imperial-college-london-icl/u96a1-sorta2...
Page loaded. Waiting 3 seconds...
  - Looking for cookie banner: div.consent_button.all_action_button:has-text("Allow All")
  - Clicked cookie banner.
  - Looking for Dream Home close button: i.el-dialog__close.el-icon-close
  - Could not click 'Dream Home' close button: TimeoutError

Pop-up handling done. Starting scraping...
Looking for listings with selector: a.house-card.list
Found 12 listings on this page.
  - Processing listing 1...
    -> Scraped: The Other House South Kensington
  - Processing listing 2...
    -> Scraped: Fifty One London
  - Processing listing 3...
    -> Scraped: Scape Kings Cross
  - Processing listing 4...
    -> Scraped: iQ Bloomsbury
  - Processing listing 5...
    -> Scraped: Gob Spitalfields Residence
  - Processing listing 6...
    -> Scraped: Lit Westminster Residence
  - Processing 

In [35]:
# --- Load Data ---
# @st.cache_data # Cache the data loading for performance
def load_data(filepath='uhomes_data.json'):
    """Loads scraped data from a JSON file."""
    try:
        with open(filepath, 'r', encoding='utf-8') as f:
            data = json.load(f)
        df = pd.DataFrame(data)
        return df
    except FileNotFoundError:
    #     st.error(f"Error: The data file '{filepath}' was not found.")
    #     st.warning("Please run your scraper first to generate the uhomes_data.json file.")
    #     return None
    # except Exception as e:
    #     st.error(f"An error occurred while loading the data: {e}")
        return None

In [36]:
load_data()

Unnamed: 0,name,address,price_per_week,amenities,link
0,The Other House South Kensington,"15-17 Harrington Gardens, South Kensington, Lo...","£1,050","[ Student Accommodation, Bills included, 24 h...",https://en.uhomes.com/uk/london/detail-apartme...
1,Fifty One London,"Cromwell Road, London SW7 2EH, United Kingdom",£828,"[ Verified,  Student Accommodation, Free reg...",https://en.uhomes.com/uk/london/detail-apartme...
2,Scape Kings Cross,"30 Thanet Street, London WC1H 9QH, United Kingdom",£689.02,"[ Verified,  Student Accommodation, Near rai...",https://en.uhomes.com/uk/london/detail-apartme...
3,iQ Bloomsbury,"200 Euston Road, London NW1 2FA, United Kingdom",£666.02,"[ Verified,  Student Accommodation, Referral...",https://en.uhomes.com/uk/london/detail-apartme...
4,Gob Spitalfields Residence,"9 Frying Pan Alley, London E1 7HS, United Kingdom",£599,"[ Verified,  Student Accommodation, Bills in...",https://en.uhomes.com/uk/london/detail-apartme...
5,Lit Westminster Residence,"200A Lambeth Road, London SE1 7LR, United Kingdom",£590,"[ Student Accommodation, No Deposit, Bills in...",https://en.uhomes.com/uk/london/detail-apartme...
6,Gob Aldgate Residence,"1-2 Education Square, London E1 1FA, United Ki...",£580,"[ Student Accommodation, Walk to school, 24 h...",https://en.uhomes.com/uk/london/detail-apartme...
7,Gob Westminster Residence,"200A Lambeth Road, London SE1 7LR, United Kingdom",£575,"[ Student Accommodation, Bills included, Gym]",https://en.uhomes.com/uk/london/detail-apartme...
8,Lit Old Street Residence,"18 Paul Street, London EC2A 4JH, United Kingdom",£575,"[ Verified,  Student Accommodation, No Depos...",https://en.uhomes.com/uk/london/detail-apartme...
9,Lit Spitalfields Residence,"9 Frying Pan Alley, London E1 7HS, United Kingdom",£575,"[ Verified,  Student Accommodation, No Depos...",https://en.uhomes.com/uk/london/detail-apartme...


In [23]:
pd.DataFrame(all_scraped_data)

Unnamed: 0,name,address,price_per_week,amenities,link
0,Fifty One London,"Cromwell Road, London SW7 2EH, United Kingdom",£828,"[ Verified,  Student Accommodation, Free reg...",https://en.uhomes.com/uk/london/detail-apartme...
1,Downing The Lyra,"North Acton, London W3 6BX, United Kingdom",£343,"[ Verified,  Student Accommodation, ""Operato...",https://en.uhomes.com/uk/london/detail-apartme...
2,Chapter Ealing,"Victoria Road, London W3 6UN, United Kingdom",£345,"[ Verified,  Student Accommodation, No Depos...",https://en.uhomes.com/uk/london/detail-apartme...
3,iQ Hammersmith,"230 Shepherds Bush Road, London W6 7NL, United...",£435,"[ Verified,  Student Accommodation, Referral...",https://en.uhomes.com/uk/london/detail-apartme...
4,Chelsea Lightfoot Hall,"Manresa Road, London SW3 6NA, United Kingdom",£218.02,"[ Verified,  Student Accommodation, Walk to ...",https://en.uhomes.com/uk/london/detail-apartme...
5,Scape Hammersmith,"Talgarth Road, London W6 8DN, United Kingdom",£403,"[ Verified,  Student Accommodation, with air...",https://en.uhomes.com/uk/london/detail-apartme...
6,Chapter Portobello,"1 Alderson Street, London W10 5JY, United Kingdom",£307,"[ Verified,  Student Accommodation, No Depos...",https://en.uhomes.com/uk/london/detail-apartme...
7,Chapter White City,"10 Westway Shepherd's Bush, London W12 0DD, Un...",£397,"[ Verified,  Student Accommodation, No Depos...",https://en.uhomes.com/uk/london/detail-apartme...
8,Dolphin Square,"Chichester Street, London SW1V 3LX, United Kin...","£1,909","[ Verified,  Apartment, Walk to school, Doub...",https://en.uhomes.com/uk/london/detail-apartme...
9,Ravenscourt House,"3 Paddenswick Rd, London W6 0BY, United Kingdom",£349.41,"[ Verified,  Student Accommodation, Bills in...",https://en.uhomes.com/uk/london/detail-apartme...
