In [None]:
import requests
import pandas as pd
import time
import os
import getpass

In [None]:
def get_zillow_listings(api_key, listing_url):
    base_url = "https://app.scrapeak.com/v1/scrapers/zillow/listing"
    params = {
        "api_key": api_key,
        "url": listing_url
    }
    response = requests.get(base_url, params=params)
    return response


In [None]:
def extract_listings(response):
    try:
        listings = response.json()['data']['cat1']['searchResults']['listResults']
        return listings
    except KeyError:
        print("⚠️ Unexpected JSON structure. Skipping page.")
        return []


In [None]:
def format_listings(raw_listings):
    formatted = []
    for x in raw_listings:
        if 'hdpData' in x and x['hdpData']['homeInfo']['homeType'] == 'SINGLE_FAMILY':
            home_info = x['hdpData']['homeInfo']
            d = {
                'zpid': x['zpid'],
                'url': x.get('detailUrl'),
                'img': x.get('imgSrc'),
                'price': x.get('unformattedPrice'),
                'address': x.get('address'),
                'beds': x.get('beds'),
                'baths': x.get('baths'),
                'area': x.get('area'),
                'homeType': home_info.get('homeType'),
                'latitude': x['latLong'].get('latitude'),
                'longitude': x['latLong'].get('longitude'),
                'zestimate': home_info.get('zestimate'),
                'rentZestimate': home_info.get('rentZestimate'),
                'daysOnZillow': home_info.get('daysOnZillow'),
                'priceChange': home_info.get('priceChange'),
                'datePriceChanged': home_info.get('datePriceChanged'),
                'availabilityDate': x.get('availabilityDate'),
            }
            formatted.append(d)
    return formatted


In [None]:
# Get API key securely
api_key = getpass.getpass("🔐 Enter your ScrapeAK API key: ")

🔐 Enter your ScrapeAK API key: ··········


In [None]:
# Base search URL for page 1 (San Francisco)
base_url = "https://www.zillow.com/san-francisco-ca/?searchQueryState="


In [None]:
# Zillow's search state — we'll edit only the page number
search_state_template = {
  "isMapVisible": True,
  "mapBounds": {
    "north": 37.842914,
    "south": 37.707608,
    "east": -122.32992,
    "west": -122.536739
  },
  "usersSearchTerm": "San Francisco, CA",
  "filterState": {
    "sort": {
      "value": "globalrelevanceex"
    }
  },
  "isListVisible": True,
  "regionSelection": [{
    "regionId": 20330,
    "regionType": 6
  }],
  "mapZoom": 12
}

In [None]:
# Loop through 5 pages
all_listings = []
for page in range(1, 6):
    search_state = search_state_template.copy()
    search_state["pagination"] = {"currentPage": page}

    import json
    encoded_state = requests.utils.quote(json.dumps(search_state))
    full_url = f"https://www.zillow.com/san-francisco-ca/?searchQueryState={encoded_state}"

    print(f"📄 Scraping Page {page}...")
    response = get_zillow_listings(api_key, full_url)
    listings = extract_listings(response)
    formatted = format_listings(listings)
    all_listings.extend(formatted)
    time.sleep(2)  # Be polite

📄 Scraping Page 1...
📄 Scraping Page 2...
📄 Scraping Page 3...
📄 Scraping Page 4...
📄 Scraping Page 5...


In [None]:
# Save results
df = pd.DataFrame(all_listings)

In [None]:
df.head(4)

Unnamed: 0,zpid,url,img,price,address,beds,baths,area,homeType,latitude,longitude,zestimate,rentZestimate,daysOnZillow,priceChange,datePriceChanged,availabilityDate
0,455063793,https://www.zillow.com/homedetails/839-Cole-St...,https://photos.zillowstatic.com/fp/01b0c6b587e...,1395000,"839 Cole St #839, San Francisco, CA 94117",2,2.0,1240,SINGLE_FAMILY,37.76624,-122.450294,,,7,,,
1,15153199,https://www.zillow.com/homedetails/939-Innes-A...,https://photos.zillowstatic.com/fp/13f7087e3aa...,759000,"939 Innes Ave, San Francisco, CA 94124",2,2.0,1475,SINGLE_FAMILY,37.732243,-122.376724,,4209.0,12,,,
2,2060418645,https://www.zillow.com/homedetails/2360-Union-...,https://photos.zillowstatic.com/fp/4fdc29c5e53...,899000,"2360 Union St APT 2, San Francisco, CA 94123",2,1.0,0,SINGLE_FAMILY,37.796978,-122.43822,880300.0,4862.0,76,-96000.0,1750835000000.0,
3,15128966,https://www.zillow.com/homedetails/173-Warren-...,https://photos.zillowstatic.com/fp/145eb4df113...,1249000,"173 Warren Dr, San Francisco, CA 94131",3,2.0,1590,SINGLE_FAMILY,37.75345,-122.45961,,5274.0,10,,,


In [None]:
df.shape

(132, 17)

In [None]:
df.columns

Index(['zpid', 'url', 'img', 'price', 'address', 'beds', 'baths', 'area',
       'homeType', 'latitude', 'longitude', 'zestimate', 'rentZestimate',
       'daysOnZillow', 'priceChange', 'datePriceChanged', 'availabilityDate'],
      dtype='object')