# Zillow Real-Estate Scraper
Scrape property data from www.zillow.com

In [1]:
import csv
import json
from time import sleep
from random import randint
import requests
from bs4 import BeautifulSoup

params = {
    "usersSearchTerm":'null', 
    "mapBounds":{"west":-81.23255562207031,"east":-80.42918037792968,"south":34.9751801306342,"north":35.44307306884572},
    "isMapVisible":'false',
    "filterState":{
        "price":{"min":200000,"max":400000},
        "mp":{"min":689,"max":1377},
        "beds":{"min":4},
        "sort":{"value":"globalrelevanceex"},
        "con":{"value":'false'},
        "mf":{"value":'false'},
        "manu":{"value":'false'},
        "land":{"value":'false'},
        "tow":{"value":'false'},
        "apa":{"value":'false'}
        },
    "isListVisible":'true',
    "mapZoom":11
    }

headers = {
    'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
    'accept-encoding': 'gzip, deflate, br',
    'accept-language': 'en-US,en;q=0.9',
    'sec-fetch-dest': 'document',
    'sec-fetch-mode': 'navigate',
    'sec-fetch-site': 'none',
    'sec-fetch-user': '?1',
    'upgrade-insecure-requests': '1',
    'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/85.0.4183.83 Safari/537.36 Edg/85.0.564.44'
}

url = 'https://www.zillow.com/homes/charlotte,-nc_rb/'
listings = []

while True:
    print(url)

    # request raw html
    response = requests.get(url, params=params, headers=headers)

    # parse raw html
    soup = BeautifulSoup(response.text, 'html.parser')
    
    # extract the script containing the search results
    script = soup.find('script', {'data-zrr-shared-data-key':'mobileSearchPageStore'})    
    
    # remove the comment formatting
    json_str = script.contents[0][4:-3]   
    
    # convert the json string to python dictionary
    json_data = json.loads(json_str)    
    
    # extract page results from json data
    page_results = json_data['cat1']['searchResults']['listResults']    
    
    # extract listing details from json data
    for result in page_results:
        home_data = result.get('hdpData').get('homeInfo')
        if home_data:
            listings.append(home_data)    

    # find the next page
    page = json_data['cat1']['searchList']['pagination'].get('nextUrl')
    if page.endswith('homes/'):
        break
    else:
        url = 'https://www.zillow.com' + page

    # create random delay to prevent blocking
    delay = randint(10, 20)
    sleep(delay)

    
# get list of field names
distinct_fields = set()
for row in listings:
    for key in row.keys():
        if not key in distinct_fields:
            distinct_fields.add(key)

fieldnames = sorted(distinct_fields)

# add missing keys in records
for i, row in enumerate(listings):
    for field in fieldnames:
        if not field in row:
            listings[i][key] = ''
            
with open('charlotte_homes.csv', 'w', newline='', encoding='utf-8') as f:
    writer = csv.DictWriter(f, fieldnames=fieldnames)
    writer.writeheader()
    writer.writerows(listings)            

https://www.zillow.com/homes/charlotte,-nc_rb/
https://www.zillow.com/homes/charlotte,-nc_rb/2_p/
https://www.zillow.com/homes/charlotte,-nc_rb/3_p/
https://www.zillow.com/homes/charlotte,-nc_rb/4_p/
https://www.zillow.com/homes/charlotte,-nc_rb/5_p/
https://www.zillow.com/homes/charlotte,-nc_rb/6_p/
https://www.zillow.com/homes/charlotte,-nc_rb/7_p/
https://www.zillow.com/homes/charlotte,-nc_rb/8_p/
https://www.zillow.com/homes/charlotte,-nc_rb/9_p/
https://www.zillow.com/homes/charlotte,-nc_rb/10_p/
https://www.zillow.com/homes/charlotte,-nc_rb/11_p/
https://www.zillow.com/homes/charlotte,-nc_rb/12_p/
https://www.zillow.com/homes/charlotte,-nc_rb/13_p/
https://www.zillow.com/homes/charlotte,-nc_rb/14_p/
https://www.zillow.com/homes/charlotte,-nc_rb/15_p/
https://www.zillow.com/homes/charlotte,-nc_rb/16_p/
https://www.zillow.com/homes/charlotte,-nc_rb/17_p/
https://www.zillow.com/homes/charlotte,-nc_rb/18_p/
https://www.zillow.com/homes/charlotte,-nc_rb/19_p/
https://www.zillow.com/ho

In [2]:
listings[0]

{'zpid': 6216979,
 'streetAddress': '11105 Renda Ct',
 'zipcode': '28215',
 'city': 'Charlotte',
 'state': 'NC',
 'latitude': 35.249127,
 'longitude': -80.641428,
 'price': 246000.0,
 'dateSold': 1083222000000,
 'bathrooms': 3.0,
 'bedrooms': 3.0,
 'livingArea': 1639.0,
 'homeType': 'SINGLE_FAMILY',
 'homeStatus': 'FOR_SALE',
 'photoCount': 22,
 'imageLink': 'https://photos.zillowstatic.com/p_g/IS3f44qhtcqn8g1000000000.jpg',
 'daysOnZillow': 0,
 'isFeatured': False,
 'shouldHighlight': False,
 'brokerId': 15893,
 'zestimate': 247392,
 'rentZestimate': 1550,
 'listing_sub_type': {'is_FSBA': True},
 'isUnmappable': False,
 'mediumImageLink': 'https://photos.zillowstatic.com/p_c/IS3f44qhtcqn8g1000000000.jpg',
 'isPreforeclosureAuction': False,
 'homeStatusForHDP': 'FOR_SALE',
 'priceForHDP': 246000.0,
 'timeOnZillow': 1599441240000,
 'hiResImageLink': 'https://photos.zillowstatic.com/p_f/IS3f44qhtcqn8g1000000000.jpg',
 'watchImageLink': 'https://photos.zillowstatic.com/p_j/IS3f44qhtcqn8g1