In [31]:
# Author:       Andrey Norin
# Title :       Scrape Suffolk County Real Estate Listings
# Date Created: 11/20/2022
# Date Updated: 11/26/2022
# based on tutorial: https://medium.com/@knappik.marco/python-web-scraping-how-to-scrape-the-api-of-a-real-estate-website-dc8136e56249

In [48]:
from datetime import date
import requests 
import json 
import pandas as pd 

In [49]:
# set export file details
todaysDate = str(date.today())
countyName = "Suffolk"
csvFileName = (countyName + "_" + todaysDate + ".csv")

In [50]:
def send_request(page_number: int, offset_parameter: int):
    url = "https://www.realtor.com/api/v1/hulk?client_id=rdc-x&schema=vesta"
    headers = {"content-type": "application/json"}

    body = r'{"query":"\nquery ConsumerSearchQuery($query: HomeSearchCriteria!) {\n  rich_snippets: home_search(query: $query,\n    sort:{field: home_phrases_score, direction: desc},\n    limit: 20,\n  ){\n    count\n    total\n    results {\n      property_id\n      list_price\n      description {\n        beds\n        baths\n        baths_full\n        baths_half\n        baths_3qtr\n        sqft\n      }\n      primary_photo(https: true) {\n        href\n      }\n      permalink\n      href\n      flags {\n        is_pending\n        is_foreclosure\n        is_contingent\n        is_new_construction\n        is_new_listing (days: 14)\n        is_price_reduced (days: 30)\n        is_plan\n      }\n      location {\n        address {\n          city\n          country\n          line\n          postal_code\n          state_code\n          state\n        }\n      }\n      open_houses {\n        start_date\n        end_date\n      }\n      sentences_list: highlights(highlight_type: phrase_srp_spl_homes) {\n        ... on HighlightPhrase {\n          phrase\n          text\n        }\n      }\n    }\n  }\n}","callfrom":"SRP","nrQueryType":"SPECIAL_FEATURES","cacheKey":"SPECIAL_FEATURES","cacheParams":"/special_features/Suffolk-County_NY","variables":{"query":{"search_location":{"location":"Suffolk County, NY"},"status":"for_sale"}},"isClient":true}'
    json_body = json.loads(body)

    json_body["variables"]["page_index"] = page_number
    json_body["seoPayload"] = page_number
    json_body["variables"]["offset"] = offset_parameter

    r = requests.post(url=url, json=json_body, headers=headers)
    json_data = r.json()
    return json_data

In [51]:
offset_parameter = 0

json_data_list = []

for page_number in range(1, 151):
    json_data = send_request(page_number=page_number, offset_parameter=offset_parameter)
    json_data_list.append(json_data)
    offset_parameter +=42

In [64]:
def extract_features(entry: dict):
    feature_dict = {
        "id": entry["property_id"],
        "price": entry["list_price"],
        "beds": entry["description"]["beds"],
        "baths": entry["description"]["baths"],
        "sqft": entry["description"]["sqft"],
        "address": entry["location"]["address"]["line"],
        "postal_code": entry["location"]["address"]["postal_code"],
        "state": entry["location"]["address"]["state_code"],
        "city": entry["location"]["address"]["city"]
    }

    return feature_dict

In [65]:
feature_dict_list = []

for data in json_data_list:
    for entry in data['data']['rich_snippets']['results']:
        feature_dict = extract_features(entry=entry)
        feature_dict_list.append(feature_dict)

df = pd.DataFrame(feature_dict_list)

In [66]:
# export csv file
df.to_csv(csvFileName)

In [67]:
df.head(100)

Unnamed: 0,id,price,beds,baths,sqft,address,postal_code,state,city
0,4320632096,3995000,7,7,6400.0,3 Discovery Ln,11937,NY,East Hampton
1,4528089672,2699000,4,4,2700.0,12 Bay Meadow Ln,11977,NY,Westhampton
2,4560478667,975000,4,5,3600.0,34 Jefferson Landing Cir,11777,NY,Port Jefferson
3,4622503716,400000,3,2,,50 Beecher Ave,11730,NY,East Islip
4,4355382664,2475000,5,4,,87 Wildwood Dr,11746,NY,Dix Hills
...,...,...,...,...,...,...,...,...,...
95,4697368030,875000,3,2,,516 Madison Ave,11944,NY,Greenport
96,3542840089,1095000,4,3,,505 Saltaire Way,11952,NY,Mattituck
97,4068836272,18995000,6,8,8000.0,67 Hither Ln,11937,NY,East Hampton
98,3651483610,4495000,4,6,3196.0,55 Gould St,11937,NY,East Hampton


In [58]:
json_data

{'data': {'rich_snippets': {'count': 20,
   'total': 6929,
   'results': [{'property_id': '4320632096',
     'list_price': 3995000,
     'description': {'beds': 7,
      'baths': 7,
      'baths_full': 7,
      'baths_half': None,
      'baths_3qtr': None,
      'sqft': 6400},
     'primary_photo': {'href': 'https://ap.rdcpix.com/2b6b8552ecfaa94b635dc1e7e9d63b17l-m4176403864s.jpg'},
     'permalink': '3-Discovery-Ln_East-Hampton_NY_11937_M43206-32096',
     'href': 'https://www.realtor.com/realestateandhomes-detail/3-Discovery-Ln_East-Hampton_NY_11937_M43206-32096',
     'flags': {'is_pending': None,
      'is_foreclosure': None,
      'is_contingent': None,
      'is_new_construction': None,
      'is_new_listing': False,
      'is_price_reduced': False,
      'is_plan': None},
     'location': {'address': {'city': 'East Hampton',
       'country': 'USA',
       'line': '3 Discovery Ln',
       'postal_code': '11937',
       'state_code': 'NY',
       'state': 'New York'}},
     'open

In [59]:
json_data

{'data': {'rich_snippets': {'count': 20,
   'total': 6929,
   'results': [{'property_id': '4320632096',
     'list_price': 3995000,
     'description': {'beds': 7,
      'baths': 7,
      'baths_full': 7,
      'baths_half': None,
      'baths_3qtr': None,
      'sqft': 6400},
     'primary_photo': {'href': 'https://ap.rdcpix.com/2b6b8552ecfaa94b635dc1e7e9d63b17l-m4176403864s.jpg'},
     'permalink': '3-Discovery-Ln_East-Hampton_NY_11937_M43206-32096',
     'href': 'https://www.realtor.com/realestateandhomes-detail/3-Discovery-Ln_East-Hampton_NY_11937_M43206-32096',
     'flags': {'is_pending': None,
      'is_foreclosure': None,
      'is_contingent': None,
      'is_new_construction': None,
      'is_new_listing': False,
      'is_price_reduced': False,
      'is_plan': None},
     'location': {'address': {'city': 'East Hampton',
       'country': 'USA',
       'line': '3 Discovery Ln',
       'postal_code': '11937',
       'state_code': 'NY',
       'state': 'New York'}},
     'open