In [6]:
# from this tutorial: https://medium.com/@knappik.marco/python-web-scraping-how-to-scrape-the-api-of-a-real-estate-website-dc8136e56249

In [7]:
from datetime import date
import requests 
import json 
import pandas as pd

In [8]:
# set export file details
todaysDate  = str(date.today())
countyName  = "Nassau"
csvFileName = (countyName + "_" + todaysDate + ".csv")

In [9]:
def send_request(page_number: int, offset_parameter: int):
    url = "https://www.realtor.com/api/v1/hulk?client_id=rdc-x&schema=vesta"
    headers = {"content-type": "application/json"}

    body = r'{"query":"\n\nquery ConsumerSearchMainQuery($query: HomeSearchCriteria!, $limit: Int, $offset: Int, $sort: [SearchAPISort], $sort_type: SearchSortType, $client_data: JSON, $bucket: SearchAPIBucket)\n{\n  home_search: home_search(query: $query,\n    sort: $sort,\n    limit: $limit,\n    offset: $offset,\n    sort_type: $sort_type,\n    client_data: $client_data,\n    bucket: $bucket,\n  ){\n    count\n    total\n    results {\n      property_id\n      list_price\n      primary\n      primary_photo (https: true){\n        href\n      }\n      source {\n        id\n        agents{\n          office_name\n        }\n        type\n        spec_id\n        plan_id\n      }\n      community {\n        property_id\n        description {\n          name\n        }\n        advertisers{\n          office{\n            hours\n            phones {\n              type\n              number\n            }\n          }\n          builder {\n            fulfillment_id\n          }\n        }\n      }\n      products {\n        brand_name\n        products\n      }\n      listing_id\n      matterport\n      virtual_tours{\n        href\n        type\n      }\n      status\n      permalink\n      price_reduced_amount\n      other_listings{rdc {\n      listing_id\n      status\n      listing_key\n      primary\n    }}\n      description{\n        beds\n        baths\n        baths_full\n        baths_half\n        baths_1qtr\n        baths_3qtr\n        garage\n        stories\n        type\n        sub_type\n        lot_sqft\n        sqft\n        year_built\n        sold_price\n        sold_date\n        name\n      }\n      location{\n        street_view_url\n        address{\n          line\n          postal_code\n          state\n          state_code\n          city\n          coordinate {\n            lat\n            lon\n          }\n        }\n        county {\n          name\n          fips_code\n        }\n      }\n      tax_record {\n        public_record_id\n      }\n      lead_attributes {\n        show_contact_an_agent\n        opcity_lead_attributes {\n          cashback_enabled\n          flip_the_market_enabled\n        }\n        lead_type\n        ready_connect_mortgage {\n          show_contact_a_lender\n          show_veterans_united\n        }\n      }\n      open_houses {\n        start_date\n        end_date\n        description\n        methods\n        time_zone\n        dst\n      }\n      flags{\n        is_coming_soon\n        is_pending\n        is_foreclosure\n        is_contingent\n        is_new_construction\n        is_new_listing (days: 14)\n        is_price_reduced (days: 30)\n        is_plan\n        is_subdivision\n      }\n      list_date\n      last_update_date\n      coming_soon_date\n      photos(limit: 2, https: true){\n        href\n      }\n      tags\n      branding {\n        type\n        photo\n        name\n      }\n    }\n  }\n}","variables":{"query":{"status":["for_sale","ready_to_build"],"primary":true,"search_location":{"location":"Suffolk County, NY"}},"client_data":{"device_data":{"device_type":"web"},"user_data":{"last_view_timestamp":-1}},"limit":42,"offset":42,"zohoQuery":{"silo":"search_result_page","location":"Suffolk County, NY","property_status":"for_sale","filters":{},"page_index":"2"},"sort_type":"relevant","geoSupportedSlug":"Suffolk-County_NY","bucket":{"sort":"modelF"},"by_prop_type":["home"]},"operationName":"ConsumerSearchMainQuery","callfrom":"SRP","nrQueryType":"MAIN_SRP","user_id":"7c95effd-2605-4643-a9cf-afd7d75ed2d6","isClient":true,"seoPayload":{"asPath":"/realestateandhomes-search/Suffolk-County_NY/pg-2","pageType":{"silo":"search_result_page","status":"for_sale"},"county_needed_for_uniq":false}}'
    json_body = json.loads(body)

    json_body["variables"]["page_index"] = page_number
    json_body["seoPayload"] = page_number
    json_body["variables"]["offset"] = offset_parameter

    r = requests.post(url=url, json=json_body, headers=headers)
    json_data = r.json()
    return json_data

In [10]:
offset_parameter = 0

json_data_list = []

for page_number in range(1, 103):
    json_data = send_request(page_number=page_number, offset_parameter=offset_parameter)
    json_data_list.append(json_data)
    offset_parameter +=42

In [11]:
def extract_features(entry: dict):
    feature_dict = {
        "id": entry["property_id"],
        "price": entry["list_price"],
        "beds": entry["description"]["beds"],
        "baths": entry["description"]["baths"],
        # dropping sqft column
        #"sqft": entry["description"]["sqft"],
        "address": entry["location"]["address"]["line"],
        "postal_code": entry["location"]["address"]["postal_code"],
        "state": entry["location"]["address"]["state_code"],
        "city": entry["location"]["address"]["city"]
    }

    return feature_dict

In [12]:
feature_dict_list = []

for data in json_data_list:
    for entry in data["data"]["home_search"]["results"]:
        feature_dict = extract_features(entry=entry)
        feature_dict_list.append(feature_dict)

df = pd.DataFrame(feature_dict_list)

In [13]:
# export csv file
df.to_csv(csvFileName)

In [14]:
df

Unnamed: 0,id,price,beds,baths,address,postal_code,state,city
0,4511307738,478400,3.0,4.0,48 Burleigh Dr,11741,NY,Holbrook
1,3294424430,499999,4.0,2.0,4 Speed St,11717,NY,Brentwood
2,4139061052,4675000,5.0,6.0,27 Stephen Halsey Path,11976,NY,Water Mill
3,3520287010,339900,3.0,2.0,105 N Clinton Ave,11772,NY,Patchogue
4,3026925642,1050000,3.0,4.0,21 Salt Meadow Rd Unit 21,11702,NY,Babylon
...,...,...,...,...,...,...,...,...
4279,4350377562,1349000,5.0,5.0,105 Church Rd,11739,NY,Great River
4280,3067591712,585000,2.0,2.0,330 Vernon Valley Rd,11768,NY,Northport
4281,4692382829,379000,3.0,1.0,90 Stuyvesant Ave,11950,NY,Mastic
4282,417000676113,3470000,5.0,,44 Depot Road,11978,NY,Westhampton Beach


In [15]:
df.drop_duplicates()

Unnamed: 0,id,price,beds,baths,address,postal_code,state,city
0,4511307738,478400,3.0,4.0,48 Burleigh Dr,11741,NY,Holbrook
1,3294424430,499999,4.0,2.0,4 Speed St,11717,NY,Brentwood
2,4139061052,4675000,5.0,6.0,27 Stephen Halsey Path,11976,NY,Water Mill
3,3520287010,339900,3.0,2.0,105 N Clinton Ave,11772,NY,Patchogue
4,3026925642,1050000,3.0,4.0,21 Salt Meadow Rd Unit 21,11702,NY,Babylon
...,...,...,...,...,...,...,...,...
4279,4350377562,1349000,5.0,5.0,105 Church Rd,11739,NY,Great River
4280,3067591712,585000,2.0,2.0,330 Vernon Valley Rd,11768,NY,Northport
4281,4692382829,379000,3.0,1.0,90 Stuyvesant Ave,11950,NY,Mastic
4282,417000676113,3470000,5.0,,44 Depot Road,11978,NY,Westhampton Beach
