In [3]:
import pandas as pd
import requests
from bs4 import BeautifulSoup
import re
import json

In [23]:
def get_target_zips(state, city=None, zip_code=None):
    zips = pd.read_csv("zip_code_database.csv")
    
    if city is None and zip_code is None:
        target_zips = zips[zips["state"] == state]["zip"].tolist()
    elif zip_code is None:
        target_zips = zips[(zips["primary_city"] == city) & (zips["state"] == state)]["zip"].tolist()
    else:
        target_zips = [zip_code]
    
    return target_zips


def get_stingray_rgn_id(zip):
    query_location_api = f"https://www.redfin.com/stingray/do/query-location?location={zip}&v=2"
    response = requests.get(query_location_api, headers={'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'}) 
    soup = BeautifulSoup(response.text, 'html.parser').text
    prefix_removed = soup.split('&&', 1)[1]
    data = json.loads(prefix_removed)
    try:
        region_id = data["payload"]["exactMatch"].get("id").split("_",1)[1]
        return region_id
    except:
        print(f"No Exact match found for zip: {zip}")
        return None


def build_stingray_gis_params(params):
        return "&".join(f"{key}={value}" for key, value in params.items() if params.get(key) != None)


def call_stingray_buy_gis(params_url):
    api_url = "https://www.redfin.com/stingray/api/gis"
    url = f"{api_url}?{params_url}"
    response = requests.get(url, headers={'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'})
    soup = BeautifulSoup(response.text, 'html.parser').text
    prefix_removed = soup.split('&&', 1)[1]
    # print(url)
    data = json.loads(prefix_removed)

    return data


def parse_stingray_buy_gis(data):
    homes = data.get('payload', {}).get('homes', [])
    # print(homes)
    parsed_homes = []
    
    for home in homes:
        home_info = {
            "MLS ID": home.get('mlsId', {}).get('value'),
            "Status": home.get('mlsStatus'),
            "Price": home.get('price', {}).get('value'),
            "HOA Fee": home.get('hoa', {}).get('value'),
            "Square Feet": home.get('sqFt', {}).get('value'),
            "Price per Square Foot": home.get('pricePerSqFt', {}).get('value'),
            "Lot Size": home.get('lotSize', {}).get('value'),
            "Bedrooms": home.get('beds'),
            "Bathrooms": home.get('baths'),
            "Location": home.get('location', {}).get('value'),
            "Stories": home.get('stories'),
            "Address": home.get('streetLine', {}).get('value'),
            "City": home.get('city'),
            "State": home.get('state'),
            "ZIP Code": home.get('postalCode', {}).get('value'),
            "Year Built": home.get('yearBuilt', {}).get('value'),
            "URL": home.get('url'),
        }
        parsed_homes.append(home_info)
    
    return parsed_homes

In [26]:
print(build_stingray_gis_params(params))

al=1&include_nearby_homes=False&num_homes=350&ord=days-on-redfin-asc&page_number=1&sf=1,2,3,4,5,6,7&status=9&uipt=1,3,4&v=8&region_type=2&region_id=41473


In [24]:
# GIS Search API

import requests

Zip = None
City = None
State = "WA"

target_zips = get_target_zips(State, City, Zip)

print(f"Number of Zipcodes to be Scrubbed: {len(target_zips)}")

data = []

for index, zip in enumerate(target_zips):
    if index % 10 == 0:
        print(f"{index} Zip Codes Evaluated")


    params = {
    #??Active Listings
    "al": 1,
    #Include Nearby Homes
    "include_nearby_homes": "false",
    # Market. ie Seattle
    "market": None,
    # Number of homes to retrieve
    "num_homes": 350,
    #How to Sort the homes
    "ord": "days-on-redfin-asc",
    "page_number": 1,
    "poly": None,
    #Listing Types
    "sf": "1,2,3,4,5,6,7",
    "start": None,
    "status": 9,
    # User input property types (currently only single family, townhomes, multifamily : 134)
    "uipt": "1,3,4",
    # ??API Version?
    "v": 8,
    "zoomLevel": None,
    #Type of Region analyzed
    "region_type" : 2,
    "region_id" : get_stingray_rgn_id(zip)
    }

    if params.get("region_id") == None:
        continue
    else:
        url_param = build_stingray_gis_params(params)
        json_data = call_stingray_buy_gis(url_param)
        list_data = parse_stingray_buy_gis(json_data)
        
        data.extend(list_data)

df= pd.DataFrame(data)

df.drop_duplicates(subset=["MLS ID"], inplace=True)

df.to_csv("washington_homes.csv")

Number of Zipcodes to be Scrubbed: 733
0 Zip Codes Evaluated
10 Zip Codes Evaluated
20 Zip Codes Evaluated
30 Zip Codes Evaluated
40 Zip Codes Evaluated
50 Zip Codes Evaluated
60 Zip Codes Evaluated
70 Zip Codes Evaluated
80 Zip Codes Evaluated
90 Zip Codes Evaluated
100 Zip Codes Evaluated
110 Zip Codes Evaluated
120 Zip Codes Evaluated
130 Zip Codes Evaluated
No Exact match found for zip: 98189
140 Zip Codes Evaluated
150 Zip Codes Evaluated
160 Zip Codes Evaluated
170 Zip Codes Evaluated
180 Zip Codes Evaluated
190 Zip Codes Evaluated
200 Zip Codes Evaluated
210 Zip Codes Evaluated
220 Zip Codes Evaluated
No Exact match found for zip: 98324
230 Zip Codes Evaluated
240 Zip Codes Evaluated
No Exact match found for zip: 98343
250 Zip Codes Evaluated
260 Zip Codes Evaluated
270 Zip Codes Evaluated
280 Zip Codes Evaluated
290 Zip Codes Evaluated
300 Zip Codes Evaluated
310 Zip Codes Evaluated
320 Zip Codes Evaluated
330 Zip Codes Evaluated
340 Zip Codes Evaluated
350 Zip Codes Evaluated


In [25]:


display(df)


Unnamed: 0,MLS ID,Status,Price,HOA Fee,Square Feet,Price per Square Foot,Lot Size,Bedrooms,Bathrooms,Location,Stories,Address,City,State,ZIP Code,Year Built,URL
0,2244221,Active,295000.0,,1250.0,236.0,12700.0,3.0,1.0,Algona,1.5,1035 Algona Blvd N,Algona,WA,98001,1928.0,/WA/Algona/1035-Algona-Blvd-N-98001/home/365678
1,2246950,Active,950000.0,,3054.0,311.0,18733.0,3.0,2.5,Lake Dolloff,2.0,31722 47th Ct S,Auburn,WA,98001,2005.0,/WA/Auburn/31722-47th-Ct-S-98001/home/2082698
2,2246201,Active,829990.0,77.0,2386.0,348.0,4881.0,4.0,3.0,Auburn,2.0,4581 S 328th Ct #12,Auburn,WA,98001,2024.0,/WA/Auburn/4581-S-328th-Ct-98001/unit-12/home/...
3,2246197,Active,779990.0,77.0,2219.0,352.0,4886.0,4.0,2.5,Auburn,2.0,4575 S 328 th Ct #13,Auburn,WA,98001,2024.0,/WA/Auburn/4575-S-328th-Ct-98001/unit-13/home/...
4,2246071,Active,570000.0,,1250.0,456.0,7200.0,4.0,2.0,West Hill,1.0,4030 S 296th St,Auburn,WA,98001,1968.0,/WA/Auburn/4030-S-296th-St-98001/home/211589
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
16754,536505,Active,1217804.0,,19999.0,61.0,48395.0,3.0,2.5,Clarkston,,NNA Mcnaughton Ridge Addition,Clarkston,WA,99403,2023.0,/WA/Clarkston/NNA-Mcnaughton-Ridge-Addition-99...
16755,536506,Active,529750.0,,1580.0,335.0,7405.0,3.0,2.5,Clarkston,,1673 2 Bennett Hill Dr,Clarkston,WA,99403,2023.0,/WA/Clarkston/1673-2-Bennett-Hill-DR-99403/hom...
16756,98876764,Active,1217804.0,,3364.0,362.0,48395.0,3.0,3.0,Clarkston Heights - 2520,1.0,NNA Mcnaughton Rdg,Clarkston,WA,99403,2023.0,/WA/Clarkston/NNA-Mcnaughton-RDG-99403/home/18...
16757,98876734,Active,506650.0,,1619.0,313.0,6534.0,3.0,3.0,Clarkston Downtown - 2510,2.0,1697 2 Bennett Hill Dr,Clarkston,WA,99403,2023.0,/WA/Clarkston/1697-2-Bennett-Hill-DR-99403/hom...
