In [23]:
import pandas as pd
import requests
from bs4 import BeautifulSoup
import requests
import json

def get_target_zips(state, city=None, zip_code=None):
    zips = pd.read_csv("zip_code_database.csv")
    
    if city is None and zip_code is None:
        target_zips = zips[zips["state"] == state]["zip"].tolist()
    elif zip_code is None:
        target_zips = zips[(zips["primary_city"] == city) & (zips["state"] == state)]["zip"].tolist()
    else:
        target_zips = [zip_code]
    
    return target_zips


def get_stingray_rgn_id(zip):
    query_location_api = f"https://www.redfin.com/stingray/do/query-location?location={zip}&v=2"
    response = requests.get(query_location_api, headers={'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'}) 
    soup = BeautifulSoup(response.text, 'html.parser').text
    prefix_removed = soup.split('&&', 1)[1]
    data = json.loads(prefix_removed)
    try:
        region_id = data["payload"]["exactMatch"].get("id").split("_",1)[1]
        return region_id
    except:
        print(f"No Exact match found for zip: {zip}")
        return None


def build_stingray_gis_params(params):
        return "&".join(f"{key}={value}" for key, value in params.items() if params.get(key) != None)




def call_stingray_rent_gis(params_url):
    api_url = "https://www.redfin.com/stingray/api/v1/search/rentals?"
    url = f"{api_url}?{params_url}"
    response = requests.get(url, headers={'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'})
    soup = BeautifulSoup(response.text, 'html.parser').text
    # print(url)
    data = json.loads(soup)

    return data


def parse_stingray_rent_gis(data):
    homes = data.get('homes', [])
    parsed_homes = []
    
    for home in homes:
        home_data = home.get('homeData', {})
        rental_data = home.get('rentalExtension', {})
        
        home_info = {
            "Property ID": home_data.get('propertyId'),
            "URL": home_data.get('url'),
            "Property Type": home_data.get('propertyType'),
            # "Photos Info": home_data.get('photosInfo', {}).get('photoRanges'),
            # "Static Map URL": home_data.get('staticMapUrl'),
            # "Has AT&T Fiber": home_data.get('hasAttFiber'),
            "Address": home_data.get('addressInfo', {}).get('formattedStreetLine'),
            "City": home_data.get('addressInfo', {}).get('city'),
            "State": home_data.get('addressInfo', {}).get('state'),
            "ZIP Code": home_data.get('addressInfo', {}).get('zip'),
            "Country Code": home_data.get('addressInfo', {}).get('countryCode'),
            "Latitude": home_data.get('addressInfo', {}).get('centroid', {}).get('centroid', {}).get('latitude'),
            "Longitude": home_data.get('addressInfo', {}).get('centroid', {}).get('centroid', {}).get('longitude'),
            "Rental ID": rental_data.get('rentalId'),
            "Max Beds": rental_data.get('bedRange', {}).get('max'),
            "Max Baths": rental_data.get('bathRange', {}).get('max'),
            "Max Square Feet": rental_data.get('sqftRange', {}).get('max'),
            "Max Rent Price": rental_data.get('rentPriceRange', {}).get('max'),
            # "Last Updated": rental_data.get('lastUpdated'),
            # "Number of Available Units": rental_data.get('numAvailableUnits'),
            # "Status": rental_data.get('status'),
            # "Date Available": rental_data.get('dateAvailable'),
            # "Rental Details Page Type": rental_data.get('rentalDetailsPageType'),
            # "Search Rank Score": rental_data.get('searchRankScore'),
            # "Freshness Timestamp": rental_data.get('freshnessTimestamp'),
            "Description": rental_data.get('description'),
            # "Revenue Per Lead": rental_data.get('revenuePerLead'),
            # "Feed Source Internal ID": rental_data.get('feedSourceInternalId'),
            # "Is Commercial Paid": rental_data.get('isCommercialPaid'),
            # "Feed Original Source": rental_data.get('feedOriginalSource'),
            # "Desktop Phone": rental_data.get('desktopPhone'),
            # "Mobile Web Phone": rental_data.get('mobileWebPhone'),
            # "Mobile App Phone": rental_data.get('mobileAppPhone')
        }
        parsed_homes.append(home_info)
    
    return parsed_homes


In [32]:


Zip = None
City = None
State = "WA"

target_zips = get_target_zips(State, City, Zip)

print(f"Number of Zipcodes to be Scrubbed: {len(target_zips)}")

data = []

for index, zip in enumerate(target_zips):
    if index % 10 == 0:
        print(f"{index} Zip Codes Evaluated")

    params = {
        #??Active Listings
        "al": 1,
        #Rentals Only
        "isRentals":"true",
        #Include Nearby Homes
        "include_nearby_homes": "false",
        # Market. ie Seattle
        "market": None,
        # Number of homes to retrieve
        "num_homes": 350,
        #How to Sort the homes
        "ord": "days-on-redfin-asc",
        "page_number": 1,
        "poly": None,
        #Listing Types
        "sf": "1,2,3,4,5,6,7",
        "start": None,
        "status": 9,
        # User input property types (currently only single family, townhomes, multifamily : 134)
        "uipt": "1,3,4",
        # ??API Version?
        "v": 8,
        "zoomLevel": None,
        #Type of Region analyzed
        "region_type" : 2,
        "region_id" : get_stingray_rgn_id(zip)
    }

    if params.get("region_id") == None:
        continue
    else:
        url_param = build_stingray_gis_params(params)
        json_data = call_stingray_rent_gis(url_param)
        list_data = parse_stingray_rent_gis(json_data)

    data.extend(list_data)


df = pd.DataFrame(data)
df.drop_duplicates(subset=["Property ID"], inplace=True)

df.to_csv("washington_rentals.csv")

Number of Zipcodes to be Scrubbed: 733
0 Zip Codes Evaluated
10 Zip Codes Evaluated
20 Zip Codes Evaluated
30 Zip Codes Evaluated
40 Zip Codes Evaluated
50 Zip Codes Evaluated
60 Zip Codes Evaluated
70 Zip Codes Evaluated
80 Zip Codes Evaluated
90 Zip Codes Evaluated
100 Zip Codes Evaluated
110 Zip Codes Evaluated
120 Zip Codes Evaluated
130 Zip Codes Evaluated
No Exact match found for zip: 98189
140 Zip Codes Evaluated
150 Zip Codes Evaluated
160 Zip Codes Evaluated
170 Zip Codes Evaluated
180 Zip Codes Evaluated
190 Zip Codes Evaluated
200 Zip Codes Evaluated
210 Zip Codes Evaluated
220 Zip Codes Evaluated
No Exact match found for zip: 98324
230 Zip Codes Evaluated
240 Zip Codes Evaluated
No Exact match found for zip: 98343
250 Zip Codes Evaluated
260 Zip Codes Evaluated
270 Zip Codes Evaluated
280 Zip Codes Evaluated
290 Zip Codes Evaluated
300 Zip Codes Evaluated
310 Zip Codes Evaluated
320 Zip Codes Evaluated
330 Zip Codes Evaluated
340 Zip Codes Evaluated
350 Zip Codes Evaluated


In [None]:
current_db = 