In [5]:
import os
import pandas as pd
from supabase import create_client, Client
from dotenv import load_dotenv
import numpy as np
from redfin_scraping_utils import RentScraper, BuyScraper
from geocoding_utils import Geocoder 
import geopandas as  gpd

In [2]:
#parameters
states = ['WA', 'ID', 'OR', 'MI', 'IL', 'IA', 'WI', 'MN', 'IN']

In [3]:
load_dotenv()

# Create Supabase Client
url: str = os.getenv("SUPABASE_URL")
key: str = os.getenv("SUPABASE_KEY")
supabase: Client = create_client(url, key)

#Initialize Scraper
buy_scraper = BuyScraper()


In [4]:

for state in states:
    
    #Initialize New DF for each run
    buy_df = pd.DataFrame()
    # Scrape for-sale listings and append to buy_df
    buy_data = buy_scraper.scrape_state(state)
    
    if not buy_data.empty:
        buy_df = pd.concat([buy_df, buy_data], ignore_index=True)
    
    buy_df = buy_df.replace(np.nan, None)
    buy_df = buy_df.drop_duplicates(subset= "property_id")
    records = buy_df.to_dict(orient='records')

    print(f"Scraped {len(buy_data)} for-sale listings for state {state}")

    geocoder = Geocoder(
        buy_df, 
        latitude_col='latitude', 
        longitude_col='longitude'
    )

    df_geocoded = geocoder.geocode_all(
        demographic_areas_path=r"C:\Users\mattl\OneDrive\Documents\reibrowser\Database\Areas\census_block_group_source_nationwide\v107\blkgrp.gdb",
        cbsa_source_path=r"C:\Users\mattl\OneDrive\Documents\reibrowser\Database\Areas\cbsa_source\tl_2020_us_cbsa.shp", 
        state_source_path=r"C:\Users\mattl\OneDrive\Documents\reibrowser\Database\Areas\state_source\States_shapefile.shp"
    )

    # print("Writing...")
    # response = (
    #     supabase.table("redfin_listings_bronze")
    #     .upsert(records, on_conflict="property_id")
    #     .execute()
    # )

    print(f"Write Complete")

Scraping 733 Zip Codes in WA
Processing 24% done (183/733 zip codes)
Processing 49% done (366/733 zip codes)
Processing 74% done (549/733 zip codes)
Scraped 37148 for-sale listings for state WA
Writing...
Write Complete
Scraping 325 Zip Codes in ID
Processing 24% done (81/325 zip codes)
Processing 49% done (162/325 zip codes)
Processing 74% done (243/325 zip codes)
Scraped 16945 for-sale listings for state ID
Writing...
Write Complete
Scraping 492 Zip Codes in OR
Processing 25% done (123/492 zip codes)
Processing 50% done (246/492 zip codes)
Processing 75% done (369/492 zip codes)
Scraped 27530 for-sale listings for state OR
Writing...
Write Complete


In [10]:
df = pd.read_csv(r"C:\Users\mattl\OneDrive\Documents\reibrowser\Database\Redfin Data\geocoded_forsale.csv")

display(df)

  df = pd.read_csv(r"C:\Users\mattl\OneDrive\Documents\reibrowser\Database\Redfin Data\geocoded_forsale.csv")


Unnamed: 0.1,Unnamed: 0,id,property_id,listing_id,mls_id,status,price,hoa_fee,square_feet,lot_size,...,longitude,description,property_type,country_code,updated_at,cbg_geoid,cbsa_geoid,cbsa_name,state_id,state_code
0,0,173659,188301810,193002794,76753963FEB4,Active,739900.0,180.0,1678.0,,...,-122.053663,,6,US,2024-09-22T00:00:39.352575,5.303303e+11,42660.0,"Seattle-Tacoma-Bellevue, WA",48.0,WA
1,1,173660,188301999,193002655,C85D384582DE,Active,699900.0,180.0,1552.0,,...,-122.053663,,6,US,2024-09-22T00:00:39.352575,5.303303e+11,42660.0,"Seattle-Tacoma-Bellevue, WA",48.0,WA
2,2,173661,188306022,193002555,6CCFAC1B86DD,Active,754900.0,180.0,1832.0,,...,-122.053663,,6,US,2024-09-22T00:00:39.352575,5.303303e+11,42660.0,"Seattle-Tacoma-Bellevue, WA",48.0,WA
3,3,173662,190970006,188585103,C55AD96D31FC,Active,894995.0,,2197.0,,...,-122.027538,,6,US,2024-09-22T00:00:39.352575,5.303303e+11,42660.0,"Seattle-Tacoma-Bellevue, WA",48.0,WA
4,4,173663,190904657,188235807,51834B8CAC61,Active,904995.0,,2214.0,,...,-122.027538,,6,US,2024-09-22T00:00:39.352575,5.303303e+11,42660.0,"Seattle-Tacoma-Bellevue, WA",48.0,WA
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
222229,222229,68492,142269335,193369441,202436316,Active,249900.0,,2105.0,50530.0,...,-87.047533,"Welcome to this spacious 3-bedroom, 2-bathroom...",6,US,2024-09-21T21:58:49.420912,1.818196e+11,,,15.0,IN
222230,222230,68493,164313537,191256615,202427351,Active,149900.0,,1808.0,256568.0,...,-87.042271,Handyman special on a beautiful 5.89 acres! Se...,6,US,2024-09-21T21:58:49.420912,1.818196e+11,,,15.0,IN
222231,222231,68494,142277901,191180839,202426925,Active,349900.0,,2274.0,85378.0,...,-87.028557,"Charming updated country home on 1.96 acres, j...",6,US,2024-09-21T21:58:49.420912,1.818196e+11,,,15.0,IN
222232,222232,68495,142261506,187699097,202417327,Active,338000.0,,3100.0,23392.0,...,-87.038991,"This spacious 4-bedroom, 2.5-bathroom gem offe...",6,US,2024-09-21T21:58:49.420912,1.818196e+11,,,15.0,IN
