In [1]:
import os
import pandas as pd
from supabase import create_client, Client
from dotenv import load_dotenv
import numpy as np
from redfin_scraping_utils import RentScraper, BuyScraper

In [2]:
#parameters
states = ['WA', 'ID', 'OR']

In [3]:
load_dotenv()

# Create Supabase Client
url: str = os.getenv("SUPABASE_URL")
key: str = os.getenv("SUPABASE_KEY")
supabase: Client = create_client(url, key)

#Initialize Scraper
buy_scraper = BuyScraper()


In [4]:


for state in states:
    
    #Initialize New DF for each run
    buy_df = pd.DataFrame()
    # Scrape for-sale listings and append to buy_df
    buy_data = buy_scraper.scrape_state(state)
    
    if not buy_data.empty:
        buy_df = pd.concat([buy_df, buy_data], ignore_index=True)
    
    buy_df = buy_df.replace(np.nan, None)
    buy_df = buy_df.drop_duplicates(subset= "property_id")
    records = buy_df.to_dict(orient='records')

    print(f"Scraped {len(buy_data)} for-sale listings for state {state}")
    print("Writing...")
    response = (
        supabase.table("redfin_listings_bronze")
        .upsert(records, on_conflict="property_id")
        .execute()
    )

    print(f"Write Complete")

Scraping 733 Zip Codes in WA
Processing 24% done (183/733 zip codes)
Processing 49% done (366/733 zip codes)
Processing 74% done (549/733 zip codes)
Scraped 37148 for-sale listings for state WA
Writing...
Write Complete
Scraping 325 Zip Codes in ID
Processing 24% done (81/325 zip codes)
Processing 49% done (162/325 zip codes)
Processing 74% done (243/325 zip codes)
Scraped 16945 for-sale listings for state ID
Writing...
Write Complete
Scraping 492 Zip Codes in OR
Processing 25% done (123/492 zip codes)
Processing 50% done (246/492 zip codes)
Processing 75% done (369/492 zip codes)
Scraped 27530 for-sale listings for state OR
Writing...
Write Complete
