Code to download data for each property URL on Redfin's website.

In [4]:
import pyautogui
import random
import time
import pyperclip
import math
import subprocess
import pandas as pd
import time

def move_naturally(end_x, end_y, duration=None):
    """
    Move mouse naturally to target position using bezier curve movement
    
    Args:
        end_x (int): Target X coordinate
        end_y (int): Target Y coordinate
        duration (float, optional): Time to complete movement. If None, calculated based on distance
    """
    # Ensure pyautogui doesn't instantly kill the program if mouse goes to corner
    pyautogui.FAILSAFE = True
    
    # Get start position
    start_x, start_y = pyautogui.position()
    
    # Calculate distance
    distance = math.sqrt((end_x - start_x)**2 + (end_y - start_y)**2)
    
    # Set duration based on distance if not specified
    if duration is None:
        duration = distance / 1500  # Adjust this value to change speed
        
    # Generate control points for bezier curve
    control_x1 = start_x + random.randint(0, int(distance/2))
    control_y1 = start_y + random.randint(-int(distance/2), int(distance/2))
    control_x2 = end_x - random.randint(0, int(distance/2))
    control_y2 = end_y + random.randint(-int(distance/2), int(distance/2))
    
    # Number of steps
    steps = int(duration * 50)  # 50 movements per second
    
    # Move through bezier curve points
    for i in range(steps + 1):
        t = i / steps
        
        # Bezier curve formula
        x = (1-t)**3 * start_x + 3*(1-t)**2 * t * control_x1 + \
            3*(1-t) * t**2 * control_x2 + t**3 * end_x
        y = (1-t)**3 * start_y + 3*(1-t)**2 * t * control_y1 + \
            3*(1-t) * t**2 * control_y2 + t**3 * end_y
        
        # Move to the next point
        pyautogui.moveTo(int(x), int(y))
        
        # Add small random delay
        time.sleep(duration/steps + random.uniform(0.0001, 0.001))

def scroll_naturally(total_amount, duration=None):
    """
    Scroll naturally with variable speed and random pauses
    
    Args:
        total_amount: Positive for scroll up, negative for scroll down
        duration: Approximate time to complete scroll (optional)
    """
    # Set default duration based on scroll amount if not specified
    if duration is None:
        duration = abs(total_amount) / 500  # Adjust this divisor to change default speed
    
    # Initialize variables
    scrolled = 0
    start_time = time.time()
    
    while scrolled < abs(total_amount):
        # Calculate remaining amount
        remaining = abs(total_amount) - scrolled
        
        # Variable scroll speed
        scroll_step = random.randint(
            min(4, remaining),  # Minimum scroll amount
            min(16, remaining)  # Maximum scroll amount
        )
        
        # Determine scroll direction
        if total_amount < 0:
            scroll_step = -scroll_step
            
        # Perform scroll
        pyautogui.scroll(scroll_step)
        
        # Add human-like pause
        time.sleep(random.uniform(0.05, 0.15))
        
        # Sometimes add a longer pause
        if random.random() < 0.1:  # 10% chance
            time.sleep(random.uniform(0.2, 0.5))
            
        scrolled += abs(scroll_step)
        
        # Check if we're taking too long
        if time.time() - start_time > duration * 1.5:
            # Finish the scroll faster
            pyautogui.scroll(total_amount - scrolled)
            break

def write_to_file_in_append_mode(file_name, data):
    try:
        # Open the file in append mode ('a')
        with open(file_name, 'a') as file:
            # Write the data to the file, adding a newline after each entry
            file.write(data + "\n")
    except Exception as e:
        print(f"Error writing to file: {e}")

def scrape_url(url):

    js = """

    function arrayToSnakeCaseJson(array) {
        return array.reduce((acc, item) => {
            const [key, value] = item.split(':').map(str => str.trim());
            if (key.toLowerCase().includes("has")) {
            acc[key.replace(/[\s.:]+/g, '_').replace(/([a-z])([A-Z])/g, '$1_$2').toLowerCase()] = true
            } else {
            acc[key.replace(/[\s.:]+/g, '_').replace(/([a-z])([A-Z])/g, '$1_$2').toLowerCase()] = value;
            }
            return acc;
        }, {});
    }

    let mainObject = {}; // Consolidate all data into a single object
    for (let i = 0; i < 6; i++) {
        let section = document.getElementsByClassName("amenities-container")[0].children[i];
        if (section) {
            mainObject = Object.assign({}, arrayToSnakeCaseJson(section.innerText.split("\\n")), mainObject)
        }
    }
    
        // Add Additional Data
        mainObject['date_sold'] = document.querySelector('[data-rf-test-id=home-sash]')?.textContent?.split("SOLD")[1]?.trim() || "NA";
        mainObject['address'] = document.querySelector('[data-rf-test-id=abp-homeinfo-homeaddress]')?.children[0]?.textContent || "NA";
        mainObject['estimated_sales_range'] = document.getElementsByClassName("sale-price-range")[0]?.innerText || "NA";
        mainObject['estimated_rental_value'] = document.getElementsByClassName("estimate")[0]?.innerText || "NA";
        mainObject['estimated_monthly_cost'] = document.getElementsByClassName("CostOfOwnershipSectionContent")[0]?.children[0]?.children[0]?.children[0]?.innerText?.split(" ")[0] || "NA";
        mainObject['monthly_mortgage_payment'] = document.getElementsByClassName("CostOfOwnershipSectionContent")[0]?.children[0]?.children[2]?.children[0]?.innerText?.split("\\n")[1]?.trim() || "NA";
        mainObject['estimated_market_value'] = document.querySelector('[data-rf-test-id=abp-price]')?.children[0]?.children[0]?.innerText || "NA";
        mainObject['price_per_sqft'] = document.querySelector('[data-rf-test-id=house-info]')?.children[2]?.children[0]?.children[0]?.children[0]?.children[0]?.innerText?.split("\\n").find(text => text.includes('Redfin Estimate'))?.split(" ")[0] || "NA";

        // Add Walk Score, Transit Score, and Bike Score
        mainObject['walk_score'] = document.getElementsByClassName("walkscore-pills")[0].children[0]?.innerText?.split("/")[0]?.trim() || "NA";
        mainObject['transit_score'] = document.getElementsByClassName("walkscore-pills")[0].children[1]?.innerText?.split("/")[0]?.trim() || "NA";
        mainObject['bike_score'] = document.getElementsByClassName("walkscore-pills")[0].children[2]?.innerText?.split("/")[0]?.trim() || "NA";
        mainObject
    
"""
#open new tab and close the old one
    move_naturally(330, 58)
    pyautogui.click(x=330, y=58)
    pyautogui.click(x=293, y=57)

#click and type the url
    move_naturally(182, 100)
    pyautogui.click(x=182, y=100)

    pyautogui.write(url)
    pyautogui.press('enter')

    time.sleep(2)

    # Scroll down
    move_naturally(300, 400)
    scroll_naturally(-4000, 1)

    # Right click and click on inspect
    move_naturally(250, 740)
    pyautogui.rightClick(x=250, y=740)
    move_naturally(289, 1087)
    pyautogui.click(x=289, y=1087)

    # Click the console button
    move_naturally(969, 140)
    time.sleep(0.1)
    pyautogui.click(x=969, y=140)
    pyautogui.click(x=969, y=140)

    # Click on clear console
    move_naturally(740, 165)
    pyautogui.click(x=740, y=165)
    pyautogui.click(x=740, y=165)

    # Click the console
    move_naturally(760, 400)
    pyautogui.click(x=760, y=400)

    # Type the js code
    pyperclip.copy(js)
    pyautogui.hotkey('command', 'v')
    pyautogui.press('enter')

    # copy the object
    move_naturally(850, 750)
    pyautogui.rightClick(x=850, y=750)
    move_naturally(910, 760)
    pyautogui.click(x=910, y=760)
    
    # copy the object
    move_naturally(850, 750)
    pyautogui.rightClick(x=850, y=750)
    move_naturally(910, 760)
    pyautogui.click(x=910, y=760)

   # print(str(pyperclip.paste()))
    write_to_file_in_append_mode("data2.txt", str(pyperclip.paste())+",")



In [6]:
target_urls = ["https://www.redfin.com/CA/Mountain-View/725-Rainbow-Dr-94041/home/1209669",
                "https://www.redfin.com/CA/Mountain-View/2010-California-St-94040/home/615666"]

In [None]:
time.sleep(2)
write_to_file_in_append_mode("data2.txt","[")

for url in target_urls:
    scrape_url(url)