Code to download All CSV files with zipcode and name of the company

In [None]:
import time
import os
import pandas as pd
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

# Load the ZIP code and associated companies mapping
file_path = "ZipCode_Company_Data.xlsx"  # Path to the uploaded Excel file
pivot_table_data = pd.read_excel(file_path, sheet_name='Pivot Table')

# Extract mapping of Zip_Code to associated companies
zip_to_companies = {}
for _, row in pivot_table_data.iterrows():
    zip_code = row['Zip_Code']
    companies = [
        company for company in pivot_table_data.columns[1:-1]  # Exclude 'Zip_Code' and 'Grand Total'
        if not pd.isna(row[company])  # Include only non-NaN values
    ]
    zip_to_companies[zip_code] = ', '.join(companies)  # Join company names with commas

# Set up WebDriver with download preferences
download_dir = r"C:\Users\tirth\Desktop\Capstone Project\Data_Download"  # Update this path
chrome_options = webdriver.ChromeOptions()
chrome_options.add_experimental_option("prefs", {
    "download.default_directory": download_dir,
    "download.prompt_for_download": False,
    "safebrowsing.enabled": True
})
driver = webdriver.Chrome(options=chrome_options)

# Iterate through each ZIP code and associated companies
for zip_code, companies in zip_to_companies.items():
    try:
        # Navigate to the Redfin URL for the given ZIP code
        url = f"https://www.redfin.com/zipcode/{zip_code}/filter/property-type=house,include=sold-3mo"
        print(f"Processing ZIP code: {zip_code}, Companies: {companies}")
        driver.get(url)

        # Dismiss cookie banner (if present)
        try:
            WebDriverWait(driver, 15).until(
                EC.element_to_be_clickable((By.ID, "onetrust-accept-btn-handler"))
            ).click()
            print("Cookie banner dismissed.")
        except Exception as e:
            print("No cookie banner to dismiss:", e)

        # Wait for the "Download All" button to appear
        try:
            download_button = WebDriverWait(driver, 20).until(
                EC.presence_of_element_located((By.XPATH, "//a[contains(text(), 'Download All')]"))
            )

            # Scroll to the button and click it
            driver.execute_script("arguments[0].scrollIntoView(true);", download_button)
            time.sleep(1)  # Allow page to adjust
            try:
                download_button.click()
            except Exception:
                # Fallback to force-click using JavaScript
                driver.execute_script("arguments[0].click();", download_button)

            # Wait for the download to complete
            time.sleep(10)  # Adjust this time as needed

            # Rename the downloaded file to include the ZIP code and companies
            companies_safe = companies.replace(", ", "_").replace(" ", "_")  # Replace spaces and commas for file naming
            downloaded_file = max(
                [os.path.join(download_dir, f) for f in os.listdir(download_dir)],
                key=os.path.getctime
            )
            new_file_name = os.path.join(download_dir, f"Redfin_{zip_code}_{companies_safe}.csv")
            os.rename(downloaded_file, new_file_name)
            print(f"CSV downloaded and renamed for ZIP code: {zip_code}, Companies: {companies}")
        except Exception as e:
            print(f"Error interacting with the download button for ZIP code {zip_code}: {e}")

    except Exception as e:
        print(f"Error processing ZIP code {zip_code}: {e}")

# Close the browser after processing all ZIP codes
driver.quit()
print("All ZIP codes processed.")
