In [8]:
import requests
from bs4 import BeautifulSoup
import re  # Importing regex module for pattern matching

def get_redfin_data(zipcode):
    base_url = f"https://www.redfin.com/zipcode/{zipcode}"
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 '
                      '(KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
    }

    try:
        response = requests.get(base_url, headers=headers, timeout=10)
        response.raise_for_status()  # Raise exception for 4xx or 5xx errors
    except requests.exceptions.RequestException as e:
        print(f"Error fetching data for zipcode {zipcode}: {e}")
        return []

    if response.status_code != 200:
        print(f"Failed to retrieve data for zipcode {zipcode}")
        return []

    soup = BeautifulSoup(response.content, 'html.parser')

    # Parse property details
    property_list = []
    properties = soup.find_all('div', class_='HomeCardContainer')
    for prop in properties:
        address_elem = prop.find('div', class_='bp-Homecard__Address')
        price_elem = prop.find('span', class_='bp-Homecard__Price--value')
        beds_elem = prop.find('span', class_='bp-Homecard__Stats--beds')
        baths_elem = prop.find('span', class_='bp-Homecard__Stats--baths')
        sqft_elem = prop.find('span', class_='bp-Homecard__Stats--sqft')
        lot_size_elem = prop.find('span', class_='bp-Homecard__Stats--lotsize')
        
        # Function to extract numeric value from string
        def extract_numeric_value(value):
            numeric_part = re.search(r'\d+', value)
            return numeric_part.group() if numeric_part else ''        
        
        property_details = {
            'address': address_elem.text.strip() if address_elem else '',
            'price': price_elem.text.strip() if price_elem else '',
            'beds': extract_numeric_value(beds_elem.text.strip() if beds_elem else ''),
            'baths': extract_numeric_value( baths_elem.text.strip() if baths_elem else '',),
            'sqft': sqft_elem.text.strip() if sqft_elem else '',
            'lot_size': lot_size_elem.text.strip() if lot_size_elem else ''
        }
  
        
        # # Modify property_details to extract numeric values
        # modified_property_details = {
        #     'address': property_details.get('address', ''),
        #     'price': property_details.get('price', ''),
        #     'beds': extract_numeric_value(beds_elem.text.strip() if beds_elem else ''),
        #     'baths': extract_numeric_value( baths_elem.text.strip() if baths_elem else '',),
        #     'sqft': extract_numeric_value(sqft_elem.text.strip() if sqft_elem else ''),
        #     'lot_size': extract_numeric_value(lot_size_elem.text.strip() if lot_size_elem else '')
        # }        
        
        property_list.append(property_details)

    return property_list

# Example usage:
zipcode = '78660'  # Replace with the desired zipcode
properties = get_redfin_data(zipcode)
# Print the results
for property in properties:
    print(property)


{'address': '2821 Mission Tejas Dr, Pflugerville, TX 78660', 'price': '$389,000', 'beds': '3', 'baths': '2', 'sqft': '2,062 sq ft', 'lot_size': '6,464 sq ft (lot)'}
{'address': '17928 Kenai Fjords Dr #2, Pflugerville, TX 78660', 'price': '$300,000', 'beds': '3', 'baths': '2', 'sqft': '1,366 sq ft', 'lot_size': '4,351 sq ft (lot)'}
{'address': '', 'price': '', 'beds': '', 'baths': '', 'sqft': '', 'lot_size': ''}
{'address': '17209 Calipatria Ln, Pflugerville, TX 78660', 'price': '$400,000', 'beds': '3', 'baths': '2', 'sqft': '1,816 sq ft', 'lot_size': '6,250 sq ft (lot)'}
{'address': '20017 Ploughshores Ln, Pflugerville, TX 78660', 'price': '$429,900', 'beds': '4', 'baths': '2', 'sqft': '2,766 sq ft', 'lot_size': '6,272 sq ft (lot)'}
{'address': '20024 Crested Caracara Ln, Pflugerville, TX 78660', 'price': '$675,000', 'beds': '4', 'baths': '3', 'sqft': '3,382 sq ft', 'lot_size': '7,492 sq ft (lot)'}
{'address': '1424 Howlin Wolf Trl, Pflugerville, TX 78660', 'price': '$410,000', 'beds':

In [9]:

# from tabulate import tabulate
# from rich.console import Console
# from rich.table import Table

# def print_properties_tabular(properties):
#     headers = ['Address', 'Price', 'Beds', 'Baths', 'Sqft', 'Lot Size']
#     table_data = []
    
#     for prop in properties:
#         row = [
#             prop['address'],
#             prop['price'],
#             prop['beds'],
#             prop['baths'],
#             prop['sqft'],
#             prop['lot_size']
#         ]
#         table_data.append(row)
    
#     print(tabulate(table_data, headers=headers, tablefmt='pretty'))

# def print_properties_rich(properties):
#     console = Console()

#     table = Table(title="Properties", show_header=True, header_style="bold magenta")
#     table.add_column("Address", style="cyan")
#     table.add_column("Price", justify="right")
#     table.add_column("Beds", justify="right")
#     table.add_column("Baths", justify="right")
#     table.add_column("Sqft", justify="right")
#     table.add_column("Lot Size", justify="right")

#     for prop in properties:
#         table.add_row(
#             prop['address'],
#             prop['price'],
#             prop['beds'],
#             prop['baths'],
#             prop['sqft'],
#             prop['lot_size']
#         )

#     console.print(table)
# # Print properties in tabular format using tabulate
# print("Properties in Tabular Format:")
# print_properties_tabular(properties)
# print("\n")

# # Print properties using rich for styled console output
# print("Properties in Styled Console Output:")
# print_properties_rich(properties)