In [8]:
import csv
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException
import time

driver = webdriver.Chrome()

url = 'https://www.rent.com/new-york/valley-stream-apartments'
driver.get(url)

try:
    WebDriverWait(driver, 20).until(
        EC.presence_of_element_located((By.XPATH, '//*[@data-tid="listings-grid"]'))
    )
    print("Listings grid loaded.")
except TimeoutException:
    print("Timeout while waiting for listings grid.")
    driver.quit()

listings = driver.find_elements(By.XPATH, '//*[@data-tid="listings-grid"]/li')

apartment_data = []

for listing in listings:
    try:
        price_element = listing.find_element(By.XPATH, './/span[@data-tid="listing-price-text"]')
        price = price_element.text
        
        address_element = listing.find_element(By.XPATH, './/a[@data-tid="pdp-link"]/div/p[2]')
        address = address_element.text
        
        name_element = listing.find_element(By.XPATH, './/a[@data-tid="pdp-link"]/div/p[1]')
        name = name_element.text
        
        details_element = listing.find_element(By.XPATH, './/a[@data-tid="pdp-link"]/div/p[3]')
        details = details_element.text
    
        apartment_data.append([name, address, price, details])

    except Exception as e:
        print(f"Error extracting data for a listing: {e}")

filename = 'apartment_listings.csv'
with open(filename, mode='w', newline='', encoding='utf-8') as file:
    writer = csv.writer(file)

    writer.writerow(['Name', 'Address', 'Price', 'Details'])
    
    # Write the apartment data
    for data in apartment_data:
        writer.writerow(data)

print(f"Data has been written to {filename}")

driver.quit()


Listings grid loaded.
Error extracting data for a listing: Message: no such element: Unable to locate element: {"method":"xpath","selector":".//span[@data-tid="listing-price-text"]"}
  (Session info: chrome=130.0.6723.116); For documentation on this error, please visit: https://www.selenium.dev/documentation/webdriver/troubleshooting/errors#no-such-element-exception
Stacktrace:
0   chromedriver                        0x00000001051c36ac cxxbridge1$str$ptr + 3645404
1   chromedriver                        0x00000001051bbf0c cxxbridge1$str$ptr + 3614780
2   chromedriver                        0x0000000104c28104 cxxbridge1$string$len + 88416
3   chromedriver                        0x0000000104c6a364 cxxbridge1$string$len + 359360
4   chromedriver                        0x0000000104c608b0 cxxbridge1$string$len + 319756
5   chromedriver                        0x0000000104ca3bd0 cxxbridge1$string$len + 594988
6   chromedriver                        0x0000000104c5ef54 cxxbridge1$string$len + 3

In [18]:
import pandas as pd
import re

df = pd.read_csv('/Users/ainsleymartinez/Downloads/VS_listings.csv')

df['Price'] = df['Price'].replace({'\$': '', '\+': '', ',': ''}, regex=True).astype(float)

def extract_sqft(detail):

    match = re.search(r'(\d{3,4})\s*Sqft', detail)
    if match:
        return int(match.group(1))
    return None  

df['Sqft'] = df['Details'].apply(extract_sqft)

df['Price per Sqft'] = df['Price'] / df['Sqft']

median_sqft = df['Sqft'].median()
median_price_per_sqft = df['Price per Sqft'].median()

print(f"Median Sqft: {median_sqft} Sqft")
print(f"Median Price per Sqft: ${median_price_per_sqft:.2f}")


Median Sqft: 700.0 Sqft
Median Price per Sqft: $3.89


In [19]:
import pandas as pd

# Load the CSV file into a pandas DataFrame
file_path = '/Users/ainsleymartinez/Downloads/VS_listings.csv'
df = pd.read_csv(file_path)

# Inspect the first few rows to understand the structure of the dataset
print(df.head())

# Clean the 'Price' column by removing the $ symbol, + symbol, and commas
df['Price'] = df['Price'].replace({'\$': '', '\+': '', ',': ''}, regex=True).astype(float)

# Calculate the median, min, max, and standard deviation for the 'Price' column
median_price = df['Price'].median()
min_price = df['Price'].min()
max_price = df['Price'].max()
std_price = df['Price'].std()

# Print the statistics
print(f"Price Statistics:")
print(f"  Median Price: ${median_price:.2f}")
print(f"  Min Price: ${min_price:.2f}")
print(f"  Max Price: ${max_price:.2f}")
print(f"  Std Dev of Price: ${std_price:.2f}")


                                              Name  \
0                       Fairfield at Valley Stream   
1                                    The Plaza 360   
2              Fairfield Tudor At Rockville Centre   
3                   Fairfield Courtyard at Hewlett   
4  Fairfield Waterside At Village Of East Rockaway   

                                        Address      Price  \
0       64 Gibson Blvd, Valley Stream, NY 11581  $2,815.00   
1           360 Central Ave, Lawrence, NY 11559  $3,005.00   
2  565 Merrick Road, Rockville Centre, NY 11570  $3,590.00   
3         65 PROSPECT AVENUE, Hewlett, NY 11557  $2,970.00   
4          60 Front St, East Rockaway, NY 11518  $2,885.00   

                       Details  
0   2 Beds • 1 Bath • 500 Sqft  
1  1 Beds • 1 Baths • 775 Sqft  
2   2 Beds • 1 Bath • 871 Sqft  
3    Studio• 1 Bath • 785 Sqft  
4       2 • 1 Baths • 738 Sqft  
Price Statistics:
  Median Price: $2585.00
  Min Price: $1795.00
  Max Price: $4495.00
  Std Dev of Pric