In [2]:
import pandas as pd
import numpy as np
import warnings
warnings.simplefilter(action='ignore', category=pd.errors.SettingWithCopyWarning)

df = pd.read_csv('data/cleaned_df.csv')

key_columns = ['State', 'City', 'Bedroom', 'Bathroom', 'ListedPrice', 
               'MarketEstimate', 'Area', 'PPSq', 'LotArea', 
               'RentEstimate', 'Latitude', 'Longitude']
df_clean = df.dropna(subset=key_columns)

numeric_columns = ['Bedroom', 'Bathroom', 'ListedPrice', 'MarketEstimate', 
                   'Area', 'PPSq', 'LotArea', 'RentEstimate']
for col in numeric_columns:
    df_clean[col] = pd.to_numeric(df_clean[col], errors='coerce')
df_clean = df_clean.dropna(subset=numeric_columns)

df_clean = df_clean.drop_duplicates()

df_clean['TotalAmenities'] = df_clean['Bedroom'] + df_clean['Bathroom']

In [None]:
# Choropleth Map (Ratio of ListedPrice / MarketEstimate)
import plotly.express as px

state_agg = df_clean.groupby('State').agg({
    'MarketEstimate': 'mean', 
    'ListedPrice': 'mean'
}).reset_index()

state_agg['PriceRatio'] = state_agg['ListedPrice'] / state_agg['MarketEstimate']

fig = px.choropleth(
    state_agg,
    locations='State',
    locationmode="USA-states",
    color='PriceRatio',
    scope="usa",
    color_continuous_scale="RdBu",
    labels={'PriceRatio': 'Listing Price / Market Estimate'},
    title="Average Price Ratio (Listed Price vs. Market Estimate) by State"
)

fig.show()

In [None]:
# Scatterplot (LotArea vs ListedPrice)
import altair as alt
import warnings
warnings.filterwarnings("ignore", category=FutureWarning)
alt.data_transformers.disable_max_rows()

df_filtered = df_clean[df_clean['LotArea'] > 0]
lot_low, lot_high = df_filtered['LotArea'].quantile([0.01, 0.99])
price_low, price_high = df_filtered['ListedPrice'].quantile([0.01, 0.99])

df_filtered = df_filtered[
    (df_filtered['LotArea'] >= lot_low) & (df_filtered['LotArea'] <= lot_high) &
    (df_filtered['ListedPrice'] >= price_low) & (df_filtered['ListedPrice'] <= price_high)
]

scatter = alt.Chart(df_filtered).mark_circle(opacity=0.6).encode(
    x=alt.X('LotArea:Q', title='LotArea (acres)'),
    y=alt.Y('ListedPrice:Q', scale=alt.Scale(type='log'), title='Listed Price ($)'),
    color=alt.Color('State:N', legend=alt.Legend(title="State")),
    tooltip=['State', 'City', 'Street', 'LotArea', 'ListedPrice']
).properties(
    title='Total Land of Property vs. Listed Price by State',
    width=600,
    height=400
)

scatter