In [1]:
# Import the required libraries and dependencies
import pandas as pd
import numpy as np
import hvplot.pandas
from pathlib import Path

# Import the data
file_path = Path("Resources/sfo_neighborhoods_census_data.csv")
sfo_data_df = pd.read_csv(file_path, index_col="year")

# Calculate and Plot the Housing Units per Year
# Group by year and calculate mean
housing_units_by_year = sfo_data_df.groupby("year").mean()

# Plot housing units by year
housing_units_by_year_plot = housing_units_by_year.hvplot.bar(
    x="year",
    y="housing_units",
    xlabel="Year",
    ylabel="Housing Units",
    title="Housing Units per Year in San Francisco",
    color="blue",
    width=800,
    height=400,
    ylim=(housing_units_by_year["housing_units"].min() - 5000, housing_units_by_year["housing_units"].max() + 5000),
    rot=45
)

# Calculate and Plot the Average Sale Prices per Square Foot
# Calculate mean values for each year
averages_by_year = sfo_data_df.groupby("year").mean()

# Create DataFrame without housing_units
prices_square_foot_by_year = averages_by_year.drop(columns=['housing_units'])

# Plot prices_square_foot_by_year DataFrame
prices_square_foot_by_year_plot = prices_square_foot_by_year.hvplot.line(
    xlabel='Year',
    ylabel='Price per Sqft / Gross Rent',
    title='Average Sale Price per Square Foot and Gross Rent by Year',
    width=800,
    height=400,
    grid=True
)

# Compare the Average Sale Prices by Neighborhood
# Group by year and neighborhood then calculate mean
prices_by_year_by_neighborhood = sfo_data_df.groupby(['year', 'neighborhood']).mean()

# Drop housing_units column
prices_by_year_by_neighborhood = prices_by_year_by_neighborhood.drop(columns=['housing_units'])

# Interactive line plot
line_plot = prices_by_year_by_neighborhood.hvplot.line(
    x='year',
    y=['sale_price_sqr_foot', 'gross_rent'],
    groupby='neighborhood',
    xlabel='Year',
    ylabel='Price per Sqft / Gross Rent',
    title='Average Sale Price per Square Foot and Gross Rent by Neighborhood'
)

# Build an Interactive Neighborhood Map
# Read neighborhood coordinates
neighborhood_locations_df = pd.read_csv("Resources/neighborhoods_coordinates.csv", index_col="Neighborhood")

# Create mean DataFrame by neighborhood
all_neighborhood_info_df = sfo_data_df.groupby('neighborhood').mean()

# Concatenate DataFrames
all_neighborhoods_df = pd.concat(
    [neighborhood_locations_df, all_neighborhood_info_df], 
    axis="columns",
    sort=False
).dropna().reset_index().rename(columns={"index": "Neighborhood"})

# Create hvPlot points plot
neighborhoods_plot = all_neighborhoods_df.hvplot.points(
    geo=True,
    x='Lon',
    y='Lat',
    size='sale_price_sqr_foot',
    color='gross_rent',
    frame_width=700,
    frame_height=500,
    title='Average Sale Price per Square Foot and Gross Rent by Neighborhood',
    cmap='viridis',
    clim=(all_neighborhoods_df['gross_rent'].min(), all_neighborhoods_df['gross_rent'].max()),
    colorbar=True
).opts(projection=crs.GOOGLE_MERCATOR)


TypeError: agg function failed [how->mean,dtype->object]