In [None]:
# initial imports
import os
import pandas as pd
import matplotlib.pyplot as plt
import hvplot.pandas
import plotly.express as px
from pathlib import Path
from dotenv import load_dotenv

%matplotlib inline

In [None]:
# Read the Mapbox API key
load_dotenv()
map_box_api = os.getenv("mapbox")
px.set_mapbox_access_token(map_box_api)

In [None]:
# Read the census data into a Pandas DataFrame
file_path = Path("Data/sfo_neighborhoods_census_data.csv")
sfo_data = pd.read_csv(file_path, index_col="year")
sfo_data.head()

In [None]:
# Calculate the mean number of housing units per year (hint: use groupby)
housing_units = sfo_data['housing_units'].groupby(sfo_data.index).mean()
print(housing_units)
housing_units.plot(kind='bar')

In [None]:
# Use the Pandas plot function to plot the average housing units per year.
# Note: You will need to manually adjust the y limit of the chart using the min and max
fig_housing_units = housing_units.plot(kind='bar', 
                         ylim=(min(housing_units)*.99, max(housing_units)*1.01), 
                         title='Housing Units in San Francisco from 2010 to 2016')

## ylim(limits) sets the y-axis limits for the current axes or chart. 
## Specify limits as a two-element vector of the form [ymin ymax], where ymax is greater than ymin.

In [None]:
# Calculate the average gross rent and average sale price per square foot.
avg_price = sfo_data[['sale_price_sqr_foot', 'gross_rent']].groupby(sfo_data.index).mean()
avg_price

In [None]:
# Plot the Average Sales Price per Year as a line chart.
avg_price['sale_price_sqr_foot'].plot(kind='line', title='Average Sale Price per Square Foot in San Francisco');

In [None]:
# Group by year and neighborhood and then create a new dataframe of the mean values.
avg_prices_by_nbh = sfo_data.reset_index()
avg_prices_by_nbh = avg_prices_by_nbh.groupby(['year','neighborhood']).mean()
avg_prices_by_nbh

In [None]:
# Use hvplot to create an interactive line chart of the average price per sq ft.
# The plot should have a dropdown selector for the neighborhood
avg_prices_by_nbh.hvplot.line(x='year', y='sale_price_sqr_foot')

In [None]:
# Getting the data for all neighborhoods
mean_sale_price = sfo_data.reset_index()
mean_sale_price = mean_sale_price.drop(labels=['year'], axis=1)
mean_sale_price = mean_sale_price.groupby('neighborhood').mean().sort_values('sale_price_sqr_foot', ascending=False)
# Getting the data from the top 10 expensive neighborhoods
top_ten = mean_sale_price.head(10)
top_ten.reset_index(inplace=True)
top_ten

In [None]:
# Plotting the data from the top 10 expensive neighborhoods
top_ten.hvplot.bar(x='neighborhood', y='sale_price_sqr_foot', xlabel='San Francisco Neighboorhood', ylabel='$/sqft', 
                   rot=45, title='Top 10 Expensive Neighboorhoods in San Francisco, CA')

In [None]:
# Parallel coordinates plot
top_ten_px_data = top_ten[['sale_price_sqr_foot', 'housing_units', 'gross_rent']]
fig = px.parallel_coordinates(top_ten_px_data, color='sale_price_sqr_foot', color_continuous_scale=px.colors.sequential.Inferno,
                        labels={"sale_price_sqr_foot": "$/sqft", "housing_units": "Housing Units", "gross_rent": "Gross Rent"})
fig.show()

In [None]:
fig = px.parallel_categories(top_ten, color='sale_price_sqr_foot', color_continuous_scale=px.colors.sequential.Inferno,
                        labels={"neighboorhood": "neighborhood", "sale_price_sqr_foot": "$/sqft", "housing_units": "Housing Units", "gross_rent": "Gross Rent"})
fig.show()

In [None]:
# Neighborhood Map
file_path = Path("Data/neighborhoods_coordinates.csv")
df_neighborhood_locations = pd.read_csv(file_path)
df_neighborhood_locations.set_index('Neighborhood', inplace=True)
df_neighborhood_locations.head()

In [None]:
mean_sale_price.sort_index(inplace=True)
mean_sale_price.head()

In [None]:
avg_values_by_nbh = pd.concat([df_neighborhood_locations, mean_sale_price], sort=False, axis=1)
avg_values_by_nbh.dropna(inplace=True)
avg_values_by_nbh.head()

In [None]:
fig2 = px.scatter_mapbox(avg_values_by_nbh, lat='Lat', lon='Lon', color='gross_rent', zoom=10,
                         text = avg_values_by_nbh.index,
                         title='Gross Rent in San Francisco, CA')
fig2.show()