## Housing Rental Analysis for San Francisco


In [29]:
# Import the required libraries and dependencies
import pandas as pd
import hvplot.pandas
from pathlib import Path


## Read in data and find summary stats

In [30]:
# Using the read_csv function and Path module, create a DataFrame 
# by importing the sfo_neighborhoods_census_data.csv file from the Resources folder
sfo_data_df = pd.read_csv(
    Path("Resources/sfo_neighborhoods_census_data.csv"), 
    parse_dates=True, 
    infer_datetime_format=True)


# Review the first and last five rows of the DataFrame
display(sfo_data_df.head())
display(sfo_data_df.tail())
display(sfo_data_df.describe())
display(sfo_data_df.isnull().sum())

Unnamed: 0,year,neighborhood,sale_price_sqr_foot,housing_units,gross_rent
0,2010,Alamo Square,291.182945,372560,1239
1,2010,Anza Vista,267.932583,372560,1239
2,2010,Bayview,170.098665,372560,1239
3,2010,Buena Vista Park,347.394919,372560,1239
4,2010,Central Richmond,319.027623,372560,1239


Unnamed: 0,year,neighborhood,sale_price_sqr_foot,housing_units,gross_rent
392,2016,Telegraph Hill,903.049771,384242,4390
393,2016,Twin Peaks,970.08547,384242,4390
394,2016,Van Ness/ Civic Center,552.602567,384242,4390
395,2016,Visitacion Valley,328.319007,384242,4390
396,2016,Westwood Park,631.195426,384242,4390


Unnamed: 0,year,sale_price_sqr_foot,housing_units,gross_rent
count,397.0,392.0,397.0,397.0
mean,2012.901763,489.209635,378209.732997,2765.722922
std,1.963794,240.08491,3823.506872,1068.961546
min,2010.0,40.772625,372560.0,1239.0
25%,2011.0,332.947034,374507.0,1530.0
50%,2013.0,438.664166,378401.0,2971.0
75%,2015.0,584.60984,382295.0,3739.0
max,2016.0,2258.702832,384242.0,4390.0


year                   0
neighborhood           0
sale_price_sqr_foot    5
housing_units          0
gross_rent             0
dtype: int64

## Calculate and Plot the Housing Units per Year


In [31]:
# Create a numerical aggregation that groups the data by the year and then averages the results.
housing_units_by_year = sfo_data_df.groupby('year').mean()

# Review the DataFrame
housing_units_by_year


Unnamed: 0_level_0,sale_price_sqr_foot,housing_units,gross_rent
year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2010,369.344353,372560.0,1239.0
2011,341.903429,374507.0,1530.0
2012,399.389968,376454.0,2324.0
2013,483.600304,378401.0,2971.0
2014,556.277273,380348.0,3528.0
2015,632.540352,382295.0,3739.0
2016,697.643709,384242.0,4390.0


In [32]:
# Use the hvplot function to plot the housing_units_by_year DataFrame as a bar chart. Make the x-axis represent the year and the y-axis represent the housing_units.
housing_units_by_year.hvplot.bar( 
    figsize=(10, 15),
    xlabel = "year",
    ylabel = "housing_units", 
    title= "Housing Units in SF from 2010 to 2016",
    color = 'blue'
).opts(yformatter='%.0f')


Question: What is the overall trend in housing_units over the period being analyzed?

Answer: based on the data and the visualizations, housing units have increased steadily with time.


## Calculate and Plot the Average Sale Prices per Square Foot

In [11]:
# Create a numerical aggregation by grouping the data by year and averaging the results
prices_square_foot_by_year = sfo_data_df.groupby('year').mean()

# Review the resulting DataFrame
prices_square_foot_by_year

Unnamed: 0_level_0,sale_price_sqr_foot,housing_units,gross_rent
year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2010,369.344353,372560.0,1239.0
2011,341.903429,374507.0,1530.0
2012,399.389968,376454.0,2324.0
2013,483.600304,378401.0,2971.0
2014,556.277273,380348.0,3528.0
2015,632.540352,382295.0,3739.0
2016,697.643709,384242.0,4390.0


Question: What is the lowest gross rent reported for the years included in the DataFrame?

Answer: 2010 shows the lowest gross rent reported for the years included in the DataFrame.

In [25]:
# Create a new DataFrame named prices_square_foot_by_year by filtering out the “housing_units” column. The new DataFrame should include the averages per year for only the sale price per square foot and the gross rent.

# Filter out the housing_units column, creating a new DataFrame 
# Keep only sale_price_sqr_foot and gross_rent averages per year
prices_square_foot_by_year = sfo_data_df[['year', 'sale_price_sqr_foot', 'gross_rent']].set_index('year')
# Review the DataFrame
display(prices_square_foot_by_year)

Unnamed: 0_level_0,sale_price_sqr_foot,gross_rent
year,Unnamed: 1_level_1,Unnamed: 2_level_1
2010,291.182945,1239
2010,267.932583,1239
2010,170.098665,1239
2010,347.394919,1239
2010,319.027623,1239
...,...,...
2016,903.049771,4390
2016,970.085470,4390
2016,552.602567,4390
2016,328.319007,4390


In [27]:
# Plot prices_square_foot_by_year. 
# Inclued labels for the x- and y-axes, and a title.
prices_square_foot_by_year.hvplot.line(
    x = "year"
)

Question: Did any year experience a drop in the average sale price per square foot compared to the previous year?

Answer: # YOUR ANSWER HERE

Question: If so, did the gross rent increase or decrease during that year?

Answer: # YOUR ANSWER HERE


## Compare the Average Sale Prices by Neighborhood


In [None]:
# Group by year and neighborhood and then create a new dataframe of the mean values
prices_by_year_by_neighborhood = sfo_data_df.groupby('year','neighborhood')

# Review the DataFrame
# YOUR CODE HERE