# Ann Arbor House Price Analysis

By: Grant Jason

For: SIADS 521, Assignment #3

In [76]:
# All my import statements at the top of the notebook
import requests
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.ticker as mtick
import seaborn as sns
from matplotlib import style
import matplotlib.dates as mdates
import ipywidgets as widgets
from ipywidgets import interactive, Dropdown, IntRangeSlider, interact
from IPython.display import display, clear_output


### Get data from zillow.com, save it to downloads so it can be open and modified, then prepare the data by melting columns into a single date column

In [78]:
# Download the data, filter, and clean it
url = 'https://files.zillowstatic.com/research/public_csvs/zhvi/Neighborhood_zhvi_uc_sfrcondo_tier_0.33_0.67_sm_sa_month.csv?t=1742155647'
response = requests.get(url)

# Define downloads as the directory to save the file
writeable_directory = os.path.expanduser('~/Downloads')
file_path = os.path.join(writeable_directory, 'Zillow Data')

with open(file_path, 'wb') as file:
    file.write(response.content)

df = pd.read_csv(file_path)

# Filter the data to only include Ann Arbor
filtered_df = df[df['City'] == 'Ann Arbor']

# Melt the data so all dates are in one column
melted_df = pd.melt(filtered_df, id_vars=['RegionID', 'SizeRank', 'RegionName', 'RegionType', 'StateName', 'State', 'City' , 'Metro', 'CountyName'], 
                    var_name='Date', value_name='HomeValue')

melted_df['Date'] = pd.to_datetime(melted_df['Date'])

### First Chart: Average Home Price in Ann Arbor Over Time

In [81]:
def create_chart_one():

    # prepare data for chart one
    chart_one_df = melted_df.copy()
    chart_one_df['Year'] = chart_one_df['Date'].dt.year
    yearly_data = chart_one_df.groupby(['Year'])['HomeValue'].agg(['mean', 'std']).reset_index()

    #plot chart 1
    plt.style.use('fivethirtyeight')
    plt.figure(figsize=(10, 6))
    plt.plot(yearly_data['Year'], yearly_data['mean'], label='Mean Home Value')
    plt.fill_between(yearly_data['Year'], yearly_data['mean'] - yearly_data['std'], yearly_data['mean'] + yearly_data['std'], alpha=0.3)

    # format y axis to be in $
    fmt = mtick.StrMethodFormatter('${x:,.0f}')
    plt.gca().yaxis.set_major_formatter(fmt)

    # other general formatting
    plt.xlabel('Year')
    plt.xticks(fontsize=10, rotation=45)
    plt.ylabel('Home Value')
    plt.title('Mean Home Value in Ann Arbor by Year', pad=20)
    plt.legend(['Mean Home Value', '+/- 1 Std Dev'])

### Second Chart: Price of Homes by Neighboorhood Over Time

In [71]:
def create_chart_two(): 

    #prepare data for chart 2
    chart_two_df = melted_df.copy()
    chart_two_df['Year'] = chart_two_df['Date'].dt.year
    agg_df = chart_two_df.groupby(['RegionName', 'Year'])['HomeValue'].agg(['mean', 'std']).reset_index()
    neighborhood_data = agg_df.pivot_table(values='mean', index='RegionName', columns='Year')

    plt.style.use('fivethirtyeight')
    plt.figure(figsize=(10,12))
    sns.set_theme(style=style.library['fivethirtyeight'])

    # use seaborn heatmap functionality
    heatmap = sns.heatmap(neighborhood_data, cmap='magma')

    # set color bar format to $
    cbar = heatmap.collections[0].colorbar
    cbar.ax.yaxis.set_major_formatter('${x:,.0f}')

    # set titles and axis labels
    plt.title('Home Prices by Neighborhood in Ann Arbor by Year', pad=25)
    plt.xlabel('Year')
    plt.xticks(fontsize=10, rotation=45)
    plt.ylabel('Neighborhood')

### Create the neighborhood selector that will be used in the next two charts

In [82]:
# create neighborhood selector

neighborhoods = melted_df['RegionName'].unique()
select_var = widgets.Dropdown(
    options=neighborhoods,
    description='Neighborhood:',
    # set default value to barton hills like in the narrative document
    value='Barton Hills',
    disabled=False,
    layout=widgets.Layout(width='20%'),
    style={
        'description_width': 'initial',
        'font_size': '14px',
        'color': 'black', 
        'font_weight': 'bold'
    }
)

### Third Chart: Examining trends in each neighborhood over more granular timeline

In [74]:
def create_chart_three(neighborhood):

    # prepare data for chart 3
    chart_three_df = melted_df.copy()
    chart_three_df['Month'] = chart_three_df['Date'].dt.to_period('M').dt.to_timestamp()
    chart_three_df['Year'] = chart_three_df['Date'].dt.year

    chart_three_df = chart_three_df[chart_three_df['RegionName'] == neighborhood]

    plt.style.use('fivethirtyeight')
    plt.figure(figsize=(10,6))
    plt.plot(chart_three_df['Month'], chart_three_df['HomeValue'], label='Home Value')

    # format y axis to be in $
    fmt = mtick.StrMethodFormatter('${x:,.0f}')
    plt.gca().yaxis.set_major_formatter(fmt)

    #format x axis to only show the years
    plt.gca().xaxis.set_major_locator(mdates.YearLocator())
    plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%Y'))

    plt.xlabel('Date')
    plt.xticks(fontsize=10, rotation=45)
    # set y lim so we can visualize consistent comparisons among different selections
    plt.ylim(0,1500000)
    plt.ylabel('Home Value')
    plt.title('ZHVI for {} neighborhood in Ann Arbor'.format(neighborhood), pad=20)
    plt.legend(['Zillow Home Value Index'])

### Fourth Chart: Examining how a neighborhood's home value index is ditributed

In [None]:
def create_chart_four(neighborhood):
    