# Ann Arbor House Price Analysis

By: Grant Jason

For: SIADS 521, Assignment #3

In [211]:
# All my import statements at the top of the notebook
import requests
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.ticker as mtick
import seaborn as sns
from matplotlib import style
import matplotlib.dates as mdates
import ipywidgets as widgets
from ipywidgets import interactive, Dropdown, IntRangeSlider, interact
from IPython.display import display, clear_output

### Get data from zillow.com, save it to downloads so it can be open and modified, then prepare the data by melting columns into a single date column

In [212]:
# Download the data, filter, and clean it
url = 'https://files.zillowstatic.com/research/public_csvs/zhvi/Neighborhood_zhvi_uc_sfrcondo_tier_0.33_0.67_sm_sa_month.csv?t=1742155647'
response = requests.get(url)

# Define downloads as the directory to save the file
writeable_directory = os.path.expanduser('~/Downloads')
file_path = os.path.join(writeable_directory, 'Zillow Data')

with open(file_path, 'wb') as file:
    file.write(response.content)

df = pd.read_csv(file_path)

# Filter the data to only include Ann Arbor
filtered_df = df[df['City'] == 'Ann Arbor']

# Melt the data so all dates are in one column
melted_df = pd.melt(filtered_df, id_vars=['RegionID', 'SizeRank', 'RegionName', 'RegionType', 'StateName', 'State', 'City' , 'Metro', 'CountyName'], 
                    var_name='Date', value_name='HomeValue')

melted_df['Date'] = pd.to_datetime(melted_df['Date'])

### First Chart: Average Home Price in Ann Arbor Over Time

In [213]:
def create_chart_one(ax):

    #prepare data for chart one
    chart_one_df = melted_df.copy()
    chart_one_df['Year'] = chart_one_df['Date'].dt.year
    yearly_data = chart_one_df.groupby(['Year'])['HomeValue'].agg(['mean', 'std']).reset_index()

    #plot chart 1
    plt.style.use('fivethirtyeight')
    ax.plot(yearly_data['Year'], yearly_data['mean'], label='Mean Home Value')
    ax.fill_between(yearly_data['Year'], yearly_data['mean'] - yearly_data['std'], yearly_data['mean'] + yearly_data['std'], alpha=0.3)

    #format y axis to be in $
    fmt = mtick.StrMethodFormatter('${x:,.0f}')
    ax.yaxis.set_major_formatter(fmt)

    #other general formatting
    ax.set_xlabel('Year', fontsize=10)
    ax.set_xticks(yearly_data['Year'])
    ax.set_xticklabels(yearly_data['Year'], fontsize=8, rotation=45)
    ax.set_ylabel('Home Value', fontsize=10)
    ax.set_yticks(ax.get_yticks())
    ax.set_yticklabels(ax.get_yticklabels(), fontsize=8)
    ax.set_title('Mean Home Value in Ann Arbor by Year', pad=20, fontsize=14)
    ax.legend(['Mean Home Value', '+/- 1 Std Dev'])

### Second Chart: Price of Homes by Neighboorhood Over Time

In [214]:
def create_chart_two(ax): 

    #prepare data for chart 2
    chart_two_df = melted_df.copy()
    chart_two_df['Year'] = chart_two_df['Date'].dt.year
    agg_df = chart_two_df.groupby(['RegionName', 'Year'])['HomeValue'].agg(['mean', 'std']).reset_index()
    neighborhood_data = agg_df.pivot_table(values='mean', index='RegionName', columns='Year')

    plt.style.use('fivethirtyeight')
    sns.set_theme(style=style.library['fivethirtyeight'])

    #use seaborn heatmap functionality
    heatmap = sns.heatmap(neighborhood_data, cmap='magma', ax=ax)

    #set color bar format to $
    cbar = heatmap.collections[0].colorbar
    cbar.ax.yaxis.set_major_formatter('${x:,.0f}')
    cbar.ax.tick_params(labelsize=8)

    #set titles and axis labels
    ax.set_title('Home Prices by Neighborhood in Ann Arbor by Year', pad=25, fontsize=14)
    ax.set_xlabel('Year', fontsize=10)
    ax.set_xticks(ax.get_xticks())
    ax.set_xticklabels(ax.get_xticklabels(), fontsize=8, rotation=45)
    ax.set_ylabel('Neighborhood',fontsize=10)
    ax.set_yticks(ax.get_yticks())
    ax.set_yticklabels(ax.get_yticklabels(), fontsize=8)


### Create the neighborhood selector that will be used in the next two charts

In [None]:
# create neighborhood selector

neighborhoods = melted_df['RegionName'].unique()
select_var = widgets.Dropdown(
    options=neighborhoods,
    description='Neighborhood:',
    # set default value to barton hills like in the narrative document
    value='Barton Hills',
    disabled=False,
    layout=widgets.Layout(width='20%'),
    style={'description_width': 'initial'}
)

### Third Chart: Examining trends in each neighborhood over more granular timeline

In [216]:
def create_chart_three(ax, neighborhood):

    # prepare data for chart 3
    chart_three_df = melted_df.copy()
    chart_three_df['Month'] = chart_three_df['Date'].dt.to_period('M').dt.to_timestamp()
    chart_three_df['Year'] = chart_three_df['Date'].dt.year

    chart_three_df = chart_three_df[chart_three_df['RegionName'] == neighborhood]

    plt.style.use('fivethirtyeight')
    ax.plot(chart_three_df['Month'], chart_three_df['HomeValue'], label='Home Value')

    #format y axis to be in $
    fmt = mtick.StrMethodFormatter('${x:,.0f}')
    ax.yaxis.set_major_formatter(fmt)

    #format x axis to only show the years
    ax.xaxis.set_major_locator(mdates.YearLocator())
    ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y'))

    #other formatting
    ax.set_ylim(0,1500000)
    ax.set_xlabel('Date', fontsize=10)
    ax.set_ylabel('Home Value', fontsize=10)
    ax.set_xticks(ax.get_xticks())
    ax.set_yticks(ax.get_yticks())
    ax.set_xticklabels(ax.get_xticklabels(), fontsize=8, rotation=45)
    ax.set_yticklabels(ax.get_yticklabels(), fontsize=8)
    ax.set_title('Average Home Value in {} Over Time'.format(neighborhood), pad=20, fontsize=14)

### Fourth Chart: Examining how a neighborhood's home value index is ditributed

In [217]:
def create_chart_four(ax, neighborhood):
    chart_four_df = melted_df.copy()
    chart_four_df = chart_four_df[chart_four_df['RegionName'] == neighborhood]

    plt.style.use('fivethirtyeight')
    ax.hist(chart_four_df['HomeValue'], bins=25)

    fmt = mtick.StrMethodFormatter('${x:,.0f}')
    ax.xaxis.set_major_formatter(fmt)

    #similar to chart 3, set consistent axis so comparison among neighborhoods is visually more clear
    ax.set_xlim(0,1500000)
    ax.set_ylabel('Count', fontsize=10)
    ax.set_xlabel('Home Prices', fontsize=10)
    ax.set_xticks(ax.get_xticks())
    ax.set_yticks(ax.get_yticks())
    ax.set_xticklabels(ax.get_xticklabels(), fontsize=8, rotation=45)
    ax.set_yticklabels(ax.get_yticklabels(), fontsize=8)
    ax.set_title('Distribution of Home Prices in {}'.format(neighborhood), pad=20, fontsize=14)

### Create Dashboard

In [None]:
def create_dashboard(value):

    clear_output(wait=True)
    plt.style.use('fivethirtyeight')
    fig = plt.figure(figsize=(17,17))
    gs = fig.add_gridspec(2,2)

    ax1 = fig.add_subplot(gs[0, 0])
    fig.sca(ax1)
    create_chart_three(ax1, value)

    ax2 = fig.add_subplot(gs[1, 0])
    fig.sca(ax2)
    create_chart_one(ax2)

    ax3 = fig.add_subplot(gs[0, 1])
    fig.sca(ax3)
    create_chart_four(ax3, value)

    ax4 = fig.add_subplot(gs[1, 1])
    fig.sca(ax4)
    create_chart_two(ax4)

    plt.tight_layout(pad=3.0, w_pad=1, h_pad=1)
    plt.show()

interactive_dash = interactive(create_dashboard, value=select_var)
interactive_dash

interactive(children=(Dropdown(description='Neighborhood:', index=38, layout=Layout(width='20%'), options=('Bu…