## Alpha Release

#### Team Members: Bingyi Li, Guoqiang Liang

The primary goal of this project is to create an interactive visualization of historical U.S. Housing Price Index values. For the alpha release, we created diagrams which can show the HPI of different states and different counties at a historical time point.

In [29]:
import numpy as np
import pandas as pd

from plotly.offline import init_notebook_mode, plot, iplot
import plotly.figure_factory as ff

init_notebook_mode(connected=True)

## Data

For this project, we will use two datasets. The first one contains HPI information for each state, and the second one contains more detailed HPI information for each county. Here are how these two datasets look like. 

In [31]:
states = pd.read_csv('./data/HPI_AT_state.csv', header=None)
states.columns = ['code','year','season','HPI']
states.head()

Unnamed: 0,code,year,season,HPI
0,AK,1975,1,61.65
1,AK,1975,2,63.2
2,AK,1975,3,71.19
3,AK,1975,4,67.23
4,AK,1976,1,70.36


In [26]:
df = states

for col in df.columns:
    df[col] = df[col].astype(str)

scl = [[0.0, 'rgb(242,240,247)'],[0.2, 'rgb(218,218,235)'],[0.4, 'rgb(188,189,220)'],\
            [0.6, 'rgb(158,154,200)'],[0.8, 'rgb(117,107,177)'],[1.0, 'rgb(84,39,143)']]

df['text'] = 'State: ' + df['code'] + '<br>' +\
    'Year: '+ df['year'] + ' Season: ' + df['season']

to_show = df.query("year == '2000' and season == '1'")    
    
data = [ dict(
        type='choropleth',
        colorscale = scl,
        autocolorscale = False,
        locations = to_show['code'],
        z = to_show['HPI'].astype(float),
        locationmode = 'USA-states',
        text = to_show['text'],
        marker = dict(
            line = dict (
                color = 'rgb(255,255,255)',
                width = 2
            )
        ),
        colorbar = dict(
            title = "Price Index"
        )
    ) ]

layout = dict(
        title = 'US Housing Price Index',
        geo = dict(
            scope='usa',
            projection=dict( type='albers usa' ),
            showlakes = True,
            lakecolor = 'rgb(255, 255, 255)',
        ),
    )

fig = dict(data=data, layout=layout)

url = iplot(fig, filename='d3-cloropleth-map')

In [32]:
data = []

# let's create some additional, random data
for i in range(1975,2018):
    for j in range(1,5):
        df = states.query('year==%i and season==%i' % (i,j))
        current = dict(type='choropleth', 
                        locations = df['code'].astype(str),
                        z=df['HPI'].astype(float),
                        locationmode='USA-states')
        data.append(current)

# let's create the steps for the slider
steps = []
for i in range(len(data)):
    step = dict(method='restyle',
                args=['visible', [False] * len(data)],
                label='%i'% (1975+np.floor(i/4.)))
    step['args'][1][i] = True
    steps.append(step)

sliders = [dict(active=0,
                pad={"t": 1},
                steps=steps)]    
layout = dict(geo=dict(scope='usa',
                       projection={'type': 'albers usa'}),
              sliders=sliders)

fig = dict(data=data, 
           layout=layout)
iplot(fig)

In [4]:
county = pd.read_csv('./data/HPI_AT_BDL_county.csv', dtype={'FIPS code':object})
county.head()

Unnamed: 0,State,County,FIPS code,Year,Annual Change (%),HPI,HPI with 1990 base,HPI with 2000 base
0,AL,Autauga,1001,1986,.,100.0,95.4,71.03
1,AL,Autauga,1001,1987,-2.17,97.83,93.33,69.49
2,AL,Autauga,1001,1988,3.24,100.99,96.35,71.74
3,AL,Autauga,1001,1989,4.16,105.19,100.36,74.72
4,AL,Autauga,1001,1990,-0.35,104.82,100.0,74.45


In [52]:
df_sample = county.query('State == "CA" and Year == "2000" and HPI != "."')

values = df_sample['HPI'].astype(float).tolist()
fips = df_sample['FIPS code'].tolist()

colorscale = ["#f7fbff","#ebf3fb","#deebf7","#d2e3f3","#c6dbef","#b3d2e9","#9ecae1",
              "#85bcdb","#6baed6","#57a0ce","#4292c6","#3082be","#2171b5","#1361a9",
              "#08519c","#0b4083","#08306b"]
endpts = list(np.linspace(1, 12, len(colorscale) - 1))

fig = ff.create_choropleth(
    fips=fips, values=values, scope=['CA'],
    county_outline={'color': 'rgb(255,255,255)', 'width': 0.5}, round_legend_values=True,
    binning_endpoints=binning_endpoints, colorscale=colorscale,
    legend_title='HPI by County', title='Housing Price Index of California'
)
iplot(fig, filename='choropleth_california')