In [1]:
# Through the bar chart, we can find the following trends.
# 1. No matter what property type it is, the average price in Alameda County is always the lowest.
# 2. San Mateo County and Santa Clara County share a lot of similaries in house price. 
# The average price of single family house is much higher thantownhouse and condo;
# The difference of average price between townhouse and condo is bigger than that of other two counties;
# 3. The difference of average price of each property type is smaller compared to other counties, 
# which means people in San Francisco city doesn't have a strong preference on single family house as 
# residents in other counties do.

In [2]:
import numpy as np
import pandas as pd
from bokeh.models import ColumnDataSource, HoverTool, FactorRange
from bokeh.plotting import figure
from bokeh.transform import factor_cmap
from bokeh.palettes import Spectral4
from bokeh.io import output_notebook, show
output_notebook()

In [3]:
filename = 'data/cleaned_data.csv'
df = pd.read_csv(filename)
df_na = df.replace('', np.nan)
df1 = df_na.dropna(subset = ['PROPERTY TYPE', 'PRICE IN K', 'COUNTY']) 
county = df1['COUNTY'].unique().tolist()
county.sort()
property_type = df1['PROPERTY TYPE'].unique().tolist()
property_type.sort()
county_type = df1.groupby(['COUNTY', 'PROPERTY TYPE'])['PRICE IN K'].mean()
county_type = county_type.reset_index()
condo = county_type.loc[county_type['PROPERTY TYPE']== 'Condo']
sfh = county_type.loc[county_type['PROPERTY TYPE']== 'SFH']
th = county_type.loc[county_type['PROPERTY TYPE']== 'TH']
data = {'counties': county,
        'Condo': condo['PRICE IN K'].tolist(),
        'Townhouse': th['PRICE IN K'].tolist(),
        'SFH': sfh['PRICE IN K'].tolist()}

In [4]:
x = [ (c, p) for c in county for p in property_type ]
counts = sum(zip(data['Condo'], data['SFH'], data['Townhouse']), ()) 
source = ColumnDataSource(data=dict(x=x, counts=counts))
p = figure(x_range=FactorRange(*x), plot_height=250, x_axis_label = "County/Property Type", y_axis_label = "Price in K",
           title="Price by County & Property Type",
           toolbar_location=None, tools="")
p.vbar(x='x', top='counts', width=0.9, source=source, line_color="white", 
       fill_color=factor_cmap('x', palette=Spectral4, factors=property_type, start=1, end=2))

p.y_range.start = 0
p.x_range.range_padding = 0.1
p.xaxis.major_label_orientation = 1
p.xgrid.grid_line_color = None

show(p)