In [1]:
import pandas as pd
import geopandas as gpd
import os
import fiona
import datetime
import shapely

from bokeh.plotting import Figure, show, output_file, save
from bokeh.models import CustomJS, Slider, DateSlider, ColorBar, LogTicker, FixedTicker, BasicTicker
from bokeh.layouts import column, widgetbox
from bokeh.io import show, output_notebook
from bokeh.models import HoverTool, ColumnDataSource, LinearColorMapper, LogColorMapper

In [2]:
#PGH_review = pd.read_csv('/Users/chenchen/Documents/DataScience/DataIncubator/dataset/Yelp_ReviewCount.csv', sep = '\t',parse_dates=['Month'], index_col='Month')
PGH_review = pd.read_csv('Yelp_ReviewCount_withPredict.csv', sep = '\t',parse_dates=['Month'], index_col='Month')

In [3]:
# Load in neighborhood data for plotting
neighborhoods = gpd.read_file(os.path.join(r"PA_neighborhood/",
                       "ZillowNeighborhoods-PA.shp"))
Pittsburgh_neighborhood = neighborhoods[neighborhoods['City'] == 'Pittsburgh'].copy()

# List the number of neigborhoods that don't have data. 
list1 = Pittsburgh_neighborhood['Name'].tolist()
missing = [col for col in list1 if col not in PGH_review.columns]
len(missing)

25

In [4]:
# Add neighborhood outline to the review count data. 
PGH_neighbor = PGH_review['2006':].T.copy().reset_index()
PGH_neighbor.rename(columns = {'index':'Name'},inplace=True)
Merged_result = pd.merge(PGH_neighbor, Pittsburgh_neighborhood, on='Name', how='outer')

# Convert the datetime column name to a string representation. 
for col in Merged_result.columns:
    if isinstance(col, datetime.date):
        new_name = col.strftime('%Y-%m')
        Merged_result.rename(columns = {col:new_name},inplace=True)
            

In [5]:
# Create a dataframe for plotting using Bokeh later
bokeh_df = Merged_result.drop(['State','County','City','RegionID'],axis = 1)
#bokeh_df_uncategorized = bokeh_df.iloc[65,:]
#bokeh_df = bokeh_df.drop(bokeh_df.index[[1,65]])

In [6]:
def getPolyCoords(row, geom, coord_type):
    """Returns the coordinates ('x' or 'y') of edges of a Polygon exterior"""

    # Parse the exterior of the coordinate
    if isinstance(row[geom], shapely.geometry.multipolygon.MultiPolygon):
        exterior = row[geom][1].exterior
    else:
        exterior = row[geom].exterior

    if coord_type == 'x':
        # Get the x coordinates of the exterior
        return list( exterior.coords.xy[0] )
    elif coord_type == 'y':
        # Get the y coordinates of the exterior
        return list( exterior.coords.xy[1] )

In [7]:
# Save outline of each neighborhood as X and Y. 
bokeh_df['x'] = bokeh_df.apply(getPolyCoords, geom='geometry', coord_type='x', axis=1)
bokeh_df['y'] = bokeh_df.apply(getPolyCoords, geom='geometry', coord_type='y', axis=1)
bokeh_df.drop(['geometry'],axis = 1,inplace=True)

# Get the percentage of review counts
bokeh_df_percent = bokeh_df.copy()
for i in bokeh_df.columns[1:157]:  
    bokeh_df_percent[i] = 100 * bokeh_df[i] / bokeh_df[i].sum()    

In [8]:
# Plotting the percentage of review count for each neighborhood in Pittsburgh. 

bokeh_df_percent['used'] = bokeh_df_percent['2012-01']
source = ColumnDataSource(bokeh_df_percent)
#source2 = ColumnDataSource(bokeh_df)

TOOLS = "pan,wheel_zoom,reset,hover,save"
#custom_colors = ['#f2f2f2', '#fee5d9', '#fcbba1', '#fc9272', '#fb6a4a', '#de2d26']
#custom_colors = ['#ffede6','#ffdacc','#ffc8b3','#ffb699','#ffa480','#ffa07a','#ff9166','#ff7f4d','#ff6d33','#ff4800']
custom_colors = ['#ffede6','#ffb699','#ffa07a','#ff7f4d','#ff4800']
color_mapper = LinearColorMapper(palette=custom_colors, low = 0.0, high = 15)


p = Figure(title="Fraction of Yelp Reviews (%)", tools = TOOLS, x_axis_location=None, y_axis_location=None)
p.width=600
p.height = 550
p.grid.grid_line_color = None


renderer = p.patches('x', 'y', source=source,
          fill_color={'field': 'used', 'transform': color_mapper}, 
          fill_alpha = 0.8, line_color='black', line_width=0.6)

hover = p.select_one(HoverTool)
hover.point_policy = "follow_mouse"
hover.tooltips = [
    ("Name", "@Name"),
    ("Review Count(%)", "@used"),
    ("(Lat, Long)", "($y, $x)"),
]

color_bar = ColorBar(color_mapper=color_mapper, ticker=BasicTicker(),
                     label_standoff=12, border_line_color=None, location=(0,0))

p.add_layout(color_bar, 'left')

callback = CustomJS(args=dict(source=source,  plot=p, color_mapper = color_mapper,renderer = renderer),code="""
    var data = source.data;
    date = new Date(slider.value)
    var year = date.getFullYear().toString();
    var month = date.getMonth().toString();
    console.log(month)
    var month_str = month.length == 1 ? '0'+ month : month;
    
    var time = year+'-'+month_str;
    used = data['used']
    should_be = data[time]
    for (i = 0; i < should_be.length; i++) {
         used[i] = should_be[i];
    }
    
    source.change.emit()
    """)

time_slider = DateSlider(title="Date", start=datetime.date(2006, 1, 1), end=datetime.date(2017, 11, 1),value=datetime.date(2014, 4, 1), step=1, format = "%Y-%m", callback=callback)
callback.args['slider'] = time_slider

layout = column(p, time_slider)
show(layout)

In [9]:
# Read in data on the number of restaurants in Pittsburgh. 
PGH_restaurants = pd.read_csv('/Users/chenchen/Documents/DataScience/DataIncubator/dataset/Yelp_RestaurantCount.csv', sep = '\t',parse_dates=['Year'], index_col='Year')

In [10]:
# Get the number of restaurants in 2017 to show the density of restaurants. 
PGH_neighbor_res = PGH_restaurants['2017':].T.copy().reset_index()
PGH_neighbor_res.rename(columns = {'index':'Name'},inplace=True)
Merged_result_res = pd.merge(PGH_neighbor_res, Pittsburgh_neighborhood, on='Name', how='outer')

for col in Merged_result_res.columns:
    if isinstance(col, datetime.date):
        new_name = col.strftime('%Y')
        Merged_result_res.rename(columns = {col:new_name},inplace=True)

In [11]:
bokeh_res_df = Merged_result_res.drop(['State','County','City','RegionID'],axis = 1)
bokeh_res_df = bokeh_res_df.drop(bokeh_res_df.index[[1,65]]) # leave out 'Uncategorized column during plotting

bokeh_res_df['x'] = bokeh_res_df.apply(getPolyCoords, geom='geometry', coord_type='x', axis=1)
bokeh_res_df['y'] = bokeh_res_df.apply(getPolyCoords, geom='geometry', coord_type='y', axis=1)
bokeh_res_df.drop(['geometry'],axis = 1,inplace=True)

In [12]:
# Plot the number of restaurants in Pittsburgh 
bokeh_res_df['used'] = bokeh_res_df['2017']
source3 = ColumnDataSource(bokeh_res_df)

TOOLS = "pan,wheel_zoom,reset,hover,save"
#custom_colors = ['#f2f2f2', '#fee5d9', '#fcbba1', '#fc9272', '#fb6a4a', '#de2d26']
custom_colors = ['#ffede6','#ffdacc','#ffc8b3','#ffb699','#ffa480','#ffa07a','#ff9166','#ff7f4d','#ff6d33','#ff4800']
#custom_colors = ['#ffede6','#ffb699','#ffa07a','#ff7f4d','#ff4800']
color_mapper = LinearColorMapper(palette=custom_colors, low = 0.0, high = 200)


p = Figure(title="Number of restaurants in 2017", tools = TOOLS, x_axis_location=None, y_axis_location=None)
p.width=600
p.height = 550
p.grid.grid_line_color = None

renderer = p.patches('x', 'y', source=source3,
          fill_color={'field': 'used', 'transform': color_mapper}, 
          fill_alpha = 0.8, line_color='black', line_width=0.6)

hover = p.select_one(HoverTool)
hover.point_policy = "follow_mouse"
hover.tooltips = [
    ("Name", "@Name"),
    ("# Restaurants", "@used"),
    ("(Lat, Long)", "($y, $x)"),
]


color_bar = ColorBar(color_mapper=color_mapper, ticker=BasicTicker(),
                     label_standoff=12, border_line_color=None, location=(0,0))

p.add_layout(color_bar, 'left')

callback = CustomJS(args=dict(source=source3,  plot=p, color_mapper = color_mapper,renderer = renderer),code="""
    var data = source.data;
    date = new Date(slider.value)
    var year = date.getFullYear().toString();
  
    used = data['used']
    should_be = data[time]
    for (i = 0; i < should_be.length; i++) {
         used[i] = should_be[i];
    }
    
    source.change.emit()
    """)

time_slider = DateSlider(title="Year", start=datetime.date(2017, 1, 1), end=datetime.date(2017, 2, 1),value=datetime.date(2017, 1, 1), step=1, format = "%Y", callback=callback)
callback.args['slider'] = time_slider

layout = column(p, time_slider)
#layout=p
show(layout)
#output_file("restaurant_count.html")
#save(layout)
