# Data Exploration and Visualization

In [1]:
from datetime import date, datetime, timedelta
import pandas as pd

import bokeh, bokeh.plotting, bokeh.models
from bokeh.io import output_notebook, show
output_notebook()

import geopandas as gpd
from shapely.geometry import Point
import urllib
import numpy as np
import time

## Functions

In [124]:
# zones lookup table for Borough and Zone name against ID
zones = pd.read_csv('taxiZoneLookup.csv', index_col="LocationID")
zones.index = zones.index.astype(str)

In [125]:
df = pd.read_csv("zonePickups.csv", index_col="time_bin", parse_dates=["time_bin"])
df = df.fillna(0)

In [242]:
for t in range(1):
    z = df.iloc[t,:]
    myDatetime = z.name.strftime("%m/%d/%Y, %H:%M:%S")
    z = pd.DataFrame(z)
    z.columns = ['N']
    z["LocationID"] = z.index.astype(int)
    counts = pd.merge(geo_df, z, on="LocationID", how="left")

    gjds = bokeh.models.GeoJSONDataSource(geojson=counts.to_json())
    TOOLS = "pan,wheel_zoom,reset,hover,save"

    p = bokeh.plotting.figure(title=" NYC Taxi Pickups Heatmap " +  myDatetime, tools=TOOLS,
        x_axis_location=None, y_axis_location=None)

    color_mapper = bokeh.models.LogColorMapper(palette=bokeh.palettes.Blues256, low=50, high=800)

    p.patches('xs', 'ys', 
              fill_color={'field': 'N', 'transform': color_mapper},
              fill_alpha=1., line_color="black", line_width=0.5,          
              source=gjds)

    p.grid.grid_line_color = None

    hover = p.select_one(bokeh.models.HoverTool)
    hover.point_policy = "follow_mouse"
    hover.tooltips = u"""
    <div> 
        <div class="bokeh_hover_tooltip">Name : @zone</div>
        <div class="bokeh_hover_tooltip">Borough : @borough</div>
        <div class="bokeh_hover_tooltip">Trips Start : @N</div>
    </div>
    """

    color_bar = bokeh.models.ColorBar(
        color_mapper=color_mapper, orientation='horizontal',
        ticker=bokeh.models.FixedTicker(ticks=[50, 100, 200, 400, 800]),
        formatter=bokeh.models.PrintfTickFormatter(format='%d'),
        label_standoff=12, border_line_color=None, location=(0,0))
    p.add_layout(color_bar, 'below')

    t = show(p, notebook_handle=True)
    time.sleep(1)

In [232]:
from bokeh.io import push_notebook, show, output_notebook

In [240]:
z = df.iloc[10,:]
myDatetime = z.name.strftime("%m/%d/%Y, %H:%M:%S")
z = pd.DataFrame(z)
z.columns = ['N']
z["LocationID"] = z.index.astype(int)
counts = pd.merge(geo_df, z, on="LocationID", how="left")

gjds = bokeh.models.GeoJSONDataSource(geojson=counts.to_json())
TOOLS = "pan,wheel_zoom,reset,hover,save"

p = bokeh.plotting.figure(title=" NYC Taxi Pickups Heatmap " +  myDatetime, tools=TOOLS,
    x_axis_location=None, y_axis_location=None)

color_mapper = bokeh.models.LogColorMapper(palette=bokeh.palettes.Blues256, low=50, high=800)

p.patches('xs', 'ys', 
          fill_color={'field': 'N', 'transform': color_mapper},
          fill_alpha=1., line_color="black", line_width=0.5,          
          source=gjds)

p.grid.grid_line_color = None

hover = p.select_one(bokeh.models.HoverTool)
hover.point_policy = "follow_mouse"
hover.tooltips = u"""
<div> 
    <div class="bokeh_hover_tooltip">Name : @zone</div>
    <div class="bokeh_hover_tooltip">Borough : @borough</div>
    <div class="bokeh_hover_tooltip">Trips Start : @N</div>
</div>
"""

color_bar = bokeh.models.ColorBar(
    color_mapper=color_mapper, orientation='horizontal',
    ticker=bokeh.models.FixedTicker(ticks=[50, 100, 200, 400, 800]),
    formatter=bokeh.models.PrintfTickFormatter(format='%d'),
    label_standoff=12, border_line_color=None, location=(0,0))
p.add_layout(color_bar, 'below')

In [241]:
push_notebook(handle=t)

In [219]:
z = df.iloc[996,:]

In [220]:
myDatetime = z.name.strftime("%m/%d/%Y, %H:%M:%S")
myDatetime

'01/21/2017, 18:00:00'

In [221]:
z = pd.DataFrame(z)

In [222]:
z.columns = ['N']

In [223]:
#z = pd.DataFrame(z.sum())
#z.columns = ['N']
z["LocationID"] = z.index.astype(int)

In [224]:
#JFK: 132
#LG: 138

In [225]:
coord_system = {'init': 'epsg:4326'}
geo_df = gpd.read_file('shapefiles/taxi_zones.shp')
geo_df.crs = {'init' :'epsg:4326'} 
geo_df = geo_df.drop(['Shape_Area', 'Shape_Leng', 'OBJECTID'], axis=1)

  return _prepare_from_string(" ".join(pjargs))


In [226]:
geo_df = geo_df[(geo_df.borough == "Manhattan") | (geo_df.LocationID == 132) | (geo_df.LocationID == 138)]

In [229]:
counts = pd.merge(geo_df, z, on="LocationID", how="left")


gjds = bokeh.models.GeoJSONDataSource(geojson=counts.to_json())
TOOLS = "pan,wheel_zoom,reset,hover,save"

p = bokeh.plotting.figure(title=" NYC Taxi Pickups Heatmap " +  myDatetime, tools=TOOLS,
    x_axis_location=None, y_axis_location=None)

color_mapper = bokeh.models.LogColorMapper(palette=bokeh.palettes.Blues256, low=50, high=800) #Viridis256

p.patches('xs', 'ys', 
          fill_color={'field': 'N', 'transform': color_mapper},
          fill_alpha=1., line_color="black", line_width=0.5,          
          source=gjds)

p.grid.grid_line_color = None

hover = p.select_one(bokeh.models.HoverTool)
hover.point_policy = "follow_mouse"
hover.tooltips = u"""
<div> 
    <div class="bokeh_hover_tooltip">Name : @zone</div>
    <div class="bokeh_hover_tooltip">Borough : @borough</div>
    <div class="bokeh_hover_tooltip">Trips Start : @N</div>
</div>
"""

color_bar = bokeh.models.ColorBar(
    color_mapper=color_mapper, orientation='horizontal',
    ticker=bokeh.models.FixedTicker(ticks=[50, 100, 200, 400, 800]),
    formatter=bokeh.models.PrintfTickFormatter(format='%d'),
    label_standoff=12, border_line_color=None, location=(0,0))
p.add_layout(color_bar, 'below')

show(p)

In [10]:
def topZones(date, n, hour=None, data=df):
    if hour == None:
        datetime = date 
    else:
        datetime = str(date) + f" {hour:02d}"
        print(datetime)
     
    data = pd.DataFrame(data[datetime].transpose().sum(axis=1))
    data.index.name="LocationID"
    data.columns = ["sum"]
    data = pd.merge(zones[["Borough", "Zone"]], data, on="LocationID", how="left")
    data = data.sort_values(by="sum", ascending=False)[:n]
    
    return data

In [11]:
from pylab import figure, text

# line plots for power usage dataset
from pandas import read_csv
from matplotlib import pyplot
# load the new file
# line plot for each variable
pyplot.figure(figsize=(7,20))
for i in range(len(dataset.columns)):
    # create subplot
    pyplot.subplot(len(dataset.columns), 1, i+1)
    # get variable name
    name = dataset.columns[i]
    # plot data
    pyplot.plot(dataset[name])
    # set title
    my_title = pu_sum.loc[name,"Borough"] + ", " + pu_sum.loc[name,"Zone"]
    pyplot.title(my_title , y=1)
    # turn off ticks to remove clutter
    #pyplot.yticks([])
    pyplot.xticks([])
pyplot.show()

NameError: name 'dataset' is not defined

<Figure size 504x1440 with 0 Axes>