In [204]:
#- Some trips, like to/from an airport, are fixed fee.
#- Fare at night is different from day time.
#- Fare varies with pickup location
#- Relevance of direction for fare amount
#- Time of Travel : During peak traffic hours, the taxi fare may be higher.
#- Day of Travel : Fare amount may differ on weekday and weekends
#- Weather Conditions : If it is snowing, there may be lower availability of cabs and hence higher fares.
#- Pickup or Drop-off Neighborhood : Fare may be different based on the kind of neighborhood
#- Availability of taxi : If a particular location has a lot of cabs available, the fares may be lower

In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

# Filter out deprecated warnings
import warnings
warnings.filterwarnings("ignore")

In [2]:
BREAK = '------------------------------------------------------------------------'

In [3]:
#green1 = pd.read_csv('../raw_data/green_tripdata_2016-01.csv')
green2 = pd.read_csv('../raw_data/green_tripdata_2016-02.csv')
green3 = pd.read_csv('../raw_data/green_tripdata_2016-03.csv')
green4 = pd.read_csv('../raw_data/green_tripdata_2016-04.csv')
#green5 = pd.read_csv('../raw_data/green_tripdata_2016-05.csv')
#green6 = pd.read_csv('../raw_data/green_tripdata_2016-06.csv')

In [4]:
#yellow1 = pd.read_csv('../raw_data/yellow_tripdata_2016-01.csv')
yellow2 = pd.read_csv('../raw_data/yellow_tripdata_2016-02.csv')
yellow3 = pd.read_csv('../raw_data/yellow_tripdata_2016-03.csv')
yellow4 = pd.read_csv('../raw_data/yellow_tripdata_2016-04.csv')
#yellow5 = pd.read_csv('../raw_data/yellow_tripdata_2016-05.csv')
#yellow6 = pd.read_csv('../raw_data/yellow_tripdata_2016-06.csv')

In [5]:
green_cabs = green2.append([green3, green4])

In [6]:
yellow_cabs = yellow2.append([yellow3, yellow4])

# 1. Cleaning Data

In [7]:
# Check for strange values in dataframe
"""for column in green_cabs.columns:
    print(column, ":", green_cabs[column].unique())

print(BREAK)    

for column in yellow_cabs.columns:
    print(column, ":", yellow_cabs[column].unique())"""

'for column in green_cabs.columns:\n    print(column, ":", green_cabs[column].unique())\n\nprint(BREAK)    \n\nfor column in yellow_cabs.columns:\n    print(column, ":", yellow_cabs[column].unique())'

In [8]:
# Make the casing of the columns consistent
green_cabs.columns= green_cabs.columns.str.lower()
yellow_cabs.columns= yellow_cabs.columns.str.lower()

### In green_cabs, Ehail_fee is only 'nan', therefore should be removed. There is 'nan' in trip type, and trip type does not contribute much to our study.

In [9]:
green_cabs = green_cabs.drop(columns=['ehail_fee', 'trip_type '])

In [10]:
# These features should not contain negative values
pos_features = ['passenger_count', 'trip_distance', 'fare_amount', 'tip_amount', 'tolls_amount', 'total_amount']

In [79]:
# Fare amount negative due to refund by the company to the passenger
# 0 trip distance due to booking without attendance
# taxi meter faulty

In [11]:
for feature in pos_features:
    green_cabs = green_cabs[green_cabs[feature] >= 0]
    yellow_cabs = yellow_cabs[yellow_cabs[feature] >= 0]

### Making sure that the longitudes and latitudes are within New York City's boundaries

In [208]:
# mid_coord = the middle coordinates for the map
mid_coord = green_cabs[COORD].describe().loc[["50%"]].values[0]

# axis ranges
x_Range = [green_cabs['pickup_longitude'].min(), green_cabs['pickup_longitude'].max()]
y_Range = [green_cabs['pickup_latitude'].min(), green_cabs['pickup_latitude'].max()]

x_Range, y_Range

([-74.25791168212889, -73.70039367675781],
 [40.5308837890625, 40.91608810424805])

In [62]:
MIN_LAT = y_Range[0]
MAX_LAT = y_Range[1]
MIN_LONG = x_Range[0]
MAX_LONG = x_Range[1]

In [15]:
green_cabs = green_cabs[(green_cabs['pickup_latitude'] >= MIN_LAT) & (green_cabs['pickup_latitude'] <= MAX_LAT)]
green_cabs = green_cabs[(green_cabs['pickup_longitude'] >= MIN_LONG) & (green_cabs['pickup_longitude'] <= MAX_LONG)]

green_cabs = green_cabs[(green_cabs['dropoff_latitude'] >= MIN_LAT) & (green_cabs['dropoff_latitude'] <= MAX_LAT)]
green_cabs = green_cabs[(green_cabs['dropoff_longitude'] >= MIN_LONG) & (green_cabs['dropoff_longitude'] <= MAX_LONG)]

In [17]:
yellow_cabs = yellow_cabs[(yellow_cabs['pickup_latitude'] >= MIN_LAT) & (yellow_cabs['pickup_latitude'] <= MAX_LAT)]
yellow_cabs = yellow_cabs[(yellow_cabs['pickup_longitude'] >= MIN_LONG) & (yellow_cabs['pickup_longitude'] <= MAX_LONG)]

yellow_cabs = yellow_cabs[(yellow_cabs['dropoff_latitude'] >= MIN_LAT) & (yellow_cabs['dropoff_latitude'] <= MAX_LAT)]
yellow_cabs = yellow_cabs[(yellow_cabs['dropoff_longitude'] >= MIN_LONG) & (yellow_cabs['dropoff_longitude'] <= MAX_LONG)]

### Make sure dropoff time and pickup time match, and trip distance must be greater than 0.00

In [18]:
green_err = green_cabs[(green_cabs['lpep_dropoff_datetime'] == green_cabs['lpep_pickup_datetime']) & (green_cabs['trip_distance'] == 0.00)]
yellow_err = yellow_cabs[(yellow_cabs['tpep_dropoff_datetime'] == yellow_cabs['tpep_pickup_datetime']) & (yellow_cabs['trip_distance'] == 0.00)]

In [19]:
green_cabs = pd.concat([green_cabs, green_err, green_err]).drop_duplicates(keep=False)
yellow_cabs = pd.concat([yellow_cabs, yellow_err, yellow_err]).drop_duplicates(keep=False)

In [20]:
# Convert data types
green_cabs['lpep_pickup_datetime'] = pd.to_datetime(green_cabs['lpep_pickup_datetime'])
green_cabs['lpep_dropoff_datetime'] = pd.to_datetime(green_cabs['lpep_dropoff_datetime'])
#green_cabs["store_and_fwd_flag"].replace({"Y": 1, "N": 0}, inplace=True)

yellow_cabs['tpep_pickup_datetime'] = pd.to_datetime(yellow_cabs['tpep_pickup_datetime'])
yellow_cabs['tpep_dropoff_datetime'] = pd.to_datetime(yellow_cabs['tpep_dropoff_datetime'])
#yellow_cabs["store_and_fwd_flag"].replace({"Y": 1, "N": 0}, inplace=True)

In [21]:
# Rearrange columns in both df
green_index = green_cabs.columns
yellow_index = yellow_cabs.columns

green_index = sorted(list(green_index))
yellow_index = sorted(list(yellow_index))

In [22]:
green_index = ['dropoff_latitude', 'dropoff_longitude', 'pickup_latitude', 'pickup_longitude', 'lpep_dropoff_datetime', 'lpep_pickup_datetime', 'extra', 'fare_amount', 'mta_tax', 'passenger_count', 'payment_type', 'ratecodeid', 'store_and_fwd_flag', 'tip_amount', 'tolls_amount', 'total_amount', 'trip_distance', 'vendorid', 'improvement_surcharge']
yellow_index = ['dropoff_latitude', 'dropoff_longitude', 'pickup_latitude', 'pickup_longitude', 'tpep_dropoff_datetime', 'tpep_pickup_datetime', 'extra', 'fare_amount', 'mta_tax', 'passenger_count', 'payment_type', 'ratecodeid', 'store_and_fwd_flag', 'tip_amount', 'tolls_amount', 'total_amount', 'trip_distance', 'vendorid', 'improvement_surcharge']

green_cabs = green_cabs[green_index]
yellow_cabs = yellow_cabs[yellow_index]

### Separate time into days of the week, hours

In [23]:
green_cabs['weekday'] = green_cabs.lpep_pickup_datetime.apply(lambda t: t.weekday())
green_cabs['hour'] = green_cabs.lpep_pickup_datetime.apply(lambda t: t.hour)

In [24]:
yellow_cabs['weekday'] = yellow_cabs.tpep_pickup_datetime.apply(lambda t: t.weekday())
yellow_cabs['hour'] = yellow_cabs.tpep_pickup_datetime.apply(lambda t: t.hour)

In [25]:
green_cabs

Unnamed: 0,dropoff_latitude,dropoff_longitude,pickup_latitude,pickup_longitude,lpep_dropoff_datetime,lpep_pickup_datetime,extra,fare_amount,mta_tax,passenger_count,...,ratecodeid,store_and_fwd_flag,tip_amount,tolls_amount,total_amount,trip_distance,vendorid,improvement_surcharge,weekday,hour
0,40.785885,-73.972534,40.805214,-73.939018,2016-02-01 00:10:06,2016-02-01 00:00:01,0.5,10.5,0.5,1,...,1,N,0.00,0.0,11.80,2.86,2,0.3,0,0
1,40.743896,-73.890877,40.746651,-73.891495,2016-02-01 00:20:13,2016-02-01 00:01:33,0.5,13.0,0.5,1,...,1,N,0.00,0.0,14.30,3.35,2,0.3,0,0
2,40.718327,-73.956978,40.676132,-73.983780,2016-02-01 00:21:04,2016-02-01 00:03:46,0.5,17.5,0.5,1,...,1,N,3.76,0.0,22.56,4.70,2,0.3,0,0
3,40.705978,-73.831657,40.700375,-73.807518,2016-02-01 00:06:48,2016-02-01 00:00:05,0.5,8.0,0.5,1,...,1,N,0.00,0.0,9.30,2.11,2,0.3,0,0
4,40.733601,-73.900009,40.744934,-73.903961,2016-02-01 00:08:47,2016-02-01 00:06:20,0.5,5.0,0.5,5,...,1,N,0.00,0.0,6.30,0.98,2,0.3,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1543920,40.807129,-73.967079,40.805477,-73.939301,2016-04-30 23:13:11,2016-04-30 23:00:30,0.5,10.5,0.5,1,...,1,N,1.50,0.0,13.30,2.40,1,0.3,5,23
1543921,40.745987,-73.893250,40.754555,-73.897263,2016-04-30 23:06:43,2016-04-30 23:00:00,0.5,7.0,0.5,1,...,1,N,0.00,0.0,8.30,1.40,1,0.3,5,23
1543922,40.683731,-73.945625,40.703651,-73.926971,2016-04-30 23:12:55,2016-04-30 23:00:00,0.5,11.5,0.5,1,...,1,N,0.00,0.0,12.80,2.80,1,0.3,5,23
1543923,40.723015,-73.982719,40.716717,-73.957565,2016-04-30 23:23:36,2016-04-30 23:00:16,0.5,16.0,0.5,1,...,1,N,3.45,0.0,20.75,2.70,1,0.3,5,23


In [26]:
yellow_cabs

Unnamed: 0,dropoff_latitude,dropoff_longitude,pickup_latitude,pickup_longitude,tpep_dropoff_datetime,tpep_pickup_datetime,extra,fare_amount,mta_tax,passenger_count,...,ratecodeid,store_and_fwd_flag,tip_amount,tolls_amount,total_amount,trip_distance,vendorid,improvement_surcharge,weekday,hour
0,40.735390,-73.992012,40.763771,-73.947250,2016-02-25 17:27:20,2016-02-25 17:24:20,0.0,5.0,0.5,2,...,1,N,0.00,0.0,5.80,0.70,2,0.3,3,17
1,40.758839,-73.988586,40.750992,-73.983017,2016-02-25 23:31:50,2016-02-25 23:10:50,0.5,20.0,0.5,2,...,1,N,0.00,0.0,21.30,5.52,2,0.3,3,23
2,40.757977,-73.964355,40.758202,-73.992340,2016-02-01 00:10:52,2016-02-01 00:00:01,0.5,9.5,0.5,6,...,1,N,0.70,0.0,11.50,1.99,2,0.3,0,0
3,40.763985,-73.982323,40.749722,-73.981453,2016-02-01 00:05:16,2016-02-01 00:00:04,0.5,6.5,0.5,1,...,1,N,0.00,0.0,7.80,1.50,1,0.3,0,0
4,40.669834,-73.951324,40.729755,-74.000603,2016-02-01 00:20:59,2016-02-01 00:00:05,0.5,20.0,0.5,1,...,1,N,4.00,0.0,25.30,5.60,2,0.3,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
11934329,40.744419,-74.002098,40.740383,-74.004150,2016-04-19 19:14:03,2016-04-19 19:08:47,1.0,5.0,0.5,1,...,1,N,1.00,0.0,7.80,0.60,1,0.3,1,19
11934330,40.764793,-73.958282,40.764095,-73.973984,2016-04-19 19:17:45,2016-04-19 19:08:47,1.0,7.5,0.5,1,...,1,N,0.00,0.0,9.30,1.10,1,0.3,1,19
11934331,40.772587,-73.947166,40.770531,-73.962059,2016-04-19 19:15:13,2016-04-19 19:08:47,1.0,6.5,0.5,2,...,1,N,1.65,0.0,9.95,1.10,1,0.3,1,19
11934332,40.772480,-73.946823,40.779041,-73.955971,2016-04-19 19:13:41,2016-04-19 19:08:47,1.0,5.5,0.5,3,...,1,N,0.00,0.0,7.30,0.75,2,0.3,1,19


## Geospatial Visualisation

In [63]:
# Coordinates
PICKUP_COORD = ['pickup_latitude', 'pickup_longitude']
DROPOFF_COORD = ['dropoff_latitude', 'dropoff_longitude']

In [64]:
import folium
from folium.plugins import HeatMap

In [30]:
nyc_m = folium.Map(location=mid_coord, tiles="Stamen Terrain", zoom_start=11)

nyc_m.save('../mast30034_2021_s2_project_1-YourTeacher23/plots/folium_nyc.html')

nyc_m

In [96]:
from bokeh.plotting import figure, show, output_file, save
from bokeh.tile_providers import get_provider, Vendors

# to display bokeh plots inside jupyter, we need to use output_notebook
from bokeh.io import reset_output, output_notebook

reset_output()
output_notebook()
# note below that it says "BokehJS 1.4.0 successfully loaded."

In [32]:
""" This code is taken from the Python Stream Workshop Repository, 
    https://github.com/akiratwang/MAST30034_Python/blob/main/tutorials/Lab1_Python.ipynb """

def latitude_to_mercator(coords):
    """ Function which converts an array of latitude coordinates 
        into its mercator coordinate representation """
    k = 6378137
    converted = list()
    for lat in coords:
        converted.append(np.log(np.tan((90 + lat) * np.pi/360.0)) * k)
    return converted

def longitude_to_mercator(coords):
    """
    Function which converts an array of longitude coordinates 
    into its mercator coordinate representation
    """
    k = 6378137
    converted = list()
    for lon in coords:
        converted.append(lon * (k * np.pi/180.0))
    return converted

In [33]:
TILE = get_provider("STAMEN_TERRAIN_RETINA")

pickup_m = figure(x_range=longitude_to_mercator(xRange), y_range=latitude_to_mercator(yRange),
       x_axis_type="mercator", y_axis_type="mercator")
pickup_m.add_tile(TILE)
pickup_m.title.text = "Pickups in NYC"

In [174]:
green_cabs['pickupX'] = green_cabs['pickup_longitude'].apply(lambda x: longitude_to_mercator([x])[0])
green_cabs['pickupY'] = green_cabs['pickup_latitude'].apply(lambda x: latitude_to_mercator([x])[0])
green_cabs['dropoffX'] = green_cabs['dropoff_longitude'].apply(lambda x: longitude_to_mercator([x])[0])
green_cabs['dropoffY'] = green_cabs['dropoff_latitude'].apply(lambda x: latitude_to_mercator([x])[0])

In [34]:
green_sample = green_cabs.sample(n=10000)

In [36]:
# for every source value, draw a small circle denoting a pickup
pickup_m.circle(x='pickupX', y='pickupY', 
         size=5, fill_color="blue", fill_alpha=0.5, 
         source=green_sample[['pickupX','pickupY']])

In [37]:
#show(pickup_m)

In [38]:
# create map
dropoff = figure(x_range=longitude_to_mercator(xRange), y_range=latitude_to_mercator(yRange),
       x_axis_type="mercator", y_axis_type="mercator")
dropoff.add_tile(TILE)
dropoff.title.text = "Dropoff in NYC"

# convert to mercer
#green_sample['dropoffX'] = green_sample['dropoff_longitude'].apply(lambda x: longitude_to_mercator([x])[0])
#green_sample['dropoffY'] = green_sample['dropoff_latitude'].apply(lambda x: latitude_to_mercator([x])[0])

# plot circles (source = data source)
dropoff.circle(x='dropoffX', y='dropoffY', 
         size=5, color="pink", fill_color="red", fill_alpha=0.5, 
         source=green_sample[['dropoffX','dropoffY']])

#show(dropoff)

In [120]:
"""import folium
from folium.plugins import FastMarkerCluster

# create an interactive geospatial graph
pickups_cluster = folium.Map(location=[40.66, -73.94], tiles="Stamen Terrain", zoom_start=10)

# use a built-in clustering algorithm to apply markers for hotspots
pickups_cluster.add_child(FastMarkerCluster(data=green_cabs[COORD].values))

# visualize the plot 
pickups_cluster.save('../mast30034_2021_s2_project_1-YourTeacher23/plots/foliumFastCluster.html')
#pickups_cluster"""

In [205]:
pickups_heatmap = folium.Map(location=mid_coord, tiles="Stamen Terrain", zoom_start=10)
pickups_heatmap.add_child(HeatMap(green_cabs[PICKUP_COORD].values, radius=10))

pickups_heatmap.save('../mast30034_2021_s2_project_1-YourTeacher23/plots/PickupHeatmap.html')
#pickups_heatmap

In [207]:
pickups_heatmap

In [206]:
dropoffs_heatmap = folium.Map(location=mid_coord, tiles="Stamen Terrain", zoom_start=10)
dropoffs_heatmap.add_child(HeatMap(green_cabs[DROPOFF_COORD].values, radius=10))

dropoffs_heatmap.save('../mast30034_2021_s2_project_1-YourTeacher23/plots/DropoffHeatmap.html')
#dropoffs_heatmap

In [97]:
from bokeh.plotting import figure, show
from bokeh.tile_providers import get_provider, Vendors
from bokeh.io import save, reset_output, output_notebook

reset_output()
output_notebook()
# note below that it says "BokehJS 1.4.0 successfully loaded."

In [165]:
from bokeh.io import curdoc
from bokeh.models import Model

def clear_doc():
    """ Clears doc memory for plots """
    curdoc().clear()
    for model in p.select({'type': Model}):
        prev_doc = model.document
        model._document = None
        if prev_doc:
            prev_doc.remove_root(model)
    return 0

In [169]:
from bokeh.models import ColorBar, LinearColorMapper
from bokeh.palettes import all_palettes

clear_doc()

# create bokeh figure, where x_range and y_range are in mercer
pickup_hex = figure(x_range=lon2mercer(x_Range), y_range=lat2mercer(y_Range),
           x_axis_type="mercator", y_axis_type="mercator")
# add map tile
pickup_hex.add_tile(TILE)
# change title
pickup_hex.title.text = "Hex-Binned Pickups in NYC"

palette = all_palettes['Magma'][256][::-1]
color_mapper = LinearColorMapper(palette=palette, low=1, high=1449)
color_bar = ColorBar(color_mapper=color_mapper, label_standoff=12)
r, bins = pickup_hex.hexbin(x=green_cabs['pickupX'], y=green_cabs['pickupY'], size=250, palette=palette)

pickup_hex.add_layout(color_bar, 'right')

#show(pickup_hex)
save(pickup_hex, '../mast30034_2021_s2_project_1-YourTeacher23/plots/PickupHexBinned.html')

'/mnt/student.unimelb.edu.au/kguok/mast30034_2021_s2_project_1-YourTeacher23/plots/PickupHexBinned.html'

In [196]:
green_morning_peak = green_cabs[(green_cabs['hour'] == 7) | (green_cabs['hour'] == 8) | (green_cabs['hour'] == 9)]
green_evening_peak = green_cabs[(green_cabs['hour'] == 16) | (green_cabs['hour'] == 17)]

In [189]:
PEAK_HOUR = [7, 8, 9, 16, 17]
OFF_HOUR = [0, 1, 2, 3, 4, 5, 6, 10, 11, 12, 13, 14, 15, 18, 19, 20, 21, 22, 23]

In [197]:
clear_doc()

####################################""" Hexbin map of morning pickups """####################################

# create bokeh figure, where x_range and y_range are in mercer
pickup_hex = figure(x_range=lon2mercer(x_Range), y_range=lat2mercer(y_Range),
           x_axis_type="mercator", y_axis_type="mercator")
# add map tile
pickup_hex.add_tile(TILE)
# change title
pickup_hex.title.text = "Hex-Binned Morning Peak Hours Pickups in NYC"

palette = all_palettes['Magma'][256][::-1]
color_mapper = LinearColorMapper(palette=palette, low=1, high=1449)
color_bar = ColorBar(color_mapper=color_mapper, label_standoff=12)
r, bins = pickup_hex.hexbin(x=green_morning_peak['pickupX'], y=green_morning_peak['pickupY'], size=250, palette=palette)

pickup_hex.add_layout(color_bar, 'right')

#show(pickup_hex)
save(pickup_hex, '../mast30034_2021_s2_project_1-YourTeacher23/plots/MorningPickupHex.html')

'/mnt/student.unimelb.edu.au/kguok/mast30034_2021_s2_project_1-YourTeacher23/plots/MorningPickupHex.html'

In [198]:
clear_doc()

####################################""" Hexbin map of morning dropoffs """####################################

# create bokeh figure, where x_range and y_range are in mercer
dropoff_hex = figure(x_range=lon2mercer(x_Range), y_range=lat2mercer(y_Range),
           x_axis_type="mercator", y_axis_type="mercator")
# add map tile
dropoff_hex.add_tile(TILE)
# change title
dropoff_hex.title.text = "Hex-Binned Morning Peak Hour Dropoffs in NYC"

palette = all_palettes['Magma'][256][::-1]
color_mapper = LinearColorMapper(palette=palette, low=1, high=1449)
color_bar = ColorBar(color_mapper=color_mapper, label_standoff=12)
r, bins = dropoff_hex.hexbin(x=green_morning_peak['dropoffX'], y=green_morning_peak['dropoffY'], size=250, palette=palette)

dropoff_hex.add_layout(color_bar, 'right')

#show(dropoff_hex)
save(dropoff_hex, '../mast30034_2021_s2_project_1-YourTeacher23/plots/MorningDropoffHex.html')

'/mnt/student.unimelb.edu.au/kguok/mast30034_2021_s2_project_1-YourTeacher23/plots/MorningDropoffHex.html'

In [199]:
clear_doc()

####################################""" Hexbin map of evening pickups """####################################

# create bokeh figure, where x_range and y_range are in mercer
pickup_hex = figure(x_range=lon2mercer(x_Range), y_range=lat2mercer(y_Range),
           x_axis_type="mercator", y_axis_type="mercator")
# add map tile
pickup_hex.add_tile(TILE)
# change title
pickup_hex.title.text = "Hex-Binned Evening Peak Hour Pickups in NYC"

palette = all_palettes['Magma'][256][::-1]
color_mapper = LinearColorMapper(palette=palette, low=1, high=1449)
color_bar = ColorBar(color_mapper=color_mapper, label_standoff=12)
r, bins = pickup_hex.hexbin(x=green_evening_peak['pickupX'], y=green_evening_peak['pickupY'], size=250, palette=palette)

pickup_hex.add_layout(color_bar, 'right')

#show(pickup_hex)
save(pickup_hex, '../mast30034_2021_s2_project_1-YourTeacher23/plots/EveningPickupHex.html')

'/mnt/student.unimelb.edu.au/kguok/mast30034_2021_s2_project_1-YourTeacher23/plots/EveningPickupHex.html'

In [200]:
clear_doc()

####################################""" Hexbin map of evening dropoffs """####################################

# create bokeh figure, where x_range and y_range are in mercer
dropoff_hex = figure(x_range=lon2mercer(x_Range), y_range=lat2mercer(y_Range),
           x_axis_type="mercator", y_axis_type="mercator")
# add map tile
dropoff_hex.add_tile(TILE)
# change title
dropoff_hex.title.text = "Hex-Binned Evening Peak Hour Dropoffs in NYC"

palette = all_palettes['Magma'][256][::-1]
color_mapper = LinearColorMapper(palette=palette, low=1, high=1449)
color_bar = ColorBar(color_mapper=color_mapper, label_standoff=12)
r, bins = dropoff_hex.hexbin(x=green_evening_peak['dropoffX'], y=green_evening_peak['dropoffY'], size=250, palette=palette)

dropoff_hex.add_layout(color_bar, 'right')

#show(dropoff_hex)
save(dropoff_hex, '../mast30034_2021_s2_project_1-YourTeacher23/plots/EveningDropoffHex.html')

'/mnt/student.unimelb.edu.au/kguok/mast30034_2021_s2_project_1-YourTeacher23/plots/EveningDropoffHex.html'

In [201]:
green_weekday = green_cabs[(green_cabs['weekday'] == 0) | (green_cabs['weekday'] == 1) | (green_cabs['weekday'] == 3) | (green_cabs['weekday'] == 4) | (green_cabs['weekday'] == 2)]
green_weekend = green_cabs[(green_cabs['weekday'] == 5) | (green_cabs['weekday'] == 6)]

In [202]:
green_peak = green_morning_peak.append(green_evening_peak)
green_peak = green_peak[(green_peak['weekday'] == 0) | (green_peak['weekday'] == 2) | (green_peak['weekday'] == 3) | (green_peak['weekday'] == 4) | (green_peak['weekday'] == 1)]

In [203]:
clear_doc()

####################################""" Hexbin map of Peak Hour dropoffs """####################################

# create bokeh figure, where x_range and y_range are in mercer
dropoff_hex = figure(x_range=lon2mercer(x_Range), y_range=lat2mercer(y_Range),
           x_axis_type="mercator", y_axis_type="mercator")
# add map tile
dropoff_hex.add_tile(TILE)
# change title
dropoff_hex.title.text = "Hex-Binned Peak Hour Dropoffs in NYC"

palette = all_palettes['Magma'][256][::-1]
color_mapper = LinearColorMapper(palette=palette, low=1, high=1449)
color_bar = ColorBar(color_mapper=color_mapper, label_standoff=12)
r, bins = dropoff_hex.hexbin(x=green_peak['dropoffX'], y=green_peak['dropoffY'], size=250, palette=palette)

dropoff_hex.add_layout(color_bar, 'right')

#show(dropoff_hex)
save(dropoff_hex, '../mast30034_2021_s2_project_1-YourTeacher23/plots/PeakDropoffHex.html')

'/mnt/student.unimelb.edu.au/kguok/mast30034_2021_s2_project_1-YourTeacher23/plots/PeakDropoffHex.html'

## External Datasets

In [124]:
traffic_collisions = pd.read_csv('../raw_data/rows.csv?accessType=DOWNLOAD')

In [125]:
traffic_collisions = traffic_collisions.dropna(subset=['LATITUDE', 'LONGITUDE'])

In [131]:
collisions_2016 = traffic_collisions[traffic_collisions['CRASH DATE'].str.endswith('2016')]

In [134]:
collisions_2016 = collisions_2016[(collisions_2016['CRASH DATE'].str.startswith('02')) | (collisions_2016['CRASH DATE'].str.startswith('03')) | (collisions_2016['CRASH DATE'].str.startswith('04'))]

In [135]:
collisions_2016

Unnamed: 0,CRASH DATE,CRASH TIME,BOROUGH,ZIP CODE,LATITUDE,LONGITUDE,LOCATION,ON STREET NAME,CROSS STREET NAME,OFF STREET NAME,...,CONTRIBUTING FACTOR VEHICLE 2,CONTRIBUTING FACTOR VEHICLE 3,CONTRIBUTING FACTOR VEHICLE 4,CONTRIBUTING FACTOR VEHICLE 5,COLLISION_ID,VEHICLE TYPE CODE 1,VEHICLE TYPE CODE 2,VEHICLE TYPE CODE 3,VEHICLE TYPE CODE 4,VEHICLE TYPE CODE 5
394,04/16/2016,14:20,BROOKLYN,11214.0,40.586277,-73.986200,"(40.586277, -73.9862)",WEST 17 STREET,BAY 50 STREET,,...,Unspecified,,,,4408059,Sedan,Station Wagon/Sport Utility Vehicle,,,
995254,04/14/2016,17:25,,,40.754250,-73.968990,"(40.75425, -73.96899)",,,EAST 49 STREET,...,,,,,3407843,4 dr sedan,,,,
1000327,04/12/2016,11:00,,,40.754745,-73.987900,"(40.754745, -73.9879)",,,WEST 40 STREET,...,Unspecified,,,,3407430,Van,,,,
1000451,04/24/2016,15:33,,,40.603550,-74.067570,"(40.60355, -74.06757)",Staten Island Expy,,,...,Unspecified,,,,3440364,Station Wagon/Sport Utility Vehicle,Station Wagon/Sport Utility Vehicle,,,
1000454,04/12/2016,10:00,BROOKLYN,11212.0,40.668780,-73.910520,"(40.66878, -73.91052)",,,464 ROCKAWAY AVENUE,...,Unspecified,,,,3418442,4 dr sedan,4 dr sedan,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1065497,02/15/2016,12:45,BROOKLYN,11236.0,40.636506,-73.915715,"(40.6365062, -73.9157145)",EAST 79 STREET,GLENWOOD ROAD,,...,Unspecified,Unspecified,,,3390161,PASSENGER VEHICLE,PASSENGER VEHICLE,SPORT UTILITY / STATION WAGON,,
1065498,02/03/2016,2:55,MANHATTAN,10004.0,40.702960,-74.011505,"(40.7029601, -74.0115051)",BROAD STREET,WATER STREET,,...,Unspecified,,,,3382600,OTHER,MOTORCYCLE,,,
1065499,02/03/2016,19:59,QUEENS,11105.0,40.771598,-73.908947,"(40.7715984, -73.9089475)",38 STREET,23 AVENUE,,...,,,,,3384366,PASSENGER VEHICLE,,,,
1065500,02/11/2016,13:30,BROOKLYN,11235.0,40.592968,-73.948190,"(40.5929683, -73.9481899)",EAST 22 STREET,AVENUE X,,...,Unspecified,,,,3387925,PASSENGER VEHICLE,SPORT UTILITY / STATION WAGON,,,
