In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import geopandas as gpd
import warnings
warnings.filterwarnings("ignore")
import folium
from folium.plugins import FastMarkerCluster
from folium.plugins import HeatMap
from bokeh.plotting import figure, show
from bokeh.tile_providers import get_provider, Vendors
from bokeh.io import save, reset_output, output_notebook
from bokeh.models import ColorBar, LinearColorMapper
from bokeh.palettes import all_palettes
# pen the new file
full_month = pd.read_feather("2018_preprocessed_yellow_11.feather").drop('index',axis=1)
full_month['data'] = full_month['tpep_pickup_datetime'].str[5:10]
full_month['time'] = full_month['tpep_pickup_datetime'].str[10:13]+" 00"

#2018/11/04 The TCS New York City Marathon
marathon = full_month[(full_month['data']=='11-04')]
sf = gpd.read_file("MAST30034_Python/data/taxi_zones/taxi_zones.shp")
zone = pd.read_csv("MAST30034_Python/data/taxi_zones/taxi+_zone_lookup.csv")
# Please attribute this if you are using it
sf['geometry'] = sf['geometry'].to_crs("+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs")

# the coordinate of marathon
start_coordinate = [40.712778, -74.006111]
start_coord = np.array(start_coordinate)
PU_count = marathon['PULocationID'].value_counts().sort_index()
PU_count = pd.DataFrame({"LocationID": PU_count.index, "PU_Count": PU_count})
gdf = gpd.GeoDataFrame(pd.merge(PU_count, sf, left_on='LocationID', right_on='LocationID')).drop('LocationID',axis=1)
variable = "PU_Count"
vmin, vmax = 0, gdf["PU_Count"].max()
fig, ax = plt.subplots(1, figsize=(10,10))
ax.set_title("2018 New York city Marathon of pick up", fontdict={'fontsize': '20', 'fontweight':'3'} )
sm = plt.cm.ScalarMappable(cmap="Reds", norm=plt.Normalize(vmin=vmin, vmax=vmax))
sm.set_array([])
fig.colorbar(sm)
gdf.plot(column=variable, cmap="Reds", linewidth = 1.5, ax=ax, edgecolor = '0.4')
plt.show()

In [None]:
m_trip_distance = folium.Map(location=[40.712778, -74.006111], tiles=None, zoom_start=10)
folium.TileLayer('CartoDB positron', name="Light Map", control=False).add_to(m_trip_distance)

# refer to the folium documentations on more information on how to plot aggregated data.
folium.Choropleth(
    geo_data=gdf, # geoJSON 
    name='2018 The TCS New York City Marathon ' , # name of plot
    data=gdf, # data source
    columns=['OBJECTID','PU_Count'], # the columns required
    key_on='feature.properties.OBJECTID', # this is from the geoJSON's properties
    fill_color='OrRd', # color scheme
    fill_opacity=0.9,
    line_opacity=0.3,
    legend_name='2018 Marathon of pick up' # legend title
).add_to(m_trip_distance)
feature = folium.FeatureGroup(name='<span style="color: red:">Tennis Venue</span>').add_to(m_trip_distance)
folium.Marker(start_coordinate, popup='New York City',icon=folium.Icon(color='blue')).add_to(feature)
folium.map.LayerControl("topright",collapsed=False).add_to(m_trip_distance)
m_trip_distance.save('2018 Marathon foliumChoroplethMapTrips.html')
m_trip_distance

In [None]:
data_count = full_month['data'].value_counts().sort_index()
marathon_count = data_count
for i in range(len(marathon_count)):
    if marathon_count.index[i] not in ["11-04"]:
        marathon_count[marathon_count.index[i]]=0
labels = data_count.index
data_count = full_month["data"].value_counts().sort_index()
x = np.arange(len(labels))
width = 0.5
#fig, ax = plt.subplots()
fig, ax = plt.subplots(figsize=(6,6))
normal_bar = ax.bar(x,data_count,width,label="data")
marathon_bar = ax.bar(x,marathon_count,width,label= "marathon")
ax.set_ylabel("total pick time")
ax.set_title("taxi pickup per day")
ax.set_xticks(x)
ax.set_xlabel("Date")
ax.set_xticklabels(labels)
ax.legend()
plt.xticks(rotation=90)
fig.tight_layout()
plt.show()

In [None]:
full_month['data']=full_month['tpep_pickup_datetime'].str[5:10]
data_count = full_month['data'].value_counts().sort_index()
weekend_count = data_count
for i in range(len(weekend_count)):
    if weekend_count.index[i] not in ["11-03","11-04","11-10","11-11","11-17","11-18","11-24","11-25"]:
        weekend_count[weekend_count.index[i]]=0
labels = data_count.index
data_count = full_month["data"].value_counts().sort_index()
x = np.arange(len(labels))
width = 0.5
#fig, ax = plt.subplots()
fig, ax = plt.subplots(figsize=(6,6))
working_day = ax.bar(x,data_count,width,label="working days")
weekend = ax.bar(x,weekend_count,width,label= "weekend")
ax.set_ylabel("total pick time")
ax.set_title("taxi pick up per day")
ax.set_xticks(x)
ax.set_xlabel("Date")
ax.set_xticklabels(labels)
ax.legend()
plt.xticks(rotation=90)
fig.tight_layout()
plt.show()

In [None]:
#weather and festival
coldest_count = data_count
for i in range(len(coldest_count)):
    if coldest_count.index[i] not in ["11-22"]:
        coldest_count[coldest_count.index[i]]=0
data_count = full_month["data"].value_counts().sort_index()        
labels = data_count.index
x = np.arange(len(labels))
width = 0.5
#fig, ax = plt.subplots()
fig, ax = plt.subplots(figsize=(6,6))
normal_cold_bar = ax.bar(x,data_count,width,label="normal day")
coldest_bar = ax.bar(x,coldest_count,width,label= "coldest day")
ax.set_ylabel("total pick time")
ax.set_title("taxi pickup per day")
ax.set_xticks(x)
ax.set_xlabel("Date")
ax.set_xticklabels(labels)
ax.legend()
plt.xticks(rotation=90)
fig.tight_layout()
plt.show()