This notebook extracts the geographic data of Manhattan's taxi zones. It reads a shapefile with all taxi zones, transforms its coordinates and computes the geographic centroid of each zone. The result is a cleaned dataset of the Manhattan taxi zones and their centroid latitude and longitude, geometry, as well as other details which are saved as a CSV file. This is directly used in predictions to mark different areas and how busy they are.

In [1]:
# Importing.
import fiona
from shapely.geometry import shape
from shapely.ops import transform
import pandas as pd
import geopandas as gpd
from pyproj import Transformer
import folium

Getting the centroid locations and geometry data for each taxi zone below.

In [2]:
# Path to shapefile.
shapefile_path = 'taxi_zones.shp'

# Seting up transformer.
transformer = Transformer.from_crs(2263, 4326, always_xy=True)

# Reading .shp and getting centroids.
records = []
geoms = []
with fiona.open(shapefile_path) as shp:
    for feature in shp:
        props = feature['properties']
        geom = shape(feature['geometry'])
        centroid = geom.centroid
        lon, lat = transformer.transform(centroid.x, centroid.y)
        geom_wgs84 = transform(transformer.transform, geom)
        props['centroid_lon'] = lon
        props['centroid_lat'] = lat
        props['geometry'] = geom_wgs84
        records.append(props)

# Creating DataFrame.
df = pd.DataFrame(records)

# Filtering for Manhattan only.
manhattan_df = df[df['borough'] == 'Manhattan']

# Examining results.
print(manhattan_df[['LocationID', 'zone', 'centroid_lat', 'centroid_lon', 'geometry']])

  props['centroid_lon'] = lon
  props['centroid_lat'] = lat
  props['geometry'] = geom_wgs84


     LocationID                       zone  centroid_lat  centroid_lon  \
3             4              Alphabet City     40.723752    -73.976968   
11           12               Battery Park     40.702946    -74.015563   
12           13          Battery Park City     40.712038    -74.016079   
23           24               Bloomingdale     40.801971    -73.965479   
40           41             Central Harlem     40.804334    -73.951292   
..          ...                        ...           ...           ...   
245         246  West Chelsea/Hudson Yards     40.753309    -74.004016   
248         249               West Village     40.734576    -74.002875   
260         261         World Trade Center     40.709139    -74.013023   
261         262             Yorkville East     40.775932    -73.946510   
262         263             Yorkville West     40.778766    -73.951010   

                                              geometry  
3    POLYGON ((-73.97177410965318 40.72582128133726...

In [3]:
# Saving to CSV.
manhattan_df.to_csv('manhattan_taxi_zones.csv', index=False)

Examining centroid locations and whether they are marked correctly for each zone below and mapping each zone.

In [4]:
# Loading CSV with centroid data.
df = pd.read_csv('manhattan_taxi_zones.csv')

# Creating a map centered on Manhattan.
m = folium.Map(location=[40.7831, -73.9712], zoom_start=12)

# Plotting centroid markers.
for _, row in df.iterrows():
    folium.CircleMarker(
        location=[row['centroid_lat'], row['centroid_lon']],
        radius=5,
        color='red',
        fill=True,
        fill_color='red',
        fill_opacity=1,
        popup=f"Zone {row['zone']} (ID: {row['OBJECTID']})"
    ).add_to(m)

# Loading and reprojecting shapefile to WGS84.
gdf = gpd.read_file("taxi_zones.shp").to_crs(epsg=4326)

# Filtering for Manhattan zones only.
manhattan_shapes = gdf[gdf['borough'] == 'Manhattan']

# Adding actual zone shapes to the map.
folium.GeoJson(
    manhattan_shapes,
    name="Manhattan Zones",
    tooltip=folium.GeoJsonTooltip(fields=["zone", "OBJECTID"], aliases=["Zone:", "ID:"])
).add_to(m)

# Saving combined map.
m.save("manhattan_taxi_zones_combined.html")