In [653]:
! pip install -U scikit-learn

Requirement already up-to-date: scikit-learn in /opt/conda/lib/python3.6/site-packages
[33mYou are using pip version 9.0.1, however version 10.0.1 is available.
You should consider upgrading via the 'pip install --upgrade pip' command.[0m


In [654]:
import matplotlib.pyplot as plt 
from pyspark.sql import SparkSession
from pyspark.sql.types import *
import pyspark.sql.functions as F
from pyspark.sql.functions import udf
from pyspark.sql.functions import when
from pyspark import SparkContext as sc
from pyspark.sql.functions import col, split, ltrim, substring
import pyspark.sql as SQL
from pyspark.sql.functions import unix_timestamp, from_unixtime, date_format, \
        from_utc_timestamp, to_utc_timestamp, date_format, dayofmonth
import datetime
import calendar
import pandas as pd
from pyspark.ml.linalg import Vectors
from pyspark.ml.regression import LinearRegression
#from pyspark.ml.feature import OneHotEncoderEstimator


In [655]:
spark = SparkSession.builder.appName('Jan-01').getOrCreate()

In [656]:
# Download and decompress data into your Jupyter environment; abreviated jan 2017 data
jan_2017 = spark.read.format("csv").load('yellow_tripdata_half.csv', header = True).cache()

In [657]:
#need to get two dataframes to merge on, or else get cartesian product error
taxi_zone = spark.read.format("csv").load('taxi+_zone_lookup.csv', header = True)
taxi_zone2 = spark.read.format("csv").load('taxi+_zone_lookup.csv', header = True)


In [658]:
#taxi_zone.printSchema()

root
 |-- LocationID: string (nullable = true)
 |-- Borough: string (nullable = true)
 |-- Zone: string (nullable = true)
 |-- service_zone: string (nullable = true)



In [659]:
#merging to get destination information
jan_2017 = jan_2017.join(taxi_zone, jan_2017.PULocationID == taxi_zone.LocationID, "left_outer"). \
                withColumnRenamed("Borough", "PUBorough").withColumnRenamed("Zone", "PUZone").withColumnRenamed("service_zone", "PUServiceZone").cache()

In [660]:
#merging to get destination information
jan_2017 = jan_2017.join(taxi_zone2, jan_2017.DOLocationID == taxi_zone2.LocationID, "left_outer"). \
                withColumnRenamed("Borough", "DOBorough").withColumnRenamed("Zone", "DOZone").withColumnRenamed("service_zone", "DOServiceZone")

In [661]:
#jan_2017 = jan_2017.drop("LocationID")

In [662]:
#encoding if destination is an aiport
jan_2017 = jan_2017.withColumn("AirportDO", \
                               F.when((jan_2017["DOLocationID"] == '138' ) & \
                                      (jan_2017["DOLocationID"] == '132'),1).otherwise(0))

In [663]:
#jan_2017.count()

5044667

In [664]:
#jan_2017.printSchema()

root
 |-- VendorID: string (nullable = true)
 |-- tpep_pickup_datetime: string (nullable = true)
 |-- tpep_dropoff_datetime: string (nullable = true)
 |-- passenger_count: string (nullable = true)
 |-- trip_distance: string (nullable = true)
 |-- RatecodeID: string (nullable = true)
 |-- store_and_fwd_flag: string (nullable = true)
 |-- PULocationID: string (nullable = true)
 |-- DOLocationID: string (nullable = true)
 |-- payment_type: string (nullable = true)
 |-- fare_amount: string (nullable = true)
 |-- extra: string (nullable = true)
 |-- mta_tax: string (nullable = true)
 |-- tip_amount: string (nullable = true)
 |-- tolls_amount: string (nullable = true)
 |-- improvement_surcharge: string (nullable = true)
 |-- total_amount: string (nullable = true)
 |-- LocationID: string (nullable = true)
 |-- PUBorough: string (nullable = true)
 |-- PUZone: string (nullable = true)
 |-- PUServiceZone: string (nullable = true)
 |-- LocationID: string (nullable = true)
 |-- DOBorough: string (

In [665]:
#jan_2017.show(10)

In [666]:
#splitting date and time into different columns, casting date into date type
split_pickup_col = split(jan_2017['tpep_pickup_datetime'], ' ')
split_dropoff_col = split(jan_2017['tpep_pickup_datetime'], ' ')
jan_2017 = jan_2017.withColumn("PUDate", split_pickup_col.getItem(0).cast(DateType()))
jan_2017 = jan_2017.withColumn("PUTime", split_pickup_col.getItem(1))
jan_2017 = jan_2017.withColumn("DODate", split_dropoff_col.getItem(0).cast(DateType()))
jan_2017 = jan_2017.withColumn("DOTime", split_dropoff_col.getItem(1))

In [667]:
#splitting time into hour and minute; will round minute to nearest 5 minutes
split_PUTime = split(jan_2017['PUTime'], ':')
jan_2017 = jan_2017.withColumn("PUHour", split_PUTime.getItem(0).cast(IntegerType()))
jan_2017 = jan_2017.withColumn("PUMinute", split_PUTime.getItem(1).cast(IntegerType()))

split_DOTime = split(jan_2017['DOTime'], ':')
jan_2017 = jan_2017.withColumn("DOHour", split_DOTime.getItem(0).cast(IntegerType()))
jan_2017 = jan_2017.withColumn("DOMinute", split_DOTime.getItem(1).cast(IntegerType()))

In [668]:
#rounding down mintue to closest 5 minute mark (computationally easier)
jan_2017 = jan_2017.withColumn("DOMinute", (jan_2017.DOMinute - jan_2017.DOMinute%5))
jan_2017 = jan_2017.withColumn("PUMinute", (jan_2017.PUMinute - jan_2017.PUMinute%5))

In [669]:
#DOW gives you 1 (Monday) - 7 (Sunday)
jan_2017 = jan_2017.withColumn("PU_DOW",  date_format(jan_2017.PUDate, 'u').cast(ShortType()))
jan_2017 = jan_2017.withColumn("DO_DOW",  date_format(jan_2017.DODate, 'u').cast(ShortType()))

In [670]:
#encoding if destination is a weekend
jan_2017 = jan_2017.withColumn("Weekend", \
                               F.when((jan_2017["PU_DOW"] == 7) | \
                                      (jan_2017["PU_DOW"] == 6) | \
                                      (jan_2017["DO_DOW"] == 7) |
                                      (jan_2017["PU_DOW"] == 7),1).otherwise(0))

In [671]:
#casting data types to primitives

#1= Creative Mobile Technologies, LLC; 2= VeriFone Inc.
jan_2017 = jan_2017.withColumn("VendorID", jan_2017["VendorID"].cast(ShortType()))

jan_2017 = jan_2017.withColumn("passenger_count", jan_2017["passenger_count"].cast(ShortType()))

#in miles
jan_2017 = jan_2017.withColumn("trip_distance", jan_2017["trip_distance"].cast(FloatType()))

#1= Credit card
#2= Cash
#3= No charge
#4= Dispute
#5= Unknown
#6= Voided trip
jan_2017 = jan_2017.withColumn("payment_type", jan_2017["payment_type"].cast(ShortType()))
jan_2017 = jan_2017.withColumn("fare_amount", jan_2017["fare_amount"].cast(FloatType()))

#0.50 and $1 rush hour and overnight charges.
jan_2017 = jan_2017.withColumn("extra", jan_2017["extra"].cast(FloatType()))
#.50, automatic MTA charge
jan_2017 = jan_2017.withColumn("mta_tax", jan_2017["mta_tax"].cast(FloatType()))


jan_2017 = jan_2017.withColumn("tip_amount", jan_2017["tip_amount"].cast(FloatType()))
jan_2017 = jan_2017.withColumn("tolls_amount", jan_2017["tolls_amount"].cast(FloatType()))
jan_2017 = jan_2017.withColumn("improvement_surcharge", jan_2017["improvement_surcharge"].cast(FloatType()))
jan_2017 = jan_2017.withColumn("total_amount", jan_2017["total_amount"].cast(FloatType()))


jan_2017 = jan_2017.withColumn("RateCodeID", jan_2017["RateCodeID"].cast(ShortType()))
#1= Standard rate
#2=JFK -> $52 flat fare
#3=Newark
#4=Nassau or Westchester
#5=Negotiated fare
#6=Group ride

In [672]:
#column for looking at average dollar/mile, will used for filtering: should be around $2.5 per mile according to MTA
#use fare amount because tips can be large if generous 
jan_2017 = jan_2017.withColumn("cost_per_mile", (jan_2017["fare_amount"]/jan_2017["trip_distance"]).cast(FloatType()))
#column for looking at average miles per dollar, will be used for filtering: should be around 0.4 miles per dollar
#use fare amount because tips can be large if generous 
jan_2017 = jan_2017.withColumn("miles_per_dollar", (jan_2017["trip_distance"]/jan_2017["fare_amount"]).cast(FloatType()))

#use fare amount because tips can be large if generous 
jan_2017 = jan_2017.withColumn("missing_money", (jan_2017["total_amount"]-jan_2017["fare_amount"]-jan_2017["extra"] \
                                                 - jan_2017["mta_tax"] - jan_2017["tip_amount"] - jan_2017["tolls_amount"] \
                                                 - jan_2017["improvement_surcharge"]).cast(FloatType()))
                                                
                                    

In [673]:
#basic fare cleaning, ensure that all values are above zero
jan_2017 = jan_2017.filter(jan_2017.tip_amount >= 0)
jan_2017 = jan_2017.filter(jan_2017.tolls_amount >= 0.0) 
jan_2017 = jan_2017.filter(jan_2017.total_amount >= 3.30)
jan_2017 = jan_2017.filter(jan_2017.extra >= 0.00)
jan_2017 = jan_2017.filter(jan_2017.cost_per_mile.isNotNull())


#minimum fare amounts according to NYC Taxi data standards
jan_2017 = jan_2017.filter((jan_2017.fare_amount >= 2.50))
jan_2017 = jan_2017.filter(jan_2017.improvement_surcharge >= 0.3)
jan_2017 = jan_2017.filter(jan_2017.mta_tax >= 0.5)

#maximum fare amount, no (logical) fares were greater than 600 although some tips might be 
#jan_2017 = jan_2017.filter((jan_2017.fare_amount < 600.0))
#all trips being filtered out of the max cost per mile going < 0.1 miles 
#there were MANY trips that had a cost per mile of 1733.3334 with the total trip distance being exactly 0.03 and the fare amount being exactly 52




#jan_2017.describe("fare_amount").show()

#jan_2017.describe("DOLocationID").show()

#jan_2017.count()
#0.646167527% of trips invalidated using basic filters of fare amounts

In [674]:
#missing money description
#jan_2017.describe("missing_money").show()
#jan_2017.sort('missing_money', ascending=False).select(["missing_money"]).show(50)

In [675]:
#jan_2017.sort('fare_amount', ascending=False).select(["fare_amount", "trip_distance", "cost_per_mile"]).show(50)

In [676]:
#jan_2017.sort('trip_distance', ascending=False).select(["fare_amount", "trip_distance", "cost_per_mile"]).show(50)

In [677]:
#jan_2017.describe("cost_per_mile").show()

#jan_2017.describe("miles_per_dollar").show()

In [678]:
#jan_2017.describe("tip_amount").show()
#jan_2017.sort('tip_amount', ascending=False).select("tip_amount").show(50)

In [679]:
#jan_2017.sort('cost_per_mile', ascending=False).select(["miles_per_dollar", "cost_per_mile", "trip_distance",  "total_amount",  "fare_amount", "tip_amount", "tolls_amount"]).show(50)
#upper bound of cost per mile is currently  1733.34

+----------------+-------------+-------------+------------+-----------+----------+------------+
|miles_per_dollar|cost_per_mile|trip_distance|total_amount|fare_amount|tip_amount|tolls_amount|
+----------------+-------------+-------------+------------+-----------+----------+------------+
|          4.0E-5|      25000.0|         0.01|       270.8|      250.0|      20.0|         0.0|
|     4.255319E-5|      23500.0|         0.01|      306.54|      235.0|     70.74|         0.0|
|    5.5555553E-5|      18000.0|         0.01|       210.8|      180.0|      30.0|         0.0|
|     6.060606E-5|      16500.0|         0.01|       165.8|      165.0|       0.0|         0.0|
|     6.666667E-5|      15000.0|         0.01|       150.8|      150.0|       0.0|         0.0|
|     6.666667E-5|      15000.0|         0.01|       150.8|      150.0|       0.0|         0.0|
|     7.142857E-5|      14000.0|         0.01|       216.8|      140.0|      76.0|         0.0|
|     7.407407E-5|      13500.0|        

In [680]:
#jan_2017.sort('cost_per_mile', ascending=True).select(["cost_per_mile", "trip_distance",  "total_amount",  "fare_amount","extra", "tip_amount", "tolls_amount", "improvement_surcharge" ]).show(50)

#lower bound of cost per mile is currently 

+-------------+-------------+------------+-----------+-----+----------+------------+---------------------+
|cost_per_mile|trip_distance|total_amount|fare_amount|extra|tip_amount|tolls_amount|improvement_surcharge|
+-------------+-------------+------------+-----------+-----+----------+------------+---------------------+
|  0.017321017|        173.2|         9.3|        3.0|  5.5|       0.0|         0.0|                  0.3|
|  0.071633235|         34.9|       113.3|        2.5|  0.0|      20.0|        90.0|                  0.3|
|   0.07575758|         33.0|         3.8|        2.5|  0.5|       0.0|         0.0|                  0.3|
|   0.07911392|         31.6|         3.8|        2.5|  0.5|       0.0|         0.0|                  0.3|
|   0.09225092|         27.1|       13.84|        2.5|  5.0|       0.0|        5.54|                  0.3|
|   0.09960159|         25.1|         3.8|        2.5|  0.5|       0.0|         0.0|                  0.3|
|    0.1059322|         23.6|        

In [681]:
#jan_2017.describe("total_amount").show()
#jan_2017.sort('total_amount', ascending=False).select("total_amount").show(50)

+-------+------------------+
|summary|      total_amount|
+-------+------------------+
|  count|           4992919|
|   mean|  15.1093943040766|
| stddev|12.363551155207466|
|    min|              3.31|
|    max|            930.34|
+-------+------------------+

+------------+
|total_amount|
+------------+
|      930.34|
|       611.3|
|      608.34|
|      608.24|
|      592.84|
|      590.22|
|      538.84|
|      534.88|
|      518.34|
|      446.74|
|      432.96|
|      425.04|
|      419.37|
|       400.8|
|      387.34|
|      380.14|
|       378.8|
|      378.41|
|       377.8|
|      372.36|
|      372.34|
|       358.8|
|       350.8|
|       349.3|
|      344.05|
|      343.06|
|       339.8|
|      335.76|
|      331.89|
|      320.38|
|       320.3|
|       316.8|
|       310.8|
|      306.54|
|      306.34|
|       306.3|
|      304.01|
|       300.8|
|       300.8|
|       300.3|
|      298.63|
|      298.01|
|      294.95|
|      294.84|
|       294.8|
|      291.05|
|  

In [682]:
#jan_2017.describe("improvement_surcharge").show()

In [683]:
#jan_2017.describe("trip_distance").show()
#jan_2017.sort('trip_distance', ascending=False).select("trip_distance").show(50)

In [684]:
#jan_2017.select('tpep_pickup_datetime').show(10)

In [685]:
#jan_2017.select('PUDate').distinct().show()


In [686]:
weather_data = spark.read.load('weather.txt', format="text")
weather_data.show()

+--------------------+
|               value|
+--------------------+
|2015-1-1,12:51 AM...|
|2015-1-1,1:51 AM,...|
|2015-1-1,2:51 AM,...|
|2015-1-1,3:51 AM,...|
|2015-1-1,4:51 AM,...|
|2015-1-1,5:51 AM,...|
|2015-1-1,6:51 AM,...|
|2015-1-1,7:51 AM,...|
|2015-1-1,8:51 AM,...|
|2015-1-1,9:51 AM,...|
|2015-1-1,10:51 AM...|
|2015-1-1,11:51 AM...|
|2015-1-1,12:51 PM...|
|2015-1-1,1:51 PM,...|
|2015-1-1,2:51 PM,...|
|2015-1-1,3:51 PM,...|
|2015-1-1,4:51 PM,...|
|2015-1-1,5:51 PM,...|
|2015-1-1,6:51 PM,...|
|2015-1-1,7:51 PM,...|
+--------------------+
only showing top 20 rows



In [687]:
weather_data.createOrReplaceTempView('weather_data_sdf')

spark.sql('select * FROM weather_data_sdf').show()

+--------------------+
|               value|
+--------------------+
|2015-1-1,12:51 AM...|
|2015-1-1,1:51 AM,...|
|2015-1-1,2:51 AM,...|
|2015-1-1,3:51 AM,...|
|2015-1-1,4:51 AM,...|
|2015-1-1,5:51 AM,...|
|2015-1-1,6:51 AM,...|
|2015-1-1,7:51 AM,...|
|2015-1-1,8:51 AM,...|
|2015-1-1,9:51 AM,...|
|2015-1-1,10:51 AM...|
|2015-1-1,11:51 AM...|
|2015-1-1,12:51 PM...|
|2015-1-1,1:51 PM,...|
|2015-1-1,2:51 PM,...|
|2015-1-1,3:51 PM,...|
|2015-1-1,4:51 PM,...|
|2015-1-1,5:51 PM,...|
|2015-1-1,6:51 PM,...|
|2015-1-1,7:51 PM,...|
+--------------------+
only showing top 20 rows



In [688]:
weather_data = spark.sql('SELECT CAST(split(value, ",")[0] as string) AS date, '\
                        'CAST(split(value, ",")[1] as string) as time, '\
                        'CAST(split(value, ",")[2] as float) as temp, '\
                        'CAST(split(value, ",")[3] as float) as windchill, '\
                        'CAST(split(value, ",")[4] as float) as dewpoint, '\
                        'CAST(split(value, ",")[5] as float) as humidity, '\
                        'CAST(split(value, ",")[6] as float) as pressure, '\
                        'CAST(split(value, ",")[7] as float) as visibility, '\
                        'CAST(split(value, ",")[8] as string) as windDir, '\
                        'CAST(split(value, ",")[9] as float) as windSpeed, '\
                        'CAST(split(value, ",")[10] as float) as gustSpeed, '\
                        'CAST(split(value, ",")[11] as float) as Precip, '\
                        'CAST(split(value, ",")[12] as string) as Events, '\
                        'CAST(split(value, ",")[13] as string) as Conditions '\
                         'FROM weather_data_sdf')

In [689]:
#cast date to date type
weather_data = weather_data.withColumn("date", weather_data.date.cast(DateType()))

In [690]:
def period(x):
    return split(split(x, ':')[1], " ")[1]

In [691]:
def toHour(x):
    first_split = split(x, ':')
    retval = first_split[0].cast(IntegerType()) % 12
    return retval 

In [692]:
weather_data = weather_data.withColumn("period", period("time"))

In [693]:
#make hour military time
weather_data = weather_data.withColumn("hour", when(weather_data.period == 'PM', toHour("time") + 12).otherwise(toHour("time")))

In [694]:
#fill any nulls
weather_data = weather_data.na.fill(0)

In [695]:
weather_data.printSchema()

root
 |-- date: date (nullable = true)
 |-- time: string (nullable = true)
 |-- temp: float (nullable = false)
 |-- windchill: float (nullable = false)
 |-- dewpoint: float (nullable = false)
 |-- humidity: float (nullable = false)
 |-- pressure: float (nullable = false)
 |-- visibility: float (nullable = false)
 |-- windDir: string (nullable = true)
 |-- windSpeed: float (nullable = false)
 |-- gustSpeed: float (nullable = false)
 |-- Precip: float (nullable = false)
 |-- Events: string (nullable = true)
 |-- Conditions: string (nullable = true)
 |-- period: string (nullable = true)
 |-- hour: integer (nullable = true)



In [696]:
#make temporary views for joining
weather_data.createOrReplaceTempView('weather_data_sdf')

weather_data_pu = spark.sql('SELECT date AS PUTempdate, '\
                            'time as PUTemptime, ' \
                            'temp as PUtemp, '\
                            'windchill as PUwindchill, '\
                            'dewpoint as PUdewpoint, '\
                            'pressure as PUpressure, '\
                            'visibility as PUvisibility, '\
                            'windDir as PUwindDir, '\
                            'gustSpeed as PUgustSpeed, '\
                            'Precip as PUPrecip, '\
                            'Events as PUEvents, '\
                            'Conditions as PUConditions, '\
                            'period as PUperiod, '\
                            'hour as PUTemphour '\
                            'FROM weather_data_sdf')

weather_data_do = spark.sql('SELECT date AS DOTempdate, '\
                            'time as DOTemptime, ' \
                            'temp as DOtemp, '\
                            'windchill as DOwindchill, '\
                            'dewpoint as DOdewpoint, '\
                            'pressure as DOpressure, '\
                            'visibility as DOvisibility, '\
                            'windDir as DOwindDir, '\
                            'gustSpeed as DOgustSpeed, '\
                            'Precip as DOPrecip, '\
                            'Events as DOEvents, '\
                            'Conditions as DOConditions, '\
                            'period as DOperiod, '\
                            'hour as DOTemphour '\
                            'FROM weather_data_sdf')

In [697]:
jan_2017 = jan_2017.join(weather_data_pu, (jan_2017.PUDate == weather_data_pu.PUTempdate) & \
                         (jan_2017.PUHour == weather_data_pu.PUTemphour), "left_outer")


In [698]:
jan_2017 = jan_2017.join(weather_data_do, (jan_2017.DODate == weather_data_do.DOTempdate) & \
                         (jan_2017.DOHour == weather_data_do.DOTemphour), "left_outer")

In [648]:
jan_2017.printSchema()

root
 |-- VendorID: short (nullable = true)
 |-- tpep_pickup_datetime: string (nullable = true)
 |-- tpep_dropoff_datetime: string (nullable = true)
 |-- passenger_count: short (nullable = true)
 |-- trip_distance: float (nullable = true)
 |-- RateCodeID: short (nullable = true)
 |-- store_and_fwd_flag: string (nullable = true)
 |-- PULocationID: string (nullable = true)
 |-- DOLocationID: string (nullable = true)
 |-- payment_type: short (nullable = true)
 |-- fare_amount: float (nullable = true)
 |-- extra: float (nullable = true)
 |-- mta_tax: float (nullable = true)
 |-- tip_amount: float (nullable = true)
 |-- tolls_amount: float (nullable = true)
 |-- improvement_surcharge: float (nullable = true)
 |-- total_amount: float (nullable = true)
 |-- LocationID: string (nullable = true)
 |-- PUBorough: string (nullable = true)
 |-- PUZone: string (nullable = true)
 |-- PUServiceZone: string (nullable = true)
 |-- LocationID: string (nullable = true)
 |-- DOBorough: string (nullable = t

In [711]:
jan_2017.createOrReplaceTempView('jan_2017_sdf')

In [723]:
demographics = spark.read.format("csv").load('demographics.csv', header = True).cache()

In [724]:
demographics.show(1)

+--------------------+---------------+---------------+-------------+-------------+------------------+------------------+------------+------------+----------------+----------------+------------------------+------------------------+----------------------------+----------------------------+-----------------------+-----------------------+-----------------+-----------------+--------------------+--------------------+--------------------+--------------------+---------------------+---------------------+----------------------+----------------------+--------------+--------------+------------------+------------------+-----------+-----------+-------------+-------------+------------------+------------------+------------------+------------------+---------------------+---------------------+------------------+------------------+-------------+------------+------------------------+------------------------+------------------------+------------------------+-------------+-------------+---------------------

In [727]:
jan_2017.select('PUZone').distinct().show(100)

+--------------------+
|              PUZone|
+--------------------+
|Governor's Island...|
|           Homecrest|
|              Corona|
|    Bensonhurst West|
|          Douglaston|
|      Newark Airport|
|          Mount Hope|
|East Concourse/Co...|
|      Pelham Parkway|
|         Marble Hill|
|           Rego Park|
|Heartland Village...|
|       Dyker Heights|
|Upper East Side S...|
|   Kew Gardens Hills|
|             Bayside|
|     Jackson Heights|
|      Yorkville West|
|TriBeCa/Civic Center|
|          Highbridge|
|Stuy Town/Peter C...|
|Flushing Meadows-...|
|Riverdale/North R...|
|      Bushwick South|
|     Windsor Terrace|
|         Old Astoria|
|          Ocean Hill|
|      West Concourse|
|        Clinton Hill|
|  Stuyvesant Heights|
|Upper West Side N...|
|     Mariners Harbor|
|          Kensington|
|Springfield Garde...|
|   East Harlem South|
|     Oakland Gardens|
| Crown Heights North|
|            Longwood|
|           Hudson Sq|
|     Lenox Hill East|
|          

In [731]:
jan_2017.select('DOZone').distinct().count()

261

In [732]:
taxi_zone.count()

265