In [1]:
import pandas as pd
import pyarrow.parquet as pq
import pyarrow as pa
import pyarrow.compute as pc
import pyarrow.dataset as ds


data_reports_monthly = pd.read_csv('data_reports_monthly.csv')
trip_data = pq.read_table('fhvhv_tripdata_2025-02.parquet')
# Convert to Pandas DataFrame
trip_data = trip_data.to_pandas()


In [None]:
# cleaning the data

In [2]:
# add column company using the hvfhs_license_num:
# HV0003: Uber 
# HV0005: Lyft

def get_company(license_num):
    if license_num == 'HV0003':
        return 'Uber'
    elif license_num == 'HV0005':
        return 'Lyft'
    else:
        return None
# apply the function to the hvfhs_license_num column
trip_data['company'] = trip_data['hvfhs_license_num'].apply(get_company)


## Inferring the Fleet Size

### 1. Total fleet size of High Volume Ride Hailing Services
Shows the fleet size size of all large ride hailing competitors combined

In [3]:
# show line where Month/Year is '2025-02' and License Class is 'FHV - High Volume'
feb_2025 = data_reports_monthly[data_reports_monthly['Month/Year'] == '2025-02']
feb_2025 = feb_2025[feb_2025['License Class'] == 'FHV - High Volume']

print("Fleet Size in February 2025 (Drivers): " + feb_2025['Unique Drivers'].astype(str).values[0])
print("Fleet Size in February 2025 (Vehicles): " + feb_2025['Unique Vehicles'].astype(str).values[0])

feb_2025

Fleet Size in February 2025 (Drivers): 80,286
Fleet Size in February 2025 (Vehicles): 79,556


Unnamed: 0,Month/Year,License Class,Trips Per Day,Farebox Per Day,Unique Drivers,Unique Vehicles,Vehicles Per Day,Avg Days Vehicles on Road,Avg Hours Per Day Per Vehicle,Avg Days Drivers on Road,Avg Hours Per Day Per Driver,Avg Minutes Per Trip,Percent of Trips Paid with Credit Card,Trips Per Day Shared
10,2025-02,FHV - High Volume,690694,-,80286,79556,57477,20.2,6.6,20.3,6.5,18.0,-,13562


## 2. Fleet size of Uber in February 2025 
### Approach Zero
To estimate the average fleet size of Uber per day, take the percentage of Uber rides among all rides and apply that proportion to the total vehicle count per day.

In [2]:
# portion of rides that are Uber
uber_rides_count = len(trip_data[trip_data['company'] == 'Uber'])
uber_rides_share = uber_rides_count / len(trip_data)

# Apply share to total ride hailing fleet size
# Remove commas from the string before converting to float
vehicles_per_day_str = feb_2025['Vehicles Per Day'].iloc[0].replace(',', '')
uber_fleet_size = float(vehicles_per_day_str) * uber_rides_share

print("Estimated Uber Fleet Size in February 2025: " + str(int(uber_fleet_size)))

NameError: name 'trip_data' is not defined

### First Approach
For each minute calculate the amount of active rides. The maximum amount of simultanous rides can be a lower bound for the fleet size of the day.

In [None]:
calculate_active_rides_per_minute(df, target_date='2025-02-01')

### Second Approach
Over a specific period, calculate the amount of rides, that are possible, if one driver can do only one ride at a time and start a ride only at reachable zone from his last dropoff zone within the time.
Uses distance from each zone to another.

**Assumption:** the driver stays in the zone of the last drop off when waiting for his next request.

In [None]:
# reduce trip data to only include trips on 1st February 2025
feb1_2025_trips = trip_data[trip_data['pickup_datetime'].dt.date == pd.to_datetime('2025-02-01').date()]

# reduce trip data to Uber rides only
feb1_2025_uber_trips = feb1_2025_trips[feb1_2025_trips['company'] == 'Uber']

print("Number of Uber trips on 1st February 2025: " + str(len(feb1_2025_uber_trips)))

57,477 

6.6

feb1_2025_uber_trips = feb1_2025_uber_trips.drop(columns=['hvfhs_license_num', 'dispatching_base_num', 'originating_base_num'])
feb1_2025_uber_trips

Number of Uber trips on 1st February 2025: 593734


Unnamed: 0,request_datetime,on_scene_datetime,pickup_datetime,dropoff_datetime,PULocationID,DOLocationID,trip_miles,trip_time,base_passenger_fare,tolls,...,airport_fee,tips,driver_pay,shared_request_flag,shared_match_flag,access_a_ride_flag,wav_request_flag,wav_match_flag,cbd_congestion_fee,company
0,2025-01-31 23:59:37,2025-02-01 00:02:18,2025-02-01 00:03:15,2025-02-01 00:07:02,233,170,1.02,227,10.02,0.0,...,0.0,0.00,4.26,N,N,N,N,N,1.5,Uber
1,2025-02-01 00:03:52,2025-02-01 00:09:57,2025-02-01 00:10:11,2025-02-01 00:36:32,107,47,8.71,1581,38.93,0.0,...,0.0,0.00,27.21,N,N,N,N,N,1.5,Uber
2,2025-02-01 00:30:46,2025-02-01 00:33:02,2025-02-01 00:34:30,2025-02-01 01:00:56,237,249,5.12,1586,52.70,0.0,...,0.0,0.00,32.39,N,N,N,N,N,1.5,Uber
3,2025-02-01 00:33:01,2025-02-01 00:33:09,2025-02-01 00:35:10,2025-02-01 00:48:22,263,151,2.37,792,16.52,0.0,...,0.0,0.00,10.91,N,N,N,N,N,0.0,Uber
4,2025-02-01 00:51:19,2025-02-01 00:53:04,2025-02-01 00:54:11,2025-02-01 01:26:01,238,33,10.81,1910,34.37,0.0,...,0.0,8.69,33.26,N,N,N,N,N,0.0,Uber
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
798390,2025-02-01 23:07:32,2025-02-01 23:13:20,2025-02-01 23:13:36,2025-02-01 23:26:51,137,79,1.68,795,28.72,0.0,...,0.0,6.94,18.27,N,N,N,N,N,1.5,Uber
798391,2025-02-01 23:31:53,2025-02-01 23:37:44,2025-02-01 23:38:48,2025-02-01 23:52:47,79,144,1.03,839,55.50,0.0,...,0.0,12.90,27.24,N,N,N,N,N,1.5,Uber
798392,2025-02-01 22:52:18,2025-02-01 22:59:12,2025-02-01 23:01:07,2025-02-01 23:09:09,163,141,1.08,482,30.74,0.0,...,0.0,0.00,15.21,N,N,N,N,N,1.5,Uber
798393,2025-02-01 23:06:53,2025-02-01 23:13:52,2025-02-01 23:13:57,2025-02-01 23:42:11,141,90,5.79,1694,21.36,0.0,...,0.0,0.00,24.33,N,N,N,N,N,1.5,Uber
