In [88]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

%matplotlib inline

In [89]:
df_did = pd.read_csv('driver_ids.csv')
df_rid = pd.read_csv('ride_ids.csv')
df_rts = pd.read_csv('ride_timestamps.csv')
df_rtsp = pd.read_csv('ride_timestamps_pvt.csv', parse_dates=['accepted_at', 'arrived_at', 'dropped_off_at', 'picked_up_at', 'requested_at'])

# Import your revenue table with data you need. This table is made via all the work below
# df = pd.read_csv('rides_revenue.csv')

#### Pivot your ride_timestamps table to have unique rides per row

In [16]:
# df_rtsp = pd.pivot_table(df_rts, values='timestamp', index='ride_id', columns='event', dropna=False, aggfunc='min')
# df_rtsp.reset_index(inplace=True)
# df_rtsp.to_csv("ride_timestamps_pvt.csv", index=False)

#### Do data transformations to get a rich table. Goal: Ride Revenue + Driver Information

In [79]:
df_rtsp['time_to_accept'] = df_rtsp['accepted_at'] - df_rtsp['requested_at']
df_rtsp['time_to_arrive'] = df_rtsp['arrived_at'] - df_rtsp['accepted_at']
df_rtsp['driver_wait_time'] = df_rtsp['picked_up_at'] - df_rtsp['arrived_at']

## We'd normally do trip duration, but this already comes on the ride_id dataset. I'll default to that one
# df_rtsp['trip_duration'] = df_rtsp['dropped_off_at'] - df_rtsp['picked_up_at']

#### Join Driver ID and Driver Onboard Date

In [80]:
# Using inner joins to make sure we don't have NaNs. It looks like a negligable amount of rows are NaNs
df = pd.merge(df_rtsp, df_rid, how='inner', on=['ride_id'])
df = pd.merge(df, df_did, how='inner', on=['driver_id'])

#### Misc Conversions

In [83]:
# Convert meters to miles, 1 Mile = 1609.34 Meters
df['trip_miles'] = df['ride_distance'] / 1609.34

# Convert seconds duration to minutes duration, 1 Minute = 60 Seconds
df['ride_duration_min'] = df['ride_duration'] / 60

#### Calculate Revenue Per Ride

In [84]:
# Create your revenue function
# Assumption: Prime time is a multiplier at the end of the fare calculation
def get_ride_revenue(ride):
    base_fare = 2.00
    cost_per_mile = 1.15
    cost_per_minute = 0.22
    service_fee = 1.75
    minimum_fare = 5.00
    maximum_fare = 400.00
    trip_revenue = 0 # Starting point
    
    # Add on your base_fare and service fee
    trip_revenue += base_fare
    trip_revenue += service_fee
    
    # Calculate your variable charges
    trip_revenue += (ride['trip_miles'] * cost_per_mile)
    trip_revenue += (ride['ride_duration_min'] * cost_per_minute)
    
    # Take care if your minimum_fare and maximum_fare
    if trip_revenue > 400:
        trip_revenue = 400
    if trip_revenue < 5:
        trip_revenue = 5
        
    # Calculate your prime time
    trip_revenue = trip_revenue * (1 + (ride['ride_prime_time'] / 100))
    
    # Round to nearest cent to make the calcs cleaner
    trip_revenue = np.around(trip_revenue, decimals=2)
    
    return trip_revenue

In [85]:
# Apply your revenue calculation
df['ride_revenue'] = df.apply(get_ride_revenue, axis=1)

In [87]:
df.to_csv('rides_revenue.csv', index=False)