In [1]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import math
import time
from datetime import datetime

In [2]:
df = pd.read_csv("../boston_kaggle/cab_rides.csv")
df['time_stamp'] = df['time_stamp']/1000
df = df.sort_values(by = "time_stamp")
#make assumption that drivers are driving at 40 mph - get duration in seconds
df['duration'] = df['distance'] / 10 * 60 * 60
df = df.reset_index(drop=True)
df = df.fillna(0)

In [3]:
old_df = df.copy()

In [4]:
df.columns

Index(['distance', 'cab_type', 'time_stamp', 'destination', 'source', 'price',
       'surge_multiplier', 'id', 'product_id', 'name', 'duration'],
      dtype='object')

In [5]:
set(df.destination)

{'Back Bay',
 'Beacon Hill',
 'Boston University',
 'Fenway',
 'Financial District',
 'Haymarket Square',
 'North End',
 'North Station',
 'Northeastern University',
 'South Station',
 'Theatre District',
 'West End'}

In [6]:
print(df)

        distance cab_type    time_stamp              destination  \
0           3.03     Lyft  1.543204e+09         Theatre District   
1           1.57     Uber  1.543204e+09                North End   
2           1.30     Uber  1.543204e+09         Theatre District   
3           2.71     Uber  1.543204e+09                   Fenway   
4           2.43     Lyft  1.543204e+09              Beacon Hill   
5           2.71     Uber  1.543204e+09                   Fenway   
6           2.71     Uber  1.543204e+09                   Fenway   
7           2.19     Uber  1.543204e+09                North End   
8           3.05     Uber  1.543204e+09                   Fenway   
9           2.19     Uber  1.543204e+09                North End   
10          2.19     Uber  1.543204e+09                North End   
11          2.22     Lyft  1.543204e+09  Northeastern University   
12          4.46     Lyft  1.543204e+09       Financial District   
13          4.46     Lyft  1.543204e+09       Fi

In [7]:
df.iloc[100]

distance                                            1.45
cab_type                                            Lyft
time_stamp                                   1.54321e+09
destination                                       Fenway
source                                          Back Bay
price                                               10.5
surge_multiplier                                       1
id                  e57bd38c-cb92-414a-be2f-322beb26e8ad
product_id                                     lyft_plus
name                                             Lyft XL
duration                                             522
Name: 100, dtype: object

'2018-12-18 14:15:10.943000'

In [94]:
def simulate_drive(df, time_start, time_end, source = "Haymarket Square", verbose = True):
    wait_noise = max(0, np.random.normal(0, 1) * 60 * 3 + 5 * 60)
    time_start = time_start + wait_noise
    
    #total price of all the rides
    running_price = 0
    
    #duration of the ride
    duration = 0
    
    df = df[(df['time_stamp'] > time_start) & (df['time_stamp'] < time_end)]
    drives = df[(df['time_stamp'] > time_start) & (df['source'] == source)].reset_index(drop= True)

    wait_noise = 0
    
    while(len(drives) > 0): 
        wait_noise = max(0, np.random.normal(0, 1) * 60 * 3 + 5 * 60)
        if verbose: 
            print("\t[Time_Start : %s] $%s [%.2f (%.2f wait)] [%s] to [%s]" %(str(datetime.fromtimestamp(time_start)).split(".")[0],
                                                        drives['price'].iloc[0],
                                                        drives['duration'].iloc[0]/60,
                                                        wait_noise/60, 
                                                        drives['source'].iloc[0], 
                                                        drives['destination'].iloc[0] 
                                                        ))
        running_price += drives['price'].iloc[0]
        duration = drives['duration'].iloc[0] 

        
        #note - everything below is after the trip has been taken already

        
        #increment time start by the duration of the trip + the noise in waiting for the next trip
        time_start = time_start + duration + wait_noise

        # the next source is the current drive's destination
        source = drives['destination'].iloc[0]
        # drives is based on a slice of the dataframe
        drives = df[(df['time_stamp'] > time_start) & (df['source'] == source)].reset_index(drop= True)

    if verbose: 
        print("\tPrice $%.2f" % running_price)
        print("\tEarned $%.2f" % (running_price * 0.3))
    earnings = running_price * 0.3
    return running_price, earnings

time_start = df['time_stamp'].iloc[101]
time_end = time_start + 60 * 60    
simulate_drive(df, time_start, time_end, "Back Bay")


	[Time_Start : 2018-11-25 23:40:55] $38.5 [18.24 (4.01 wait)] [Back Bay] to [North End]
	[Time_Start : 2018-11-26 00:03:10] $5.0 [7.56 (6.42 wait)] [North End] to [West End]
	[Time_Start : 2018-11-26 00:17:09] $3.5 [4.50 (2.34 wait)] [West End] to [Haymarket Square]
	[Time_Start : 2018-11-26 00:23:59] $26.0 [2.94 (2.17 wait)] [Haymarket Square] to [North Station]
	[Time_Start : 2018-11-26 00:29:06] $24.0 [18.06 (7.20 wait)] [North Station] to [Boston University]
	Price $97.00
	Earned $29.10


(97.0, 29.099999999999998)

In [95]:
CALENDAR_SIZE = (24, 7) #set calendar size to 24 by 7 

def generate_calendar_matrix(arr = None): 
    '''
    Generates a calendar matrix based off
    an optional numpy input array. If no arr
    is inputted, the default is zero
    '''
    if arr is None:
        arr = np.zeros(CALENDAR_SIZE)
    df = pd.DataFrame(arr, index = range(CALENDAR_SIZE[0]), columns= ["mon", "tues", "wed", "thur", "fri", "sat", "sun"])
    return df

def to_datestring(s): 
    return str(datetime.fromtimestamp(x)).split(".")[0]

In [109]:
np.random.seed(8)

cdf = generate_calendar_matrix((np.random.random(CALENDAR_SIZE) > 0.1).astype(int))


def get_start_end_times(df, base_date = '11/30/18'): 
    cmatrix = df.values
    orig = np.zeros((CALENDAR_SIZE[0] + 1, CALENDAR_SIZE[1]))
    shift = np.zeros((CALENDAR_SIZE[0] + 1, CALENDAR_SIZE[1]))

    orig[1:, :] = cmatrix
    shift[:-1, :] = cmatrix

    diff = shift - orig

    start_times_bool = diff == 1
    end_times_bool = diff == -1
    
    monday_time = datetime.timestamp(datetime.strptime(datetime_str, '%m/%d/%y %H')) + 1
    base_timestamp = datetime.timestamp(datetime.strptime((base_date), '%m/%d/%y'))
    li = [[base_timestamp + hour*60*60 + day*24*60*60 for day in range(7)] for hour in range(25)]
    #li = [[str(datetime.fromtimestamp(x)).split(".")[0] for x in y] for y in li]

    date_arr = np.array(li)

    start_times = date_arr[start_times_bool]
    end_times = date_arr[end_times_bool]

    start_times = np.sort(start_times)
    end_times = np.sort(end_times)
    
    return start_times, end_times


start_times, end_times = get_start_end_times(cdf)
print(cdf)

    mon  tues  wed  thur  fri  sat  sun
0     1     1    1     1    1    0    1
1     1     1    1     1    1    1    1
2     1     1    1     1    1    1    0
3     1     1    1     0    1    1    1
4     1     1    0     1    1    1    1
5     1     1    1     1    1    0    1
6     1     0    1     1    1    1    0
7     1     1    1     1    1    1    1
8     1     1    1     1    1    1    1
9     1     0    0     1    1    1    1
10    1     1    1     1    1    1    1
11    1     1    1     1    1    1    1
12    1     1    1     1    1    1    1
13    1     1    1     1    1    1    1
14    0     1    1     1    1    1    1
15    1     1    1     1    1    0    1
16    1     0    1     1    1    1    1
17    1     1    1     1    0    1    1
18    1     1    1     1    0    1    1
19    1     1    1     0    1    1    1
20    1     1    1     1    1    1    1
21    1     1    1     1    1    0    1
22    1     1    1     1    1    1    1
23    1     1    1     0    1    1    1


In [110]:
running_price = 0
running_earnings = 0
for i in range(len(start_times)): 
    start = start_times[i]
    end = end_times[i]
    
    print("Ride %d of %d [%d%%]"%(i, len(start_times), 100 * i/len(start_times)))
    price, earnings = simulate_drive(df, start, end)
    
    running_price += price
    running_earnings += earnings

print(running_price)
print(running_earnings)

Ride 0 of 21 [0%]
	[Time_Start : 2018-11-30 00:00:40] $7.5 [2.94 (7.85 wait)] [Haymarket Square] to [North Station]
	[Time_Start : 2018-11-30 00:11:28] $9.0 [12.90 (0.42 wait)] [North Station] to [South Station]
	[Time_Start : 2018-11-30 00:24:47] $22.5 [14.94 (8.41 wait)] [South Station] to [Beacon Hill]
	[Time_Start : 2018-11-30 00:48:08] $13.5 [13.86 (6.45 wait)] [Beacon Hill] to [Fenway]
	[Time_Start : 2018-11-30 01:08:27] $20.5 [16.32 (1.99 wait)] [Fenway] to [West End]
	[Time_Start : 2018-11-30 01:26:46] $8.0 [12.84 (4.04 wait)] [West End] to [South Station]
	[Time_Start : 2018-11-30 01:43:39] $27.5 [14.64 (5.47 wait)] [South Station] to [Beacon Hill]
	[Time_Start : 2018-11-30 02:03:46] $32.5 [14.76 (6.08 wait)] [Beacon Hill] to [Northeastern University]
	[Time_Start : 2018-11-30 02:24:36] $19.5 [27.42 (5.77 wait)] [Northeastern University] to [Financial District]
	[Time_Start : 2018-11-30 02:57:47] $16.5 [20.70 (9.28 wait)] [Financial District] to [Northeastern University]
	[Tim

	[Time_Start : 2018-12-01 15:13:49] $19.5 [8.88 (7.94 wait)] [Back Bay] to [Boston University]
	[Time_Start : 2018-12-01 15:30:38] $19.5 [16.20 (8.40 wait)] [Boston University] to [Beacon Hill]
	[Time_Start : 2018-12-01 15:55:14] $10.5 [13.62 (1.87 wait)] [Beacon Hill] to [Boston University]
	Price $272.50
	Earned $81.75
Ride 5 of 21 [23%]
	[Time_Start : 2018-12-01 17:06:21] $8.5 [3.66 (2.82 wait)] [Haymarket Square] to [West End]
	[Time_Start : 2018-12-01 17:12:50] $9.5 [17.04 (3.47 wait)] [West End] to [Fenway]
	[Time_Start : 2018-12-01 17:33:21] $22.5 [26.70 (2.67 wait)] [Fenway] to [Financial District]
	[Time_Start : 2018-12-01 18:02:43] $22.0 [28.38 (9.42 wait)] [Financial District] to [Northeastern University]
	[Time_Start : 2018-12-01 18:40:31] $12.0 [27.30 (2.11 wait)] [Northeastern University] to [Financial District]
	[Time_Start : 2018-12-01 19:09:56] $27.0 [7.50 (4.57 wait)] [Financial District] to [Haymarket Square]
	[Time_Start : 2018-12-01 19:22:00] $7.5 [6.96 (6.36 wait)

	[Time_Start : 2018-12-02 21:30:54] $17.0 [16.86 (9.28 wait)] [Northeastern University] to [Theatre District]
	[Time_Start : 2018-12-02 21:57:02] $0.0 [9.54 (2.74 wait)] [Theatre District] to [North End]
	[Time_Start : 2018-12-02 22:09:19] $8.5 [8.46 (11.11 wait)] [North End] to [Theatre District]
	[Time_Start : 2018-12-02 22:28:53] $9.0 [10.68 (4.25 wait)] [Theatre District] to [Haymarket Square]
	[Time_Start : 2018-12-02 22:43:49] $16.5 [14.46 (1.23 wait)] [Haymarket Square] to [Back Bay]
	[Time_Start : 2018-12-02 22:59:30] $7.0 [6.30 (4.15 wait)] [Back Bay] to [Northeastern University]
	[Time_Start : 2018-12-02 23:09:57] $5.0 [8.82 (4.85 wait)] [Northeastern University] to [Back Bay]
	[Time_Start : 2018-12-02 23:23:37] $30.5 [13.98 (7.22 wait)] [Back Bay] to [North End]
	[Time_Start : 2018-12-02 23:44:49] $19.5 [15.12 (8.16 wait)] [North End] to [Back Bay]
	Price $654.50
	Earned $196.35
Ride 9 of 21 [42%]
	[Time_Start : 2018-12-03 00:06:41] $7.5 [5.94 (2.03 wait)] [Haymarket Square]

In [108]:
df

Unnamed: 0,distance,cab_type,time_stamp,destination,source,price,surge_multiplier,id,product_id,name,duration
0,3.03,Lyft,1.543204e+09,Theatre District,Boston University,34.0,1.00,ef4771c2-c88d-4730-aaf7-a95751e9d27e,lyft_luxsuv,Lux Black XL,1090.8
1,1.57,Uber,1.543204e+09,North End,Theatre District,0.0,1.00,9962f244-8fce-4ae9-a583-139d5d7522e1,8cf7e821-f0d3-49c6-8eba-e679c0ebcf6a,Taxi,565.2
2,1.30,Uber,1.543204e+09,Theatre District,South Station,18.5,1.00,00ea74ea-2c49-416c-bfc5-f7877025f6eb,6c84fd89-3f11-4782-9b50-97c468b19529,Black,468.0
3,2.71,Uber,1.543204e+09,Fenway,Theatre District,19.5,1.00,8682f9bf-5cc0-4dfc-b8fe-4e22070d1684,55c66225-fbe7-4fd5-9072-eab1ece5e23e,UberX,975.6
4,2.43,Lyft,1.543204e+09,Beacon Hill,Northeastern University,10.5,1.00,edfc7f44-97e1-48cd-930c-e4fe20e88ac8,lyft,Lyft,874.8
5,2.71,Uber,1.543204e+09,Fenway,Theatre District,32.0,1.00,6172077a-22de-481b-aae2-b5763c87a6c4,6f72dfc5-27f1-42e8-84db-ccc7a75f6969,UberXL,975.6
6,2.71,Uber,1.543204e+09,Fenway,Theatre District,0.0,1.00,2ee597c2-b685-450e-b3a2-2df42fb18d49,8cf7e821-f0d3-49c6-8eba-e679c0ebcf6a,Taxi,975.6
7,2.19,Uber,1.543204e+09,North End,Beacon Hill,8.0,1.00,bb3f969d-3190-4bb8-9a84-dff2deba0a98,55c66225-fbe7-4fd5-9072-eab1ece5e23e,UberX,788.4
8,3.05,Uber,1.543204e+09,Fenway,North Station,10.5,1.00,f67b0a6b-08f9-43bb-b47d-efad7310d4c7,9a0e7b09-b92b-4c41-9779-2ad22b4d779d,WAV,1098.0
9,2.19,Uber,1.543204e+09,North End,Beacon Hill,13.0,1.00,cec0651f-2280-48ac-86bf-cb0152cada9e,6f72dfc5-27f1-42e8-84db-ccc7a75f6969,UberXL,788.4
