In [None]:
import numpy as np
import pandas as pd
import statsmodels.formula.api as smf
import plotly.express as px


In [None]:
supply = pd.read_excel(r"C:\Users\parke\Desktop\drivers_acceptance_price.csv")
supply = supply.rename(columns = str.lower)

supply.info()

In [None]:
px.scatter(supply, x = 'pay', y = 'accepted').show('svg')

In [148]:
logit = smf.logit("accepted ~ pay", supply).fit()
logit.summary()

Optimization terminated successfully.
         Current function value: 0.394998
         Iterations 7


0,1,2,3
Dep. Variable:,accepted,No. Observations:,1000.0
Model:,Logit,Df Residuals:,998.0
Method:,MLE,Df Model:,1.0
Date:,"Fri, 02 Dec 2022",Pseudo R-squ.:,0.4289
Time:,18:22:55,Log-Likelihood:,-395.0
converged:,True,LL-Null:,-691.69
Covariance Type:,nonrobust,LLR p-value:,4.606000000000001e-131

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,-6.3158,0.409,-15.455,0.000,-7.117,-5.515
pay,0.2539,0.016,15.979,0.000,0.223,0.285


In [None]:
plot_payouts = pd.Series(np.arange(0,60, step  =.25), name = 'pay')
predictions = pd.Series(logit.predict(plot_payouts), name = 'pred_prob')
a = pd.DataFrame({'pay':plot_payouts,'pred_prob':predictions})
px.scatter(a, 
            x = 'pay', 
            y = 'pred_prob',
            title = 'Probability of accepting ride based on payout').show()



## First Pass Version: Very slow 

In [None]:
# se tthe number of simulations to run
n = 100

# 1 period per month, 12 months
num_periods =12
period = np.arange(0,num_periods)

# Additional parameters
rider_pool = 10000
acquisition_cost = 30
ride_cost = 30

# Coarse grid of acquisitions and rider payments
acquisition_grid = [500,750,1000]
pay_grid = [10,15,20]

overall_summaries = []
monthly_summaries = []
pay_vectors = []
acquisition_vectors = []

for i in range(n):


    # Generate a dataframe of all potential riders for each month
    rider = pd.Series(data = np.arange(start = 0, stop =rider_pool), name = 'rider')
    month = pd.Series(data = np.arange(start = 0, stop = num_periods), name = 'month')

    data = pd.merge(rider, month, how = 'cross')
    data = data.assign(acquired = 0,
                        acquisition_month = np.NaN,
                        lam = 0,
                        requests = 0,
                        matches = 0
                    )

    non_acquired = np.arange(0, rider_pool)
    acquired_rider_list = np.array([])

    acquisition_list = np.array([])
    pay_list = np.array([])


    for t in period:

        # draw a number of acquisitions and driver payout for the period
        acquisitions = np.random.choice(acquisition_grid, size = 1)
        acquisition_list = np.concatenate((acquisition_list,acquisitions))

        pay = pd.Series(np.random.choice(pay_grid, size= 1), name = 'pay')
        pay_list = np.concatenate((pay_list,pay))

        # determine the probability that a ride is matched given the payout
        match_prob = logit.predict(pay).iat[0]

        # select the riders to be acquired this period and remove them from the non_acquired list
        acquired = np.random.choice(non_acquired, size = acquisitions, replace = False)
        non_acquired = np.setdiff1d(non_acquired, acquired)
        acquired_rider_list = np.concatenate((acquired_rider_list, acquired))

        # update the relevant columns for the newly acquired
        data.loc[(data['rider'].isin(acquired)) & (data['month'] >= t), 'acquired'] = 1
        data.loc[data['rider'].isin(acquired), 'acquisition_month'] = t
        data.loc[(data['rider'].isin(acquired)) & (data['month'] == t), 'lam'] = 1
        #data.loc[(data['rider'].isin(acquired)) & (data['month'] == t), 'acq_cost'] = acquisition_cost


        # update lambda for the already acquired
        data.loc[(data['month'] == t) & (data['acquired'] == 1) & (data['acquisition_month'] < t) & (data['month'] != 0), 'lam'] = data.groupby(['rider'])['matches'].shift(1)

        # compute ride requests and successful matches for all acquired riders
        filters = (data['rider'].isin(acquired_rider_list)) & (data['month'] == t)
        
        data.loc[filters, 'requests'] = data['lam'].apply(lambda x: np.random.poisson(x, size = 1)[0])
        data.loc[filters, 'matches'] = data['requests'].apply(lambda x: np.round(match_prob*x))

    # save out the pay and acquisition vectors
    pay_series = pd.Series(data = pay_list, name= 'pay')
    acquisition_series = pd.Series(data = acquisition_list, name = 'acquisitions')

    # Add in final useful columns
    df = data.merge(pay_series, how = 'left', left_on='month', right_index= True)
    df['rider_spend'] = df['matches'].multiply(ride_cost)
    df['driver_pay'] = df['matches']*df['pay']
    df['acquisition_cost'] = 0
    df.loc[df['acquisition_month'] == df['month'], 'acquisition_cost'] = 30
    df['profit'] = df['rider_spend'] - df['driver_pay'] - df['acquisition_cost']

    summary = df[['requests','matches','rider_spend','driver_pay','acquisition_cost','profit']].sum()
    monthly_summary = df.groupby('month', as_index = False)[['requests','matches','rider_spend','driver_pay','acquisition_cost','profit']].sum()

    overall_summaries.append(summary)
    monthly_summaries.append(monthly_summary)
    pay_vectors.append(pay_series)
    acquisition_vectors.append(acquisition_series)

pv_df  = pd.DataFrame(data = pay_vectors).reset_index(drop  =True)
ac_df  = pd.DataFrame(data = acquisition_vectors).reset_index(drop = True)
s_df  = pd.DataFrame(data = overall_summaries).reset_index(drop = True)


In [None]:
pv_df  = pd.DataFrame(data = pay_vectors).reset_index(drop  =True)
ac_df  = pd.DataFrame(data = acquisition_vectors).reset_index(drop = True)
s_df  = pd.DataFrame(data = overall_summaries).reset_index(drop = True)

pv_df['simulation'] = np.arange(0,n,1)
pv_df['simulation_profit'] = s_df['profit']
pv = pd.melt(pv_df, id_vars= ['simulation','simulation_profit'], var_name = 'month', value_name= 'pay')


ac_df['simulation'] = np.arange(0,n,1)
ac_df['simulation_profit'] = s_df['profit']
ac = pd.melt(ac_df, id_vars= ['simulation','simulation_profit'], var_name = 'month', value_name= 'acquisitions')

s_df['avg_pay'] = s_df['driver_pay']/s_df['matches']
s_df['match_rate'] = s_df['matches']/s_df['requests']
s_df['acquisitions'] = np.divide(s_df['acquisition_cost'], acquisition_cost)

s_df.head()

In [None]:
px.scatter(s_1, 
    x = 'avg_pay', 
    y = 'match_rate',
    color = 'profit',
    height=  800,
    width = 1000,
    title = "Relationship between average driver pay and match rate").show()

In [None]:
px.scatter(s_1, 
    x = 'acquisitions', 
    y = 'profit',
    color = 'avg_pay',
    height=  800,
    width = 1000,
    title = "Relationship between acquisitions and profit over all 12 months").show()

In [None]:
px.scatter(s_1, 
    x = 'avg_pay', 
    y = 'profit',
    color = 'acquisitions',
    height=  800,
    width = 1000,
    title = "Relationship between average driver pay and profit").show()

In [None]:
px.scatter(s_df, 
    x = 'match_rate', 
    y = 'profit',
    color = 'acquisitions',
    trendline = 'ols',
    height=  800,
    width = 1000,
    title = "Relationship between match rate and profit").show()

In [None]:
s_df['simulation'] = np.arange(0,n,1)
top_sims = s_df.sort_values(by = 'profit', ascending = False)['simulation'].head()

print("Simulations with the highest profits:")
s_df.sort_values(by = 'profit', ascending = False).head()



In [None]:
px.histogram(s_1, 
            x = 'profit',
            title = "Distribution of profits across n simulations").show()

In [None]:
s_1.describe()

In [None]:
px.line(pv.query("simulation in @top_sims "), 
    x = 'month',
    y = 'pay', 
    color = 'simulation',
    title = "Payout Vectors for simulations with highest profit").show()

In [None]:
px.line(ac.query("simulation in @top_sims "), 
    x = 'month',
    y = 'acquisitions', 
    color = 'simulation',
    title = "Acquisition Vectors for simulations with highest profit").show()

In [None]:
px.line(pv, 
        x = 'month', 
        y = 'pay', 
        color = 'simulation_profit',
        color_discrete_sequence= px.colors.sequential.Plasma_r).show()

In [None]:
pv_2 = pv_df
ac_2 = ac_df
s_2 = s_df



## "Full Numpy" Version: Blazing fast, about 1000 simulations per minute 

In [453]:
# set the number of simulations to run
n = 50000

# 1 period per month, 12 months
num_periods = 12
period = np.arange(0,num_periods)

# Additional parameters
rider_pool = 10000
acquisition_cost = 30
ride_cost = 30

# Coarse grid of acquisitions and rider payments
acquisition_grid = np.arange(0,1000,50)         #[0,250,500,750,1000]
pay_grid = np.arange(10,40,2)

# Arrays for storing the simulation outputs
sim_payouts = np.empty(shape = (n,num_periods))
sim_acquisitions = np.empty(shape = (n,num_periods))
sim_requests = np.empty(shape = (n,num_periods))
sim_matches = np.empty(shape = (n,num_periods))
sim_profit = np.empty(shape = (n,num_periods)) 

for s in range(n):   
    
    # initiate blank slate rider status lists
    riders = np.arange(0, rider_pool)
    non_acquired_riders = np.arange(0,rider_pool)

    # initiate blank slate rider data
    acquired = np.zeros((rider_pool,num_periods))
    acquisition_month = np.zeros((rider_pool,num_periods))
    lamb = np.zeros((rider_pool,num_periods))
    requests = np.zeros((rider_pool,num_periods))
    matches = np.zeros((rider_pool,num_periods))

    for t in period:    
        
        # Get the pay and number of acquisitions for the period
        pay = np.random.uniform(low = 10, high = 35, size = 1)
        num_acquisitions = np.random.randint(low = 0, high = 1000, size = 1)[0]
        sim_payouts[s,t] = pay
        sim_acquisitions[s,t] = num_acquisitions

         # determine the probability that a ride is matched given the payout
        p = pd.Series(pay, name = 'pay')
        acceptance_probability = logit.predict(p).iat[0]
    
        # Get the new acquisitions for the month and remove them from the pool
        if non_acquired_riders.size > 0:
            newly_acquired_riders = non_acquired_riders[0:num_acquisitions]                     # grab the new acquired riders
            previously_acquired_riders = np.setdiff1d(riders, non_acquired_riders)              # grab the previously acquired riders (empty in period 0)
            non_acquired_riders = np.setdiff1d(non_acquired_riders, newly_acquired_riders)      # update the non acquired rider list so above two lists are correct next period
            
            # update data for newly acquired riders
            acquired[newly_acquired_riders, t:] = 1
            acquisition_month[newly_acquired_riders,t] = 1 
            lamb[newly_acquired_riders, t] = 1
            requests[newly_acquired_riders, t] = np.random.poisson(lam = lamb[newly_acquired_riders, t], size = num_acquisitions)
            matches[newly_acquired_riders,t] = np.round(np.multiply(requests[newly_acquired_riders, t], acceptance_probability))

        # update data for previously acquired riders
        if t>0:
            lamb[previously_acquired_riders, t] = matches[previously_acquired_riders, t-1]
            requests[previously_acquired_riders, t] = np.random.poisson(lam = lamb[previously_acquired_riders, t], size = previously_acquired_riders.size)
            matches[previously_acquired_riders,t] = np.round(np.multiply(requests[previously_acquired_riders, t], acceptance_probability))
        
    # per period simualtions results
    monthly_acq = np.sum(acquisition_month, axis = 0)
    sim_requests[s] = np.sum(requests, axis = 0)
    sim_matches[s] = np.sum(matches, axis = 0)
    sim_profit[s] = np.sum(matches, axis = 0)*(ride_cost - pay) - monthly_acq*acquisition_cost

    # overall simulation results
    #sim_acquisitions[s] = np.sum(monthly_acq)
    #sim_requests[s] = np.sum(requests)
    #sim_matches[s] = np.sum(matches)
    #sim_profit[s] = np.sum(sim_profit_t)

print('simulations complete')



simulations complete


In [462]:
# Prepare output for inspection

profit_results = pd.DataFrame(data = {'profit':np.sum(sim_profit, axis = 1)}).astype({'profit':'float16'})
profit_results['simulation'] = np.arange(0,n)
#profit_results['percentile'] = pd.qcut(profit_results['profit'], q = 10, duplicates='drop')
top_100= profit_results.sort_values(by = 'profit', ascending = False)['simulation'].head(100)
bottom_100= profit_results.sort_values(by = 'profit', ascending = True)['simulation'].head(100)

payout_vectors = pd.DataFrame(data = sim_payouts)
payout_vectors['profit'] = np.sum(sim_profit, axis = 1)
#payout_vectors['percentile']= pd.qcut(profit_results['profit'], q = 10, duplicates='drop')
payout_vectors['simulation'] = np.arange(0,n)
pv_wide = pd.melt(payout_vectors, id_vars=['simulation','profit'], var_name = 'month', value_name = 'payout')

acquisition_vectors = pd.DataFrame(data = sim_acquisitions)
acquisition_vectors['profit'] = np.sum(sim_profit, axis = 1)
#acquisition_vectors['percentile'] = pd.qcut(profit_results['profit'], q = 10, duplicates='drop')
acquisition_vectors['simulation'] = np.arange(0,n)
av_wide = pd.melt(acquisition_vectors, id_vars=['simulation','profit'], var_name = 'month', value_name = 'acquisition')

profit_results.sort_values(by = 'profit', ascending = False).head(10)


Unnamed: 0,profit,simulation
6467,inf,6467
8177,inf,8177
48473,inf,48473
26138,inf,26138
3008,inf,3008
15559,inf,15559
29916,inf,29916
30909,inf,30909
17844,inf,17844
25026,inf,25026


In [None]:
pd.qcut(profit_results.profit)

In [463]:
px.scatter(pv_wide.query("simulation in @top_100"), 
        x = 'month', 
        y = 'payout', 
        #color = 'simulation',
        hover_data = ['simulation','profit'],
        trendline = 'lowess',
        title = "Payout strategy vectors for the top performing simulations",
        height = 800,
        width = 1500).show()

In [464]:
px.scatter(pv_wide.query("simulation in @bottom_100"), 
        x = 'month', 
        y = 'payout', 
        #color = 'simulation',
        trendline= 'lowess',
        hover_data = ['simulation','profit'],
         title = "Payout strategy vectors for the worst performing simulations",
         height = 800,
         width = 1500).show()

In [465]:
px.scatter(av_wide.query("simulation in @top_100"), 
        x = 'month', 
        y = 'acquisition', 
        #color = 'simulation',
        trendline = 'lowess',
        hover_data = ['simulation','profit'],
        title = "Acquisition strategy vectors for the top performing simulations",
        height = 800,
        width = 1500).show()

In [466]:
px.scatter(av_wide.query("simulation in @bottom_100"), 
        x = 'month', 
        y = 'acquisition', 
        #color = 'simulation',
        trendline = 'lowess',
        hover_data = ['simulation','profit'],
        title = "Acquisition strategy vectors for the worst performing simulations",
        height = 800,
        width = 1500).show()