In [13]:
import pandas as pd
import numpy as np
import random
from sklearn.metrics import mean_squared_error as MSE 
import warnings
import math
import seaborn as sns
import matplotlib.pyplot as plt

warnings.filterwarnings('ignore')

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname,filename))

In [14]:
days = 30
min_price = 4000
max_price = 14000
total_slots = 250
journey = 50
train_val_prop = 0.75
lst_days = list(range(1,days+1))
wastage_pct_min_range = 15
wastage_pct_max_range = 20
emptiness_threshold = 0.02
optimisation_day_bfr_journey = 10

In [15]:
lst_df = []
for j in range(journey):
    lst_price = []
    prev_price = min_price
    for i in range(days):
        curr_price = min(prev_price + random.randint(0,500),max_price)
        prev_price = curr_price
        lst_price.append(curr_price)

    slots_filled = round((1 - random.randint(wastage_pct_min_range,wastage_pct_max_range)/100.0) * total_slots,0)
    lst_slots = []
    weights = []

    for time_,p in enumerate(lst_price):
        weights.append(((time_+1)*random.uniform(1,1.2))/math.pow(p,5.0))
    
    msum = sum(weights)
    weights = [w/msum for w in weights]

    for w in weights:
        lst_slots.append(round(w * slots_filled,0))
    lst_df.append(pd.DataFrame({'journey_id' : [j+1]*days, 'day' : lst_days, 'price' : lst_price, 'slots' : lst_slots}))

df = pd.concat(lst_df)

df_train = pd.concat(lst_df[:int(train_val_prop * len(lst_df)) + 1])
df_val = pd.concat(lst_df[int(train_val_prop * len(lst_df)) + 1:])

print(df.shape)

(1500, 4)


In [16]:
df[df.journey_id == 1]

Unnamed: 0,journey_id,day,price,slots
0,1,1,4193,8.0
1,1,2,4579,9.0
2,1,3,4948,10.0
3,1,4,4981,11.0
4,1,5,5109,13.0
5,1,6,5575,11.0
6,1,7,5598,11.0
7,1,8,5721,12.0
8,1,9,6034,12.0
9,1,10,6446,9.0


In [17]:
len(df)

1500

In [18]:
from xgboost import XGBRegressor
from numpy import asarray
model = XGBRegressor()
model.fit(df_train[['price','day']],np.array
(df_train.slots))

In [19]:
def mean_absolute_percentage_error(y_true,y_pred):
    y_true,y_pred = np.array(y_true),np.array(y_pred)
    return np.mean(np.abs((y_true - y_pred)/y_true)) * 100

def mean_absolute_error(y_true,y_pred):
    y_true,y_pred = np.array(y_true),np.array(y_pred)
    return np.mean(np.abs(y_true-y_pred))

rmse = round(math.sqrt(MSE(df_val['slots'],model.predict(df_val[['price','day']]))),2)

mae = round(mean_absolute_error(df_val['slots'],model.predict(df_val[['price','day']])),2)

mape = round(mean_absolute_percentage_error(df_val['slots'],model.predict(df_val[['price','day']])),2)

print(f"RMSE : {rmse}\nMAE : {mae}\nMAPE : {mape}%")


RMSE : 1.59
MAE : 1.1
MAPE : 17.92%


In [20]:
mp = {}
lst = []
for p in range(min_price,max_price+1):
    for d in range(days - optimisation_day_bfr_journey,days + 1):
        lst.append([p,d])

pred = model.predict(np.array(lst))

for i in range(len(lst)):
    mp[lst[i][0],lst[i][1]] = int(pred[i])
    

In [21]:
def feasible(price_points,available_slots,emptiness_threshold):
    tmp = 0
    for p in price_points:
        tmp += mp[p[0],p[1]]
    
    if tmp <= available_slots and total_slots * emptiness_threshold <= (available_slots-tmp):
        return True
    return False


In [22]:
def revenue(price_points):
    rev = 0
    slots = []
    for p in price_points:
        s_filled = mp[p[0],p[1]]
        slots.append(s_filled)
        rev += (s_filled * p[0])

    return rev,slots

In [23]:
def optimise(df,journey_id):
    df_tmp = df[(df.journey_id == journey_id) & (df.day > (days-optimisation_day_bfr_journey))].reset_index(drop = True)
    slots_filled = df[(df.journey_id == journey_id) & (df.day < (days - optimisation_day_bfr_journey))].slots.sum()
    available_slots = total_slots - slots_filled

    times = 50000
    ans = 0
    solution = []
    for j in range(times):
        price_points = []
        prev_price = min_price
        for i in range(optimisation_day_bfr_journey):
            curr_price = random.randint(prev_price,int(prev_price*1.15))
            if curr_price > max_price:
                break
            prev_price = curr_price
            price_points.append((curr_price,(days - optimisation_day_bfr_journey + 1)))
        if feasible(price_points,available_slots,emptiness_threshold):
            if revenue(price_points)[0] > ans:
                ans,slots = revenue(price_points)
                solution = [p[0] for p in price_points]
    df_tmp['proposed_price'] = solution
    df_tmp['forecasted_slots'] = slots

    orig = np.sum(df_tmp['price']* df_tmp['slots'])
    proposed = np.sum(df_tmp['proposed_price'] * df_tmp['forecasted_slots'])
    revenue_gain = round(proposed-orig,2)
    revenue_gain_pct = round((proposed - orig)/orig * 100.0,2)

    slots_extra_gain = round(df_tmp['forecasted_slots'].sum() - df_tmp.slots.sum() , 0)
    slots_extra_gain_pct = round( slots_extra_gain/df_tmp.slots.sum() *100, 2)
    
    print(f"""Previous Revenue: {orig}\nNew Revenue: {proposed}\nRevenue gain: {revenue_gain}\nRevenue gain %: {revenue_gain_pct}
Available slots: {available_slots}\nPreviously filled: {df_tmp.slots.sum()}
Filled after dynamic Pricing: {df_tmp['forecasted_slots'].sum()}\nSlots gain: {slots_extra_gain}\nSlots gain %: {slots_extra_gain_pct}""")
    
    return df_tmp

In [24]:
journey_id = 1
optimise(df,journey_id)

Previous Revenue: 276160.0
New Revenue: 471903
Revenue gain: 195743.0
Revenue gain %: 70.88
Available slots: 79.0
Previously filled: 29.0
Filled after dynamic Pricing: 74
Slots gain: 45.0
Slots gain %: 155.17


Unnamed: 0,journey_id,day,price,slots,proposed_price,forecasted_slots
0,1,21,8350,5.0,4558,18
1,1,22,8697,4.0,5213,11
2,1,23,9093,4.0,5920,9
3,1,24,9554,3.0,6772,6
4,1,25,9648,3.0,7280,6
5,1,26,10138,2.0,7683,7
6,1,27,10489,2.0,7828,5
7,1,28,10519,2.0,7833,5
8,1,29,10801,2.0,8511,4
9,1,30,10875,2.0,9598,3
