<a href="https://colab.research.google.com/github/devikapillai30/Pricing-model-for-flights/blob/main/dynamic_pricing_airline_slots.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pandas as pd
import numpy as np
import random
from sklearn.linear_model import LinearRegression, Ridge
from sklearn.metrics import mean_squared_error as MSE
import warnings
import math
import seaborn as sns
import matplotlib.pyplot as plt
warnings.filterwarnings("ignore")

In [2]:
# Data  Generation steps

#Booking starts 30 days in advance
days = 30
#ticket price range
min_price = 4000
max_price = 14000
#each journey has 250 slots to sell
total_slots = 250
#total of 50 journeys are considered
journey = 50
train_val_prop = 0.75
lst_days = list(range(1,days+1))
#waste seats
wastage_pct_min_range = 15
wastage_pct_max_range = 20
#emptiness exceeds 2%, prices may need adjusting.
emptiness_threshold = 0.02
#optimizations occur 10 days before each journey.
optimisation_day_bfr_jouney = 10

In [3]:
# Data generation step

lst_df = []
for j in range(journey):
    lst_price = []
    prev_price = min_price
    for i in range(days):
        cur_price = min(prev_price + random.randint(0,500), max_price)
        prev_price = cur_price
        lst_price.append(cur_price)

    slots_filled =  round((1 - random.randint(wastage_pct_min_range,wastage_pct_max_range)/100.0) * total_slots,0)
    lst_slots = []
    weights = []
    for time_,p in enumerate(lst_price):

        # More bookings comes with time but also reduces as price increases
        weights.append(((time_ + 1)* random.uniform(1,1.2))/math.pow(p,5.0))

    msum = sum(weights)
    weights = [w/msum for w in weights]

    for w in weights:
        lst_slots.append(round(w * slots_filled, 0) )


    lst_df.append(pd.DataFrame( {'journey_id' : [j+1] * days, 'day':lst_days, 'price' : lst_price, 'slots' : lst_slots} ))


df = pd.concat(lst_df)

#Let's train on x% of journeys and validate learn't model performance on the remaining (1-x)%.

df_train = pd.concat(lst_df[:int(train_val_prop * len(lst_df)) + 1])
df_val = pd.concat(lst_df[int(train_val_prop * len(lst_df)) + 1:])


print("df's shape", df.shape)
print("df_train's shape", df_train.shape)
print("df_val's shape", df_val.shape)

df's shape (1500, 4)
df_train's shape (1140, 4)
df_val's shape (360, 4)


In [4]:
# A glance at data
df[df.journey_id == 1]

Unnamed: 0,journey_id,day,price,slots
0,1,1,4313,9.0
1,1,2,4423,16.0
2,1,3,4696,19.0
3,1,4,5039,15.0
4,1,5,5446,14.0
5,1,6,5683,14.0
6,1,7,6033,12.0
7,1,8,6034,12.0
8,1,9,6416,12.0
9,1,10,6679,10.0


In [5]:
from xgboost import XGBRegressor
from numpy import asarray
model = XGBRegressor()
model.fit(df_train[['price','day']], np.array(df_train.slots))

In [6]:
def mean_absolute_percentage_error(y_true, y_pred):
    y_true, y_pred = np.array(y_true), np.array(y_pred)
    return np.mean(np.abs((y_true - y_pred) / y_true)) * 100

def mean_absolute_error(y_true, y_pred):
    y_true, y_pred = np.array(y_true), np.array(y_pred)
    return np.mean(np.abs(y_true - y_pred))

rmse = round(math.sqrt(MSE(df_val['slots'],model.predict(df_val[['price','day']]))),2)

mae = round(mean_absolute_error( df_val['slots'],model.predict(df_val[['price','day']]) ),2)

mape = round(mean_absolute_percentage_error( df_val['slots'],model.predict(df_val[['price','day']]) ),2)

print(f"Root Mean squared error : {rmse}\nMean absolute error : {mae}\nMean Absolute percentage error : {mape}%")

Root Mean squared error : 1.63
Mean absolute error : 1.04
Mean Absolute percentage error : 15.64%


In [7]:
# Store predictions for fast stroage which will be utilised by the optimisation layer.

mp = {}
lst = []
for p in range(min_price, max_price+1):
    for d in range(days - optimisation_day_bfr_jouney , days +  1):
        lst.append( [p,d] )

pred = model.predict( np.array(lst))

for i in range(len(lst)):
    mp[lst[i][0],lst[i][1]] = int(pred[i])

In [8]:
def m_feasible(price_points, available_slots, emptiness_threshold):
    tmp = 0
    for p in price_points:
#         tmp = tmp + int(model.predict(np.asarray([[p[0],p[1]]]))[0])
        tmp = tmp + mp[p[0],p[1]]

    if tmp <= available_slots and total_slots * emptiness_threshold <= (available_slots - tmp):
        return True
    return False

In [9]:
def m_revenue(price_points):
    rev = 0
    slots = []
    for p in price_points:

        s_filled = mp[p[0],p[1]]
        slots.append(s_filled)
        rev = rev + (s_filled * p[0])

    return rev,slots

In [10]:
def optimise(df, journey_id):

    df_tmp = df[(df.journey_id == journey_id) & (df.day > (days - optimisation_day_bfr_jouney))].reset_index(drop = True)

    slots_filled = df[ (df.journey_id == journey_id) & (df.day < (days - optimisation_day_bfr_jouney)) ].slots.sum()

    available_slots = (total_slots - slots_filled)

    # random search
    times = 50000
    ans = 0
    solution = []
    for j in range(times):
        price_points = []
        prev_price = min_price
        for i in range(optimisation_day_bfr_jouney):
            cur_price = random.randint(prev_price,int(prev_price * 1.15))
            if cur_price > max_price:
                break
            prev_price = cur_price
            price_points.append((cur_price,(days - optimisation_day_bfr_jouney + 1)))
        if m_feasible(price_points, available_slots, emptiness_threshold):
            if m_revenue(price_points)[0] > ans:
                ans, slots = m_revenue(price_points)
                solution = [p[0] for p in price_points]

#     df_tmp = df[df.journey_id == 1].loc[days - optimisation_day_bfr_jouney: ].reset_index(drop = True)
    df_tmp['proposed_price'] = solution
    df_tmp['forecasted_slots'] = slots

    orig = np.sum(df_tmp['price'] * df_tmp['slots'])
    proposed = np.sum(df_tmp['proposed_price'] * df_tmp['forecasted_slots'])
    revenue_gain = round(proposed-orig, 2)
    revenue_gain_pct = round((proposed-orig)/orig * 100.0, 2)

    slots_extra_gain = round(df_tmp['forecasted_slots'].sum() - df_tmp.slots.sum() , 0)
    slots_extra_gain_pct = round( slots_extra_gain/df_tmp.slots.sum() *100, 2)

#     display(df_tmp)

    print(f"""Previous Revenue: {orig}\nNew Revenue: {proposed}\nRevenue gain: {revenue_gain}\nRevenue gain %: {revenue_gain_pct}
Available slots: {available_slots}\nPreviously filled: {df_tmp.slots.sum()}
Filled after dynamic Pricing: {df_tmp['forecasted_slots'].sum()}\nSlots gain: {slots_extra_gain}\nSlots gain %: {slots_extra_gain_pct}""")

    return df_tmp

In [15]:
# Optimise journey
journey_id = 2
optimise(df, journey_id)

Previous Revenue: 253738.0
New Revenue: 431020
Revenue gain: 177282.0
Revenue gain %: 69.87
Available slots: 74.0
Previously filled: 28.0
Filled after dynamic Pricing: 69
Slots gain: 41.0
Slots gain %: 146.43


Unnamed: 0,journey_id,day,price,slots,proposed_price,forecasted_slots
0,2,21,7988,5.0,4576,19
1,2,22,8179,4.0,5251,15
2,2,23,8402,4.0,5998,10
3,2,24,8883,3.0,6567,7
4,2,25,9336,3.0,7540,6
5,2,26,9790,2.0,8532,4
6,2,27,10015,2.0,9605,2
7,2,28,10411,2.0,9691,3
8,2,29,10707,2.0,10253,2
9,2,30,10971,1.0,11205,1


In [14]:
# Optimise journey
journey_id = 5
optimise(df, journey_id)

Previous Revenue: 260560.0
New Revenue: 466209
Revenue gain: 205649.0
Revenue gain %: 78.93
Available slots: 79.0
Previously filled: 28.0
Filled after dynamic Pricing: 74
Slots gain: 46.0
Slots gain %: 164.29


Unnamed: 0,journey_id,day,price,slots,proposed_price,forecasted_slots
0,5,21,8523,4.0,4580,21
1,5,22,8855,3.0,5264,15
2,5,23,8912,3.0,6013,10
3,5,24,8969,3.0,6689,7
4,5,25,9093,3.0,7584,6
5,5,26,9191,3.0,8335,4
6,5,27,9684,3.0,9040,3
7,5,28,10026,2.0,9082,3
8,5,29,10339,2.0,9658,3
9,5,30,10813,2.0,10966,2
