In [4]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib

import gym
import time

from sklearn.metrics import mean_absolute_error

# Dynamic Pricing with Reinforcement Learning

In [1]:
from google.colab import drive
drive.mount('/content/drive/')

Mounted at /content/drive/


In [5]:

df = pd.read_csv("/content/drive/My Drive/Graduation Project-Gamze Tuncay/dynamic_pricing/data/hotel-one-date-generated.csv")

In [6]:
df= df[df['ListPriceTRYUnit'] <=1800]
df = df.reset_index()

In [7]:
print('Max Purchase Amount:', df['PurchaseAmountTRYUnit'].max() , 'Min Purchase Amount:', df['PurchaseAmountTRYUnit'].min())
print('Max List Price:', df['ListPriceTRYUnit'].max() , 'Min List Price:', df['ListPriceTRYUnit'].min() )
print('Max Rate:', df['RatingTripA'].max(), 'Min Rate:', df['RatingTripA'].min())
print('Max Feature Score:', df['featureScore'].max() , 'Min Feature Score:', df['featureScore'].min())
print('Max Room Count:', df['empty_rooms'].max() , 'Min Room Count:', df['empty_rooms'].min())

Max Purchase Amount: 1638.0 Min Purchase Amount: 15.44725
Max List Price: 1800.0 Min List Price: 17.15
Max Rate: 5.0 Min Rate: 2.3
Max Feature Score: 2.253854215 Min Feature Score: 0.0
Max Room Count: 65.0 Min Room Count: 0.001


In [8]:
# Utility function for creating discreate space
def round_specific(x, precision, base):
  return round(base * round(float(x)/base),precision)

In [9]:
class Sale:
    def __init__(self, purchase, customer_traffic, competitiveness, empty_rooms):
        self.profit = round_specific(np.random.rand(), 2, 0.01)
        self.purchase = purchase
        if self.purchase > 1800:
            self.purchase = 1800
        if self.purchase < 0:
            self.purchase = 0
        
        self.list_price = self.purchase + self.purchase*self.profit
        self.list_price = round_specific(self.list_price, 0, 30)
        if self.list_price > 1800:
            self.list_price = 1800
            #self.list_price = 2000-((4000-self.list_price)/2200)*200
            #self.list_price = round_specific(self.list_price, 0, 30)
        if self.list_price < 0:
            self.list_price = 0

        self.customer_traffic = customer_traffic
        self.competitiveness = competitiveness
        self.empty_rooms = empty_rooms
    
    def __str__(self):
        return f"{self.profit}"
    
    def __sub__(self,other):
        return self.profit - other.profit
    
    def action(self, choice):
        if choice == 0:
            self.change(-20) # profiti %20 indir
        elif choice == 1:
            self.change(-15) # profiti %15 indir
        elif choice == 2:
            self.change(-10) # profiti %10 indir
        elif choice == 3:
            self.change(0) 
        elif choice == 4:
            self.change(+10) # profiti %10 arttir 
    
    def change(self, p=False):
        if not p:
            self.profit = round_specific(np.random.rand(), 2, 0.01)
        else:
            self.profit = self.profit + (p/100)
            
        self.list_price = self.purchase + self.purchase*self.profit 
        self.list_price = round_specific(self.list_price, 0, 30)
        if self.list_price > 1800:
            self.list_price = 1800
            #self.list_price = 2000-((4000-self.list_price)/2200)*200
            #self.list_price = round_specific(self.list_price, 0, 80)
        if self.list_price < 0:
            self.list_price = 0

In [12]:
!unzip -uq "/content/drive/My Drive/Graduation Project-Gamze Tuncay/dynamic_pricing/q_table_60_1800.zip" -d "/content/drive/My Drive/Graduation Project-Gamze Tuncay/dynamic_pricing/"

q_table_main = np.load('/content/drive/My Drive/Graduation Project-Gamze Tuncay/dynamic_pricing/q_table_60.npy',allow_pickle='TRUE').item()

In [13]:
def RL(HM_EPISODES, epsilon, df, q_table):
    EPS_DECAY = 0.998
    SHOW_EVERY = 100
    LEARNING_RATE = 0.1
    DISCOUNT = 0.95
    df['list_price']=np.nan

    episode_rewards = []

    for episode in range(HM_EPISODES):
    
        sale = df.iloc[episode]
        
        purchase = sale['PurchaseAmountTRYUnit']
        purchase = round_specific(purchase, 0, 30)
        
        customer_traffic = sale['RatingTripA']
        customer_traffic = round_specific(customer_traffic, 1, 0.1)
        
        competitiveness = sale['featureScore']
        competitiveness = round_specific(competitiveness, 2, 0.05)
        
        empty_rooms = sale['empty_rooms']
        
        agent = Sale(purchase, customer_traffic, competitiveness, empty_rooms)
        
        list_price = agent.list_price
        """
        if episode % SHOW_EVERY == 0:
            print(f"on #{episode}, epsilon is {epsilon}")
            print(f"{SHOW_EVERY} ep mean: {np.mean(episode_rewards[-SHOW_EVERY:])}")
            show = True
        else:
            show = False
        """
        episode_reward = 0
        for i in range(200):
            obs = (purchase, list_price, customer_traffic, competitiveness)
            
            if np.random.random() > epsilon:
                # GET THE ACTION
                action = np.argmax(q_table[obs])
            else:
                action = np.random.randint(0, 5)
            
            # Take the action!
            agent.action(action)
            
            ##############################
            # REWARD FUNCTION
            new_obs = (agent.purchase, agent.list_price, agent.customer_traffic, agent.competitiveness)
            max_future_q = np.max(q_table[new_obs])
            current_q = q_table[obs][action]
            
            rewardrate = (agent.list_price-agent.purchase)/ agent.empty_rooms
            
            if rewardrate < 1:
                reward = -300
                new_q = (1 - LEARNING_RATE) * current_q + LEARNING_RATE * (reward + DISCOUNT * max_future_q)
            elif rewardrate < 50 or rewardrate > 530:
                new_q = rewardrate
                reward = 25
            else:
                reward = -1
                new_q = (1 - LEARNING_RATE) * current_q + LEARNING_RATE * (reward + DISCOUNT * max_future_q)
            
            q_table[obs][action] = new_q

            episode_reward += reward
            if reward ==25:
                break
        df['list_price'].iloc[episode] = agent.list_price
        #print(episode_reward)
        episode_rewards.append(episode_reward)
        epsilon *= EPS_DECAY

    moving_avg = np.convolve(episode_rewards, np.ones((SHOW_EVERY,))/SHOW_EVERY, mode='valid')
    return df, moving_avg

**Hotel Based Pricing**

In [14]:
df_1M = df
df_h3 = df[df['DWHotelID'] == 3]
df_h54 = df[df['DWHotelID'] == 54]
df_h60 = df[df['DWHotelID'] == 60]
df_h98 = df[df['DWHotelID'] == 98]
df_h154 = df[df['DWHotelID'] == 154]

df_h389 = df[df['DWHotelID'] == 389]
df_h633 = df[df['DWHotelID'] == 633]
df_h114 = df[df['DWHotelID'] == 114]
df_h540 = df[df['DWHotelID'] == 540]
df_h460 = df[df['DWHotelID'] == 460]

df_list = [df_1M,df_h3,df_h54,df_h60,df_h98,df_h154,df_h389,df_h633,df_h114,df_h540,df_h460]

In [15]:
for data_frame in df_list:
  q_table = q_table_main
  data_frame.reset_index()
  data_frame, moving_avg = RL(data_frame.shape[0], 0.4, data_frame, q_table)
  
  plt.rcParams['figure.figsize'] = [20, 5]
  plt.plot([i for i in range(len(moving_avg))], moving_avg)
  plt.ylabel(f"Reward 100ma")
  plt.xlabel("episode #")
  plt.show()

KeyError: ignored

In [None]:
df_1M2 = df_1M[['ListPriceTRYUnit','list_price']]
plt.figure();
plt.rcParams['figure.figsize'] = [20, 5]
df_1M2.plot.area(stacked=False);

**Original List Price Classfication**

In [None]:
# Utility function for creating discreate space
def round_specific_series(x, precision, base):
  return round(base * round(x/base),precision)

In [None]:
for x in df_list:
    x["ClassifiedListPriceTRYUnit"] = round_specific_series(x["ListPriceTRYUnit"], 0, 80)

**Mean Absolute Percentage Error**

In [None]:
def mean_absolute_percentage_error(y_true, y_pred): 
    return np.mean(np.abs((y_true - y_pred) / y_true)) * 100

**Print Metrics**

In [None]:
def print_metrics(data_frame):

  print("*Number of Sales: ",data_frame.shape[0])
  print("*Mean Absolute Error: ", mean_absolute_error(data_frame['ListPriceTRYUnit'],data_frame['list_price']));
  print("*Mean Absolute Error of Classified: ", mean_absolute_error(data_frame['ClassifiedListPriceTRYUnit'],data_frame['list_price']));

  print("*h460 MAPE of Or-Re: ", mean_absolute_percentage_error(data_frame['ListPriceTRYUnit'],data_frame['list_price']));
  print("*h460 MAPE of Cl-Re: ", mean_absolute_percentage_error(data_frame['ClassifiedListPriceTRYUnit'],data_frame['list_price']));

  print("*Mean of Recommended List Prices:", data_frame['list_price'].mean())
  print("*Mean of Original List Prices:", data_frame['ListPriceTRYUnit'].mean())
  print("*Mean of Classified List Prices:", data_frame['ClassifiedListPriceTRYUnit'].mean())

  print("*Standard Deviation of Recommended List Prices:", data_frame['list_price'].std())
  print("*Standard Deviation of Original List Prices:", data_frame['ListPriceTRYUnit'].std())
  print("*Standard Deviation of Classified List Prices:", data_frame['ClassifiedListPriceTRYUnit'].std())
  
  return

In [None]:
for d in df_list:
  print_metrics(d)
  print("|")