In [1]:
import torch
import torch.nn as nn
import pickle
import pandas as pd
import numpy as np
import datetime

In [2]:
class Environment:
    
    def __init__(self, model, df):
        
        self.model = model
        self.data = df
        self.N = len(self.data) - 1
        self.reset()
        
        
    def reset(self):
        self.t = 0
        self.done = False
        self.orders = 0
        self.olist_price = 0
        self.profits = 0
        return [self.olist_price, self.orders] + self.data.iloc[self.t].tolist() 
    
    def step(self, act):       
        
        # act = 0: stay, 1: raise, 2: lower
        if act == 0:
            self.olist_price = self.data['base_cost'][self.t] * 1.05
        elif act == 1:
            self.olist_price = self.data['base_cost'][self.t] * 1.075
        elif act == 2:
            self.olist_price = self.data['base_cost'][self.t] * 1.10
        elif act == 3:
            self.olist_price = self.data['base_cost'][self.t] * 1.125
        elif act == 4:
            self.olist_price = self.data['base_cost'][self.t] * 1.15
        elif act == 5:
            self.olist_price = self.data['base_cost'][self.t] * 1.175
        elif act == 6:
            self.olist_price = self.data['base_cost'][self.t] * 1.20
        elif act == 7:
            self.olist_price = self.data['base_cost'][self.t] * 1.225
        elif act == 8:
            self.olist_price = self.data['base_cost'][self.t] * 1.25
        elif act == 9:
            self.olist_price = self.data['base_cost'][self.t] * 1.275

        # Calculate demand
        self.orders = predict_demand(self.model, self.data.iloc[self.t], self.olist_price)        

        reward = (self.olist_price + self.data['freight_value'][self.t] - self.data['base_cost'][self.t])*self.orders
        self.profits += reward

        # set next time
        self.t += 1
        
        if (self.t == self.N):
            self.done=True

        return [self.olist_price, self.orders] + self.data.iloc[self.t].tolist(), reward, self.done # obs, reward, done 

In [3]:
class Q_Network(nn.Module):
        
    def __init__(self,obs_len,hidden_size,actions_n):
            
        super(Q_Network,self).__init__()
        self.fc_val = nn.Sequential(
            nn.BatchNorm1d(num_features=obs_len),
            nn.Linear(obs_len, hidden_size),            
            nn.ReLU(),
            nn.BatchNorm1d(num_features=hidden_size),
            nn.Linear(hidden_size, hidden_size),
            nn.ReLU(),
            nn.BatchNorm1d(num_features=hidden_size),
            nn.Linear(hidden_size, hidden_size),
            nn.ReLU(),
            nn.BatchNorm1d(num_features=hidden_size),
            nn.Linear(hidden_size, hidden_size),
            nn.ReLU(),
            nn.BatchNorm1d(num_features=hidden_size),
            nn.Linear(hidden_size, actions_n),
        )        
        
    def forward(self,x):
        h =  self.fc_val(x)
        return (h) 
            

In [4]:
hidden_size = 30
input_size = 2 + 16
output_size = 10

In [5]:
Q = Q_Network(input_size, hidden_size, output_size)
Q.load_state_dict(torch.load('./Q_state.torch'))

<All keys matched successfully>

In [6]:
with open('./lr_cellphone_C.pkl','rb') as f:
    model = pickle.load(f)



In [7]:
def predict_demand(model, df_row, olist_price):
    
    year = df_row.year
    month = df_row.month
    dayofweek = df_row.dayofweek
    day = df_row.day
    olist_price = olist_price
    freight_value = df_row.freight_value
    competition_price = df_row.competition_price
    stock = df_row.stock
    black_friday = df_row.black_friday
    carnival = df_row.carnival
    christmas = df_row.christmas
    friday = df_row.friday
    mothers_day = df_row.mothers_day
    new_year = df_row.new_year
    others = df_row.others
    valentines = df_row.valentines

    X = np.array([year, month, dayofweek, day, olist_price, freight_value,
                 competition_price, stock, black_friday, carnival, christmas,
                 friday, mothers_day, new_year, others, valentines]).reshape(1,-1)
    
    #X = xgboost.DMatrix(X)
                 
    orders = model.predict(X)
    
    return max(orders[0],0)

In [8]:
input_date = '05/22/2020'
freight_value = 25
competition_price = 898
stock = 1
base_cost = 718

date = datetime.datetime.strptime(input_date, '%m/%d/%Y')
year = date.year
month = date.month
dayofweek = date.weekday()
day = date.day
friday = 1 if dayofweek==4 else 0
black_friday = 0
carnival = 0
christmas = 0
mothers_day = 0
new_year = 0
others = 0
valentines = 0

date_next = date + datetime.timedelta(days=1)
year_next = date_next.year
month_next = date_next.month
dayofweek_next = date_next.weekday()
day_next = date_next.day
friday_next = 1 if dayofweek_next==4 else 0

In [9]:
df_dict = {
    'year': [year, year_next],
    'month': [month, month_next],
    'dayofweek': [dayofweek, dayofweek_next],
    'day': [day, day_next],
    'freight_value': [freight_value, freight_value],
    'competition_price': [competition_price, competition_price],
    'stock': [stock, stock],
    'black_friday': [black_friday, black_friday],
    'carnival': [carnival, carnival],
    'christmas': [christmas, christmas],
    'friday': [friday, friday_next],
    'mothers_day': [mothers_day, mothers_day],
    'new_year': [new_year, new_year],
    'others': [others, others],
    'valentines': [valentines, valentines],
    'base_cost': [base_cost, base_cost]
}

In [10]:
df = pd.DataFrame.from_dict(df_dict)

In [11]:
test_env = Environment(model, df)
pobs = test_env.reset()
pact_history = []
done = False

In [12]:
Q.eval()
pact = Q(torch.from_numpy(np.array(pobs, dtype=np.float32).reshape(1, -1)))
pact = np.argmax(pact.data.cpu())
pact_history.append(pact)
obs, reward, done = test_env.step(pact.numpy())
orders = obs[1]
o_price = obs[0]
profit = reward

In [13]:
orders

6.580337392222191

In [14]:
o_price

861.6

In [15]:
profit

1109.4448843286616