In [2]:
from __future__ import print_function
import os, sys, time, datetime, json, random
import numpy as np
import math
from keras.models import Sequential
from keras.layers.core import Dense, Activation
from keras.optimizers import SGD , Adam, RMSprop
from keras.layers.advanced_activations import PReLU
from keras.layers.advanced_activations import LeakyReLU
from keras.activations import relu
import matplotlib.pyplot as plt
import imageio
%matplotlib inline

Using TensorFlow backend.


In [3]:
# ENVIRONMENT
# 10x10 storage spaces, with value indicating % space free (initially 1.0 ie. all are empty)
warehouse_layout = [
    [ 1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.],
    [ 1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.],
    [ 1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.],
    [ 1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.],
    [ 1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.],
    [ 1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.],
    [ 1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.],
    [ 1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.],
    [ 1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.],
    [ 1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.]
]

# ACTIONS
# agent can assign a SKU to a gicen block or remove a SKU from a given block
actions = {1: 'assign_sku', 2: 'remove_sku'}
num_actions = len(actions)

#EXPLORATION FACTOR
epsilon = 0.1

In [4]:
class ACTIONS(object):
    
    def __init__(self, action_id, item_id, quantity, block_row, block_col, status):
        self.action_id = action_id
        self.item_id = item_id
        self.quantity = quantity
        self.block_row = block_row
        self.block_col = block_col
        self.status = status


class WAREHOUSE_BLOCKS(object):
    
    def __init__(self, warehouse_layout, item_log):
        self.warehouse_layout = np.array(warehouse_layout)
        nrows, ncols = self.warehouse_layout.shape
        for row in range(nrows):
            for col in range(ncols):
                self.warehouse_layout[row, col] = 1.0
        self.action_log = []
        self.total_reward = -math.inf
        self.block_capacity = 100.0
        self.item_log = item_log
        self.item_iterator = 0
        
    def reset(self):
        nrows, ncols = self.warehouse_layout.shape
        for row in range(nrows):
            for col in range(ncols):
                self.warehouse_layout[row, col] = 0.0
        self.action_log = []
        self.total_reward = -math.inf
        self.item_iterator = 0
        
    def update(self, action):
        target_row = action.block_row
        target_col = action.block_col
        # determine if action is valid
        if (action.action_id == 1):
            if (self.warehouse_layout[target_row, target_col] == 0.0):
                action.status = 0
            if (action.quantity > self.warehouse_layout[target_row, target_col] * self.block_capacity):
                action.status = 0
        elif (action.action_id == 2):
            if (self.warehouse_layout[target_row, target_col] == 0.0):
                action.status = 0
            if (action.quantity > (1.0 - self.warehouse_layout[target_row, target_col]) * self.block_capacity):
                action.status = 0
        # perform action if action is valid
        if (action.status):
            self.warehouse_layout[target_row, target_col] = self.warehouse_layout[target_row, target_col] - action.quantity / self.block_capacity
            self.action_log.append(action)
            
    def get_reward(self, warehouse_log):
        if not self.action_log[-1].status:
            return 0
        reward = 0.0
        latest_action = self.action_log[-1]
        item = latest_action.item_id
        ideal_zone = warehouse_log[item]
        given_zone = -1
        if latest_action.block_row in [0, 1, 2]:
            given_zone = 1
        elif latest_action.block_row in [3, 4, 5]:
            given_zone = 2
        elif latest_action.block_row in [6, 7, 8, 9]:
            given_zone = 3
        # reward based on frequency zone
        if given_zone == ideal_zone:
            reward += 0.5
        else:
            reward += -(abs(ideal_zone - given_zone) / 2)
        # print(reward)
        # reward based on empty space
        reward += -(abs(self.warehouse_layout[latest_action.block_row, latest_action.block_col]))
        # print(reward)
        return reward
    
    def act(self, action, warehouse_log):
        action.item_id = self.item_log[self.item_iterator][0]
        action.quantity = self.item_log[self.item_iterator][1]
        self.update(action)
        if action.status:
            self.total_reward += self.get_reward(warehouse_log)
            self.item_iterator = self.item_iterator + 1
            
    def observe(self):
        return np.array(self.warehouse_layout).reshape(1, -1)

In [5]:
class warehouse_state(object):
    
    def __init__(self, warehouse_layout, item_log):
        self.warehouse_layout = np.array(warehouse_layout)
        nrows, ncols = self.warehouse_layout.shape
        for row in range(nrows):
            for col in range(ncols):
                self.warehouse_layout[row, col] = 1.0
        self.action_log = []
        self.total_reward = -math.inf
        self.block_capacity = 100.0
        self.item_log = item_log
        self.item_iterator = 0
        
    def reset(self):
        nrows, ncols = self.warehouse_layout.shape
        for row in range(nrows):
            for col in range(ncols):
                self.warehouse_layout[row, col] = 1.0
        self.action_log = []
        self.total_reward = -math.inf
        self.item_iterator = 0
        
    def update(self, action):
        target_row = action.block_row
        target_col = action.block_col
        # determine if action is valid
        if (action.action_id == 1):
            if (self.warehouse_layout[target_row, target_col] == 0.0):
                action.status = 0
            if (action.quantity > self.warehouse_layout[target_row, target_col] * self.block_capacity):
                action.status = 0
        elif (action.action_id == 2):
            if (self.warehouse_layout[target_row, target_col] == 0.0):
                action.status = 0
            if (action.quantity > (1.0 - self.warehouse_layout[target_row, target_col]) * self.block_capacity):
                action.status = 0
        # perform action if action is valid
        if (action.status):
            self.warehouse_layout[target_row, target_col] = self.warehouse_layout[target_row, target_col] - action.quantity / self.block_capacity
            self.action_log.append(action)
            
    def get_reward(self, warehouse_log):
        if not self.action_log[-1].status:
            return 0
        reward = 0.0
        latest_action = self.action_log[-1]
        item = latest_action.item_id
        ideal_zone = warehouse_log[item]
        given_zone = -1
        if latest_action.block_row in [0, 1, 2]:
            given_zone = 1
        elif latest_action.block_row in [3, 4, 5]:
            given_zone = 2
        elif latest_action.block_row in [6, 7, 8, 9]:
            given_zone = 3
        # reward based on frequency zone
        if given_zone == ideal_zone:
            reward += 0.5
        else:
            reward += -(abs(ideal_zone - given_zone) / 2)
        # print(reward)
        # reward based on empty space
        reward += -(abs(self.warehouse_layout[latest_action.block_row, latest_action.block_col]))
        # print(reward)
        return reward
    
    def act(self, action, warehouse_log):
        action.item_id = self.item_log[self.item_iterator][0]
        action.quantity = self.item_log[self.item_iterator][1]
        self.update(action)
        if action.status:
            self.total_reward += self.get_reward(warehouse_log)
            self.item_iterator = self.item_iterator + 1
            
    def observe(self):
        return np.array(self.warehouse_layout).reshape(1, -1)

In [6]:
warehouse = WAREHOUSE_BLOCKS(warehouse_layout)
warehouse_log = { 1256: 3 }
item_log = [[1256, 90], [2157, 75]]
curr_action = ACTIONS(1, 1256, 90, 2, 3, 1)
warehouse.act(curr_action, warehouse_log)
print(warehouse.total_reward)
print(warehouse.warehouse_layout)
print(warehouse.observe().shape[1])

TypeError: __init__() missing 1 required positional argument: 'item_log'

In [None]:
# TODO
def simulate_slotting(model, warehouse, item_log, warehouse_log):
    warehouse.reset()
    item_index = 0
    while True:
        # get next action
        # act
        # check if all items placed or no place available

In [None]:
# TODO
class EXPERIENCE(object):
    
    def __init__(self, model, max_memory=100, discount=0.95):
        self.model = model
        self.max_memory = max_memory
        self.discount = discount
        self.memory = list()
        self.num_actions = model.output_shape[-2]
        
    def remember(self, epuisode):
        self.memory.append(episode)
        if len(self.memory) > self.max_memory:
            del self.memory[0]
            
    def predict(self, envstate):
        return self.model.predict(envstate)[0]
    
    def get_data(self, data_size=10):
        env_size = self.memory[0][0].shape[1]
        mem_size = len(self.memory)
        data_size = min(mem_size, data_size)
        inputs = np.zeros((data_size, env_size))
        targets = np.zeros((data_size, self.num_actions))
        for i, j in enumerate(np.random.choice(range(mem_size), data_size, replace=False)):
            envstate, action, reward, envstate_next, simulation_over = self.memory[j]
            inputs[i] = envstate
            targets[i] = self.predict(envstate_next)
            Qsa = np.max(self.predict(envstate_next))
            if simulation_over:
                targets[i, action] = reward
            else:
                targets[i, action] = reward + self.discount * Qsa
        return inputs, targets

In [None]:
# This is a small utility for printing readable time strings:
def format_time(seconds):
    if seconds < 400:
        s = float(seconds)
        return "%.1f seconds" % (s,)
    elif seconds < 4000:
        m = seconds / 60.0
        return "%.2f minutes" % (m,)
    else:
        h = seconds / 3600.0
        return "%.2f hours" % (h,)

In [None]:
def qtrain(model, warehouse, **hyperparameters):
    
    # set hyperparameters
    global epsilon
    n_epoch = hyperparameters.get('n_epoch', 15000)
    max_memory = hyperparameters.get('max_memory', 1000)
    data_size = hyperparameters.get('data_size', 50)
    weights_file = hyperparameters.get('weights_file', "")
    name = opt.get('name', 'model')
    start_time = datetime.datetime.now()

    if weights_file:
        print("loading weights from file: %s" % (weights_file,))
        model.load_weights(weights_file)
        
    # set warehouse layout
    warehouse_state = warehouse.warehouse_state
    SKU_data = warehouse.SKU_data
    
    # set experience object
    experience = Experience(model, max_memory = max_memory)
    
    # simulation result log
    simulation_log = []
    history_size = warehouse.warehouse_layout//2
    success_rate = 0.0
    
    for epoch in range(n_epoch):
        loss = 0.0
        # pick any item to start placement
        curr_SKU = random.choice(SKU_data)
        warehouse_state.reset()   
        n_rows = warehouse.warehouse_layout.n_rows
        n_cols = warehouse.warehouse_layout.n_cols
        simulation_over = False
        
        # observe the initial state of warehouse
        envstate = warehouse_state.observe()
        
        n_episodes = 0
        
        while not simulation_over:
            # create action object by passing current SKU data
            action = Action(curr_SKU)
        
            prev_envstate = envstate
            # TODO
            # add method to warehouse_state that will generate an intermediate state with current SKU effect
            
            # set the placement location for current SKU either by Exploration or Exploitation
            if np.random.rand() < epsilon:
                action.place_location = [random.randint(0, n_rows - 1), random.randint(0, n_cols - 1)]
            else:
                action.place_location = experience.predict(prev_envstate) # should take the intermediate SKU effect state
            
            # apply action, get reward, get next warehouse state
            envstate, reward, simulation_status = warehouse_state.act(action)
            if simulation_status == 'success':
                simulation_log.append(1)
                simulation_over = True
            elif simulation_status == 'failed':
                simulation_log.append(0)
                simulation_over = True
            else:
                simulation_over = False
                
            # store the experience
            episode = [prev_envstate, action, reward, envstate, simulation_over]
            experience.remember(episode)
            n_episodes += 1
            
            # train the neural network model
            inputs, targets = experience.get_data(data_size = data_size)
            h = model.fit(
                inputs,
                targets,
                epochs = 10,
                batch_size = 16,
                verbose = 0,
            )
            loss = model.evaluate(inputs, targets, verbose = 0)
            
            if len(simulation_log) > history_size:
                success_rate = sum(simulation_log[-history_size:]) / history_size
            
            dt = datetime.datetime.now() - start_time
            t = format_time(dt.total_seconds())
            template = "Epoch: {:03d}/{:d} | Loss: {:.4f} | Episodes: {:d} | Win count: {:d} | Win rate: {:.3f} | time: {}"
            print(template.format(epoch, n_epoch-1, loss, n_episodes, sum(simulation_log), success_rate, t))
            # we simply check if training has exhausted all free cells and if in all
            # cases the agent won
            if success_rate > 0.9 : epsilon = 0.05
            if sum(simulation_log[-history_size:]) == history_size and warehouse_filled_check(model, warehouse_state):
                print("Reached 100%% success rate at epoch: %d" % (epoch,))
                break
            # TODO
            # update current SKU
            
        # Save trained model weights and architecture
        h5file = name + ".h5"
        json_file = name + ".json"
        model.save_weights(h5file, overwrite=True)
        with open(json_file, "w") as outfile:
            json.dump(model.to_json(), outfile)
        end_time = datetime.datetime.now()
        dt = datetime.datetime.now() - start_time
        seconds = dt.total_seconds()
        t = format_time(seconds)6        print('files: %s, %s' % (h5file, json_file))
        print("n_epoch: %d, max_mem: %d, data: %d, time: %s" % (epoch, max_memory, data_size, t))
        return seconds