In [1]:
import numpy as np
import gym
from gym import spaces, logger
from gym.utils import seeding
import copy

In [155]:
# Matches the AWS model as first pass

class OnlineBinPackingEnv(gym.Env):
    '''
    Online Bin Packing Problem

    The Bin Packing Problem (BPP) is a combinatorial optimization problem which
    requires the user to select from a range of goods of different values and
    weights in order to maximize the value of the selected items within a 
    given weight limit. This version is online meaning each item is randonly
    presented to the algorithm one at a time, at which point the algorithm 
    can either accept or reject the item. After seeing a fixed number of 
    items are shown, the episode terminates. If the weight limit is reached
    before the episode ends, then it terminates early.

    Observation:
        Type: Tuple, Discrete
        0 - bin_capacity: Count of bins at a given level h
        -1: Current item size


    Actions:
        Type: Discrete
        0: Open a new bin and place item into bin
        1+: Attempt to place item into bin at the given level

    Reward:
        Negative of the waste, which is the difference between the current
        size and excess space of the bin.

    Starting State:
        No available bins and random starting item
        
    Episode Termination:
        When invalid action is selected (e.g. attempt to place item in non-existent
        bin), bin limits are exceeded, or step limit is reached.
    '''
    def __init__(self):
        self.bin_capacity = 9
        self.item_sizes = [2, 3]
        self.item_probs = [0.8, 0.2]
        self.step_counter = 0
        self.step_limit = 1000
        
        self.observation_space = spaces.Box(
            low=np.array([0] * (1 + self.bin_capacity)),
            high=np.array([self.step_limit] * self.bin_capacity + [max(self.item_sizes)]),
            dtype=np.uint32)
        
        self.action_space = spaces.Discrete(self.bin_capacity)
        
        self.seed()
        self.state = self.reset()
        
    def step(self, action):
        done = False
        if action >= self.bin_capacity:
            raise ValueError('{} is an invalid action. Must be between {} and {}'.format(
                action, 0, self.bin_capacity))
        elif action > (self.bin_capacity - self.item_size):
            # Bin overflows
            reward = BIG_NEG_REWARD - self.waste
            done = True
        elif action == 0:
            # Create new bin
            self.bin_levels[self.item_size] += 1
            # This waste penalty seems very strange, it only occurs
            # when a new bin is opened.
            self.waste = self.bin_capacity - self.item_size
            reward = -1 * self.waste
        elif self.bin_levels[action] == 0:
            # Can't insert item into non-existent bin
            reward = BIG_NEG_REWARD - self.waste
            done = True
        else:
            if action + self.item_size == self.bin_capacity:
                self.num_full_bins += 1
            else:
                self.bin_levels[action + self.item_size] += 1
            self.waste = -self.item_size
            reward = -1 * self.waste
            
            self.bin_levels[action] -= 1
        
        self.total_reward += reward
        
        if self.step_counter >= self.step_limit:
            done = True
            
        if self.step_counter == self.step_limit:
            done = True
            
        self.item_size = self.get_item()
        state = self.bin_levels + [self.item_size]
        
        return self.state, reward, done, {}
    
    def get_item(self):
        return np.random.choice(self.item_sizes, p=self.item_probs)
        
    def sample_action(self):
        return self.action_space.sample()
    
    def reset(self):
        self.current_weight = 0
        self.step_counter = 0        
        self.num_full_bins = 0
        self.total_reward = 0
        self.waste = 0
        self.step_counter = 0
        
        self.bin_levels = [0] * self.bin_capacity
        self.item_size = self.get_item()
        initial_state = self.bin_levels + [self.item_size]
        return initial_state

In [156]:
env = OnlineBinPackingEnv()

In [157]:
env.state

[0, 0, 0, 0, 0, 0, 0, 0, 0, 2]

In [158]:
for i in range(1000):
    done = False
    env.reset()
    while done == False:
        action = env.action_space.sample()
        state, reward, done, _ = env.step(action)

In [159]:
action

5

In [160]:
env.item_size

2

# Online Bin Packing

A random sequence of items $i$ with size $w_i \in W_i$, is provided to an algorithm to be packed into various bins $B$ denoted by $j \in J$, where the capacity of each bin is given by $\textrm{max}(W_i) \leq B_j^{max} < \infty \; \forall j \in J$. Each item $i$ arrives at a given time $t \in T$, and is sampled according to a fixed probability $p_i$. Following Gupta and Radovanovic, we assume all bin and item sizes are integer values.

## Amazon Implementation

Unlimited bins, each with identical capacity. Assign to group of bins based on level $h$, whereby $N_h(t)$ is the number of bins at that level. The action, $a_t$ is to select the level to assign the item to, or, in the case of $a_t=0$, to open a new, empty bin. $R_t$ is the negative of the incremental waste as $i$ is assigned to $B_j$. They use an action mask to prevent infeasible actions.

In [2]:
BIG_NEG_REWARD = -100
BIG_POS_REWARD = 10


class BinPackingGymEnvironment(gym.Env):

    def __init__(self, env_config={}):

        config_defaults = {
            'bag_capacity': 9,
            'item_sizes': [2, 3],
            'item_probabilities': [0.8, 0.2],  # linear waste -> SS: -150 to -340
            # 'item_probabilities': [0.75, 0.25], # perfect pack -> SS: -20 to -100
            # 'item_probabilities': [0.5, 0.5], #bounded waste ->  SS: -11 to -20
            'time_horizon': 1000,
        }

        for key, val in config_defaults.items():
            val = env_config.get(key, val)  # Override defaults with constructor parameters
            self.__dict__[key] = val  # Creates variables like self.plot_boxes, self.save_files, etc
            if key not in env_config:
                env_config[key] = val
        print('Using bin size: ', self.bag_capacity)
        print('Using items sizes {} \nWith item probabilities {}'.format(self.item_sizes,self.item_probabilities))
        self.csv_file = '/opt/ml/output/intermediate/binpacking.csv'

        self.episode_count = 0

        # state: number of bags at each level, item size,
        self.observation_space = spaces.Box(low=np.array([0] * self.bag_capacity + [0]), high=np.array(
            [self.time_horizon] * self.bag_capacity + [max(self.item_sizes)]), dtype=np.uint32)

        # actions: select a bag from the different levels possible
        self.action_space = spaces.Discrete(self.bag_capacity)

    def reset(self):
        self.time_remaining = self.time_horizon
        self.item_size = self.__get_item()
        self.num_full_bags = 0

        # an array of size bag capacity that keeps track of
        # number of bags at each level
        self.num_bins_levels = [0] * self.bag_capacity

        initial_state = self.num_bins_levels + [self.item_size]
        self.total_reward = 0
        self.waste = 0
        self.episode_count += 1
        self.bin_type_distribution_map = {}  # level to bin types, to the # of bins for each bin type.
        self.step_count = 0
        return initial_state

    def step(self, action):
        done = False
        self.step_count += 1
        if action >= self.bag_capacity:
            print("Error: Invalid Action")
            raise
        elif action > (self.bag_capacity - self.item_size):
            # can't insert item because bin overflow
            reward = BIG_NEG_REWARD - self.waste
            done = True
        elif action == 0:  # new bag
            self.num_bins_levels[self.item_size] += 1
            # waste = sum of empty spaces in all bags
            self.waste = self.bag_capacity - self.item_size
            # reward is negative waste
            reward = -1 * self.waste
            self.__update_bin_type_distribution_map(0)
        elif self.num_bins_levels[action] == 0:
            # can't insert item because bin of this level doesn't exist
            print('cannot insert item because bin of this level does not exist')
            reward = BIG_NEG_REWARD - self.waste
            done = True
        else:
            if action + self.item_size == self.bag_capacity:
                self.num_full_bags += 1
            else:
                self.num_bins_levels[action + self.item_size] += 1
            # waste = empty space in the bag
            self.waste = -self.item_size
            # reward is negative waste
            reward = -1 * self.waste
            self.__update_bin_type_distribution_map(action)
            if self.num_bins_levels[action] < 0:
                print(self.num_bins_levels[action])
            self.num_bins_levels[action] -= 1

        self.total_reward += reward

        self.time_remaining -= 1
        if self.time_remaining == 0:
            done = True

        # get the next item
        self.item_size = self.__get_item()
        # state is the number of bins at each level and the item size
        state = self.num_bins_levels + [self.item_size]
        info = self.bin_type_distribution_map

        return state, reward, done, info

    def __get_item(self):
        num_items = len(self.item_sizes)
        item_index = np.random.choice(num_items, p=self.item_probabilities)
        return self.item_sizes[item_index]

    def __update_bin_type_distribution_map(self, target_bin_util):
        if target_bin_util < 0 or target_bin_util + self.item_size > self.bag_capacity:
            print("Error: Invalid Bin Utilization/Item Size")
            return
        elif target_bin_util > 0 and target_bin_util not in self.bin_type_distribution_map:
            print("Error: bin_type_distribution_map does not contain " + str(target_bin_util) + " as key!")
            return
        elif target_bin_util > 0 and target_bin_util in self.bin_type_distribution_map and len(
                self.bin_type_distribution_map[target_bin_util]) == 0:
            print("Error: bin_type_distribution_map has no element at level " + str(target_bin_util) + " !")
            return
        elif target_bin_util == 0:  # opening a new bin
            if self.item_size not in self.bin_type_distribution_map:
                self.bin_type_distribution_map[self.item_size] = {str(self.item_size): 1}
            elif str(self.item_size) not in self.bin_type_distribution_map[self.item_size]:
                self.bin_type_distribution_map[self.item_size][str(self.item_size)] = 1
            else:
                self.bin_type_distribution_map[self.item_size][str(self.item_size)] += 1
        else:
            key = np.random.choice(list(self.bin_type_distribution_map[target_bin_util].keys()))
            if self.bin_type_distribution_map[target_bin_util][key] <= 0:
                print("Error: Invalid bin count!")
                return
            elif self.bin_type_distribution_map[target_bin_util][key] == 1:
                del self.bin_type_distribution_map[target_bin_util][key]
            else:
                self.bin_type_distribution_map[target_bin_util][key] -= 1

            new_key = self.__update_key_for_bin_type_distribution_map(key, self.item_size)
            if (target_bin_util + self.item_size) not in self.bin_type_distribution_map:
                self.bin_type_distribution_map[target_bin_util + self.item_size] = {new_key: 1}
            elif new_key not in self.bin_type_distribution_map[target_bin_util + self.item_size]:
                self.bin_type_distribution_map[target_bin_util + self.item_size][new_key] = 1
            else:
                self.bin_type_distribution_map[target_bin_util + self.item_size][new_key] += 1

    @staticmethod
    def __update_key_for_bin_type_distribution_map(key, item_size):
        parts = key.split(' ')
        parts.append(str(item_size))
        parts.sort()
        return " ".join(parts)

In [3]:
env = BinPackingGymEnvironment()

Using bin size:  9
Using items sizes [2, 3] 
With item probabilities [0.8, 0.2]


In [86]:
env.reset()

[0, 0, 0, 0, 0, 0, 0, 0, 0, 2]

In [98]:
env.step(4), env.total_reward

(([0, 0, 0, 0, 0, 0, 1, 0, 2, 2],
  2,
  False,
  {2: {}, 4: {}, 6: {'2 2 2': 1}, 8: {'2 2 2 2': 2}}),
 -103)

In [10]:
env.bin_type_distribution_map

{2: {'2': 1}}

In [29]:
env.step(2)

cannot insert item because bin of this level does not exist


([0, 0, 0, 0, 1, 0, 0, 0, 0, 2], -98, True, {2: {}, 4: {'2 2': 1}})

In [34]:
env.step(0)

([0, 0, 3, 2, 1, 0, 0, 0, 0, 2],
 -6,
 False,
 {2: {'2': 3}, 4: {'2 2': 1}, 3: {'3': 2}})