In [1]:
import gym
import or_gym
from pyomo.environ import *
from or_gym.algos.math_prog_utils import *
import numpy as np

In [20]:
env_name = 'Knapsack-v1'
env = gym.make(env_name)

# Initialize model
m = ConcreteModel()

# Sets, parameters, and variables
m.W = env.max_weight
m.i = Set(initialize=env.item_numbers)
m.w = Param(m.i, 
    initialize={i: j for i, j in zip(env.item_numbers, env.item_weights)})
m.v = Param(m.i, 
    initialize={i: j for i, j in zip(env.item_numbers, env.item_values)})
m.b = Param(m.i,
    initialize={i: j for i, j in zip(env.item_numbers, env.item_limits)})
m.x = Var(m.i, within=NonNegativeIntegers)

@m.Constraint()
def weight_constraint(m):
    return sum(m.w[i] * m.x[i] for i in m.i) - m.W <= 0

@m.Constraint(m.i)
def item_constraint(m, i):
    return m.x[i] - m.b[i] <= 0

m.obj = Objective(expr=(
    sum([m.v[i] * m.x[i] for i in m.i])),
    sense=maximize)

In [21]:
m, results = solve_math_program(m, print_results=False)
m.obj.expr()

31.0

In [22]:
[m.x[i].value for i in m.x]

[0.0, 0.0, 1.0, 2.0, 0.0, 0.0]

# Knapsack Heuristic

If an item is available, select the item that maximizes value/weight ratio if it fits in the knapsack.

In [23]:
env.state

(array([ 1,  2,  3,  6, 10, 18]),
 array([  0,   1,   3,  14,  20, 100]),
 array([2, 1, 3, 4, 5, 6]),
 15,
 0)

In [30]:
env = gym.make('Knapsack-v1')
# Get value-weight ratios
vw_ratio = env.item_values / env.item_weights
vw_order = env.item_numbers[np.argsort(vw_ratio)[::-1]]
actions = []
rewards = []
done = False
while not done:
    # Check that max item is available
    max_item = vw_order[0]
    if env.item_limits[max_item] == 0:
        # Remove item from list
        vw_order = vw_order[1:].copy()
        continue
    # Check that item fits
    if env.item_weights[max_item] > (env.max_weight - env.current_weight):
        # Remove item from list
        vw_order = vw_order[1:].copy()
        continue
    # Select max_item
    state, reward, done, _ = env.step(max_item)
    actions.append(max_item)
    rewards.append(reward)

In [45]:
def bkp_heuristic(env):
    assert env.spec.id == 'Knapsack-v1', \
        '{} received. Heuristic designed for Knapsack-v1.'.format(env.spec.id)
    env.reset()
    # Get value-weight ratios
    vw_ratio = env.item_values / env.item_weights
    vw_order = env.item_numbers[np.argsort(vw_ratio)[::-1]]
    actions = []
    rewards = []
    done = False
    while not done:
        # Check that max item is available
        max_item = vw_order[0]
        if env.item_limits[max_item] == 0:
            # Remove item from list
            vw_order = vw_order[1:].copy()
            continue
        # Check that item fits
        if env.item_weights[max_item] > (env.max_weight - env.current_weight):
            # Remove item from list
            vw_order = vw_order[1:].copy()
            continue
        # Select max_item
        state, reward, done, _ = env.step(max_item)
        actions.append(max_item)
        rewards.append(reward)
        
    return actions, rewards

In [46]:
bkp_heuristic(env)

([3, 3, 2], [14, 14, 3])

In [47]:
bkp_heuristic(gym.make('Knapsack-v0'))

AssertionError: Env Knapsack-v0 received. Heuristic designed for Knapsack-v1.

In [34]:
env.item_numbers

array([0, 1, 2, 3, 4, 5])

In [38]:
env.spec.id

'Knapsack-v1'