In [1]:
import gym
import or_gym
from pyomo.environ import *
from or_gym.algos.math_prog_utils import *
import numpy as np

In [2]:
env_name = 'Knapsack-v1'
env = gym.make(env_name)

# Initialize model
m = ConcreteModel()

# Sets, parameters, and variables
m.W = env.max_weight
m.i = Set(initialize=env.item_numbers)
m.w = Param(m.i, 
    initialize={i: j for i, j in zip(env.item_numbers, env.item_weights)})
m.v = Param(m.i, 
    initialize={i: j for i, j in zip(env.item_numbers, env.item_values)})
m.b = Param(m.i,
    initialize={i: j for i, j in zip(env.item_numbers, env.item_limits)})
m.x = Var(m.i, within=NonNegativeIntegers)

@m.Constraint()
def weight_constraint(m):
    return sum(m.w[i] * m.x[i] for i in m.i) - m.W <= 0

@m.Constraint(m.i)
def item_constraint(m, i):
    return m.x[i] - m.b[i] <= 0

m.obj = Objective(expr=(
    sum([m.v[i] * m.x[i] for i in m.i])),
    sense=maximize)

In [3]:
m, results = solve_math_program(m, print_results=False)
m.obj.expr()

2045.0

In [22]:
[m.x[i].value for i in m.x]

[0.0, 0.0, 1.0, 2.0, 0.0, 0.0]

# Knapsack Heuristic

If an item is available, select the item that maximizes value/weight ratio if it fits in the knapsack.

In [23]:
env.state

(array([ 1,  2,  3,  6, 10, 18]),
 array([  0,   1,   3,  14,  20, 100]),
 array([2, 1, 3, 4, 5, 6]),
 15,
 0)

In [4]:
env = gym.make('Knapsack-v1')
# Get value-weight ratios
vw_ratio = env.item_values / env.item_weights
vw_order = env.item_numbers[np.argsort(vw_ratio)[::-1]]
actions = []
rewards = []
done = False
while not done:
    # Check that max item is available
    max_item = vw_order[0]
    if env.item_limits[max_item] == 0:
        # Remove item from list
        vw_order = vw_order[1:].copy()
        continue
    # Check that item fits
    if env.item_weights[max_item] > (env.max_weight - env.current_weight):
        # Remove item from list
        vw_order = vw_order[1:].copy()
        continue
    # Select max_item
    state, reward, done, _ = env.step(max_item)
    actions.append(max_item)
    rewards.append(reward)

In [5]:
def bkp_heuristic(env):
    assert env.spec.id == 'Knapsack-v1', \
        '{} received. Heuristic designed for Knapsack-v1.'.format(env.spec.id)
    env.reset()
    # Get value-weight ratios
    vw_ratio = env.item_values / env.item_weights
    vw_order = env.item_numbers[np.argsort(vw_ratio)[::-1]]
    actions = []
    rewards = []
    done = False
    while not done:
        # Check that max item is available
        max_item = vw_order[0]
        if env.item_limits[max_item] == 0:
            # Remove item from list
            vw_order = vw_order[1:].copy()
            continue
        # Check that item fits
        if env.item_weights[max_item] > (env.max_weight - env.current_weight):
            # Remove item from list
            vw_order = vw_order[1:].copy()
            continue
        # Select max_item
        state, reward, done, _ = env.step(max_item)
        actions.append(max_item)
        rewards.append(reward)
        
    return actions, rewards

In [7]:
actions, rewards = bkp_heuristic(env)

In [9]:
np.unique(actions, return_counts=True)

(array([65]), array([200]))

In [10]:
env.item_limits[65]

2

In [11]:
env.reset()
# Get value-weight ratios
vw_ratio = env.item_values / env.item_weights
vw_order = env.item_numbers[np.argsort(vw_ratio)[::-1]]
actions = []
rewards = []
done = False
while not done:
    # Check that max item is available
    max_item = vw_order[0]
    if env.item_limits[max_item] == 0:
        # Remove item from list
        vw_order = vw_order[1:].copy()
        continue
    # Check that item fits
    if env.item_weights[max_item] > (env.max_weight - env.current_weight):
        # Remove item from list
        vw_order = vw_order[1:].copy()
        continue
    # Select max_item
    state, reward, done, _ = env.step(max_item)
    actions.append(max_item)
    rewards.append(reward)
        

In [12]:
vw_order

array([ 65,  15, 113,  59,  56, 184,  74, 116, 166, 146,   8,  94,  43,
       129,  29,  48,  12,  75,  89, 101, 126,  26,  66,  11,  84,   2,
        20, 148, 170,  39,  98,  70, 171, 174, 169, 125, 153,  57, 136,
       176,  83, 194,  80, 158,  81, 117, 182, 157, 193, 119, 150,  96,
        36, 104, 123,  79, 175, 196,  78,  10, 138, 118,  88, 197, 102,
       139, 186, 122,  72,  97,  37,   1, 131,  58,  60,  21,   6, 177,
        69,  68,   0, 137, 172, 110,   4, 159, 127,   7,  31, 183,  35,
       162,  24,  86, 165,  25,  52, 155,  30, 134,  44,  54,  23,  18,
       109, 173, 105, 185, 108, 135,  73, 152,  67,  91, 111, 190,  14,
        42,  90, 107,  13,  50,  51,  38, 199, 140,  92, 181,  40, 133,
        71, 161, 180,  77,  46, 128, 156, 112, 121,  62, 167, 179,  95,
       124, 192, 103,  82, 168, 151,   5,  63,  87,  41, 141,  27,   9,
       106,  19, 163, 147, 144, 132, 160, 195, 154,   3, 145, 189,  47,
       188, 164,  49,  22, 142, 178,  99,  53,  45, 130,  33,  7

In [13]:
env.item_limits[65]

2