## ***Test the OP environment***

Comparing with the original `attention-learn-to-route` paper. You can put this notebook in the root path of the `attention-learn-to-route` and also put the `rl4co` packages in the root pate. Then you can run this notebook to check if the rl4co's op implementation works the same as the original one. 

In [1]:
# Import packages
import torch
from problems.op.problem_op import OP
from problems.op.state_op import StateOP
from rl4co.envs.op2 import OPEnv
from tensordict.tensordict import TensorDict
from nets.attention_model import AttentionModel, set_decode_type

  from .autonotebook import tqdm as notebook_tqdm


### ***Step 1. Basicaly test the running of the rl4co OP environment***

In [2]:
openv = OPEnv()
td = openv.reset(batch_size=[32])
print(td)

TensorDict(
    fields={
        action_mask: Tensor(shape=torch.Size([32, 32, 21]), device=cpu, dtype=torch.bool, is_shared=False),
        current_node: Tensor(shape=torch.Size([32, 1]), device=cpu, dtype=torch.int64, is_shared=False),
        done: Tensor(shape=torch.Size([32, 1]), device=cpu, dtype=torch.bool, is_shared=False),
        length_capacity: Tensor(shape=torch.Size([32, 21]), device=cpu, dtype=torch.float32, is_shared=False),
        length_to_depot: Tensor(shape=torch.Size([32, 21]), device=cpu, dtype=torch.float32, is_shared=False),
        locs: Tensor(shape=torch.Size([32, 21, 2]), device=cpu, dtype=torch.float32, is_shared=False),
        prize: Tensor(shape=torch.Size([32, 21]), device=cpu, dtype=torch.float32, is_shared=False),
        prize_collect: Tensor(shape=torch.Size([32, 1]), device=cpu, dtype=torch.float32, is_shared=False),
        used_capacity: Tensor(shape=torch.Size([32, 1]), device=cpu, dtype=torch.float32, is_shared=False),
        visited: Tensor(

### ***Step 2. Compare with the original implementation***

In [3]:
# Init problems for ours and kool
B = 16
N = 50

# SECTION Kool's
problem = OP()
dataset = problem.make_dataset(num_samples=B, size=N)

# SECTION Ours
openv = OPEnv()

In [4]:
# Create dataset
# Collect oritinal data
input_dict = {}
depots, locs, prize, max_length = [], [], [], []
for i in range(len(dataset)):
    ret = dataset[i]
    depots.append(ret['depot'])
    locs.append(ret['loc'])
    prize.append(ret['prize'])
    max_length.append(ret['max_length'])

input_dict['loc'] = torch.stack(locs, dim=0)
input_dict['depot'] = torch.stack(depots, dim=0)
input_dict['prize'] = torch.stack(prize, dim=0)
input_dict['max_length'] = torch.stack(max_length, dim=0)

print(input_dict['loc'].size())
print(input_dict['depot'].size())
print(input_dict['prize'].size())
print(input_dict['max_length'].size())

# Create loc with depot
# loc_with_depot = torch.cat((input_dict['depot'][:, None, :], input_dict['loc']), -2)
# print(loc_with_depot.size())

# Create prize with depot
prize_with_depot = torch.cat((torch.zeros_like(input_dict['prize'][:, :1]), input_dict['prize']), -1)
print(prize_with_depot.size())

torch.Size([16, 50, 2])
torch.Size([16, 2])
torch.Size([16, 50])
torch.Size([16])
torch.Size([16, 51])


In [5]:
# Init data
# Kool
state_kool = StateOP.initialize(input_dict)

# Ours
openv = OPEnv(num_loc=N)
new_td = TensorDict(
    {
        "locs": input_dict['loc'],
        "depot": input_dict['depot'],
        "prize": prize_with_depot,
        "max_length": input_dict['max_length'],
    },
    batch_size=B,
)
td = openv.reset(new_td)

assert torch.allclose(td['locs'][..., 1:, :], input_dict['loc']), "Locs should be the same"
assert torch.allclose(td['depot'], input_dict['depot']), "Depot should be the same"
assert torch.allclose(td['prize'][..., 1:], input_dict['prize']), "Demand should be the same"

In [6]:
# Run the model to get actions and mask
# Init the model
op = AttentionModel(128, 128, problem)
set_decode_type(op, 'greedy')
cost, _, pi= op(input_dict, return_pi=True)

# Record demands
our_prize_collect = []; kool_prize_collect = []
# Record mask
our_mask = []; kool_mask = []
# Record used capacity
our_used_capacity = []; kool_used_capacity = []
# Record our dones
our_dones = []

for p in pi.T:
    # Our step
    td.set("action", p)
    td = openv.step(td)["next"]

    our_prize_collect.append(td['prize_collect'].clone())
    our_mask.append(td['action_mask'].clone().squeeze(1))
    our_used_capacity.append(td['used_capacity'].clone())
    our_dones.append(td['done'])

    # Kool step
    state_kool = state_kool.update(p)

    kool_prize_collect.append(state_kool.cur_total_prize)
    kool_mask.append(state_kool.get_mask().squeeze(1)) # negation because of how the mask is defined
    kool_used_capacity.append(state_kool.lengths)

In [7]:
print("--- Prize ---")
print(our_prize_collect[0].size())
print(kool_prize_collect[0].size())

print("--- Mask ---")
print(our_mask[0].size())
print(kool_mask[0].size())

print("--- Capacity ---")
print(our_used_capacity[0].size())
print(kool_used_capacity[0].size())

print("--- Dones ---")
print(our_dones[0].size())

--- Prize ---
torch.Size([16, 1])
torch.Size([16, 1])
--- Mask ---
torch.Size([16, 51])
torch.Size([16, 51])
--- Capacity ---
torch.Size([16, 1])
torch.Size([16, 1])
--- Dones ---
torch.Size([16, 1])


In [8]:
# Check the prize
for i, (our_p, kool_p) in enumerate(zip(our_prize_collect, kool_prize_collect)):
    if not torch.allclose(our_p, kool_p):
        print(f"Prize diff at {i}")
        print(our_p)
        print(kool_p)
        break

# Check the mask
for i, (our_m, kool_m) in enumerate(zip(our_mask, kool_mask)):
    our_m = ~our_m
    if not torch.allclose(our_m, kool_m):
        print(f"Mask diff at {i}")
        print(~our_m)
        print(kool_m)
        break

# Check the used capacity
for i, (our_c, kool_c) in enumerate(zip(our_used_capacity, kool_used_capacity)):
    if not torch.allclose(our_c, kool_c):
        print(f"Used capacity diff at {i}")
        print(our_c)
        print(kool_c)
        break

print('PASS')

PASS
