In [5]:
import torch
import numpy as np
import torch.nn as nn
import matplotlib.pyplot as plt
from GIN_jsspenv import GIN_JsspEnv
from hands_on_rl import PPO

In [9]:
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
agent = PPO(device)
agent.load("2023-04-18-23-53-20_actor.pth","2023-04-18-23-53-20_critic.pth")

In [15]:
instances = ["ft06", "la01", "la06", "la11", "la21", "la31", "la36", "orb01", "swv01", "swv06", "swv11", "yn1"]
ret = {}
for instance in instances:
    env = GIN_JsspEnv(instance)
    env.seed(0)
    torch.manual_seed(0)
    state = env.reset()
    done = False
    episode_return = 0
    while not done:
        action = agent.take_action(state, determinstic=True)
        next_state, reward, done, info = env.step(action)
        state = next_state
        episode_return += reward
        
    ret[instance] = info["makespan"]
    print(instance, ret[instance])

  state = np.array([adj, feature, mask, candidate_operation_indexes])
  next_state = np.array([adj, feature, mask, candidate_operation_indexes])


ft06 62
la01 871
la06 1278
la11 1499
la21 1331
la31 2272
la36 1721
orb01 1318
swv01 2095
swv06 2270
swv11 3845
yn1 1188


#### 可以看到agent的动作是均匀分布。啥也没学到。但却能在ft06实现比较好的效果，显然是学到了“捷径”

In [13]:
env = GIN_JsspEnv("ft06")
env.seed(0)
torch.manual_seed(0)
state = env.reset()
done = False
episode_return = 0
while not done:
    action = agent.take_action(state, determinstic=True)
    next_state, reward, done, info = env.step(action)
    state = next_state
    episode_return += reward

  state = np.array([adj, feature, mask, candidate_operation_indexes])
  next_state = np.array([adj, feature, mask, candidate_operation_indexes])


#### ft06在训练，其他算例测试

In [2]:
instances = ["ft06", "la01", "la06", "la11", "la21", "la31", "la36", "orb01", "swv01", "swv06", "swv11", "yn1"]
ret = {}

In [3]:
for instance in instances:
    env = GIN_JsspEnv(instance)
    env.seed(0)
    torch.manual_seed(0)
    state = env.reset()
    done = False
    episode_return = 0
    while not done:
        action = agent.take_action(state, determinstic=True)
        next_state, reward, done, info = env.step(action)
        state = next_state
        episode_return += reward
        
    ret[instance] = info["makespan"]

  state = np.array([adj, feature, mask, candidate_operation_indexes])
  state = np.array([adj, feature, mask, candidate_operation_indexes])


In [4]:
ret

{'ft06': 62,
 'la01': 809,
 'la06': 1331,
 'la11': 1641,
 'la21': 1481,
 'la31': 2959,
 'la36': 1869,
 'orb01': 1619,
 'swv01': 2519,
 'swv06': 3052,
 'swv11': 5577,
 'yn1': 1326}

#### 随机动作，多次平均

In [6]:
ret_random = {}
num_simulations = 100
for instance in instances:
    makespans = []
    for _ in range(num_simulations):
        env = GIN_JsspEnv(instance)
        env.seed(0)
        state = env.reset()
        done = False
        episode_return = 0
        while not done:
            action = agent.take_random_action(state)
            next_state, reward, done, info = env.step(action)
            state = next_state
            episode_return += reward
        makespans.append(info["makespan"])
    makespans = np.array(makespans)
    print(instance, makespans.mean())
    ret_random[instance] = makespans.mean()

  state = np.array([adj, feature, mask, candidate_operation_indexes])
  next_state = np.array([adj, feature, mask, candidate_operation_indexes])


ft06 74.0
la01 821.0
la06 1197.0
la11 1606.0
la21 1554.0
la31 2951.0
la36 1927.0
orb01 1474.0
swv01 2131.0
swv06 2796.0
swv11 5026.0
yn1 1360.0
