In [1]:
import torch
import numpy as np
import matplotlib.pyplot as plt
import rl_utils
from GIN_jsspenv import GIN_JsspEnv
from agent import PPO
import warnings
warnings.filterwarnings('ignore')

actor_lr = 2e-5
critic_lr = 2e-5
gamma = 0.98
lmbda = 0.95
epochs = 10
eps = 0.2
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")

In [2]:
def train_single_size_on(size, num_episodes_per_env=1):
    num_jobs, num_machines = size
    generated_instances_file = f"DataGen/generatedData{num_jobs}_{num_machines}_Seed200.npy"
    policy_dir = f"saved_policies/train_single_size_on_{num_jobs}_{num_machines}"
    agent = PPO(device, actor_lr, critic_lr, lmbda, epochs, eps, gamma)
    generated_instances = np.load(generated_instances_file)
    print(f"Size: {size}, num of generated instance is {len(generated_instances)}")
    for i in range(len(generated_instances)):
        instance = generated_instances[i]
        time_mat, machine_mat = instance
        env = GIN_JsspEnv(processing_time_matrix=time_mat, machine_matrix=machine_mat)
        env.seed(0)
        return_list, makespan_list = rl_utils.train_on_policy_agent(env, agent, num_episodes_per_env)
        if i % 10 == 0:
            print(f"{i} instances trained.")
    agent.save(policy_dir)

In [None]:
sizes = [(6,6),(10,10),(15,10),(15,15),(20,10),(20,15),(20,20),(30,15),(30,20),(50,20),(100,20),(200,50)]
for size in sizes:
    train_single_size_on(size=size)

Size: (6, 6), num of generated instance is 100
0 instances trained.
10 instances trained.
20 instances trained.
30 instances trained.
40 instances trained.
50 instances trained.
60 instances trained.
70 instances trained.
80 instances trained.
90 instances trained.
Size: (10, 10), num of generated instance is 100
0 instances trained.
10 instances trained.
20 instances trained.
30 instances trained.
40 instances trained.
50 instances trained.
60 instances trained.
70 instances trained.
80 instances trained.
90 instances trained.
Size: (15, 10), num of generated instance is 100
0 instances trained.
10 instances trained.
20 instances trained.
30 instances trained.
40 instances trained.
50 instances trained.
60 instances trained.
70 instances trained.
80 instances trained.
90 instances trained.
Size: (15, 15), num of generated instance is 100
0 instances trained.
10 instances trained.
20 instances trained.
30 instances trained.
40 instances trained.
50 instances trained.
60 instances train