In [1]:
from rl4co.envs import FJSPEnv
from rl4co.envs.scheduling.fjsp.generator import FJSPGenerator

from rl4co.models.nn.graph.hgnn import HetGNNEncoder

import torch

  return torch._C._cuda_getDeviceCount() > 0


In [None]:
class FJSPGeneratorWithGPM(FJSPGenerator):

    def __init__(self, 
                 num_jobs = 10, 
                 num_machines = 5, 
                 min_ops_per_job = 4, 
                 max_ops_per_job = 6, 
                 min_processing_time = 1, 
                 max_processing_time = 20, 
                 min_eligible_ma_per_op = 1, 
                 max_eligible_ma_per_op = None, 
                 same_mean_per_op = True, 
                 **unused_kwargs):
        
        super().__init__(num_jobs, 
                         num_machines, 
                         min_ops_per_job, 
                         max_ops_per_job, 
                         min_processing_time, 
                         max_processing_time, 
                         min_eligible_ma_per_op, 
                         max_eligible_ma_per_op, 
                         same_mean_per_op, 
                         **unused_kwargs)

    def _generate(self, batch_size):
        td = super()._generate(batch_size)

        bs, n_machines, n_ops = td["proc_times"].shape

        general_purpose_machine = torch.ones(size=(bs, 1, n_ops), 
                                             dtype=td["proc_times"].dtype, 
                                             device=td["proc_times"].device) * self.max_processing_time

        td["proc_times"] = torch.cat([td["proc_times"], general_purpose_machine], dim=1)

        return td

        

In [3]:
class FJSPEnvMOPM(FJSPEnv):
    def __init__(self, generator_params = ..., check_mask = False, stepwise_reward = False, **kwargs):

        generator_params = {**generator_params}
        self.max_ops_processed = generator_params.pop("max_ops_processed")

        generator = FJSPGeneratorWithGPM(**generator_params)

        super().__init__(generator=generator, mask_no_ops=True, check_mask=check_mask, stepwise_reward=stepwise_reward, **kwargs)

    def _reset(self, td = None, batch_size=None):
        td = super()._reset(td, batch_size)

        ma_ops_processed_left = torch.ones_like(td["busy_until"]) * self.max_ops_processed

        td["ma_ops_processed_left"] = ma_ops_processed_left

        td["ma_ops_processed_left"][:, -1] = 1e6

        return td.to(self.device)
    
    def _step(self, td):

        td = td.to(self.device)

        # test if we can use new action

        n_batches, n_jobs = td["end_op_per_job"].shape

        ma_ops_processed_left = td["ma_ops_processed_left"]

        n_machines = ma_ops_processed_left.size(1)

        machines = (td["action"] - 1) % n_machines

        ma_ops_processed_left[torch.arange(ma_ops_processed_left.size(0), device=self.device)[td["action"] > 0], machines[td["action"] > 0]] -= 1

        assert (ma_ops_processed_left >= 0).all()

        td = super()._step(td)

        td["ma_ops_processed_left"] = ma_ops_processed_left

        availible_machines_mask = (ma_ops_processed_left > 0)

        new_mask = torch.concat([torch.tensor([True], device=self.device).bool().repeat(n_batches, 1), availible_machines_mask.repeat(1, n_jobs)], dim=1)

        new_mask = td["action_mask"] * new_mask

        all_false_rows = ~new_mask.any(dim=1)

        new_mask[all_false_rows, 0] = True

        td["action_mask"] = new_mask

        return td


In [4]:
class FJSPEnvMTPM(FJSPEnv):
    def __init__(self, generator_params = ..., check_mask = False, stepwise_reward = False, **kwargs):

        generator_params = {**generator_params}
        self.max_time_worked = generator_params.pop("max_time_worked")

        generator = FJSPGeneratorWithGPM(**generator_params)

        super().__init__(generator=generator, mask_no_ops=True, check_mask=check_mask, stepwise_reward=stepwise_reward, **kwargs)

    def _reset(self, td = None, batch_size=None):
        td = super()._reset(td, batch_size)

        ma_time_left = torch.ones_like(td["busy_until"]) * self.max_time_worked

        td["ma_time_left"] = ma_time_left

        td["ma_time_left"][:, -1] = 1e6

        return td.to(self.device)
    
    def _step(self, td):

        td = td.to(self.device)

        # test if we can use new action

        n_batches, n_jobs = td["end_op_per_job"].shape

        ma_time_left = td["ma_time_left"]

        n_machines = ma_time_left.size(1)

        machines = (td["action"] - 1) % n_machines

        jobs = (td["action"] - 1) // n_machines

        ops = td["next_op"][torch.arange(n_batches), jobs]

        ops_time = td["proc_times"][torch.arange(n_batches), machines, ops]

        ma_time_left[torch.arange(ma_time_left.size(0), device=self.device)[td["action"] > 0], machines[td["action"] > 0]] -= ops_time.flatten()[td["action"] > 0]

        assert (ma_time_left >= 0).all()

        td = super()._step(td)

        td["ma_time_left"] = ma_time_left

        batches, available_jobs, available_machines = torch.where(td["action_mask"][:, 1:].reshape(n_batches, n_jobs, n_machines))

        ops = td["next_op"][batches, available_jobs]

        ops_time = td["proc_times"][batches, available_machines, ops]

        available_ops_mask = (ma_time_left[batches, available_machines] > ops_time)

        new_mask = torch.ones(size=(n_batches, n_jobs, n_machines), dtype=bool, device=self.device)

        new_mask[batches, available_jobs, available_machines] = available_ops_mask

        new_mask = new_mask.reshape(n_batches, -1)

        new_mask = torch.concat([torch.tensor([True], device=self.device).bool().repeat(n_batches, 1), new_mask], dim=1)

        new_mask *= td["action_mask"]

        all_false_rows = ~new_mask.any(dim=1)

        new_mask[all_false_rows, 0] = True
        
        td["action_mask"] = new_mask

        return td


In [5]:
generator_params = {
  "num_jobs": 20,  # the total number of jobs
  "num_machines": 10,  # the total number of machines that can process operations
  "min_ops_per_job": 1,  # minimum number of operatios per job
  "max_ops_per_job": 5,  # maximum number of operations per job
  "min_processing_time": 1,  # the minimum time required for a machine to process an operation
  "max_processing_time": 20,  # the maximum time required for a machine to process an operation
  "min_eligible_ma_per_op": 1,  # the minimum number of machines capable to process an operation
  "max_eligible_ma_per_op": 5,  # the maximum number of machines capable to process an operation
}

new_generator_params = {**generator_params}
new_generator_params["max_ops_processed"] = 6
new_generator_params["max_time_worked"] = 60

In [6]:
new_env = FJSPEnvMOPM(generator_params=new_generator_params)

env = FJSPEnv(generator_params=new_generator_params)

Found 1 unused kwargs: {'max_time_worked': 60}
Found 2 unused kwargs: {'max_ops_processed': 6, 'max_time_worked': 60}


In [7]:
from rl4co.models.zoo.l2d import L2DPolicy, L2DModel

In [8]:
if torch.cuda.is_available():
    accelerator = "gpu"
    batch_size = 64
    train_data_size = 5_000
    embed_dim = 64
    num_encoder_layers = 8
else:
    accelerator = "cpu"
    batch_size = 32
    train_data_size = 1_000
    embed_dim = 64
    num_encoder_layers = 2

accelerator

'gpu'

In [8]:
accelerator = "cpu"
batch_size = 32
train_data_size = 1_000
embed_dim = 64
num_encoder_layers = 2

accelerator

'cpu'

In [9]:
model = L2DModel(env,
                 baseline="rollout",
                 batch_size=batch_size,
                 train_data_size=train_data_size,
                 val_data_size=1_000,
                 optimizer_kwargs={"lr": 1e-5})

/home/daniil/programming/diplom/mtrlgnn/.venv/lib/python3.10/site-packages/lightning/pytorch/utilities/parsing.py:209: Attribute 'env' is an instance of `nn.Module` and is already saved during checkpointing. It is recommended to ignore them using `self.save_hyperparameters(ignore=['env'])`.
/home/daniil/programming/diplom/mtrlgnn/.venv/lib/python3.10/site-packages/lightning/pytorch/utilities/parsing.py:209: Attribute 'policy' is an instance of `nn.Module` and is already saved during checkpointing. It is recommended to ignore them using `self.save_hyperparameters(ignore=['policy'])`.


In [10]:
td_new = new_env.reset(batch_size=64)

td = env.reset(batch_size=64)

res = model(td, env=env)

res_new = model(td_new, env=new_env)

res["reward"].mean(), res_new["reward"].mean()

(tensor(-92.7656), tensor(-125.3594))

In [11]:
model_load = L2DModel.load_from_checkpoint("./logs/csv_logs/version_0/checkpoints/epoch=19-step=400.ckpt", map_location="cpu", load_baseline=False)

/home/daniil/programming/diplom/mtrlgnn/.venv/lib/python3.10/site-packages/lightning/pytorch/core/saving.py:195: Found keys that are not in the model state dict but in the checkpoint: ['baseline.baseline.policy.encoder.init_embedding.init_ops_embed.weight', 'baseline.baseline.policy.encoder.init_embedding.pos_encoder.pe', 'baseline.baseline.policy.encoder.init_embedding.init_ma_embed.weight', 'baseline.baseline.policy.encoder.init_embedding.edge_embed.weight', 'baseline.baseline.policy.encoder.layers.0.hgnn1.self_attn', 'baseline.baseline.policy.encoder.layers.0.hgnn1.cross_attn', 'baseline.baseline.policy.encoder.layers.0.hgnn1.edge_attn', 'baseline.baseline.policy.encoder.layers.0.hgnn2.self_attn', 'baseline.baseline.policy.encoder.layers.0.hgnn2.cross_attn', 'baseline.baseline.policy.encoder.layers.0.hgnn2.edge_attn', 'baseline.baseline.policy.encoder.layers.0.ffn1.ops.norm1.normalizer.weight', 'baseline.baseline.policy.encoder.layers.0.ffn1.ops.norm1.normalizer.bias', 'baseline.bas

In [12]:
td_new = new_env.reset(batch_size=64)

td = env.reset(batch_size=64)

res = model(td, env=env)

res_new = model(td_new, env=new_env)

res["reward"].mean(), res_new["reward"].mean()

(tensor(-91.0781), tensor(-118.2344))

In [13]:
model = L2DModel(new_env,
                 baseline="rollout",
                 batch_size=batch_size,
                 train_data_size=train_data_size,
                 val_data_size=1_000,
                 optimizer_kwargs={"lr": 1e-5})

In [14]:
from pytorch_lightning.loggers import CSVLogger

In [15]:
loggers = [
        CSVLogger(save_dir="logs_ver2", name="csv_logs"),
    ]

In [16]:
from rl4co.utils.trainer import RL4COTrainer

In [17]:
trainer = RL4COTrainer(
    max_epochs=3,
    accelerator=accelerator,
    devices=1,
    logger=loggers,
    log_every_n_steps=20
)

/home/daniil/programming/diplom/mtrlgnn/.venv/lib/python3.10/site-packages/lightning/pytorch/trainer/connectors/accelerator_connector.py:513: You passed `Trainer(accelerator='cpu', precision='16-mixed')` but AMP with fp16 is not supported on CPU. Using `precision='bf16-mixed'` instead.
Using bfloat16 Automatic Mixed Precision (AMP)
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs


In [18]:
trainer.fit(model)

val_file not set. Generating dataset instead
test_file not set. Generating dataset instead

  | Name     | Type           | Params | Mode 
----------------------------------------------------
0 | env      | FJSPEnvMOPM    | 0      | train
1 | policy   | L2DPolicy      | 81.2 K | train
2 | baseline | WarmupBaseline | 81.2 K | train
----------------------------------------------------
162 K     Trainable params
0         Non-trainable params
162 K     Total params
0.649     Total estimated model params size (MB)
74        Modules in train mode
70        Modules in eval mode


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

/home/daniil/programming/diplom/mtrlgnn/.venv/lib/python3.10/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:425: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.
/home/daniil/programming/diplom/mtrlgnn/.venv/lib/python3.10/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:425: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.


Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_epochs=3` reached.


In [19]:
import pandas as pd

import plotly.express as px

In [20]:
# metrics = pd.read_csv("./logs_ver2/csv_logs/version_2/metrics.csv")
metrics = pd.read_csv("./logs_ver2/csv_logs/version_6/metrics.csv")
metrics["val/reward"] = -metrics["val/reward"].shift(-1)
metrics["train/reward"] = -metrics["train/reward"]

In [21]:
px.line(data_frame=metrics.dropna(), x="step", y=["train/reward", "val/reward"])

In [22]:
import torch.nn as nn

class MultiEncoder(nn.Module):
    def __init__(self, encoder_1, encoder_2, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.encoder_1 = encoder_1
        self.encoder_2 = encoder_2

    def forward(self, td):

        hidden_1, _ = self.encoder_1(td)

        hidden_2, _ = self.encoder_2(td)

        hidden = (hidden_1[0] + hidden_2[0], hidden_1[1] + hidden_2[1])

        return hidden, None
        

In [23]:
class AdditionalMachineInfoInitEmbedding(nn.Module):
    def __init__(
        self,
        embed_dim,
        feature_name: str,
        feature_dim: int = 1,
        linear_bias: bool = True,
    ):
        super().__init__()
        self.feature_name = feature_name
        self.embed_dim = embed_dim
        self.init_embed = nn.Linear(feature_dim, embed_dim, linear_bias)

    def forward(self, td):
        bs, n_ops = td["is_ready"].shape
        ops_emb = torch.randn(size=(bs, n_ops, self.embed_dim), device=td.device)
        ma_emb = self.init_embed(td[self.feature_name].unsqueeze(2))
        n_machines = ma_emb.size(1)
        edge_emb = torch.randn(size=(bs, n_ops, n_machines, self.embed_dim), device=td.device)
        edges = td["ops_ma_adj"].transpose(1, 2).to(td.device)

        return ops_emb, ma_emb, edge_emb, edges

In [24]:
td_new = new_env.reset(batch_size=32).to("cpu")

encoder_1 = HetGNNEncoder(embed_dim=64, num_layers=2).to("cpu")
encoder_2 = HetGNNEncoder(embed_dim=64, num_layers=2, init_embedding=AdditionalMachineInfoInitEmbedding(64, "ma_ops_processed_left")).to("cpu")

enc = MultiEncoder(encoder_1, encoder_2)

In [25]:
enc(td_new)

((tensor([[[ 0.0256,  1.5537,  1.4121,  ...,  0.8951, -0.2414,  0.1412],
           [ 0.3411,  1.3600,  2.4849,  ..., -0.8661,  0.4525, -1.2002],
           [ 0.7006,  0.7042,  0.7436,  ..., -0.9510, -0.5729, -0.8725],
           ...,
           [-1.2652,  0.6114, -0.8459,  ..., -1.2724,  1.6238,  3.0824],
           [-0.2696,  1.1248, -1.8480,  ..., -0.0999,  0.5549,  3.4904],
           [-0.1094,  1.5792, -1.7397,  ..., -2.3945,  1.8959,  2.4251]],
  
          [[ 2.5619,  2.6316, -0.4933,  ..., -0.4808,  0.8992,  0.1865],
           [ 2.1542,  0.6454,  2.3940,  ..., -1.3039, -1.1708,  0.1922],
           [ 1.9572,  0.7328,  0.9534,  ..., -0.6537, -0.2140,  0.2837],
           ...,
           [-0.2552,  0.1444, -1.3306,  ..., -2.8001,  1.0467,  1.1371],
           [-0.3360, -1.3910, -2.1305,  ..., -1.0960,  0.9125,  1.4524],
           [ 0.5688,  2.4287, -1.8332,  ..., -2.2809, -0.0669,  3.0594]],
  
          [[ 0.9161, -1.2487,  0.2975,  ..., -2.7695,  0.0579,  1.6607],
           

In [26]:
model = L2DModel(new_env,
                 baseline="rollout",
                 batch_size=32,
                 train_data_size=1000,
                 val_data_size=1_000,
                 optimizer_kwargs={"lr": 1e-5}, 
                 policy_kwargs={"encoder": enc, "embed_dim": 64})


Attribute 'env' is an instance of `nn.Module` and is already saved during checkpointing. It is recommended to ignore them using `self.save_hyperparameters(ignore=['env'])`.


Attribute 'policy' is an instance of `nn.Module` and is already saved during checkpointing. It is recommended to ignore them using `self.save_hyperparameters(ignore=['policy'])`.



In [27]:
model_old = L2DModel(new_env,
                 baseline="rollout",
                 batch_size=32,
                 train_data_size=1000,
                 val_data_size=1_000,
                 optimizer_kwargs={"lr": 1e-5})

In [28]:
model_load_2 = L2DModel.load_from_checkpoint("./logs_ver2/csv_logs/version_2/checkpoints/epoch=19-step=1580.ckpt", map_location="cpu", load_baseline=False)


Found keys that are not in the model state dict but in the checkpoint: ['baseline.baseline.policy.encoder.init_embedding.init_ops_embed.weight', 'baseline.baseline.policy.encoder.init_embedding.pos_encoder.pe', 'baseline.baseline.policy.encoder.init_embedding.init_ma_embed.weight', 'baseline.baseline.policy.encoder.init_embedding.edge_embed.weight', 'baseline.baseline.policy.encoder.layers.0.hgnn1.self_attn', 'baseline.baseline.policy.encoder.layers.0.hgnn1.cross_attn', 'baseline.baseline.policy.encoder.layers.0.hgnn1.edge_attn', 'baseline.baseline.policy.encoder.layers.0.hgnn2.self_attn', 'baseline.baseline.policy.encoder.layers.0.hgnn2.cross_attn', 'baseline.baseline.policy.encoder.layers.0.hgnn2.edge_attn', 'baseline.baseline.policy.encoder.layers.0.ffn1.ops.norm1.normalizer.weight', 'baseline.baseline.policy.encoder.layers.0.ffn1.ops.norm1.normalizer.bias', 'baseline.baseline.policy.encoder.layers.0.ffn1.ops.norm1.normalizer.running_mean', 'baseline.baseline.policy.encoder.layers.

In [29]:
td_new = new_env.reset(batch_size=64)

res_new = model(td_new, env=new_env)

td_new = new_env.reset(td_new)

res_old = model_old(td_new, env=new_env)

td_new = new_env.reset(td_new)

res_load = model_load(td_new, env=new_env)

td_new = new_env.reset(td_new)

res_load_2 = model_load_2(td_new, env=new_env)

res_new["reward"].mean(), res_old["reward"].mean(), res_load["reward"].mean(), res_load_2["reward"].mean()

(tensor(-123.5312), tensor(-123.5469), tensor(-118.1875), tensor(-114.6094))

In [30]:
encoder_1 = HetGNNEncoder(embed_dim=16, num_layers=8)
encoder_2 = HetGNNEncoder(embed_dim=16, num_layers=4, init_embedding=AdditionalMachineInfoInitEmbedding(16, "ma_ops_processed_left"))

enc = MultiEncoder(encoder_1, encoder_2)

In [32]:
td_new = new_env.reset(batch_size=64)

res_new = model.to("cpu")(td_new.to("cpu"), env=new_env.to("cpu"))

res_new["reward"].mean()

tensor(-145.3438)

In [38]:
trainer = RL4COTrainer(
    max_epochs=3,
    accelerator=accelerator,
    devices=1,
    logger=loggers,
    log_every_n_steps=20
)

Using bfloat16 Automatic Mixed Precision (AMP)
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs


In [39]:
model = L2DModel(new_env,
                 baseline="rollout",
                 batch_size=32,
                 train_data_size=1000,
                 val_data_size=1_000,
                 optimizer_kwargs={"lr": 1e-5}, 
                 policy_kwargs={"encoder": enc, "embed_dim": 16}).to("cpu")

In [40]:
trainer.fit(model)

val_file not set. Generating dataset instead
test_file not set. Generating dataset instead

  | Name     | Type           | Params | Mode 
----------------------------------------------------
0 | env      | FJSPEnvMOPM    | 0      | train
1 | policy   | L2DPolicy      | 29.4 K | train
2 | baseline | WarmupBaseline | 29.4 K | train
----------------------------------------------------
58.8 K    Trainable params
0         Non-trainable params
58.8 K    Total params
0.235     Total estimated model params size (MB)
329       Modules in train mode
325       Modules in eval mode


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_epochs=3` reached.


In [41]:
model

L2DModel(
  (env): FJSPEnvMOPM()
  (policy): L2DPolicy(
    (encoder): MultiEncoder(
      (encoder_1): HetGNNEncoder(
        (init_embedding): FJSPInitEmbedding(
          (init_ops_embed): Linear(in_features=5, out_features=16, bias=False)
          (pos_encoder): PositionalEncoding(
            (dropout): Dropout(p=0.0, inplace=False)
          )
          (init_ma_embed): Linear(in_features=1, out_features=16, bias=False)
          (edge_embed): Linear(in_features=1, out_features=16, bias=False)
        )
        (layers): ModuleList(
          (0-7): 8 x HetGNNBlock(
            (hgnn1): HetGNNLayer(
              (activation): ReLU()
            )
            (hgnn2): HetGNNLayer(
              (activation): ReLU()
            )
            (ffn1): TransformerFFN(
              (ops): ModuleDict(
                (norm1): Normalization(
                  (normalizer): BatchNorm1d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
                )
               

In [42]:
td_new = new_env.reset(batch_size=64)

res_new = model.to("cpu")(td_new.to("cpu"), env=new_env.to("cpu"))

res_new["reward"].mean()

tensor(-129.5156)