In [1]:
from rl4co.envs import FJSPEnv
from rl4co.envs.scheduling.fjsp.generator import FJSPGenerator

from rl4co.models.nn.graph.hgnn import HetGNNEncoder

import torch

from pytorch_lightning.loggers import CSVLogger
from rl4co.utils.trainer import RL4COTrainer

In [2]:
import pandas as pd

import plotly.express as px

In [2]:
class FJSPGeneratorWithGPM(FJSPGenerator):

    def __init__(self, num_jobs = 10, num_machines = 5, min_ops_per_job = 4, max_ops_per_job = 6, min_processing_time = 1, max_processing_time = 20, min_eligible_ma_per_op = 1, max_eligible_ma_per_op = None, same_mean_per_op = True, **unused_kwargs):
        super().__init__(num_jobs, num_machines, min_ops_per_job, max_ops_per_job, min_processing_time, max_processing_time, min_eligible_ma_per_op, max_eligible_ma_per_op, same_mean_per_op, **unused_kwargs)

    def _generate(self, batch_size):
        td = super()._generate(batch_size)

        bs, n_machines, n_ops = td["proc_times"].shape

        general_purpose_machine = torch.ones(size=(bs, 1, n_ops), dtype=td["proc_times"].dtype, device=td["proc_times"].device) * self.max_processing_time

        td["proc_times"] = torch.cat([td["proc_times"], general_purpose_machine], dim=1)

        return td

In [58]:
class FJSPEnvMOPM(FJSPEnv):
    def __init__(self, generator_params = ..., check_mask = False, stepwise_reward = False, **kwargs):

        generator_params = {**generator_params}
        self.max_ops_limit = generator_params.pop("max_ops_limit")
        self.min_ops_limit = generator_params.pop("min_ops_limit")

        generator = FJSPGeneratorWithGPM(**generator_params)

        super().__init__(generator=generator, mask_no_ops=True, check_mask=check_mask, stepwise_reward=stepwise_reward, **kwargs)

    def _reset(self, td = None, batch_size=None):
        td = super()._reset(td, batch_size)

        ma_ops_processed_left = torch.randint_like(td["busy_until"], 
                                                  low=self.min_ops_limit, 
                                                  high=self.max_ops_limit)

        td["ma_ops_processed_left"] = ma_ops_processed_left

        td["ma_ops_processed_left"][:, -1] = 100

        return td.to(self.device)
    
    def _step(self, td):

        td = td.to(self.device)

        # test if we can use new action

        n_batches, n_jobs = td["end_op_per_job"].shape

        ma_ops_processed_left = td["ma_ops_processed_left"]

        n_machines = ma_ops_processed_left.size(1)

        machines = (td["action"] - 1) % n_machines

        ma_ops_processed_left[torch.arange(ma_ops_processed_left.size(0), device=self.device)[td["action"] > 0], machines[td["action"] > 0]] -= 1

        assert (ma_ops_processed_left >= 0).all()

        td = super()._step(td)

        td["ma_ops_processed_left"] = ma_ops_processed_left

        availible_machines_mask = (ma_ops_processed_left > 0)

        new_mask = torch.concat([torch.tensor([True], device=self.device).bool().repeat(n_batches, 1), availible_machines_mask.repeat(1, n_jobs)], dim=1)

        new_mask = td["action_mask"] * new_mask

        all_false_rows = ~new_mask.any(dim=1)

        new_mask[all_false_rows, 0] = True

        td["action_mask"] = new_mask

        return td

In [59]:
generator_params = {
  "num_jobs": 20,  # the total number of jobs
  "num_machines": 10,  # the total number of machines that can process operations
  "min_ops_per_job": 1,  # minimum number of operatios per job
  "max_ops_per_job": 5,  # maximum number of operations per job
  "min_processing_time": 1,  # the minimum time required for a machine to process an operation
  "max_processing_time": 20,  # the maximum time required for a machine to process an operation
  "min_eligible_ma_per_op": 1,  # the minimum number of machines capable to process an operation
  "max_eligible_ma_per_op": 5,  # the maximum number of machines capable to process an operation
}

new_generator_params = {**generator_params}
new_generator_params["max_ops_limit"] = 10
new_generator_params["min_ops_limit"] = 5

In [60]:
new_env = FJSPEnvMOPM(generator_params=new_generator_params)

env = FJSPEnv(generator_params=new_generator_params)

Found 2 unused kwargs: {'max_ops_limit': 10, 'min_ops_limit': 5}


In [61]:
from rl4co.models.zoo.l2d import L2DPolicy, L2DModel

In [62]:
if torch.cuda.is_available():
    accelerator = "gpu"
    batch_size = 64
    train_data_size = 5_000
    embed_dim = 64
    num_encoder_layers = 8
else:
    accelerator = "cpu"
    batch_size = 32
    train_data_size = 1_000
    embed_dim = 64
    num_encoder_layers = 2

accelerator

'gpu'

In [16]:
model = L2DModel(env,
                 baseline="rollout",
                 batch_size=batch_size,
                 train_data_size=train_data_size,
                 val_data_size=1_000,
                 optimizer_kwargs={"lr": 1e-5})

In [17]:
td_new = new_env.reset(batch_size=64)

td = env.reset(batch_size=64)

res = model(td, env=env)

res_new = model(td_new, env=new_env)

res["reward"].mean(), res_new["reward"].mean()

(tensor(-92.0156), tensor(-117.9375))

In [22]:
loggers = [
        CSVLogger(save_dir="logs_research_1", name="csv_logs"),
    ]

In [23]:
trainer = RL4COTrainer(
    max_epochs=20,
    accelerator=accelerator,
    devices=1,
    logger=loggers,
    log_every_n_steps=20
)

Using 16bit Automatic Mixed Precision (AMP)
Using default `ModelCheckpoint`. Consider installing `litmodels` package to enable `LitModelCheckpoint` for automatic upload to the Lightning model registry.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs


In [24]:
trainer.fit(model)

val_file not set. Generating dataset instead
test_file not set. Generating dataset instead
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name     | Type           | Params | Mode 
----------------------------------------------------
0 | env      | FJSPEnv        | 0      | train
1 | policy   | L2DPolicy      | 81.2 K | train
2 | baseline | WarmupBaseline | 81.2 K | train
----------------------------------------------------
162 K     Trainable params
0         Non-trainable params
162 K     Total params
0.649     Total estimated model params size (MB)
74        Modules in train mode
70        Modules in eval mode


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

/home/mlcore/conda/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:425: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.
/home/mlcore/conda/lib/python3.11/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:425: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.


Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_epochs=20` reached.


In [34]:
metrics = pd.read_csv("./logs_research_1/csv_logs/version_0/metrics.csv")
metrics["val/reward"] = -metrics["val/reward"].shift(-1)
metrics["train/reward"] = -metrics["train/reward"]

In [None]:
px.line(data_frame=metrics.dropna(), x="step", y=["train/reward", "val/reward"])

In [93]:
td_new = new_env.reset(batch_size=64).to("cpu")

td = env.reset(batch_size=64).to("cpu")

res = model.to("cpu")(td, env=env)

res_new = model.to("cpu")(td_new, env=new_env.to("cpu"))

res["reward"].mean(), res_new["reward"].mean()

(tensor(-83.9219), tensor(-108.9531))

### New env

In [29]:
old_model_new_env = L2DModel(new_env,
                             baseline="rollout",
                             batch_size=batch_size,
                             train_data_size=train_data_size,
                             val_data_size=1_000,
                             optimizer_kwargs={"lr": 1e-5})


Attribute 'env' is an instance of `nn.Module` and is already saved during checkpointing. It is recommended to ignore them using `self.save_hyperparameters(ignore=['env'])`.


Attribute 'policy' is an instance of `nn.Module` and is already saved during checkpointing. It is recommended to ignore them using `self.save_hyperparameters(ignore=['policy'])`.



In [30]:
td_new = new_env.reset(batch_size=64)

res_new = old_model_new_env(td_new, env=new_env)

res_new["reward"].mean()

tensor(-122.8281)

In [36]:
loggers = [
        CSVLogger(save_dir="old_model_new_env", name="csv_logs"),
    ]

In [37]:
trainer = RL4COTrainer(
    max_epochs=20,
    accelerator=accelerator,
    devices=1,
    logger=loggers,
    log_every_n_steps=20
)

Using 16bit Automatic Mixed Precision (AMP)
Using default `ModelCheckpoint`. Consider installing `litmodels` package to enable `LitModelCheckpoint` for automatic upload to the Lightning model registry.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs


In [38]:
trainer.fit(old_model_new_env)

val_file not set. Generating dataset instead
test_file not set. Generating dataset instead
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name     | Type           | Params | Mode 
----------------------------------------------------
0 | env      | FJSPEnvMOPM    | 0      | train
1 | policy   | L2DPolicy      | 81.2 K | train
2 | baseline | WarmupBaseline | 81.2 K | train
----------------------------------------------------
162 K     Trainable params
0         Non-trainable params
162 K     Total params
0.649     Total estimated model params size (MB)
74        Modules in train mode
70        Modules in eval mode


Sanity Checking: |          | 0/? [00:00<?, ?it/s]


The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.


The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.



Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_epochs=20` reached.


In [3]:
metrics1 = pd.read_csv("./old_model_new_env/csv_logs/version_0/metrics.csv")
metrics1["val/reward"] = -metrics1["val/reward"].shift(-1)
metrics1["train/reward"] = -metrics1["train/reward"]

In [None]:
px.line(data_frame=metrics1.dropna(), x="step", y=["train/reward", "val/reward"])

In [96]:
td_new = new_env.reset(batch_size=64).to("cpu")

res_new = old_model_new_env.to("cpu")(td_new, env=new_env.to("cpu"))

res_new["reward"].mean()

tensor(-100.5469)

# Новая модель

In [72]:
import torch.nn as nn

class MultiEncoder(nn.Module):
    def __init__(self, encoder_1, encoder_2, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.encoder_1 = encoder_1
        self.encoder_2 = encoder_2

    def forward(self, td):

        hidden_1, _ = self.encoder_1(td)

        hidden_2, _ = self.encoder_2(td)

        hidden = (hidden_1[0] + hidden_2[0], hidden_1[1] + hidden_2[1])

        return hidden, None

In [79]:
class AdditionalMachineInfoInitEmbedding(nn.Module):
    def __init__(
        self,
        embed_dim,
        feature_name: str,
        feature_dim: int = 1,
        linear_bias: bool = True,
    ):
        super().__init__()
        self.feature_name = feature_name
        self.embed_dim = embed_dim
        self.init_embed = nn.Linear(feature_dim, embed_dim, linear_bias)

    def forward(self, td):
        bs, n_ops = td["is_ready"].shape
        ops_emb = torch.randn(size=(bs, n_ops, self.embed_dim), device=td.device)
        ma_emb = self.init_embed(td[self.feature_name].unsqueeze(2))
        n_machines = ma_emb.size(1)
        edge_emb = torch.randn(size=(bs, n_ops, n_machines, self.embed_dim), device=td.device)
        edges = td["ops_ma_adj"].transpose(1, 2).to(td.device)

        return ops_emb, ma_emb, edge_emb, edges

In [80]:
td_new = new_env.reset(batch_size=64)

encoder_1 = HetGNNEncoder(embed_dim=64, num_layers=2)
encoder_2 = HetGNNEncoder(embed_dim=64, num_layers=2, init_embedding=AdditionalMachineInfoInitEmbedding(64, "ma_ops_processed_left"))

enc = MultiEncoder(encoder_1, encoder_2)

In [81]:
new_model = L2DModel(new_env,
                 baseline="rollout",
                 batch_size=64,
                 train_data_size=train_data_size,
                 val_data_size=1_000,
                 optimizer_kwargs={"lr": 1e-5}, 
                 policy_kwargs={"encoder": enc, "embed_dim": 64})


Attribute 'env' is an instance of `nn.Module` and is already saved during checkpointing. It is recommended to ignore them using `self.save_hyperparameters(ignore=['env'])`.


Attribute 'policy' is an instance of `nn.Module` and is already saved during checkpointing. It is recommended to ignore them using `self.save_hyperparameters(ignore=['policy'])`.



In [82]:
loggers = [
        CSVLogger(save_dir="new_model", name="csv_logs"),
    ]

In [83]:
trainer = RL4COTrainer(
    max_epochs=20,
    accelerator=accelerator,
    devices=1,
    logger=loggers,
    log_every_n_steps=20
)

Using 16bit Automatic Mixed Precision (AMP)
Using default `ModelCheckpoint`. Consider installing `litmodels` package to enable `LitModelCheckpoint` for automatic upload to the Lightning model registry.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs


In [84]:
trainer.fit(new_model)

val_file not set. Generating dataset instead
test_file not set. Generating dataset instead
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name     | Type           | Params | Mode 
----------------------------------------------------
0 | env      | FJSPEnvMOPM    | 0      | eval 
1 | policy   | L2DPolicy      | 149 K  | train
2 | baseline | WarmupBaseline | 149 K  | train
----------------------------------------------------
298 K     Trainable params
0         Non-trainable params
298 K     Total params
1.195     Total estimated model params size (MB)
128       Modules in train mode
126       Modules in eval mode


Sanity Checking: |          | 0/? [00:00<?, ?it/s]


The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.


The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.



Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_epochs=20` reached.


In [4]:
metrics2 = pd.read_csv("./new_model/csv_logs/version_0/metrics.csv")
metrics2["val/reward"] = -metrics2["val/reward"].shift(-1)
metrics2["train/reward"] = -metrics2["train/reward"]

In [None]:
px.line(data_frame=metrics2.dropna(), x="step", y=["train/reward", "val/reward"])

In [99]:
td_new = new_env.reset(batch_size=64).to("cpu")

res_new = new_model.to("cpu")(td_new, env=new_env.to("cpu"))

res_new["reward"].mean()

tensor(-93.4219)

# Смесь обученный + необученный 

In [143]:
import copy

In [144]:
freezed_encoder = copy.deepcopy(model.policy.encoder)
freezed_decoder = copy.deepcopy(model.policy.decoder)

In [145]:
for param in freezed_encoder.parameters():
    param.requires_grad = False

In [146]:
for param in freezed_decoder.parameters():
    param.requires_grad = False

In [147]:
learnable_encoder = HetGNNEncoder(embed_dim=64, num_layers=2, init_embedding=AdditionalMachineInfoInitEmbedding(64, "ma_ops_processed_left"))

In [148]:
enc_mix = MultiEncoder(freezed_encoder, learnable_encoder)

In [149]:
new_mix_model = L2DModel(new_env,
                 baseline="rollout",
                 batch_size=64,
                 train_data_size=train_data_size,
                 val_data_size=1_000,
                 optimizer_kwargs={"lr": 1e-5}, 
                 policy_kwargs={"encoder": enc_mix, "embed_dim": 64, "decoder": freezed_decoder})

In [150]:
loggers = [
        CSVLogger(save_dir="new_mix_model", name="csv_logs"),
    ]

In [151]:
trainer = RL4COTrainer(
    max_epochs=20,
    accelerator=accelerator,
    devices=1,
    logger=loggers,
    log_every_n_steps=20
)

Using 16bit Automatic Mixed Precision (AMP)
Using default `ModelCheckpoint`. Consider installing `litmodels` package to enable `LitModelCheckpoint` for automatic upload to the Lightning model registry.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs


In [152]:
trainer.fit(new_mix_model)

val_file not set. Generating dataset instead
test_file not set. Generating dataset instead
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name     | Type           | Params | Mode 
----------------------------------------------------
0 | env      | FJSPEnvMOPM    | 0      | eval 
1 | policy   | L2DPolicy      | 149 K  | train
2 | baseline | WarmupBaseline | 149 K  | train
----------------------------------------------------
136 K     Trainable params
162 K     Non-trainable params
298 K     Total params
1.195     Total estimated model params size (MB)
59        Modules in train mode
195       Modules in eval mode


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_epochs=20` reached.


In [5]:
metrics3 = pd.read_csv("./new_mix_model/csv_logs/version_0/metrics.csv")
metrics3["val/reward"] = -metrics3["val/reward"].shift(-1)
metrics3["train/reward"] = -metrics3["train/reward"]

In [None]:
px.line(data_frame=metrics3.dropna(), x="step", y=["train/reward", "val/reward"])