In [1]:
from torchrl.data.replay_buffers import (
    TensorDictReplayBuffer,
    LazyTensorStorage,
    PrioritizedSampler,
)
from tensordict import TensorDict

import torch
from torch.utils.data import DataLoader
import matplotlib.pyplot as plt
import gymnasium as gym
import numpy as np

from rlarcworld.arc_dataset import ArcDataset, ArcSampleTransformer
from rlarcworld.enviroments.arc_batch_grid_env import ArcBatchGridEnv
from rlarcworld.enviroments.wrappers.rewards import PixelAwareRewardWrapper
from rlarcworld.agent.actor import ArcActorNetwork
from rlarcworld.agent.critic import ArcCriticNetwork

from rlarcworld.algorithms.d4pg import D4PG

2025-02-19 10:31:03 - arc_batch_grid_env.py - INFO - Registering gymnasium environment


In [2]:
from torch.utils.tensorboard import SummaryWriter

writer = SummaryWriter()

In [3]:
grid_size = 30
color_values = 11
batch_size=128
max_steps = torch.randint(30, 100, size=(1,)).item()
n_steps = torch.randint(3, 20 // 2, size=(1,)).item()
gamma = 0.99
env = ArcBatchGridEnv(grid_size, color_values, n_steps=n_steps, gamma=gamma)
env = PixelAwareRewardWrapper(env, n_steps=n_steps, gamma=gamma)

# Create an instance of the ArcDataset
dataset = ArcDataset(
    arc_dataset_dir="./dataset/training",
    keep_in_memory=True,
    transform=ArcSampleTransformer(
        (grid_size, grid_size), examples_stack_dim=10
    ),
)
train_samples = DataLoader(dataset=dataset, batch_size=len(dataset) // 2)

dataset_val = ArcDataset(
    arc_dataset_dir="./dataset/evaluation",
    keep_in_memory=True,
    transform=ArcSampleTransformer(
        (grid_size, grid_size), examples_stack_dim=10
    ),
)
val_samples = DataLoader(dataset=dataset, batch_size=len(dataset) // 2)
replay_buffer = TensorDictReplayBuffer(
    storage=LazyTensorStorage(batch_size),
    sampler=PrioritizedSampler(
        max_capacity=batch_size,
        alpha=1.0,
        beta=1.0,
    ),
)

num_atoms = {"pixel_wise": 50, "binary": 3, "n_reward": 50 * n_steps}
v_min = {"pixel_wise": -40, "binary": 0, "n_reward": -40 * n_steps}
v_max = {"pixel_wise": 2, "binary": 1, "n_reward": 2 * n_steps}
critic = ArcCriticNetwork(
    size=grid_size,
    color_values=color_values,
    num_atoms=num_atoms,
    v_min=v_min,
    v_max=v_max,
)

actor = ArcActorNetwork(size=grid_size, color_values=color_values)
d4pg = D4PG(
    env=env,
    actor=actor,
    critic=critic,
    train_samples=train_samples,
    validation_samples=val_samples,
    batch_size=batch_size,
    replay_buffer=replay_buffer,
    target_update_frequency=5,
    n_steps=env.n_steps,
    gamma=env.gamma,
    tb_writer=SummaryWriter(log_dir="runs/test_validation_d4pg"),
)
d4pg.fit(
    max_steps=max_steps,
    validation_steps_frequency=10,
    validation_steps_per_train_step=10,
    validation_steps_per_episode=max_steps,
    logger_frequency=2
)

In [12]:
from utils import get_nested_ref
import os

In [20]:
assert os.path.isdir(
    "./runs/test_validation_d4pg"
), "Directory 'runs/test_validation_d4pg' does not exist"

ref, last_key = get_nested_ref(
    d4pg.history, "Validation/Reward"
)
assert isinstance(
    ref[last_key], dict
), "Invalid validation reward history format - expected dict, got {}".format(
    type(ref[last_key])
)
assert isinstance(
    ref[last_key]["n_reward"], np.ndarray
), "Invalid validation reward history format - expected np.ndarray for n_step, got {}".format(
    type(ref[last_key].get("n_step", None))
)

ref, last_key = get_nested_ref(
    d4pg.history, "Train/Reward"
)

assert isinstance(
    ref[last_key], dict
), "Invalid training reward history format - expected dict, got {}".format(
    type(ref[last_key])
)
assert isinstance(
    ref[last_key]["n_reward"], np.ndarray
), "Invalid training reward history format - expected np.ndarray for n_step, got {}".format(
    type(ref[last_key].get("n_step", None))
)

In [1]:
import torch

In [10]:
x = torch.randint(low=0, high=2, size=(10, 1))
print(x)
print(torch.count_nonzero(x == 1))

tensor([[0],
        [0],
        [0],
        [1],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0]])
tensor(1)


In [2]:
import torch
transformer_model = torch.nn.Transformer(nhead=16, num_encoder_layers=12)
src = torch.rand((10, 32, 512))
tgt = torch.rand((20, 32, 512))
out = transformer_model(src, tgt)



In [4]:
out.shape

torch.Size([20, 32, 512])

In [None]:
torch.rand(20,10,2,30,30)[:, 1, :,:].shape

tensor([[[[4.1218e-01, 8.5536e-02, 2.4064e-01,  ..., 9.4041e-01,
           5.9719e-02, 8.5378e-01],
          [4.0239e-01, 6.3156e-01, 1.4517e-01,  ..., 6.5641e-01,
           3.0292e-02, 7.6186e-01],
          [8.6020e-01, 1.2266e-01, 4.5527e-01,  ..., 8.3679e-01,
           7.7640e-01, 5.6512e-01],
          ...,
          [9.6519e-01, 7.2172e-01, 4.1001e-01,  ..., 1.7890e-01,
           9.6387e-01, 4.8548e-01],
          [2.0319e-01, 5.1182e-01, 2.8926e-01,  ..., 1.8486e-01,
           6.9950e-01, 1.0392e-01],
          [2.0788e-01, 2.7696e-01, 2.8339e-01,  ..., 6.9943e-01,
           8.4486e-01, 7.6022e-02]],

         [[1.3255e-01, 6.1902e-01, 5.8546e-01,  ..., 9.4947e-01,
           6.6759e-01, 9.6977e-01],
          [4.5054e-02, 3.8966e-01, 3.4987e-01,  ..., 9.6151e-01,
           1.8600e-01, 2.2942e-01],
          [4.5122e-01, 8.7766e-01, 8.6763e-01,  ..., 7.2140e-01,
           2.0912e-01, 8.6579e-01],
          ...,
          [8.7999e-01, 1.3032e-01, 5.3818e-01,  ..., 6.7960