In [1]:
from torchrl.data.replay_buffers import (
    TensorDictReplayBuffer,
    LazyTensorStorage,
    PrioritizedSampler,
)
from tensordict import TensorDict

import torch
from torch.utils.data import DataLoader
import matplotlib.pyplot as plt
import gymnasium as gym
import numpy as np

from rlarcworld.arc_dataset import ArcDataset, ArcSampleTransformer
from rlarcworld.enviroments.arc_batch_grid_env import ArcBatchGridEnv
from rlarcworld.enviroments.wrappers.rewards import PixelAwareRewardWrapper
from rlarcworld.agent.actor import ArcActorNetwork
from rlarcworld.agent.critic import ArcCriticNetwork

from rlarcworld.algorithms.d4pg import D4PG

In [2]:
from torch.utils.tensorboard import SummaryWriter

writer = SummaryWriter()

In [None]:
grid_size = 30
color_values = 11
batch_size=128
max_steps = torch.randint(30, 100, size=(1,)).item()
n_steps = torch.randint(3, 20 // 2, size=(1,)).item()
gamma = 0.99
env = ArcBatchGridEnv(grid_size, color_values, n_steps=n_steps, gamma=gamma)
env = PixelAwareRewardWrapper(env, n_steps=n_steps, gamma=gamma)

# Create an instance of the ArcDataset
dataset = ArcDataset(
    arc_dataset_dir="./dataset/training",
    keep_in_memory=False,
    transform=ArcSampleTransformer(
        (grid_size, grid_size), examples_stack_dim=10
    ),
)
train_samples = DataLoader(dataset=dataset, batch_size=len(dataset) // 2)

dataset_val = ArcDataset(
    arc_dataset_dir="./dataset/evaluation",
    keep_in_memory=False,
    transform=ArcSampleTransformer(
        (grid_size, grid_size), examples_stack_dim=10
    ),
)
val_samples = DataLoader(dataset=dataset, batch_size=len(dataset) // 2)
replay_buffer = TensorDictReplayBuffer(
    storage=LazyTensorStorage(batch_size),
    sampler=PrioritizedSampler(
        max_capacity=batch_size,
        alpha=1.0,
        beta=1.0,
    ),
)

num_atoms = {"pixel_wise": 50, "binary": 3, "n_reward": 50 * n_steps}
v_min = {"pixel_wise": -40, "binary": 0, "n_reward": -40 * n_steps}
v_max = {"pixel_wise": 2, "binary": 1, "n_reward": 2 * n_steps}
critic = ArcCriticNetwork(
    size=grid_size,
    color_values=color_values,
    num_atoms=num_atoms,
    v_min=v_min,
    v_max=v_max,
)

actor = ArcActorNetwork(size=grid_size, color_values=color_values)
d4pg = D4PG(
    env=env,
    actor=actor,
    critic=critic,
    train_samples=train_samples,
    validation_samples=val_samples,
    batch_size=batch_size,
    replay_buffer=replay_buffer,
    target_update_frequency=5,
    n_steps=env.n_steps,
    gamma=env.gamma,
    tb_writer=SummaryWriter(log_dir="runs/test_validation_d4pg"),
)
d4pg.fit(
    max_steps=max_steps,
    validation_steps_frequency=10,
    validation_steps_per_train_step=10,
    validation_steps_per_episode=max_steps,
    logger_frequency=2
)

In [12]:
from utils import get_nested_ref
import os

In [20]:
assert os.path.isdir(
    "./runs/test_validation_d4pg"
), "Directory 'runs/test_validation_d4pg' does not exist"

ref, last_key = get_nested_ref(
    d4pg.history, "Validation/Reward"
)
assert isinstance(
    ref[last_key], dict
), "Invalid validation reward history format - expected dict, got {}".format(
    type(ref[last_key])
)
assert isinstance(
    ref[last_key]["n_reward"], np.ndarray
), "Invalid validation reward history format - expected np.ndarray for n_step, got {}".format(
    type(ref[last_key].get("n_step", None))
)

ref, last_key = get_nested_ref(
    d4pg.history, "Train/Reward"
)

assert isinstance(
    ref[last_key], dict
), "Invalid training reward history format - expected dict, got {}".format(
    type(ref[last_key])
)
assert isinstance(
    ref[last_key]["n_reward"], np.ndarray
), "Invalid training reward history format - expected np.ndarray for n_step, got {}".format(
    type(ref[last_key].get("n_step", None))
)

In [1]:
import torch

In [10]:
x = torch.randint(low=0, high=2, size=(10, 1))
print(x)
print(torch.count_nonzero(x == 1))

tensor([[0],
        [0],
        [0],
        [1],
        [0],
        [0],
        [0],
        [0],
        [0],
        [0]])
tensor(1)


In [2]:
import torch
transformer_model = torch.nn.Transformer(nhead=16, num_encoder_layers=12)
src = torch.rand((10, 32, 512))
tgt = torch.rand((20, 32, 512))
out = transformer_model(src, tgt)



In [4]:
out.shape

torch.Size([20, 32, 512])

In [9]:
torch.rand(20,10,2,30,30)[:,:, 0, :,:].shape

torch.Size([20, 10, 30, 30])

In [3]:
# Create an instance of the ArcDataset
dataset = ArcDataset(
    arc_dataset_dir="./dataset/training",
    keep_in_memory=False,
    transform=ArcSampleTransformer(
        (30, 30), examples_stack_dim=10
    ),
)
train_samples = DataLoader(dataset=dataset, batch_size=len(dataset) // 2)

dataset_val = ArcDataset(
    arc_dataset_dir="./dataset/evaluation",
    keep_in_memory=False,
    transform=ArcSampleTransformer(
        (30, 30), examples_stack_dim=10
    ),
)
val_samples = DataLoader(dataset=dataset, batch_size=len(dataset) // 2)
replay_buffer = TensorDictReplayBuffer(
    storage=LazyTensorStorage(100),
    sampler=PrioritizedSampler(
        max_capacity=1000,
        alpha=1.0,
        beta=1.0,
    ),
)


In [8]:
replay_buffer.sampler.alpha

1.0

In [8]:
from google.cloud import storage

In [13]:
gcs_bucket_name = "rl_arc_project"
storage_client = storage.Client()
bucket = storage_client.bucket(gcs_bucket_name)
bucket = storage_client.bucket(gcs_bucket_name)
blobs = list(bucket.list_blobs(prefix="dataset/testing/"))
print(blobs)    
# blob = bucket.blob("dataset/training/0.json")
# file_contents = blob.download_as_string()
# sample = json.loads(file_contents)

[<Blob: rl_arc_project, dataset/testing/d017b73f.json, 1740323185131824>, <Blob: rl_arc_project, dataset/testing/d19f7514.json, 1740323183346837>, <Blob: rl_arc_project, dataset/testing/d282b262.json, 1740323201380632>, <Blob: rl_arc_project, dataset/testing/d304284e.json, 1740323181627699>, <Blob: rl_arc_project, dataset/testing/d37a1ef5.json, 1740323202993398>, <Blob: rl_arc_project, dataset/testing/d47aa2ff.json, 1740323196706985>, <Blob: rl_arc_project, dataset/testing/d492a647.json, 1740323173226719>, <Blob: rl_arc_project, dataset/testing/d56f2372.json, 1740323188471501>, <Blob: rl_arc_project, dataset/testing/d5c634a2.json, 1740323191956951>, <Blob: rl_arc_project, dataset/testing/d931c21c.json, 1740323175021665>, <Blob: rl_arc_project, dataset/testing/d94c3b52.json, 1740323177593331>, <Blob: rl_arc_project, dataset/testing/da2b0fe3.json, 1740323194450322>, <Blob: rl_arc_project, dataset/testing/da515329.json, 1740323194553274>, <Blob: rl_arc_project, dataset/testing/dc2aa30b.js

In [14]:
blobs[0].name

'dataset/testing/d017b73f.json'

In [4]:
from google.cloud import storage
import torch
import os
model = torch.nn.Linear(1, 1)

In [5]:
path = "gs://rl_arc_project/runs/saved_models/test/"
bucket_name = path.split("gs://")[-1]
prefix = bucket_name.split("/")
bucket_name = prefix[0]
prefix = "/".join(prefix[1:])

client = storage.Client()
bucket = client.get_bucket(bucket_name)
blob = bucket.blob(os.path.join(prefix, "actor.ptc"))
with blob.open("wb", ignore_flush=True) as f:
    torch.save(model.state_dict(), f)