In [1]:
import gymnasium as gym
import gzip
import pickle

import numpy
import torch

from torch.nn import Linear, Module, MSELoss, Sequential, Tanh
from torch.optim import Adam
from torch.utils.data import DataLoader, Dataset

In [2]:
class MyDataset(Dataset):
    def __init__(self, quantity, validation=False):
        self._observation_data = self._load_data(
            self._build_data_path(
                "hopper_v2",
                quantity,
                "observations",
                validation
            )
        )

        self._action_data = self._load_data(
            self._build_data_path(
                "hopper_v2",
                quantity,
                "actions",
                validation
            )
        )

    def __len__(self):
        return self._action_data.shape[0]

    def __getitem__(self, idx):
        return self._observation_data[idx], self._action_data[idx]

    def _build_data_path(self, environment, quantity, type, validation):
        suffix = "validation" if validation else "training"
        path = f"./{environment}_expert_{quantity}k_{type}_{suffix}.pkl.gz"
    
        return path

    def _load_data(self, path):
        with gzip.GzipFile(path, "rb") as stream:
            array = numpy.load(stream, allow_pickle=True)
            return torch.from_numpy(array).float()

In [3]:
class MyModel(Module):
    def __init__(self):
        super().__init__()

        self._stack = Sequential(
            Linear(11, 64),
            Tanh(),
            Linear(64, 64),
            Tanh(),
            Linear(64, 64),
            Tanh(),
            Linear(64, 3),
        )

    def forward(self, x):
        return self._stack(x)

In [4]:
def training_loop(data_loader, model, calculate_loss, optimizer):
    size = len(data_loader.dataset)
    batches = len(data_loader)
    total_loss = 0

    model.train()

    for batch, (x, y) in enumerate(data_loader):
        prediction = model(x)
        loss = calculate_loss(prediction, y)

        loss.backward()
        optimizer.step()
        optimizer.zero_grad()
        total_loss += loss.item()

    return total_loss / batches


def validation_loop(data_loader, model, calculate_loss):
    size = len(data_loader.dataset)
    batches = len(data_loader)
    total_loss = 0

    model.eval()

    with torch.no_grad():
        for x, y in data_loader:
            prediction = model(x)
            total_loss += calculate_loss(prediction, y).item()

    return total_loss / batches

In [5]:
env = gym.make('Hopper-v2')

learning_rate = 1e-3
epochs = 40

validation_dataset = MyDataset(25, validation=True)
validation_data_loader = DataLoader(validation_dataset, batch_size=32, shuffle=True)

for quantity in [10, 25, 50, 100, 125]:
    training_dataset = MyDataset(quantity, validation=False)
    training_data_loader = DataLoader(training_dataset, batch_size=32, shuffle=True)

    model = MyModel().to("cpu")

    calculate_loss = MSELoss()
    optimizer = Adam(model.parameters(), lr=learning_rate)

    training_loss = 0
    validation_loss = 0

    for t in range(epochs):
        training_loss += training_loop(training_data_loader, model, calculate_loss, optimizer)
        validation_loss += validation_loop(validation_data_loader, model, calculate_loss)

    print(f"Dataset {quantity}k results")
    print("Avg training loss: ", training_loss / epochs)
    print("Avg validation loss: ", validation_loss / epochs)

  logger.deprecation(


Compiling C:\Users\hacksparr0w\AppData\Local\pypoetry\Cache\virtualenvs\deep-rl-gym-p8Sw4ZKV-py3.10\lib\site-packages\mujoco_py\cymj.pyx because it changed.
[1/1] Cythonizing C:\Users\hacksparr0w\AppData\Local\pypoetry\Cache\virtualenvs\deep-rl-gym-p8Sw4ZKV-py3.10\lib\site-packages\mujoco_py\cymj.pyx


Possible solutions:
	1. Declare the function as 'noexcept' if you control the definition and you're sure you don't want the function to raise exceptions.
	2. Use an 'int' return type on the function to allow an error code to be returned.
performance hint: C:\Users\hacksparr0w\AppData\Local\pypoetry\Cache\virtualenvs\deep-rl-gym-p8Sw4ZKV-py3.10\lib\site-packages\mujoco_py\cymj.pyx:104:5: Exception check on 'c_error_callback' will always require the GIL to be acquired.
Possible solutions:
	1. Declare the function as 'noexcept' if you control the definition and you're sure you don't want the function to raise exceptions.
	2. Use an 'int' return type on the function to allow an error code to be returned.

Error compiling Cython file:
------------------------------------------------------------
...
    '''
                       ^
------------------------------------------------------------

C:\Users\hacksparr0w\AppData\Local\pypoetry\Cache\virtualenvs\deep-rl-gym-p8Sw4ZKV-py3.10\lib\site-p

CompileError: C:\Users\hacksparr0w\AppData\Local\pypoetry\Cache\virtualenvs\deep-rl-gym-p8Sw4ZKV-py3.10\lib\site-packages\mujoco_py\cymj.pyx