In [16]:
# import NN necessities:
import torch
from torch import nn

# import plotting utilities:
import matplotlib.pyplot as plt

# import data preprocessing utilities:
from sklearn.model_selection import train_test_split
from pathlib import Path
import h5py
import numpy as np

In [47]:
class Network(nn.Module):   # class defining a basic nn
    def __init__(self):
        super().__init__()
        self.model = nn.Sequential(
            nn.Linear(23, 200),      # in
            nn.ReLU(),
            nn.Linear(200, 200),    # hidden
            nn.ReLU(),
            nn.Linear(200, 200),
            nn.ReLU(),
            nn.Linear(200, 200),
            nn.ReLU(),
            nn.Linear(200, 18)       # out
        )
        self.optimizer = torch.optim.Adam(self.model.parameters(), lr=1e-3)
        self.criterion = nn.MSELoss()    # using mean squared error as a loss metric

    def forward(self, x):
        res = self.model(x)
        return res

    def train_epoch(self, x, y):
        self.optimizer.zero_grad()

        y_hat = self.model(x)
        loss = self.criterion(y_hat, y)

        loss.backward()
        self.optimizer.step()
        return loss.item()

    def train(self, train_data, epochs=500):
        x, y = train_data
        losses = []
        for iter in range(epochs):
            iteration_loss = self.train_epoch(x, y)
            losses.append(iteration_loss)
            if iter and iter % 5 == 0:
                print(f"iteration {iter}/{epochs}, loss = {iteration_loss}")
        return losses

    def validation_loss(self, test_data):
        x, y = test_data
        y_hat = self.model(x)
        loss = self.criterion(y_hat, y)
        return loss.item()

    def reset(self):
        self.__init__()

In [8]:
# load data
data = h5py.File(Path("./halfcheetah_medium-v2.hdf5"))
print(data.keys())


<KeysViewHDF5 ['actions', 'infos', 'metadata', 'next_observations', 'observations', 'rewards', 'terminals', 'timeouts']>


In [None]:
# extract relevant cols
a = data["actions"]
s_new = data["next_observations"]
s = data["observations"]
r = data["rewards"]

# info
print(
    f"a shape = {a.shape}\n" \
    f"s shape = {s.shape}\n" \
    f"s_new shape = {s_new.shape}\n" \
    f"r shape = {r.shape}\n"
)

a shape = (1000000, 6)
s shape = (1000000, 17)
s_new shape = (1000000, 17)
r shape = (1000000,)



In [None]:
# divide data
x = np.hstack([a, s])                                # -> (N, 23)
y = np.hstack([s_new, np.array(r).reshape(-1, 1)])   # -> (N, 18)

# converting to tensors
x = torch.tensor(x, dtype=torch.float32)   
y = torch.tensor(y, dtype=torch.float32)
x_train, x_test, y_train, y_test = train_test_split(
    x, y, test_size=0.2, shuffle=True
)

# info
print(
    f"x_train shape = {x_train.shape}\n" \
    f"x_test shape = {x_test.shape}\n" \
    f"y_train shape = {y_train.shape}\n" \
    f"y_test shape = {y_test.shape}"
)

x_train shape = torch.Size([800000, 23])
x_test shape = torch.Size([200000, 23])
y_train shape = torch.Size([800000, 18])
y_test shape = torch.Size([200000, 18])


In [48]:
# train model
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
x_train = x_train.to(device)
x_test = x_test.to(device)
y_train = y_train.to(device)
y_test.to(device)
train_data = (x_train, y_train)
model = Network()
model = model.to(device)
model.train(train_data, epochs=200)

iteration 5/200, loss = 19.004230499267578
iteration 10/200, loss = 16.140222549438477


KeyboardInterrupt: 