In [1]:
!pip install h5py

ERROR: ld.so: object '/usr/lib/x86_64-linux-gnu/libGLEW.so' from LD_PRELOAD cannot be preloaded (cannot open shared object file): ignored.
ERROR: ld.so: object '/usr/lib/x86_64-linux-gnu/libGLEW.so' from LD_PRELOAD cannot be preloaded (cannot open shared object file): ignored.
ERROR: ld.so: object '/usr/lib/x86_64-linux-gnu/libGLEW.so' from LD_PRELOAD cannot be preloaded (cannot open shared object file): ignored.
Defaulting to user installation because normal site-packages is not writeable
You should consider upgrading via the '/cvmfs/hpc.rug.nl/versions/2023.01/rocky8/x86_64/intel/icelake/software/Python/3.10.4-GCCcore-11.3.0/bin/python -m pip install --upgrade pip' command.[0m[33m
[0m

In [1]:
# import NN necessities:
import torch
from torch import nn

# import plotting utilities:
import matplotlib.pyplot as plt

# import data preprocessing utilities:
from sklearn.model_selection import train_test_split
from pathlib import Path
import h5py
import numpy as np

In [2]:
class Network(nn.Module):   # class defining a basic nn

    def __init__(self, h_size=200, h_layers=4):
        super().__init__()
        self.model = nn.Sequential(
            nn.Linear(23, h_size),      # in
            nn.ReLU(),
            
        )
        for i in range(h_layers):
            self.model.append(nn.Linear(h_size, h_size))
            self.model.append(nn.ReLU())
        self.mean_head = nn.Linear(h_size, 18)
        self.logvar_head = nn.Linear(h_size, 18)

        # bind log-variance to avoid numerical instability
        self.max_logvar = nn.Parameter(torch.ones(18) * 0.5)
        self.min_logvar = nn.Parameter(torch.ones(18) * -10)

        self.optimizer = torch.optim.Adam(self.model.parameters(), lr=1e-3)
        self.criterion = nn.MSELoss()    # using mean squared error as a loss metric

    def forward(self, x):
        res = self.model(x)
        mean = self.mean_head(res)
        logvar = self.logvar_head(res)

        # clamp log-variance using soft constraints (see MBPO/PETS)
        logvar = self.max_logvar - torch.nn.functional.softplus(self.max_logvar - logvar)
        logvar = self.min_logvar + torch.nn.functional.softplus(logvar - self.min_logvar)
        return mean, logvar

    def nll_loss(self, x, y):
        """
        Negative log-likelihood of Gaussian:
            NLL = 0.5 * [ logσ² + (y - µ)² / σ² ]
        """
        mean, logvar = self.forward(x)
        var = torch.exp(logvar)

        nll = 0.5 * ((y - mean)**2 / var + logvar)
        return nll.mean()

    def train_epoch(self, x, y):
        self.optimizer.zero_grad()

        loss = self.nll_loss(x, y)

        loss.backward()
        self.optimizer.step()
        return loss.item()


    def train(self, train_data, epochs=500, cp=100, surpress=False):
        x, y = train_data

        # again split the data to optimize hyperparam on val set, not leak data.
        x_train, x_val, y_train, y_val = train_test_split(x, y, test_size=0.2, shuffle=True)
    
        test_losses = []
        losses = []
    
        for iter in range(epochs):
            # train
            iteration_loss = self.train_epoch(x_train, y_train)
            losses.append(iteration_loss)

            # validate
            val_loss = self.validation_loss((x_val, y_val))
            test_losses.append(val_loss)
    
            # print
            if not surpress:
                if iter and iter % cp == 0:    # update on iteration checkpoints
                    print(f"iteration {iter}/{epochs}, loss = train: {iteration_loss}, val: {val_loss}")

        return losses, test_losses


    def validation_loss(self, test_data):
        x, y = test_data
        loss = self.nll_loss(x, y)
        return loss.item()


    def reset(self):
        self.__init__()

In [19]:
class Ensemble:
    def __init__(self, n_networks, lmbda=0.5, hidden_size=10):
        self.n_networks = n_networks
        self.ensemble = [Network(h_size=hidden_size) for i in range(n_networks)]
        self.lmbda = lmbda

    def train(self, train_data):
        for model in self.ensemble:
            model.train(train_data, surpress=True)
    
    def predict(self, x):
        predictions = [ model.forward(x) for model in self.ensemble ]
        # 7 x 2 x N x 18 
        
        # separate means and variances
        means = []
        variances = []
        for m, v in predictions:
            print("sep")
            means.append(m)
            variances.append(v)
    
        for i in range(len(variances)):
            print(variances[i].shape)
            variances[i] = variances[i].square().sum(dim=1).sqrt()  # convert each to mean variance
        print(variances) 
        
#         max_var = max(variance)
#         random_mean = mean[random.randint(0, self.n_networks)]
        
#         random_mean = self.correct_reward(random_mean, max_variance)
#         return random_mean
    
    def correct_reward(self, mean, max_var):
        mean[-1] = mean[-1] - self.lmbda * max_var
        return mean
    
    def mse(self, y, y_hat):
        return torch.mean( (y - y_hat)**2 )

    def test(self, test_data):
        x_test, y_test = test_data
        prediction = self.predict(x_test)
        return self.mse(prediction, y_test)

    def to(self, device):
        for model in self.ensemble:
            model.to(device)

In [4]:
# load data
data = h5py.File(Path("./halfcheetah_medium-v2.hdf5"))
print(data.keys())


<KeysViewHDF5 ['actions', 'infos', 'metadata', 'next_observations', 'observations', 'rewards', 'terminals', 'timeouts']>


In [5]:
# extract relevant cols
a = data["actions"]
s_new = data["next_observations"]
s = data["observations"]
r = data["rewards"]

# info
print(
    f"a shape = {a.shape}\n" \
    f"s shape = {s.shape}\n" \
    f"s_new shape = {s_new.shape}\n" \
    f"r shape = {r.shape}\n"
)

a shape = (1000000, 6)
s shape = (1000000, 17)
s_new shape = (1000000, 17)
r shape = (1000000,)



In [9]:
# divide data
x = np.hstack([a, s])                                # -> (N, 23)
y = np.hstack([s_new, np.array(r).reshape(-1, 1)])   # -> (N, 18)

# set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"using device: {device}")

# converting to tensors
x = torch.tensor(x, dtype=torch.float32).to(device)   
y = torch.tensor(y, dtype=torch.float32).to(device)
x_train, x_test, y_train, y_test = train_test_split(
    x, y, test_size=0.2, shuffle=True
)

# info
print(
    f"x_train shape = {x_train.shape}\n" \
    f"x_test shape = {x_test.shape}\n" \
    f"y_train shape = {y_train.shape}\n" \
    f"y_test shape = {y_test.shape}"
)
# set_data
train_data = (x_train, y_train)
test_data = (x_test, y_test)

using device: cuda
x_train shape = torch.Size([800000, 23])
x_test shape = torch.Size([200000, 23])
y_train shape = torch.Size([800000, 18])
y_test shape = torch.Size([200000, 18])


In [None]:
# train model



# set params
epochs = 100

# for network_width in width_list:
model = Network( h_size=10 )
model = model.to(device)

# train
train_losses, test_losses = model.train(train_data, epochs=epochs, cp=10)
# test_results.append(test_losses)
test_loss = model.nll_loss(x_test, y_test)
print(f"final test loss = {test_loss.item()}")

In [None]:
# plot data
fig, ax = plt.subplots(1, 1)
x = np.arange(0, epochs)
ax.plot(x, train_losses, label='train', color='red')
ax.plot(x, test_losses, label='test', color='green')
ax.set_xlabel('epochs')
ax.set_ylabel('loss')
plt.legend()
plt.show()


In [None]:
# plot size/performance comparison
fig, ax = plt.subplots(1, 1)

x = width_list
ax.set_title("epochs = 1000")
for i, w in enumerate(x):
    ax.plot(np.arange(0, epochs), test_results[i], label=w)
ax.set_xlabel("epochs")
ax.set_ylabel("loss")
plt.show()

In [20]:
# train ensemble

ensemble = Ensemble(7, hidden_size=10)
ensemble.to(device)
# ensemble.train(train_data)

In [21]:
ensemble.test(test_data)

sep
sep
sep
sep
sep
sep
sep
torch.Size([200000, 18])
torch.Size([200000, 18])
torch.Size([200000, 18])
torch.Size([200000, 18])
torch.Size([200000, 18])
torch.Size([200000, 18])
torch.Size([200000, 18])
[tensor([2.2276, 2.2450, 2.2383,  ..., 2.2106, 2.2086, 2.2547], device='cuda:0',
       grad_fn=<SqrtBackward0>), tensor([2.0179, 2.0168, 2.0205,  ..., 2.0184, 2.0163, 2.0197], device='cuda:0',
       grad_fn=<SqrtBackward0>), tensor([2.1840, 2.1902, 2.1866,  ..., 2.1882, 2.1858, 2.1890], device='cuda:0',
       grad_fn=<SqrtBackward0>), tensor([1.9650, 1.9679, 1.9656,  ..., 1.9772, 1.9693, 1.9658], device='cuda:0',
       grad_fn=<SqrtBackward0>), tensor([2.0143, 2.0565, 2.0605,  ..., 2.0489, 2.0168, 2.0602], device='cuda:0',
       grad_fn=<SqrtBackward0>), tensor([2.1824, 2.1621, 2.1653,  ..., 2.1695, 2.1877, 2.1665], device='cuda:0',
       grad_fn=<SqrtBackward0>), tensor([2.1263, 2.1279, 2.1308,  ..., 2.1316, 2.1349, 2.1323], device='cuda:0',
       grad_fn=<SqrtBackward0>)]


TypeError: unsupported operand type(s) for -: 'NoneType' and 'Tensor'