In [1]:
import numpy as np
import torch
import torch.utils.data
from torch import nn, optim
from torch.nn import functional as F
from torch.autograd import Variable
from torchvision import datasets, transforms
from torchvision.utils import save_image
from torch.utils.data import TensorDataset, DataLoader

from vrae import VRAE

In [3]:
torch.manual_seed(1337)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
kwargs = {'num_workers': 1, 'pin_memory': True} if torch.cuda.is_available() else {}

In [4]:
s = np.load('./buffer_original_reward3/test_buffer_state.npy')
a = np.load('./buffer_original_reward3/test_buffer_action.npy')
r = np.load('./buffer_original_reward3/test_buffer_reward.npy')
s_ = np.load('./buffer_original_reward3/test_buffer_next_state.npy')
not_done = np.load('./buffer_original_reward3/test_buffer_not_done.npy')

In [8]:
idx_done = np.array(np.where(not_done.reshape(-1) != 1))
idx_epi_start = np.insert(idx_done + 1, 0, 0)
idx_epi_end = np.append(idx_done, not_done.size)
epi_idxes = zip(idx_epi_start, idx_epi_end)

min_sequence_len = np.min(np.diff(idx_done, 1))
max_sequence_len = np.max(np.diff(idx_done, 1))

print('episode length min: {}, max: {}'.format(min_sequence_len, max_sequence_len))

episode length min: 11, max: 301


In [9]:
# episodes = []
# for start, finish in epi_idxes:
#     current_episode = s[start:finish]
#     padded_current_episode = np.concatenate(
#         (current_episode, np.zeros((max_sequence_len - current_episode.shape[0], 29))), axis=0)
#     episodes.append(padded_current_episode)
# episodes = np.stack(episodes)

seq_len = 8
episodes = []
for start, finish in epi_idxes:
    current_episode = s[start:finish]
    frag = [np.array(current_episode[i:i + seq_len]) for i in range(len(current_episode) - seq_len)]
    episodes.append(np.stack(frag))
episodes = np.concatenate(episodes)

train_dataset = TensorDataset(torch.from_numpy(episodes))
test_dataset = TensorDataset(torch.from_numpy(episodes))

In [10]:
hidden_size = 90
hidden_layer_depth = 1
latent_length = 64
batch_size = 32
learning_rate = 0.0005
n_epochs = 50
dropout_rate = 0.2
optimizer = 'Adam'  # options: ADAM, SGD
cuda = True  # options: True, False
print_every = 200
clip = True  # options: True, False
max_grad_norm = 5
loss = 'MSELoss'  # options: SmoothL1Loss, MSELoss
block = 'LSTM'  # options: LSTM, GRU

In [11]:
sequence_length = episodes.shape[1]
number_of_features = episodes.shape[2]

In [12]:
model = VRAE(sequence_length=sequence_length,
             number_of_features=number_of_features,
             hidden_size=hidden_size,
             hidden_layer_depth=hidden_layer_depth,
             latent_length=latent_length,
             batch_size=batch_size,
             learning_rate=learning_rate,
             n_epochs=n_epochs,
             dropout_rate=dropout_rate,
             optimizer=optimizer,
             cuda=cuda,
             print_every=print_every,
             clip=clip,
             max_grad_norm=max_grad_norm,
             loss=loss,
             block=block)



In [13]:
model.fit(train_dataset)
model.save('SaveModel/vrae_s_model.pth')

Epoch: 0
Batch [ 200/2943], loss = 216.9197, recon_loss = 215.4236, kl_loss =   1.4961
Batch [ 400/2943], loss = 104.9424, recon_loss = 102.9449, kl_loss =   1.9975
Batch [ 600/2943], loss = 100.2061, recon_loss =  97.9249, kl_loss =   2.2812
Batch [ 800/2943], loss =  85.7791, recon_loss =  83.2913, kl_loss =   2.4878
Batch [1000/2943], loss = 105.5818, recon_loss = 103.0461, kl_loss =   2.5357
Batch [1200/2943], loss = 115.6962, recon_loss = 113.1481, kl_loss =   2.5481
Batch [1400/2943], loss = 127.0886, recon_loss = 124.3764, kl_loss =   2.7122
Batch [1600/2943], loss =  90.0298, recon_loss =  87.4566, kl_loss =   2.5732
Batch [1800/2943], loss =  86.3031, recon_loss =  83.6178, kl_loss =   2.6853
Batch [2000/2943], loss =  82.8210, recon_loss =  80.2015, kl_loss =   2.6195
Batch [2200/2943], loss =  75.3398, recon_loss =  72.6242, kl_loss =   2.7156
Batch [2400/2943], loss =  60.3483, recon_loss =  57.6662, kl_loss =   2.6821
Batch [2600/2943], loss =  98.1744, recon_loss =  95.44

In [42]:
x_decoded = model.reconstruct(test_dataset)
x_decoded = x_decoded.swapaxes(0, 1)

import os
os.makedirs('buffer_recon', exist_ok=True)
np.save('buffer_recon/vrae_recon_state.npy', x_decoded)

In [40]:
# np.set_printoptions(precision=4, suppress=True)
# for i in range(seq_len):
#     print('sequence id: {}'.format(i))
#     print('original')
#     print(episodes[0][i])
#     print('reconstruct')
#     print(x_decoded[0][i])
#     print()

In [46]:
np.set_printoptions(precision=4, suppress=True)
print('orig_0_1')
print(episodes[0][1])
print('recon_0_1')
print(x_decoded[0][1])
print()
print('orig_1_0')
print(episodes[1][0])
print('recon_1_0')
print(x_decoded[1][0])



orig_0_1
[-0.0002  0.0774  0.2997  0.2321  0.1885  0.1649  0.154   0.1485  0.1438
  0.1405  0.1373  0.1342  0.1311  0.127   0.1224  0.1143  0.1     0.0812
  0.0629  0.0344 -0.3318  0.0001  0.0002  0.      0.     -0.0055  0.013
 -0.001   0.0942]
recon_0_1
[ 0.006   0.0709  0.3052  0.2343  0.1937  0.1696  0.1556  0.152   0.1453
  0.1386  0.1364  0.1324  0.1276  0.1228  0.1224  0.111   0.1021  0.0762
  0.0654  0.0335 -0.3363 -0.0016  0.0055  0.     -0.0047 -0.0045  0.0153
  0.0107  0.0852]

orig_1_0
[-0.0002  0.0774  0.2997  0.2321  0.1885  0.1649  0.154   0.1485  0.1438
  0.1405  0.1373  0.1342  0.1311  0.127   0.1224  0.1143  0.1     0.0812
  0.0629  0.0344 -0.3318  0.0001  0.0002  0.      0.     -0.0055  0.013
 -0.001   0.0942]
recon_1_0
[ 0.0072  0.0772  0.2975  0.2363  0.1964  0.1779  0.1591  0.154   0.1471
  0.1411  0.1437  0.1382  0.1323  0.1264  0.122   0.1126  0.0953  0.0796
  0.0576  0.0375 -0.3409  0.0011  0.0055 -0.0001  0.0128  0.0163  0.018
  0.0133  0.0861]
