In [0]:
from google.colab import drive
import os
import tensorflow as tf
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

In [0]:
drive.mount('/content/drive', force_remount=False)

Set this to your folder's location.

In [0]:
os.chdir('/content/drive/My Drive/DL/WorldModels-master/')

Install requirements (only for generating rollouts, all other commands work without this)
*takes some time*

In [0]:
!sudo apt-get update

In [0]:
!sudo apt-get install cmake swig python3-dev zlib1g-dev python-opengl mpich xvfb xserver-xephyr vnc4server

In [0]:
!pip install -r requirements.txt

Restart runtime after installing the requirements.txt and re-run the first 3 cells

In [0]:
!pip install box2d-py mpi4py pyglet==1.3.2

#Train new Model
Running the following cells will create a new model RNN (2) as described in the report. To train a model as RNN (1), uncomment line 74 in 02_train_vae.py and comment line 73.<br><br>
**IMPORTANT**<br>
Some of the following commands must be run twice, because of a timeout. Since we have about 10'000 rollouts per game and need to fetch the filelist, Drive times out. If run twice, Drive will take the partially loaded list from cache and complete it.<br>
<br>
**ALSO IMPORTANT**<br>
Set your model_name and alpha here and replace all YOUR_MODEL_NAME and SET_YOUR_ALPHA with the values that you filled in here (by the strings, e.g. my_fancy_model, not the variable model_name).

In [0]:
# set a new model name
model_name = "YOUR_MODEL_NAME"
# set an alpha
alpha = .7

##Generate VAE training Data
Arguments: 
*  --model_name (name of the new model to train, should be unique)
*  --alpha (0 < alpha <= 1, amount of zooming variance. The smaller alpha, the bigger the variance, alpha = 1 means normal, non-augmenting vae)
*  --total_episodes (# of rollouts to produce)
*  --time_steps (length of one game (rollout))

Leave all other arguments unchanged<br>
The rollouts are saved in /data/rollout_YOUR_MODEL_NAME/

In [0]:
!xvfb-run -a -s "-screen 0 1400x900x24" python 01_generate_data.py car_racing --total_episodes 10000 --model_name YOUR_MODEL_NAME --alpha SET_YOUR_ALPHA

**Analyze rollouts**

In [0]:
import numpy as np
import matplotlib.pyplot as plt

filelist = os.listdir("./data/rollout")
filelist = [x for x in filelist if x != '.DS_Store']
print("# of rollouts", len(filelist))

data = np.load("./data/rollout/"+filelist[90])
print("Frames shape:", data['obsS'].shape)
print("The first rollout contains", data['obsS'].shape[0], "images with dimensions", data['obsS'].shape[1:])
print("Actions shape:", data['action'].shape)
print("Rewards shape:", data['reward'].shape)
print("Done shape:", data['done'].shape)

plt.imshow(data['obsS'][290])

##Train the VAE
The weights of the model are regularly saved, so the training can be interrupted at any time without loosing any progress.<br>
Arguments: 

*   --new_model (creates new folder in /vae/)
*   --model_name (to distinguish models)
*   --S (how many rollouts to skip)
*   --N (index of last rollout to use)

This call will create a folder /vae/YOUR_MODEL_NAME containing the saved weights and the log with tensorboard summaries inside



In [0]:
!python 02_train_vae.py --new_model --model_name YOUR_MODEL_NAME

**Analyze VAE**

In [0]:
from vae.arch import VAE
vae = VAE()
vae.set_weights("./vae/"+model_name+"/"+model_name+"_weights.h5")
fig = plt.figure(figsize=(15, 8))
sub = fig.add_subplot(1, 3, 1)
sub.set_title("Input image")
sub.imshow(data['obsS'][290])
sub = fig.add_subplot(1, 3, 2)
sub.set_title("Target image")
sub.imshow(data['obsB'][290])
sub = fig.add_subplot(1, 3, 3)
sub.set_title("Predicted image")
sub.imshow(vae.full_model.predict(data['obsS'][290].reshape(1, 64, 64, 3))[0])

##Generate RNN training data
This call will create a folder /data/series_YOUR_MODEL_NAME with the rollouts convertet into latent representations.

In [0]:
!python 03_generate_rnn_data.py --model_name YOUR_MODEL_NAME

##Train the RNN
The weights of the model are regularly saved, so the training can be interrupted at any time without loosing any progress.<br>
This call will create a folder /rnn/YOUR_MODEL_NAME containing the saved weights and the tensorboard log.

In [0]:
!python 04_train_rnn.py --new_model --batch_size 1 --steps 12000 --model_name YOUR_MODEL_NAME

**Analyse RNN**

In [0]:
imgs = np.load("data/rollout/"+filelist[2000])['obsS']
imgsB = np.load("data/rollout/"+filelist[2000])['obsB']
actions = np.load("data/rollout/"+filelist[2000])['action']
rewards = np.load("data/rollout/"+filelist[2000])['reward']
done = np.load("data/rollout/"+filelist[2000])['done']

In [0]:
from rnn.arch import RNN
rnn = RNN()
rnn.set_weights('./rnn/'+model_name+'/'+model_name+'_weights.h5')

In [0]:
#@title ####_Functions to reconstruct images and compare results_
GAUSSIAN_MIXTURES = 5
Z_DIM = 32
d = GAUSSIAN_MIXTURES * Z_DIM

def sample_z(mu, log_sigma):
    z =  mu + (np.exp(log_sigma)) * np.random.randn(*log_sigma.shape) * 0.5
    return z

def get_mixture_coef(z_pred):

    log_pi, mu, log_sigma = np.split(z_pred, 3, 1)
    log_pi = log_pi - np.log(np.sum(np.exp(log_pi), axis = 1, keepdims = True))

    return log_pi, mu, log_sigma

def get_pi_idx(x, pdf):
  # samples from a categorial distribution
    N = pdf.size
    accumulate = 0
    for i in range(0, N):
        accumulate += pdf[i]
        if (accumulate >= x):
            return i
    random_value = np.random.randint(N)
    #print('error with sampling ensemble, returning random', random_value)
    return random_value

def sample_next_mdn_output(obs, h, c):
    d = GAUSSIAN_MIXTURES * Z_DIM
    
    out = rnn.forward.predict([obs,np.array([h]),np.array([c])])
    
    y_pred = out[0][0][0]
    new_h = out[1][0]
    new_c = out[2][0]
    
    z_pred = y_pred[:3*d]
    rew_pred = y_pred[-1]

    z_pred = np.reshape(z_pred, [-1, GAUSSIAN_MIXTURES * 3])

    log_pi, mu, log_sigma = get_mixture_coef(z_pred)
    
    chosen_log_pi = np.zeros(Z_DIM)
    chosen_mu = np.zeros(Z_DIM)
    chosen_log_sigma = np.zeros(Z_DIM)
    
    pi = np.copy(log_pi)
    pi = np.exp(pi)
    pi /= pi.sum(axis=1).reshape(Z_DIM, 1)
    
    for j in range(Z_DIM):
        
        idx = get_pi_idx(np.random.rand(), pi[j])
        chosen_log_pi[j] = idx
        chosen_mu[j] = mu[j,idx]
        chosen_log_sigma[j] = log_sigma[j,idx]
        
    next_z = sample_z(chosen_mu, chosen_log_sigma)

    if rew_pred > 0:
        next_reward = 1
    else:
        next_reward = 0

    return next_z, chosen_mu, chosen_log_sigma, chosen_log_pi, rew_pred, next_reward, new_h, new_c

In [0]:
from IPython import display

next_h = np.zeros(256)
next_c = np.zeros(256)
next_z = np.hstack([vae.encoder.predict(imgs), actions, rewards.reshape(-1, 1)])[0].reshape(1, -1, 36)
for i in range(len(imgs)-1):
    next_z = np.hstack([vae.encoder.predict(imgs), actions, rewards.reshape(-1, 1)])[i].reshape(1, -1, 36)
    next_z, chosen_mu, chosen_log_sigma, chosen_pi, rew_pred, next_reward, next_h, next_c \
    = sample_next_mdn_output(next_z, next_h, next_c)
        
    next_z_decoded = vae.decoder.predict(np.array([next_z]))[0]
    next_z = np.concatenate([next_z, [-1,1,0], [next_reward]])
    fig = plt.figure(figsize=(15, 8))
    sub = fig.add_subplot(1, 4, 1)
    sub.set_title(str(i)+" Original (target) image")
    sub.imshow(imgsB[i+1])
    sub = fig.add_subplot(1, 4, 2)
    sub.set_title(str(i)+" Input image")
    sub.imshow(imgs[i+1])
    sub = fig.add_subplot(1, 4, 3)
    sub.set_title(str(i)+" Output image from VAE")
    sub.imshow(vae.full_model.predict(imgs[i+1].reshape(1, 64, 64, 3))[0])
    sub = fig.add_subplot(1, 4, 4)
    sub.set_title(str(i)+" Output image from RNN")
    sub.imshow(next_z_decoded)
    plt.show()