In [0]:
from google.colab import drive
import os

In [0]:
drive.mount('/content/drive', force_remount=False)

Set this to your folder's location.

In [0]:
os.chdir('/content/drive/My Drive/DL/WorldModels-master/')

Install requirements (only for generating rollouts, all other commands work without this)
*takes some time*

In [0]:
!sudo apt-get update

In [0]:
%%bash
# Install deps from 
# https://github.com/mwydmuch/ViZDoom/blob/master/doc/Building.md#-linux

apt-get install build-essential zlib1g-dev libsdl2-dev libjpeg-dev \
nasm tar libbz2-dev libgtk2.0-dev cmake git libfluidsynth-dev libgme-dev \
libopenal-dev timidity libwildmidi-dev unzip

# Boost libraries
apt-get install libboost-all-dev

# Lua binding dependencies
apt-get install liblua5.1-dev

In [0]:
!pip install scipy==1.1.0 cupy box2d-py gym vizdoom==1.1.7

#Train new Model
Running the following cells will create a new model RNN (2) as described in the report. To train a model as RNN (1), uncomment line 93 in 02_train_doom_vae.py and comment line 92.<br><br>
**IMPORTANT**<br>
Some of the following commands must be run twice, because of a timeout. Since we have about 10'000 rollouts per game and need to fetch the filelist, Drive times out. If run twice, Drive will take the partially loaded list from cache and complete it.<br><br>
**ALSO IMPORTANT**<br>
Set a unique model_name and an alpha in the following cell and replace YOUR_MODEL_NAME and SET_YOUR_ALPHA by these values (by the strings, e.g. my_fancy_model, not the variable model_name).

In [0]:
# set a unique new model name
model_name = "YOUR_MODEL_NAME"
# set an alpha
alpha = .75

##Generate VAE training Data
Arguments: 
*  --num_rollouts (number of rollouts to generate)
*  --game (only "DoomTakeCover" works)

The rollouts are saved in /data/rollout_doom

In [0]:
!python3 01_generate_doom_data_bs.py --game DoomTakeCover --num_rollouts 10000

**Analyze rollouts**<br>
The first time you run this cell, you will get a timeout. Just run it again and the filelist will stay in cache.

In [0]:
import numpy as np
import matplotlib.pyplot as plt

filelist = os.listdir("./data/rollout_doom")
filelist = [x for x in filelist if x != '.DS_Store']
print("# of rollouts", len(filelist))

data = np.load("./data/rollout_doom/"+filelist[0])
print("Frames shape:", data['frames'].shape)
print("The first rollout contains", data['frames'].shape[0], "images with dimensions", data['frames'].shape[1:])
print("Actions shape:", data['actions'].shape)
print("Rewards shape:", data['rewards'].shape)

plt.imshow(data['frames'][0])

##Train the VAE
The weights of the model are regularly saved, so the training can be interrupted at any time without loosing any progress.<br>
Arguments: 

*   --new_model (creates new folder in /vae/)
*   --model_name (to distinguish models)
*   --S (how many rollouts to skip)
*   --N (index of last rollout to use)
*   --alpha (amount of zooming and variance, set alpha = 1 for simple, non-augmenting vae, alpha < 1 makes an augmenting VAE. The smaller alpha, the bigger the zooming effect)

This call will create a folder /vae/YOUR_MODEL_NAME containing the saved weights and a tensorboard log.



In [0]:
!python 02_train_doom_vae.py --new_model --model_name YOUR_MODEL_NAME --alpha SET_YOUR_ALPHA

**Analyze VAE**

In [0]:
#@title ####_Cropping & scaling functions (run this before the next cell)_
import cv2
def crop(image, scale):
  size = len(image)
  newsize = int(np.round(size * scale))
  border = int(round((size-newsize) / 2))
  left = border
  right = left + newsize
  top = border
  bottom = top + newsize
  return image[top:bottom, left:right]

def scale(image):
  return cv2.resize(image, dsize=(64, 64), interpolation=cv2.INTER_CUBIC)

In [0]:
from vae.arch import VAE
vae = VAE()
vae.set_weights("./vae/"+model_name+"/"+model_name+"_weights.h5")
plt.imshow(vae.full_model.predict(scale(data['frames'][0]/255.)[np.newaxis, :,:,:])[0])

##Generate RNN training data
Set the exact same settings as before.<br>
This call will create a folder /data/series_YOUR_MODEL_NAME containing the rollouts convertet into the latent representation.

In [0]:
!python 03_generate_doom_rnn_data.py --model_name YOUR_MODEL_NAME --alpha SET_YOUR_ALPHA

##Train the RNN
The weights of the model are regularly saved, so the training can be interrupted at any time without loosing any progress.<br>
Same arguments as before, except no --alpha.<br>
This call will create a folder /rnn/YOUR_MODEL_NAME containing the saved weights and a tensorboard log.

In [0]:
!python 04_train_doom_rnn.py --new_model --model_name YOUR_MODEL_NAME

**Analyse RNN**

In [0]:
import cv2
data = np.load("data/rollout_doom/"+filelist[5000])['frames']/255.
actions = np.zeros((len(data)-1, 3))
actions[:, :2] = np.load("data/rollout_doom/"+filelist[5000])['actions']
rewards = np.load("data/rollout_doom/"+filelist[5000])['rewards']

imgs = []
for i in data:
  imgs.append(cv2.resize(crop(i, alpha), dsize=(64, 64), interpolation=cv2.INTER_CUBIC))
imgs = np.asarray(imgs)

In [0]:
from rnn.arch import RNN
rnn = RNN()
rnn.set_weights('./rnn/'+model_name+'/'+model_name+'weights.h5')

In [0]:
#@title ####_Functions to reconstruct images and compare results (run this before the next cell)_
GAUSSIAN_MIXTURES = 5
Z_DIM = 32
d = GAUSSIAN_MIXTURES * Z_DIM

def sample_z(mu, log_sigma):
    z =  mu + (np.exp(log_sigma)) * np.random.randn(*log_sigma.shape) * 0.5
    return z

def get_mixture_coef(z_pred):

    log_pi, mu, log_sigma = np.split(z_pred, 3, 1)
    log_pi = log_pi - np.log(np.sum(np.exp(log_pi), axis = 1, keepdims = True))

    return log_pi, mu, log_sigma

def get_pi_idx(x, pdf):
  # samples from a categorial distribution
    N = pdf.size
    accumulate = 0
    for i in range(0, N):
        accumulate += pdf[i]
        if (accumulate >= x):
            return i
    random_value = np.random.randint(N)
    #print('error with sampling ensemble, returning random', random_value)
    return random_value

def sample_next_mdn_output(obs, h, c):
    d = GAUSSIAN_MIXTURES * Z_DIM
    
    out = rnn.forward.predict([obs,np.array([h]),np.array([c])])
    
    y_pred = out[0][0][0]
    new_h = out[1][0]
    new_c = out[2][0]
    
    z_pred = y_pred[:3*d]
    rew_pred = y_pred[-1]

    z_pred = np.reshape(z_pred, [-1, GAUSSIAN_MIXTURES * 3])

    log_pi, mu, log_sigma = get_mixture_coef(z_pred)
    
    chosen_log_pi = np.zeros(Z_DIM)
    chosen_mu = np.zeros(Z_DIM)
    chosen_log_sigma = np.zeros(Z_DIM)
    
    pi = np.copy(log_pi)
    pi = np.exp(pi)
    pi /= pi.sum(axis=1).reshape(Z_DIM, 1)
    
    for j in range(Z_DIM):
        
        idx = get_pi_idx(np.random.rand(), pi[j])
        chosen_log_pi[j] = idx
        chosen_mu[j] = mu[j,idx]
        chosen_log_sigma[j] = log_sigma[j,idx]
        
    next_z = sample_z(chosen_mu, chosen_log_sigma)

    if rew_pred > 0:
        next_reward = 1
    else:
        next_reward = 0

    return next_z, chosen_mu, chosen_log_sigma, chosen_log_pi, rew_pred, next_reward, new_h, new_c

In [0]:
from IPython import display

next_h = np.zeros(256)
next_c = np.zeros(256)
next_z = np.hstack([vae.encoder.predict(imgs)[1:], actions, rewards.reshape(-1, 1)])[0].reshape(1, -1, 36)
for i in range(len(imgs)-1):
    next_z = np.hstack([vae.encoder.predict(imgs)[1:], actions, rewards.reshape(-1, 1)])[i].reshape(1, -1, 36)
    next_z, chosen_mu, chosen_log_sigma, chosen_pi, rew_pred, next_reward, next_h, next_c \
    = sample_next_mdn_output(next_z, next_h, next_c)
        
    next_z_decoded = vae.decoder.predict(np.array([next_z]))[0]
    next_z = np.concatenate([next_z, [-1,1,0], [next_reward]])

    fig = plt.figure(figsize=(15, 8))
    sub = fig.add_subplot(1, 4, 1)
    sub.set_title(str(i)+" Original (target) image")
    sub.imshow(data[i+1])
    sub = fig.add_subplot(1, 4, 2)
    sub.set_title(str(i)+" Input image")
    sub.imshow(imgs[i+1])
    sub = fig.add_subplot(1, 4, 3)
    sub.set_title(str(i)+" Output image from VAE")
    sub.imshow(vae.full_model.predict(imgs[i+1].reshape(1, 64, 64, 3))[0])
    sub = fig.add_subplot(1, 4, 4)
    sub.set_title(str(i)+" Output image from RNN")
    sub.imshow(next_z_decoded)
    plt.show()