# Generating NeuralDream Videos

This iPython notebook walks through the process of creating Neural Dream Videos. 
The process is as follows:
1. Download a video in mp4 format, and resize it to 128x128.
2. Split video into individual frames.
3. Train VAE using individual frames.
4. Generate latent representation of all frames from trained VAE.
5. Train RNN using latent representations.
6. Generate new sequence of latent representations using RNN.
7. Decode new latent sequence using trained VAE to get new series of frames.
8. Combine new frame sequence into Neural Dream Video.

The hyperparameters of the model are free to be adjusted, as the values here are only heuristics that worked well for me.

### Loading dependencies

In [None]:
import tensorflow as tf
import numpy as np

%matplotlib inline
import matplotlib.pyplot as plt

import argparse
import time
import os
import cPickle

from model_rnn import Model

from PIL import Image
from glob import glob
import os
from model_vae import *

### Setting up project name and folders

In [None]:
project_name = 'video'

input_path = './inputs/'+project_name
output_path = './outputs/'+project_name
vae_checkpoints = './vae_cp/'+project_name
lstm_checkpoints = './lstm_cp/'+project_name
paths = [input_path,output_path,vae_checkpoints,lstm_checkpoints]
for path in paths:
    if not os.path.exists(path):
        os.makedirs(path)

## Processing source video

Resize the desired video to 128x128 using ffmpeg from the terminal or your preferred video processing software.

`ffmpeg -i input.mp4 -vf scale=128:128 output.mp4`

Next we use the smaller video to generate ten frames per second, and save them all to a frame folder.

`ffmpeg -i output.mp4 -r 10 -f image2 ./inputs/project_name/%05d.png`

## Variational Autoencoder

### Define our VAE model

In [None]:
network_architecture = \
    dict(n_hidden_recog_1=256, # 1st layer encoder neurons
         n_hidden_recog_2=256, # 2nd layer encoder neurons
         n_hidden_gener_1=256, # 1st layer decoder neurons
         n_hidden_gener_2=256, # 2nd layer decoder neurons
         n_input=49152, # Number of values per video frame
         n_z=64)  # dimensionality of latent space

### Loading the frames into memory

In [None]:
def loadImages(data):
    images = []
    for myFile in data:
        img = Image.open(myFile)
        images.append(np.reshape(np.array(img),[49152]))
    images = np.array(images)
    images = images.astype('float32')
    images = images / 256
    return images

In [None]:
dataS = sorted(glob(os.path.join("./inputs/", project_name, "*.png")))
imagesS = loadImages(dataS)
n_samples = len(imagesS)
print 'Frames loaded. There are ',str(n_samples),'frames in project',project_name

### Train the VAE model

In [None]:
def train_vae(network_architecture, learning_rate=1e-4,
          batch_size=50, training_epochs=10, display_step=5,model_path='./vae_checkpoints'):
    vae = VariationalAutoencoder(network_architecture, 
                                 learning_rate=learning_rate, 
                                 batch_size=batch_size,load_model = False,checkpoint_folder=model_path)
    # Training cycle
    for epoch in range(training_epochs):
        avg_cost = 0.
        total_batch = int(n_samples / batch_size)
        # Loop over all batches
        perms = np.random.permutation(imagesS)
        for i in range(perms.shape[0]/batch_size):
            batch_xs = perms[i *batch_size:(i+1) * batch_size,:]

            # Fit training using batch data
            cost = vae.partial_fit(batch_xs)
            # Compute average loss
            avg_cost += cost / n_samples * batch_size

        # Display logs per epoch step
        if epoch % display_step == 0:
            print "Epoch:", '%04d' % (epoch+1), \
                  "cost=", "{:.9f}".format(avg_cost)
    vae.save_model(epoch,model_path)
    return vae

In [None]:
tf.reset_default_graph()
batch_size = 50
vae = train_vae(network_architecture, training_epochs=100,model_path='./vae_cp/'+project_name,batch_size = batch_size)

### Testing the reconstruction capacity of the model

In [None]:
x_sample = imagesS[0:batch_size]
x_reconstruct = vae.reconstruct(x_sample)
plt.figure(figsize=(8, 12))
for i in range(5):

    plt.subplot(5, 2, 2*i + 1)
    plt.imshow((x_sample[i+0].reshape(128, 128,3)), vmin=0, vmax=1)
    plt.title("Test input")
    plt.subplot(5, 2, 2*i + 2)
    plt.imshow(x_reconstruct[i+0].reshape(128, 128,3), vmin=0, vmax=1)
    plt.title("Reconstruction")
plt.tight_layout()

### Generate latent representation of each frame

In [None]:
x_z = vae.transform(imagesS)
with open('./data/'+project_name+'_Z.p','w') as f:
    cPickle.dump(x_z,f)

## Recurrent Neural Network

### Defining the model and loading latent representation

In [None]:
class args():
    def __init__(self):
        self.rnn_size = 256
        self.num_layers = 2
        self.model = 'lstm'
        self.batch_size = 5
        self.seq_length = 50
        self.num_epochs = 150
        self.save_every = 1000
        self.grad_clip = 5.
        self.learning_rate = 2e-2
        self.decay_rate = 0.97
        self.input_size = 64
        self.save_dir = './lstm_cp/'+project_name
        
argsA = args()

In [None]:
with open('./data/'+project_name+'_Z.p','r') as f:
    z_images = cPickle.load(f)

In [None]:
def create_batches():
    num_batches = len(z_images) / (argsA.batch_size * argsA.seq_length)
    tensor = z_images[:num_batches * argsA.batch_size * argsA.seq_length]
    xdata = tensor
    ydata = np.copy(tensor)
    ydata[:-1] = xdata[1:]
    ydata[-1] = xdata[0]
    x_batches = np.split(xdata.reshape(argsA.batch_size,argsA.seq_length, -1), num_batches, 2)
    y_batches = np.split(ydata.reshape(argsA.batch_size,argsA.seq_length, -1), num_batches, 2)
    return x_batches,y_batches,num_batches

### Training the RNN

In [None]:
tf.reset_default_graph()
argsA = args()
model = Model(argsA)

In [None]:
x_batches,y_batches,num_batches = create_batches()

In [None]:
with tf.Session() as sess:
    tf.initialize_all_variables().run()
    saver = tf.train.Saver(tf.all_variables())
    batchPointer = 0
    for e in xrange(argsA.num_epochs):
        sess.run(tf.assign(model.lr, argsA.learning_rate * (argsA.decay_rate ** e)))
        batchPointer = 0
        state = model.initial_state.eval()
        for b in xrange(num_batches):
            start = time.time()
            x, y = x_batches[b],y_batches[b]
            feed = {model.input_data: x, model.targets: y, model.initial_state: state}
            train_loss, state, _,myOut = sess.run([model.cost, model.final_state, model.train_op,model.logits], feed)
            end = time.time()
        print 'Loss at epoch',e,':',train_loss
        if (e == argsA.num_epochs -1):
            checkpoint_path = os.path.join(argsA.save_dir, 'model.ckpt')
            saver.save(sess, checkpoint_path, global_step = e * num_batches)
            print "model saved to {}".format(checkpoint_path)

### Generating new sequences from RNN

In [None]:
tf.reset_default_graph()
model_generate = Model(argsA,True)

x_batches,y_batches,num_batches = create_batches()

In [1]:
# These hyperparameters adjust the generation process
frames_to_generate = 1000 # Numer of frames to generate
noise_to_add = 0.1 # Amount of noise to add to each generated latent representation
reset = False # By resetting the generation process periodically, we can prevent the RNN from getting stuck 

In [None]:
with tf.Session() as sess:
    tf.initialize_all_variables().run()
    saver = tf.train.Saver(tf.all_variables())
    ckpt = tf.train.get_checkpoint_state(argsA.save_dir)
    saver.restore(sess, ckpt.model_checkpoint_path)
    state = model_generate.initial_state.eval()
    x = x_batches[0]
    xs = []
    for i in range(frames_to_generate):
        feed = {model_generate.input_data: x, model_generate.initial_state: state}
        state,x1 = sess.run([model_generate.final_state,model_generate.logits], feed)
        xs.append(x1[0])
        x = x1.reshape([1,1,64]) + np.random.uniform(-noise_to_add,noise_to_add,[1,1,64])
        if i % 100 == 0 and reset == True:
            state = model_generate.cell.zero_state(1, tf.float32).eval()
            x = x_batches[i]

In [None]:
newXs = np.array(xs)
with open('./data/new'+project_name+'_Z.p','w') as f:
    cPickle.dump(newXs,f)

## Generating video from new latent sequence

In [None]:
tf.reset_default_graph()
vae = VariationalAutoencoder(network_architecture, 
                                 learning_rate=1e-3, 
                                 batch_size=200,load_model = True,checkpoint_folder='./vae_cp/'+project_name)

In [None]:
with open('./data/new'+project_name+'_Z.p','r') as f:
    dataNew = cPickle.load(f)
    
allFrames = []
for i in range((len(dataNew)/vae.batch_size)):
    newX = vae.generate(dataNew[i*vae.batch_size:(i+1)*vae.batch_size])
    allFrames.append(newX)
allFrames = np.vstack(np.array(allFrames))
allFrames = np.reshape(allFrames,[len(allFrames),128,128,3])

for i in range(len(allFrames)):
    im = Image.fromarray((allFrames[i] * 256).astype('uint8'))
    im.save('./outputs/'+project_name+'/frame'+str(i)+'.png')

Finally we combine all the newly generated frames into a video again!

`ffmpeg -framerate 10 -i frame%01d.png -c:v libx264 -r 30 -pix_fmt yuv420p out.mp4`