<a href="https://colab.research.google.com/github/blufzzz/two-stream-dyntex-synth/blob/spatio-temporal-statistics/experiments.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

### Here is experiments with approach described in 

http://arxiv.org/abs/1702.07006   


In [3]:
from google.colab import drive

drive.mount('/content/gdrive')

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


In [2]:
cd ./gdrive/My\ Drive/Colab Notebooks/two-stream-dyntex-synth

[Errno 2] No such file or directory: './gdrive/My Drive/Colab Notebooks/two-stream-dyntex-synth'
/content/gdrive/My Drive/Colab Notebooks/two-stream-dyntex-synth


In [0]:
import os
import sys
from skimage import io, transform
import numpy as np
import matplotlib.pyplot as plt
import scipy
from scipy.optimize import minimize
import tensorflow as tf
sys.path.append('/content/gdrive/My Drive/Colab Notebooks/two-stream-dyntex-synth/src') 
from utilities import load_image, load_images, vgg_process, vgg_deprocess
from appearance_descriptor import AppearanceDescriptor
from optimizer import Optimizer

In [0]:
config_proto = tf.ConfigProto()
config_proto.gpu_options.allow_growth = True
config_proto.allow_soft_placement = True
config_proto.log_device_placement = False
my_config = {}
my_config['batch_size'] = 1
my_config['iterations'] = 6000
my_config['snapshot_frequency'] = 1000
my_config['network_out_frequency'] = 100
my_config['log_frequency'] = 100
my_config['gpu'] = 0
my_config['dt'] = 3
my_config['run_id'] = "whoa_wood!"
my_config['dynamics_model'] = 'MSOEnet'

config = {'tf': config_proto, 'user': my_config}

In [0]:
import tensorflow as tf
import skimage.io
from utilities import check_snapshots, vgg_deprocess
import time
import datetime
import numpy as np
import os

class Optimizer(object):

    def __init__(self, graph, input_dimension, input_frame_count,
                 target_dynamic_path, target_static_path, config):
        self.graph = graph
        self.input_dimension = input_dimension
        self.input_frame_count = input_frame_count
        self.target_dynamic_path = target_dynamic_path
        self.target_static_path = target_static_path
        self.user_config = config['user']
        self.tf_config = config['tf']

    def print_info(self, losses):
        i = self.iterations_so_far
        iterations = self.user_config['iterations']
        run_id = self.user_config['run_id']

        time_diff = time.time() - self.last_print
        it_per_sec = 1 / time_diff
        remaining_it = iterations - i
        eta = remaining_it / it_per_sec
        eta_string = str(datetime.timedelta(seconds=eta))

        print_string = '(%s) Iteration %d: dynamic texture loss: %f ' \
                       'appearance loss: %f dynamics ' \
                       'loss: %f ' \
                       'iter per/s: %f ETA: %s' % (run_id, i + 1,
                                                   losses[0],
                                                   losses[1],
                                                   losses[2],
                                                   it_per_sec,
                                                   eta_string)
        print print_string
        self.last_print = time.time()

    def minimize_callback(self, dyntex_loss, appearance_loss,
                          dynamics_loss, output, summaries):
        # if hasattr(self, 'current_loss'):
        #     self.past_loss = self.current_loss
        # self.current_loss = dyntex_loss
        # for cleanliness
        i = self.iterations_so_far
        snapshot_frequency = self.user_config['snapshot_frequency']
        network_out_frequency = self.user_config['network_out_frequency']
        log_frequency = self.user_config['log_frequency']
        run_id = self.user_config['run_id']

        # print training information
        self.print_info([dyntex_loss, appearance_loss, dynamics_loss])

        if (i + 1) % snapshot_frequency == 0:
            print 'Saving snapshot...'
            try:
                os.makedirs('snapshots/' + run_id)
            except OSError:
                if not os.path.isdir('snapshots/' + run_id):
                    raise
            self.saver.save(self.sess, 'snapshots/' + run_id + '/iter',
                            global_step=i+1)
        if (i + 1) % log_frequency == 0:
            print 'Saving log file...'
            #self.summary_writer.add_summary(summaries, i + 1)
            #self.summary_writer.flush()

        if (i + 1) % network_out_frequency == 0:
            print 'Saving image(s)...'
            try:
                os.makedirs('data/out/' + run_id)
            except OSError:
                if not os.path.isdir('data/out/' + run_id):
                    raise
            network_out = output.reshape((-1,
                                          self.input_frame_count,
                                          self.input_dimension,
                                          self.input_dimension, 3))
            img_count = 1
            for out in network_out:
                frame_count = 1
                for frame in out:
                    img_out = vgg_deprocess(frame, no_clip=False,
                                            unit_scale=False)
                    filename = 'data/out/' + run_id + \
                        '/iter_%d_frame_%d_%d.png'
                    skimage.io.imsave(filename %
                                      (i + 1, frame_count,
                                       img_count),
                                      img_out)
                    frame_count += 1
                img_count += 1
        self.iterations_so_far += 1

    def step_callback(self, args):
        if hasattr(self, 'past_loss'):
            loss_diff = (self.past_loss - self.current_loss) / \
                np.amax([np.abs(self.past_loss), np.abs(self.current_loss), 1])
            print 'f diff = ' + str(loss_diff)

    def optimize(self):
        iterations = self.user_config['iterations']
        run_id = self.user_config['run_id']

        with self.graph.as_default():
            """
            Instantiate optimizer
            """
            with tf.device('/gpu:' + str(self.user_config['gpu'])):
              
                optimizer = tf.contrib.opt.ScipyOptimizerInterface(	
                    self.dyntex_loss, method='L-BFGS-B',
                    options={'maxfun': iterations,
                             'disp': True})
                             #'ftol': 1e-5})

            """
            Train over iterations, printing loss at each one
            """
            self.saver = tf.train.Saver(max_to_keep=0, pad_step_number=16)
            with tf.Session(config=self.tf_config) as self.sess:

                # TODO: change snapshot and log folders to be in a single
                # location
                # check snapshots
                resume, self.iterations_so_far = check_snapshots(run_id)
				
                print ('BEFORE')
                
                # start summary writer
                #self.summary_writer = tf.summary.FileWriter('logs/' + run_id,
                #                                           self.sess.graph)
                
                if resume:
                    self.saver.restore(self.sess, resume)
                else:
                    self.sess.run(tf.global_variables_initializer()) # ^C occurs here!
				
                print('AFTER')
                
                # initialize start time
                self.last_print = time.time()

                # start train loop
                print '-------OPTIMIZING USING L-BFGS-B-------'
                # scipy optimizer needs a callback for printing iter info
                optimizer.minimize(self.sess,
                                   fetches=[self.dyntex_loss,
                                            self.appearance_loss,
                                            self.dynamics_loss,
                                            self.output,
                                            self.summaries],
                                   loss_callback=self.minimize_callback)


In [0]:
class SpatialGramSynthesizer(Optimizer):
  
  def __init__(self, target_dynamic_path, generated_dynamic_path, dt, config, ):

    Optimizer.__init__(self, tf.Graph(), 256, 12, target_dynamic_path, '',config)

    with self.graph.as_default():
      with tf.device('/gpu:' + str(self.user_config['gpu'])):

        imgs = load_images(target_dynamic_path,
                       size=(input_frame_count,
                             input_dimension,
                             input_dimension))
        
        target_dynamic_texture = [tf.to_float(
            tf.constant(img.reshape(1, input_dimension,
                                  input_dimension, 3)))
                                  for img in imgs]

        initial_noise = tf.random_normal([1,
                                          input_dimension,
                                          input_dimension, 3])

        
        output = tf.Variable(initial_noise, name='output')
        
        is_generated = len(os.listdir(generated_dynamic_path))
        
        if is_generated:
        
          gen_imgs = load_images(generated_dynamic_path,
                         size=(input_frame_count,
                               input_dimension,
                               input_dimension))
          
          generated_dynamic_texture = [tf.to_float(
            tf.constant(img.reshape(1, input_dimension,
                                  input_dimension, 3)))
                                  for img in gen_imgs]
          
        else:
          
          generated_dynamic_texture = []

        # starting from the last (dt-1) frames we'll optimize new one (output)
        output_within_timeframe = (target_dynamic_texture + generated_dynamic_texture)[:-(dt-1)] + [output]

        self.dyntex_loss = build_spatial_gram_descriptors('spatial_gram_descriptors', 1e9)
        
        self.attach_summaries('summaries')

  def build_spatial_gram_descriptors(self, name, weight):

    with tf.get_default_graph().name_scope(name):
        # TODO: make this user-definable
        loss_layers = ['pool1', 'pool2',
                       'pool3', 'pool4']

        activations = []
        activations_output = []
        for i in range(input_frame_count):
            # texture target is in RGB [0,1], but VGG
            # accepts BGR [0-mean,255-mean] mean subtracted
            input = vgg_process(target_dynamic_texture[i])

            a = AppearanceDescriptor('spatial_gram_descriptor_' + str(i+1), name, input)

            activations.append([a.activations_for_layer(l) for l in loss_layers])

        for i in range(dt):

            a_output = AppearanceDescriptor('spatial_gram_descriptor_out', name, vgg_process(output_within_timeframe[i]))

            activations_output.append([a_output.activations_for_layer(l) for l in loss_layers])

        Gl = [] # Spatial Grammians of the target 
        Ol = [] # Spatial Grammians of the output 

        for l in range(len(loss_layers)):

            # gramians for this layer within different time-windows i:i+dt
            Gl_dt = []

            for i in range(opt.input_frame_count - dt + 1):

                Gl_dt.append(self.compute_timewindow_gram_l(activations[i:i+dt], l))

            Gl.append(tf.reduce_mean(tf.concat(Gl_dt, 0), 0))

            # we take [0] to get make (N x N) tensor instead of (1 x N x N)
            Ol.append(self.compute_timewindow_gram_l(activations_output,l)[0])

        spatial_gram_loss = tf.add_n([tf.reduce_sum(tf.square(tf.subtract(G,O))) for G,O in zip(Gl, Ol)])

    return tf.multiply(spatial_gram_loss, weight)


  def compute_timewindow_gram_l(activations, layer):
  
    '''
    Returns gram matrixes of activations (within time-window dt) for some layer
    '''

    # reshape activations

    shape = activations[0][layer].get_shape().as_list()

    # F with shape M_l x dt*N_l
    F = tf.concat([tf.reshape(act[layer], (shape[1]*shape[2], shape[3])) for act in activations], axis = -1)

    mult = tf.matmul(F,F,transpose_a = True)

    # make a tensor shape: 1 x N x N instead of N x N
    mult = tf.expand_dims(mult,0)

    normalize_scale = tf.div(1.0, (shape[1]*shape[2]))

    return tf.multiply(normalize_scale, mult)
 

In [0]:
# path_vgg19 = '/content/gdrive/My Drive/Colab Notebooks/two-stream-dyntex-synth/models/vgg19_normalized.tfmodel'

# with open(path_vgg19, mode='rb') as f:
#     file_content = f.read()

# graph_def = tf.GraphDef()
# graph_def.ParseFromString(file_content)

# tf.import_graph_def(graph_def, name='appearance_desrcriptor', input_map = {'images': input})

In [0]:
g_def = tf.get_default_graph()

g_def.get_tensor_by_name('appearance_desrcriptor/pool1:0')

In [0]:
for n in tf.get_default_graph().as_graph_def().node:
  
  print (n.name)

In [0]:
def read_model_data(model, filename):
    """Unpickles and loads parameters into a Lasagne model."""
    filename = os.path.join('./', '%s.%s' % (filename, 'params'))
    with open(filename, 'r') as f:
        data = pickle.load(f)
    lasagne.layers.set_all_param_values(model, data)

In [0]:
tex_dir = './'
img = plt.imread('./test.png')
img2 = preprocess('test')

In [0]:
def list2str(liste):
    '''
    This functions takes a list and returns a string containing the elements of the list
    '''
    a=''
    for x in range(len(liste)):
        a+='-'+str(liste[x])
    return a

def preprocess(im_code): 
    '''
    preprocessing of the images
    '''
    frame=io.imread(tex_dir+im_code+'.png').astype('uint8')
    frame = (frame[:,:,::-1]-[103.939, 116.779, 123.68]).transpose(2,0,1)[None,:]
    return frame.astype('float32') # 3,256,256
 

def deprocess(x):
    '''
    deprocessing of the images
    '''
    i=x.reshape(3,im_size[0],im_size[1])
    i=i.transpose(1,2,0)
    i=i+[103.939, 116.779, 123.68]  
    i=i[:,:,::-1] 
    i=np.uint8(i)
    return i

def resize_images(): 
    '''
    resize the images: im_size equals im_size[0]*im_size[1]=256**2
    the new images are saved as c_out1, ...
    '''
    for n in range(1,T_frames+1):
        frame=io.imread(tex_dir+'out'+str(n)+'.png').astype(uint8)

        if n==1: 
            im_size=[0,0]
            bn=256*np.sqrt(frame.shape[0]/float(frame.shape[1]))
            im_size[1]=int(round(256**2/bn))
            im_size[0]=int(round(bn))
            plt.imshow(frame)
            plt.show()

        frame = transform.resize(frame, (im_size[0], im_size[1]),
                                order=3, preserve_range=True).astype(uint8)
        io.imsave(tex_dir+'c_out'+str(n)+'.png', frame)
    return im_size


In [0]:
from lasagne.utils import floatX

def gram_matrix(x):
    '''
    calculates the gram matrix over the input
    '''
    x = x.flatten(ndim=3)
    g = T.tensordot(x, x, axes=([2], [2]))

    return g

def my_loss(X, layer, source_gram_s):
    '''
    loss function for the optimisation process
    '''
    x = X[layer]
    A=source_gram_s[layer]
    G = gram_matrix(x)
    
    N = x.shape[1]
    M = x.shape[2] * x.shape[3]
    
    loss = 1./(4 * N**2 * M**2) * ((G - A)**2).sum()
    return loss

def globalgram(A, layer):
    '''
    helper function
    '''
    a = A[layer]
    A = gram_matrix(a)
    return A

def get_statistics(frames):
    input_im_theano = T.tensor4()
        
    layers = {k: vggnet[k] for k in tex_layers}            
    outputs = lasagne.layers.get_output(layers.values(), input_im_theano)
    source_features = {k: theano.shared(output.eval({input_im_theano: frames}))
            for k, output in zip(layers.keys(), outputs)}
        
    source_gram={k: globalgram(source_features, k) for k in tex_layers}    
    return {k: source_gram[k].eval() for k in tex_layers}
    
def do_frames(source_gram_s, bounds, init, framenum):
    '''
    optimisation process of a new frame
    '''
    generated_image = theano.shared(floatX(np.random.uniform(-128, 128, (num, 3, im_size[0], im_size[1]))))
    input_im_theano = T.tensor4()
        
    layers = {k: vggnet[k] for k in tex_layers}            

    gen_features = lasagne.layers.get_output(layers.values(), generated_image)
    gen_features = {k: v for k, v in zip(layers.keys(), gen_features)}

    losses = []
    for tex_layer in tex_layers:  
        losses.append(1e9 * my_loss(gen_features, tex_layer, source_gram_s))
        

    total_loss = sum(losses) 
    grad = T.grad(total_loss, generated_image)

    f_loss_grad=theano.function([], [total_loss, grad])
    loss_list=[]        
    def eval_loss_grad(x0):
        x0 = floatX(x0.reshape((num, 3, im_size[0], im_size[1])))

        generated_image.set_value(x0)
        l,g=f_loss_grad() 
        
        if predict or framenum!=framenums[0]:                        
            for n in range(1,num):
                g[n,:,:,:]=0
            
        g=np.array(g).flatten().astype('float64')     
        return l.astype('float64'), g 
    

    #optimization  
    result = minimize(eval_loss_grad, init,
                      method='L-BFGS-B',
                      jac=True,
                      bounds=bounds,
                      options={'maxiter': 500,
                                'maxcor': 20,
                                'ftol': 0, 'gtol': 0})

    return result

In [0]:
def get_bounds_and_source_gram():    
    '''
    loops over the original images and calculates the bounds and the statistics of the dynamic texture 
    '''
    lowerbound,upperbound=[0,0,0],[0,0,0]
    for framenum in framenums: # range(3,4) , num = 3

        frames=np.ones([num,3,im_size[0],im_size[1]]).astype('float32')
        tmp=0
        for n in nums:               
            frames[tmp,:,:,:]=preprocess('c_out'+str(framenum-n))#orig frame 0
            tmp+=1

        source_gram=get_statistics(frames)
        if framenum==framenums[0]:
            source_gram_s=source_gram
        else:
            source_gram_s={k: source_gram_s[k]+source_gram[k] for k in tex_layers}

        #get bounds (== the minimum/maximum of the used frames)
        lowerbound[0]=min(lowerbound[0],frames[:,0,:,:].min())
        lowerbound[1]=min(lowerbound[1],frames[:,1,:,:].min())
        lowerbound[2]=min(lowerbound[2],frames[:,2,:,:].min())
        upperbound[0]=max(upperbound[0],frames[:,0,:,:].max())
        upperbound[1]=max(upperbound[1],frames[:,1,:,:].max())
        upperbound[2]=max(upperbound[2],frames[:,2,:,:].max())
    bounds = list()
    for tmp in range(num):
        for tmp2 in range(3):
            for tmp3 in range(im_size[0]*im_size[1]):
                bounds.append((lowerbound[tmp2],upperbound[tmp2]))  
    source_gram_s={k: source_gram_s[k]/float(len(framenums)) for k in tex_layers}
    return source_gram_s, bounds

def generate_frames(source_gram_s, bounds):
    '''
    generates the new frames
    '''
    for framenum in range(framenums[0],framenums[0]+number_of_generated_frames):
        print framenum
        init = (scipy.randn(num,3,im_size[0],im_size[1])).astype('float32')

        tmp=1

        for n in nums[1:]:
            if (framenum-n)<framenums[0]:
                if predict:
                    init[tmp,:,:,:]=preprocess('c_out'+str(framenum-n))
            else:
                init[tmp,:,:,:]=preprocess(name+str(framenum-n))
            tmp+=1

        result=do_frames(source_gram_s, bounds, init=init, framenum=framenum) 


        i=result.x.reshape(num,3,im_size[0],im_size[1])
        i0=i[0,:,:,:]

        erg0=deprocess(i0)
        if predict==False and framenum==framenums[0]:
            for n in nums[1:]:
                 io.imsave(tex_dir+name+str(framenum-n)+'.png', deprocess(i[n,:,:,:])) 

        imshow(erg0)
        plt.show()

        print '-----------------------------------------------------------------------'
        io.imsave(tex_dir+name+str(framenum)+'.png', erg0)  

change the parameters in the next block to choose delta t, T, ...    
the original frames have to be in a folder called 'test/' and have to be named as out1.png, out2.png, ...

**starting condition:**  
predict=true: frames of the original video are used   
predict=false: random initialisation

In [0]:
#---------------------------------------
tex_dir = './two-stream-dyntex-synth/data/dynamic_textures/escalator/'
nums=[0,1,2] # for delta t=2 use nums=[0,1], for delta t=3 use nums=[0,1,2],...
T_frames=3 # T number of frames of the original video

number_of_generated_frames=2 # number of generated frames
predict=False # defines the starting condition
            
#----------------------------------------------

num=len(nums)

if predict:
    name='predict'+list2str(nums)+'_'+str(T_frames-nums[-1])+'_'
else:
    name='random'+list2str(nums)+'_'+str(T_frames-nums[-1])+'_'
framenums=range(nums[-1]+1,T_frames+1) # range(3,4)

tex_layers=['conv1_1', 'conv2_1', 'conv3_1', 'conv4_1', 'conv5_1']
im_size=resize_images()
vggnet = build_model(im_size)
read_model_data(vggnet['pool5'], 'normalized_vgg19_weights.weights')

source_gram_s, bounds=get_bounds_and_source_gram()
generate_frames(source_gram_s, bounds)   

