# Achitecture 1 : "Learning Image Matching by Simply Watching Video"

In [1]:
import numpy as np
import tensorflow as tf
import os
import time
import datetime
import cv2
import sys

print(sys.version)

3.5.2 |Anaconda 4.2.0 (64-bit)| (default, Jul  2 2016, 17:53:06) 
[GCC 4.4.7 20120313 (Red Hat 4.4.7-1)]


## Parameters

In [3]:
_conv_layers = [6,96,96,128,128,128]
_activation = 'relu'

_batch_size = 16 #16 in the paper but too expensive for the GPU
_learning_rate = 1e-5
_epochs = 20
_step_test = 50
_step_viz = 50

_dataset = "KITTI"
# _dataset = "SINTEL"

test_dataset = "SINTEL"

if test_dataset == "KITTI":
    _h,_w = (128,384)
if test_dataset == "SINTEL":
    _h,_w = (128,256)

_data_folder = "data/%s"%_dataset
_train_folder = "train/%s"%_dataset
_test_folder = "test/%s"%_dataset
_previously_trained = True


if not os.path.exists(_test_folder):
    os.makedirs(_test_folder)
    print("Directory created :", _test_folder)

In [4]:
def activation(x,name=None):
    if _activation == 'sigmoid':
        return tf.nn.sigmoid(x,name)
    if _activation == 'relu':
        return tf.nn.relu(x,name)
    
def weight_variable(shape):
    initial = tf.truncated_normal(shape, stddev=0.1)
    return tf.Variable(initial, name='weights')

def bias_variable(shape):
    initial = tf.truncated_normal(shape, stddev=0.1)
    return tf.Variable(initial, name='bias')

## Create training batch

In [5]:
def generate_full_sequence_batch(dataset, seq, train = 1, folder=0, quality ="clean"):    
    if dataset == "SINTEL":
        if train:
            folder = "train"
        else:
            folder = "test"
            
        l = len(os.listdir("data/"+dataset+"/"+folder+"/"+quality+"/"+seq))-2
        frames = np.ndarray([3,l,128,256, 3],np.float32)
        for i in range(l):
            frames[0,i,:,:,:] = cv2.imread("data/"+dataset+"/"+folder+"/"+quality+"/"+seq+"/frame_%s"%'{0:04}'.format(i)+".png")
            frames[1,i,:,:,:] = cv2.imread("data/"+dataset+"/"+folder+"/"+quality+"/"+seq+"/frame_%s"%'{0:04}'.format(i+1)+".png")
            frames[2,i,:,:,:] = cv2.imread("data/"+dataset+"/"+folder+"/"+quality+"/"+seq+"/frame_%s"%'{0:04}'.format(i+2)+".png")
    
    if dataset == "KITTI":
        l = len(os.listdir("data/"+_dataset+"/kitti_resized/"+folder+"/"+seq))-2
        frames = np.ndarray([3,l,128,384, 3],np.float32)
        
        for i in range(l):  
            frames[0,i,:,:,:] = cv2.imread("data/"+dataset+"/kitti_resized/"+folder+"/"+seq+"/%s"%'{0:010}'.format(i)+".png")
            frames[1,i,:,:,:] = cv2.imread("data/"+dataset+"/kitti_resized/"+folder+"/"+seq+"/%s"%'{0:010}'.format(i+1)+".png")
            frames[2,i,:,:,:] = cv2.imread("data/"+dataset+"/kitti_resized/"+folder+"/"+seq+"/%s"%'{0:010}'.format(i+2)+".png")
    
    
    return np.concatenate([frames[0],frames[2]],axis=3)/255.,frames[1]/255.


def generate_batch(dataset,batch_size, train = 1, quality ="clean"):
    
    if dataset == "SINTEL":
        frames = np.ndarray([3,batch_size,128,256, 3],np.float32)
        if train:
            folder = "train"
        else:
            folder = "test"
        
        seq = np.random.choice(os.listdir("data/"+dataset+"/"+folder+"/"+quality),batch_size)        
        for i in range(batch_size):
            index = np.random.randint(1,len(os.listdir("data/"+dataset+"/"+folder+"/"+quality+"/"+seq[i]))-2)            
            frames[0,i,:,:,:] = cv2.imread("data/"+dataset+"/"+folder+"/"+quality+"/"+seq[i]+"/frame_%s"%'{0:04}'.format(index)+".png")
            frames[1,i,:,:,:] = cv2.imread("data/"+dataset+"/"+folder+"/"+quality+"/"+seq[i]+"/frame_%s"%'{0:04}'.format(index+1)+".png")
            frames[2,i,:,:,:] = cv2.imread("data/"+dataset+"/"+folder+"/"+quality+"/"+seq[i]+"/frame_%s"%'{0:04}'.format(index+2)+".png")
    
    if dataset == "KITTI":
        frames = np.ndarray([3,batch_size,128,384, 3],np.float32)
        if train:
            folders = ["2011_09_26","2011_09_28","2011_09_30","2011_10_03"]
            folder = np.random.choice(folders,batch_size)
        else:
            folders = ["2011_09_29"]
            folder = np.random.choice(folders,batch_size)        
        
        for i in range(batch_size):
            seq = np.random.choice(os.listdir("data/"+dataset+"/kitti_resized/"+folder[i]))
            
            index = np.random.randint(1,len(os.listdir("data/"+dataset+"/kitti_resized/"+folder[i]+"/"+seq))-2)            
            frames[0,i,:,:,:] = cv2.imread("data/"+dataset+"/kitti_resized/"+folder[i]+"/"+seq+"/%s"%'{0:010}'.format(index)+".png")
            frames[1,i,:,:,:] = cv2.imread("data/"+dataset+"/kitti_resized/"+folder[i]+"/"+seq+"/%s"%'{0:010}'.format(index+1)+".png")
            frames[2,i,:,:,:] = cv2.imread("data/"+dataset+"/kitti_resized/"+folder[i]+"/"+seq+"/%s"%'{0:010}'.format(index+2)+".png")
    
    
    return np.concatenate([frames[0],frames[2]],axis=3)/255.,frames[1]/255.

def load_batch(dataset,batch_size, train = 0):
    ground_truth = cv2.imread(_train_folder+"/ground_truth.png")  
    
    ground_truth= np.split(ground_truth,batch_size,axis=0)
    
    if dataset == "KITTI":
        frames = np.ndarray([3,batch_size,128,384, 3],np.float32)
        if train:
            folders = ["2011_09_26","2011_09_28","2011_09_30","2011_10_03"]
            folder = np.random.choice(folders,batch_size)
        else:
            folders = ["2011_09_29"]
            folder = np.random.choice(folders,batch_size)
        
        for i in range(batch_size):
            for seq in os.listdir("data/"+dataset+"/kitti_resized/"+folder[i]):
                for index in range(len(os.listdir("data/"+dataset+"/kitti_resized/"+folder[i]+"/"+seq))-1):
                    if (cv2.imread("data/"+dataset+"/kitti_resized/"+folder[i]+"/"+seq+"/%s"%'{0:010}'.format(index+1)+".png") == ground_truth[i]).all():
                    
                        frames[0,i,:,:,:] = cv2.imread("data/"+dataset+"/kitti_resized/"+folder[i]+"/"+seq+"/%s"%'{0:010}'.format(index)+".png")
                        frames[1,i,:,:,:] = cv2.imread("data/"+dataset+"/kitti_resized/"+folder[i]+"/"+seq+"/%s"%'{0:010}'.format(index+1)+".png")
                        frames[2,i,:,:,:] = cv2.imread("data/"+dataset+"/kitti_resized/"+folder[i]+"/"+seq+"/%s"%'{0:010}'.format(index+2)+".png")
    
    return np.concatenate([frames[0],frames[2]],axis=3)/255.,frames[1]/255.


## Convolutional Network graph

In [6]:
sess = tf.InteractiveSession()

x = tf.placeholder("float32",[None,_h,_w,6],name='x_input')
x_gt = tf.placeholder("float32",[None,_h,_w,3],name='x_ground-truth')

#CONVOLUTIONAL ENCODER

with tf.variable_scope("CONV-BLOCK1") as scope:
    with tf.variable_scope('conv1') as scope:
        W1_1 = weight_variable([3,3,_conv_layers[0],_conv_layers[1]])
        b1_1 = bias_variable([_conv_layers[1]])             
        preacti1_1 = tf.nn.bias_add(tf.nn.conv2d(x,W1_1,[1,1,1,1],padding='SAME'),b1_1)
        conv1_1 = activation(preacti1_1,name=scope.name)

    with tf.variable_scope('conv2') as scope:
        W2_1 = weight_variable([3,3,_conv_layers[1],_conv_layers[1]])
        b2_1 = bias_variable([_conv_layers[1]])             
        preacti2_1 = tf.nn.bias_add(tf.nn.conv2d(conv1_1,W2_1,[1,1,1,1],padding='SAME'),b2_1)
        conv2_1 = activation(preacti2_1,name=scope.name)

    with tf.variable_scope('conv3') as scope:
        W3_1 = weight_variable([3,3,_conv_layers[1],_conv_layers[1]])
        b3_1 = bias_variable([_conv_layers[1]])             
        preacti3_1 = tf.nn.bias_add(tf.nn.conv2d(conv2_1,W3_1,[1,1,1,1],padding='SAME'),b3_1)
        conv3_1 = activation(preacti3_1,name=scope.name)
        
    pool_1 = tf.nn.max_pool(conv3_1, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME', name='pool1')
    

    
for i in range(2,6):    
    with tf.variable_scope("CONV-BLOCK%s"%i) as scope:
        with tf.variable_scope('conv1') as scope:
            globals()['W1_%s'%i] = weight_variable([3,3,_conv_layers[i-1],_conv_layers[i]])
            globals()['b1_%s'%i] = bias_variable([_conv_layers[i]])             
            globals()['preacti1_%s'%i] = tf.nn.bias_add(tf.nn.conv2d(globals()['pool_%s'%(i-1)],globals()['W1_%s'%i],[1,1,1,1],padding='SAME'),globals()['b1_%s'%i])
            globals()['conv1_%s'%i] = activation(globals()['preacti1_%s'%i],name=scope.name)

        with tf.variable_scope('conv2') as scope:
            globals()['W2_%s'%i] = weight_variable([3,3,_conv_layers[i],_conv_layers[i]])
            globals()['b2_%s'%i] = bias_variable([_conv_layers[i]])             
            globals()['preacti2_%s'%i] = tf.nn.bias_add(tf.nn.conv2d(globals()['conv1_%s'%i],globals()['W2_%s'%i],[1,1,1,1],padding='SAME'),globals()['b2_%s'%i])
            globals()['conv2_%s'%i] = activation(globals()['preacti2_%s'%i],name=scope.name)

        with tf.variable_scope('conv3') as scope:
            globals()['W3_%s'%i] = weight_variable([3,3,_conv_layers[i],_conv_layers[i]])
            globals()['b3_%s'%i] = bias_variable([_conv_layers[i]])             
            globals()['preacti3_%s'%i] = tf.nn.bias_add(tf.nn.conv2d(globals()['conv2_%s'%i],globals()['W3_%s'%i],[1,1,1,1],padding='SAME'),globals()['b3_%s'%i])
            globals()['conv3_%s'%i] = activation(globals()['preacti3_%s'%i],name=scope.name)

        globals()['pool_%s'%i] = tf.nn.max_pool(globals()['conv3_%s'%i], ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME', name='pool1')

#DECONVOLUTIONAL DECODER

batch_size = tf.shape(x)[0]

with tf.variable_scope("DECONV-BLOCK5") as scope:
    with tf.variable_scope('deconv1') as scope:
        W1_d5 = weight_variable([4,4,_conv_layers[5],_conv_layers[4]])
        b1_d5 = bias_variable([_conv_layers[4]])
        convtr_d5 = tf.nn.conv2d_transpose(pool_5,W1_d5,[batch_size,int(_h/2**4),int(_w/2**4),_conv_layers[4]],[1,2,2,1],padding='SAME')
        preacti1_d5 = tf.nn.bias_add(convtr_d5,b1_d5)
        conv1_d5 = activation(preacti1_d5,name=scope.name)

    with tf.variable_scope('deconv2') as scope:
        W2_d5 = weight_variable([3,3,_conv_layers[4],_conv_layers[4]])
        b2_d5 = bias_variable([_conv_layers[4]])             
        preacti2_d5 = tf.nn.bias_add(tf.nn.conv2d(conv1_d5,W2_d5,[1,1,1,1],padding='SAME'),b2_d5)
        conv2_d5 = activation(preacti2_d5,name=scope.name)

    with tf.variable_scope('deconv3') as scope:
        W3_d5 = weight_variable([3,3,_conv_layers[4],_conv_layers[4]])
        b3_d5 = bias_variable([_conv_layers[4]])             
        preacti3_d5 = tf.nn.bias_add(tf.nn.conv2d(conv2_d5,W3_d5,[1,1,1,1],padding='SAME'),b3_d5)
        conv3_d5 = activation(preacti3_d5,name=scope.name)

        
for i in range(4,1,-1):
    with tf.variable_scope("DECONV-BLOCK%s"%i) as scope:
        
        globals()['concat_d%s'%i] = tf.concat(3,[globals()['conv3_d%s'%(i+1)],globals()['pool_%s'%(i)]])
        
        with tf.variable_scope('deconv1') as scope:
            globals()['W1_d%s'%i] = weight_variable([4,4,_conv_layers[i-1],2*_conv_layers[i]])
            globals()['b1_d%s'%i] = bias_variable([_conv_layers[i-1]])
            globals()['convtr_d%s'%i] = tf.nn.conv2d_transpose(globals()['concat_d%s'%i],globals()['W1_d%s'%i],[batch_size,int(_h/2**(i-1)),int(_w/2**(i-1)),_conv_layers[i-1]],[1,2,2,1],padding='SAME')
            globals()['preacti1_d%s'%i] = tf.nn.bias_add(globals()['convtr_d%s'%i],globals()['b1_d%s'%i])
            globals()['conv1_d%s'%i] = activation(globals()['preacti1_d%s'%i],name=scope.name)
            
        with tf.variable_scope('deconv2') as scope:
            globals()['W2_d%s'%i] = weight_variable([3,3,_conv_layers[i-1],_conv_layers[i-1]])
            globals()['b2_d%s'%i] = bias_variable([_conv_layers[i-1]])  
            globals()['preacti2_d%s'%i] = tf.nn.bias_add(tf.nn.conv2d(globals()['conv1_d%s'%i],globals()['W2_d%s'%i],[1,1,1,1],padding='SAME'),globals()['b2_d%s'%i])
            globals()['conv2_d%s'%i] = activation(globals()['preacti2_d%s'%i],name=scope.name)

        with tf.variable_scope('deconv3') as scope:
            globals()['W3_d%s'%i] = weight_variable([3,3,_conv_layers[i-1],_conv_layers[i-1]])
            globals()['b3_d%s'%i] = bias_variable([_conv_layers[i-1]])             
            globals()['preacti3_d%s'%i] = tf.nn.bias_add(tf.nn.conv2d(globals()['conv2_d%s'%i],globals()['W3_d%s'%i],[1,1,1,1],padding='SAME'),globals()['b3_d%s'%i])
            globals()['conv3_d%s'%i] = activation(globals()['preacti3_d%s'%i],name=scope.name)
            
            
with tf.variable_scope("DECONV-BLOCK1") as scope:
    with tf.variable_scope('deconv1') as scope:
        W1_d1 = weight_variable([4,4,3,_conv_layers[1]])
        b1_d1 = bias_variable([3])
        convtr_d1 = tf.nn.conv2d_transpose(conv3_d2,W1_d1,[batch_size,_h,_w,3],[1,2,2,1],padding='SAME')
        preacti1_d1 = tf.nn.bias_add(convtr_d1,b1_d1)
        conv1_d1 = activation(preacti1_d1,name=scope.name)

    with tf.variable_scope('deconv2') as scope:
        W2_d1 = weight_variable([3,3,3,3])
        b2_d1 = bias_variable([3])             
        preacti2_d1 = tf.nn.bias_add(tf.nn.conv2d(conv1_d1,W2_d1,[1,1,1,1],padding='SAME'),b2_d1)
        conv2_d1 = activation(preacti2_d1,name=scope.name)

    with tf.variable_scope('deconv3') as scope:
        W3_d1 = weight_variable([3,3,3,3])
        b3_d1 = bias_variable([3])             
        preacti3_d1 = tf.nn.bias_add(tf.nn.conv2d(conv2_d1,W3_d1,[1,1,1,1],padding='SAME'),b3_d1)
        conv3_d1 = activation(preacti3_d1,name=scope.name)


Potential improvements : add dropout and batch normalization (not in the paper)

## Loss and metrics

In [7]:
def charbonnier_loss(x):
    return tf.sqrt(0.1**2+tf.reduce_mean(tf.square(x)))
    
def interpol_error(x,gt):
    return tf.sqrt(tf.reduce_mean(tf.square(tf.sub(x,gt))))

def normalized_interpol_error(x,gt):
    norm_grad = cv2.Sobel(gt,cv2.CV_64F,1,0,ksize=5)**2+cv2.Sobel(gt,cv2.CV_64F,0,1,ksize=5)**2 
    return tf.sqrt(tf.reduce_mean(tf.div(tf.square(tf.sub(x,gt)),(norm_grad+1))))

In [8]:
with tf.name_scope('loss') as scope:
    cost = charbonnier_loss(tf.sub(conv3_d1,x_gt))
    tf.summary.scalar('loss',cost)

saver = tf.train.Saver()

#initialization
init = tf.global_variables_initializer()

#launch graph
sess.run(init)

# restore weights from model
saver.restore(sess, _train_folder+"/model.ckpt")
print("Model restored.")

Model restored.


## Launch Test

In [None]:
if test_dataset == "KITTI"
    #test on kitti
    mini_batch_size = 10

    start = time.time()
    folder = "2011_09_29"
    for seq in os.listdir("data/KITTI/kitti_resized/"+folder):
        batch = generate_full_sequence_batch("KITTI", seq, train = 0, folder = folder)

        index = 0
        while index < batch[0].shape[0]:
            mini_batch = [batch[0][index:index+mini_batch_size],batch[1][index:index+mini_batch_size]]

            feed = {x: mini_batch[0], x_gt: mini_batch[1]}        
            x_reconstruct,c =  sess.run([conv3_d1,cost], feed_dict=feed)

            if not os.path.exists(_test_folder+"/"+folder+"/"+seq):
                os.makedirs(_test_folder+"/"+folder+"/"+seq)
                print("Directory created :",_test_folder+"/"+folder+"/"+seq)

            for i in range(x_reconstruct.shape[0]):
                cv2.imwrite(_test_folder+"/"+folder+"/"+seq+"/%s"%'{0:010}'.format(index+i+1)+".png",255*x_reconstruct[i])

            print("Sequence %s : cost %s , time %s"%(seq,c,time.time()-start))

            index += mini_batch_size

    print("Test Finished")


In [9]:
if test_dataset == "SINTEL"
    #test on sintel
    mini_batch_size = 10

    start = time.time()
    for seq in os.listdir("data/SINTEL/test/clean/"):
        batch = generate_full_sequence_batch("SINTEL", seq, train = 0)
        index = 0
        while index < batch[0].shape[0]:
            mini_batch = [batch[0][index:index+mini_batch_size],batch[1][index:index+mini_batch_size]]

            feed = {x: mini_batch[0], x_gt: mini_batch[1]}        
            x_reconstruct,c =  sess.run([conv3_d1,cost], feed_dict=feed)

            if not os.path.exists(_test_folder+"/"+seq):
                os.makedirs(_test_folder+"/"+seq)
                print("Directory created :",_test_folder+"/"+seq)

            for i in range(x_reconstruct.shape[0]):
                cv2.imwrite("test/SINTEL/"+seq+"/%s"%'{0:010}'.format(index+i+1)+".png",255*x_reconstruct[i])

            print("Sequence %s : cost %s , time %s"%(seq,c,time.time()-start))

            index += mini_batch_size

    print("Test Finished")


Directory created : test/KITTI/ambush_1
Sequence ambush_1 : cost 0.203717 , time 3.5373122692108154
Sequence ambush_1 : cost 0.157576 , time 3.965120315551758
Sequence ambush_1 : cost 0.125225 , time 5.579365015029907
Directory created : test/KITTI/ambush_3
Sequence ambush_3 : cost 0.175527 , time 6.178162336349487
Sequence ambush_3 : cost 0.133414 , time 6.618901252746582
Sequence ambush_3 : cost 0.153471 , time 7.063467741012573
Sequence ambush_3 : cost 0.140891 , time 10.005988836288452
Directory created : test/KITTI/bamboo_3
Sequence bamboo_3 : cost 0.176403 , time 10.660010814666748
Sequence bamboo_3 : cost 0.145766 , time 11.093683004379272
Sequence bamboo_3 : cost 0.157063 , time 11.522085905075073
Sequence bamboo_3 : cost 0.154091 , time 11.992233514785767
Sequence bamboo_3 : cost 0.15722 , time 14.497493982315063
Directory created : test/KITTI/cave_3
Sequence cave_3 : cost 0.12234 , time 15.137131214141846
Sequence cave_3 : cost 0.119831 , time 15.562030553817749
Sequence cave