# Achitecture 1 : "Learning Image Matching by Simply Watching Video"

In [1]:
import numpy as np
import tensorflow as tf
from tensorflow.contrib.layers import batch_norm
import os
import time
import datetime
import cv2
import sys

print(sys.version)

3.5.2 |Anaconda 4.2.0 (64-bit)| (default, Jul  2 2016, 17:53:06) 
[GCC 4.4.7 20120313 (Red Hat 4.4.7-1)]


## Parameters

In [2]:
_conv_layers = [6,96,96,128,128,128]
_activation = 'relu'

_batch_size = 4 #16 in the paper but too expensive for the GPU
_learning_rate = 5e-4
_epochs = 20
_step_test = 50
_step_viz = 50

_dataset = "KITTI"
# _dataset = "SINTEL"

if _dataset == "KITTI":
    _h,_w = (128,384)
if _dataset == "SINTEL":
    _h,_w = (128,256)

_data_folder = "data/%s"%_dataset
_train_folder = "train/%s"%_dataset
_previously_trained = False

if not os.path.exists(_train_folder):
    os.makedirs(_train_folder)
    print("Directory created :", _train_folder)

In [3]:
def activation(x,name=None):
    if _activation == 'sigmoid':
        return tf.nn.sigmoid(x,name)
    if _activation == 'relu':
        return tf.nn.relu(x,name)
    
def weight_variable(shape):
    initial = tf.truncated_normal(shape, stddev=0.1)
    return tf.Variable(initial, name='weights')

def bias_variable(shape):
    initial = tf.truncated_normal(shape, stddev=0.1)
    return tf.Variable(initial, name='bias')

## Create training batch

In [4]:
def generate_batch(dataset,batch_size, train = 1, quality ="clean"):
    
    if dataset == "SINTEL":
        frames = np.ndarray([3,batch_size,128,256, 3],np.float32)
        if train:
            folder = "train"
        else:
            folder = "test"
        
        seq = np.random.choice(os.listdir("data/"+dataset+"/"+folder+"/"+quality),batch_size)        
        for i in range(batch_size):
            index = np.random.randint(1,len(os.listdir("data/"+dataset+"/"+folder+"/"+quality+"/"+seq[i]))-2)            
            frames[0,i,:,:,:] = cv2.imread("data/"+dataset+"/"+folder+"/"+quality+"/"+seq[i]+"/frame_%s"%'{0:04}'.format(index)+".png")
            frames[1,i,:,:,:] = cv2.imread("data/"+dataset+"/"+folder+"/"+quality+"/"+seq[i]+"/frame_%s"%'{0:04}'.format(index+1)+".png")
            frames[2,i,:,:,:] = cv2.imread("data/"+dataset+"/"+folder+"/"+quality+"/"+seq[i]+"/frame_%s"%'{0:04}'.format(index+2)+".png")
    
    if dataset == "KITTI":
        frames = np.ndarray([3,batch_size,128,384, 3],np.float32)
        if train:
            folders = ["2011_09_26","2011_09_28","2011_09_30","2011_10_03"]
            folder = np.random.choice(folders,batch_size)
        else:
            folders = ["2011_09_29"]
            folder = np.random.choice(folders,batch_size)        
        
        for i in range(batch_size):
            seq = np.random.choice(os.listdir("data/"+dataset+"/kitti_resized/"+folder[i]))
            
            index = np.random.randint(1,len(os.listdir("data/"+dataset+"/kitti_resized/"+folder[i]+"/"+seq))-2)            
            frames[0,i,:,:,:] = cv2.imread("data/"+dataset+"/kitti_resized/"+folder[i]+"/"+seq+"/%s"%'{0:010}'.format(index)+".png")
            frames[1,i,:,:,:] = cv2.imread("data/"+dataset+"/kitti_resized/"+folder[i]+"/"+seq+"/%s"%'{0:010}'.format(index+1)+".png")
            frames[2,i,:,:,:] = cv2.imread("data/"+dataset+"/kitti_resized/"+folder[i]+"/"+seq+"/%s"%'{0:010}'.format(index+2)+".png")
    
    
    return np.concatenate([frames[0],frames[2]],axis=3)/255.,frames[1]/255.

def load_batch(dataset,batch_size, train = 0):
    ground_truth = cv2.imread(_train_folder+"/ground_truth.png")  
    
    ground_truth= np.split(ground_truth,batch_size,axis=0)
    
    if dataset == "KITTI":
        frames = np.ndarray([3,batch_size,128,384, 3],np.float32)
        if train:
            folders = ["2011_09_26","2011_09_28","2011_09_30","2011_10_03"]
            folder = np.random.choice(folders,batch_size)
        else:
            folders = ["2011_09_29"]
            folder = np.random.choice(folders,batch_size)
        
        for i in range(batch_size):
            for seq in os.listdir("data/"+dataset+"/kitti_resized/"+folder[i]):
                for index in range(len(os.listdir("data/"+dataset+"/kitti_resized/"+folder[i]+"/"+seq))-1):
                    if (cv2.imread("data/"+dataset+"/kitti_resized/"+folder[i]+"/"+seq+"/%s"%'{0:010}'.format(index+1)+".png") == ground_truth[i]).all():
                    
                        frames[0,i,:,:,:] = cv2.imread("data/"+dataset+"/kitti_resized/"+folder[i]+"/"+seq+"/%s"%'{0:010}'.format(index)+".png")
                        frames[1,i,:,:,:] = cv2.imread("data/"+dataset+"/kitti_resized/"+folder[i]+"/"+seq+"/%s"%'{0:010}'.format(index+1)+".png")
                        frames[2,i,:,:,:] = cv2.imread("data/"+dataset+"/kitti_resized/"+folder[i]+"/"+seq+"/%s"%'{0:010}'.format(index+2)+".png")
    
    return np.concatenate([frames[0],frames[2]],axis=3)/255.,frames[1]/255.


## Convolutional Network graph

In [5]:
sess = tf.InteractiveSession()

x = tf.placeholder("float32",[None,_h,_w,6],name='x_input')
x_gt = tf.placeholder("float32",[None,_h,_w,3],name='x_ground-truth')

train = tf.constant(True)
drop = tf.placeholder("float")

#CONVOLUTIONAL ENCODER

with tf.variable_scope("CONV-BLOCK1") as scope:
    with tf.variable_scope('conv1') as scope:
        W1_1 = weight_variable([3,3,_conv_layers[0],_conv_layers[1]])
        b1_1 = bias_variable([_conv_layers[1]])             
        preacti1_1 = tf.nn.bias_add(tf.nn.conv2d(x,W1_1,[1,1,1,1],padding='SAME'),b1_1)        
        bn1_1 = batch_norm(preacti1_1,decay = 0.9, center = True, scale = True, is_training = train)
        conv1_1 = activation(bn1_1,name=scope.name)
        drop1_1 = tf.nn.dropout(conv1_1,1-drop)

    with tf.variable_scope('conv2') as scope:
        W2_1 = weight_variable([3,3,_conv_layers[1],_conv_layers[1]])
        b2_1 = bias_variable([_conv_layers[1]])             
        preacti2_1 = tf.nn.bias_add(tf.nn.conv2d(drop1_1,W2_1,[1,1,1,1],padding='SAME'),b2_1)
        bn2_1 = batch_norm(preacti2_1,decay = 0.9, center = True, scale = True, is_training = train)
        conv2_1 = activation(bn2_1,name=scope.name)
        drop2_1 = tf.nn.dropout(conv2_1,1-drop)
        
    with tf.variable_scope('conv3') as scope:
        W3_1 = weight_variable([3,3,_conv_layers[1],_conv_layers[1]])
        b3_1 = bias_variable([_conv_layers[1]])             
        preacti3_1 = tf.nn.bias_add(tf.nn.conv2d(drop2_1,W3_1,[1,1,1,1],padding='SAME'),b3_1)
        bn3_1 = batch_norm(preacti3_1,decay = 0.9, center = True, scale = True, is_training = train)
        conv3_1 = activation(bn3_1,name=scope.name)
        drop3_1 = tf.nn.dropout(conv3_1,1-drop)
        
    pool_1 = tf.nn.max_pool(drop3_1, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME', name='pool1')
    

    
for i in range(2,6):    
    with tf.variable_scope("CONV-BLOCK%s"%i) as scope:
        with tf.variable_scope('conv1') as scope:
            globals()['W1_%s'%i] = weight_variable([3,3,_conv_layers[i-1],_conv_layers[i]])
            globals()['b1_%s'%i] = bias_variable([_conv_layers[i]])             
            globals()['preacti1_%s'%i] = tf.nn.bias_add(tf.nn.conv2d(globals()['pool_%s'%(i-1)],globals()['W1_%s'%i],[1,1,1,1],padding='SAME'),globals()['b1_%s'%i])
            globals()['bn1_%s'%i] = batch_norm(globals()['preacti1_%s'%i],decay = 0.9, center = True, scale = True, is_training = train)
            globals()['conv1_%s'%i] = activation(globals()['bn1_%s'%i],name=scope.name)
            globals()['drop1_%s'%i] = tf.nn.dropout(globals()['conv1_%s'%i],1-drop)
            
        with tf.variable_scope('conv2') as scope:
            globals()['W2_%s'%i] = weight_variable([3,3,_conv_layers[i],_conv_layers[i]])
            globals()['b2_%s'%i] = bias_variable([_conv_layers[i]])             
            globals()['preacti2_%s'%i] = tf.nn.bias_add(tf.nn.conv2d(globals()['drop1_%s'%i],globals()['W2_%s'%i],[1,1,1,1],padding='SAME'),globals()['b2_%s'%i])
            globals()['bn2_%s'%i] = batch_norm(globals()['preacti2_%s'%i],decay = 0.9, center = True, scale = True, is_training = train)
            globals()['conv2_%s'%i] = activation(globals()['bn2_%s'%i],name=scope.name)
            globals()['drop2_%s'%i] = tf.nn.dropout(globals()['conv2_%s'%i],1-drop)

        with tf.variable_scope('conv3') as scope:
            globals()['W3_%s'%i] = weight_variable([3,3,_conv_layers[i],_conv_layers[i]])
            globals()['b3_%s'%i] = bias_variable([_conv_layers[i]])             
            globals()['preacti3_%s'%i] = tf.nn.bias_add(tf.nn.conv2d(globals()['drop2_%s'%i],globals()['W3_%s'%i],[1,1,1,1],padding='SAME'),globals()['b3_%s'%i])
            globals()['bn3_%s'%i] = batch_norm(globals()['preacti3_%s'%i],decay = 0.9, center = True, scale = True, is_training = train)
            globals()['conv3_%s'%i] = activation(globals()['bn3_%s'%i],name=scope.name)
            globals()['drop3_%s'%i] = tf.nn.dropout(globals()['conv3_%s'%i],1-drop)

        globals()['pool_%s'%i] = tf.nn.max_pool(globals()['drop3_%s'%i], ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME', name='pool1')

#DECONVOLUTIONAL DECODER

batch_size = tf.shape(x)[0]

with tf.variable_scope("DECONV-BLOCK5") as scope:
    with tf.variable_scope('deconv1') as scope:
        W1_d5 = weight_variable([4,4,_conv_layers[5],_conv_layers[4]])
        b1_d5 = bias_variable([_conv_layers[4]])
        convtr_d5 = tf.nn.conv2d_transpose(pool_5,W1_d5,[batch_size,int(_h/2**4),int(_w/2**4),_conv_layers[4]],[1,2,2,1],padding='SAME')
        preacti1_d5 = tf.nn.bias_add(convtr_d5,b1_d5)
        bn1_d5 = batch_norm(preacti1_d5,decay = 0.9, center = True, scale = True, is_training = train)
        conv1_d5 = activation(bn1_d5,name=scope.name)
        drop1_d5 = tf.nn.dropout(conv1_d5,1-drop)

    with tf.variable_scope('deconv2') as scope:
        W2_d5 = weight_variable([3,3,_conv_layers[4],_conv_layers[4]])
        b2_d5 = bias_variable([_conv_layers[4]])             
        preacti2_d5 = tf.nn.bias_add(tf.nn.conv2d(drop1_d5,W2_d5,[1,1,1,1],padding='SAME'),b2_d5)
        bn2_d5 = batch_norm(preacti2_d5,decay = 0.9, center = True, scale = True, is_training = train)
        conv2_d5 = activation(bn2_d5,name=scope.name)
        drop2_d5 = tf.nn.dropout(conv2_d5,1-drop)

    with tf.variable_scope('deconv3') as scope:
        W3_d5 = weight_variable([3,3,_conv_layers[4],_conv_layers[4]])
        b3_d5 = bias_variable([_conv_layers[4]])             
        preacti3_d5 = tf.nn.bias_add(tf.nn.conv2d(drop2_d5,W3_d5,[1,1,1,1],padding='SAME'),b3_d5)
        bn3_d5 = batch_norm(preacti3_d5,decay = 0.9, center = True, scale = True, is_training = train)
        conv3_d5 = activation(bn3_d5,name=scope.name)
        drop3_d5 = tf.nn.dropout(conv3_d5,1-drop)

        
for i in range(4,1,-1):
    with tf.variable_scope("DECONV-BLOCK%s"%i) as scope:
        
        globals()['concat_d%s'%i] = tf.concat(3,[globals()['drop3_d%s'%(i+1)],globals()['pool_%s'%(i)]])
        
        with tf.variable_scope('deconv1') as scope:
            globals()['W1_d%s'%i] = weight_variable([4,4,_conv_layers[i-1],2*_conv_layers[i]])
            globals()['b1_d%s'%i] = bias_variable([_conv_layers[i-1]])
            globals()['convtr_d%s'%i] = tf.nn.conv2d_transpose(globals()['concat_d%s'%i],globals()['W1_d%s'%i],[batch_size,int(_h/2**(i-1)),int(_w/2**(i-1)),_conv_layers[i-1]],[1,2,2,1],padding='SAME')
            globals()['preacti1_d%s'%i] = tf.nn.bias_add(globals()['convtr_d%s'%i],globals()['b1_d%s'%i])
            globals()['bn1_d%s'%i] = batch_norm(globals()['preacti1_d%s'%i],decay = 0.9, center = True, scale = True, is_training = train)
            globals()['conv1_d%s'%i] = activation(globals()['bn1_d%s'%i],name=scope.name)
            globals()['drop1_d%s'%i] = tf.nn.dropout(globals()['conv1_d%s'%i],1-drop)
            
        with tf.variable_scope('deconv2') as scope:
            globals()['W2_d%s'%i] = weight_variable([3,3,_conv_layers[i-1],_conv_layers[i-1]])
            globals()['b2_d%s'%i] = bias_variable([_conv_layers[i-1]])  
            globals()['preacti2_d%s'%i] = tf.nn.bias_add(tf.nn.conv2d(globals()['drop1_d%s'%i],globals()['W2_d%s'%i],[1,1,1,1],padding='SAME'),globals()['b2_d%s'%i])
            globals()['bn2_d%s'%i] = batch_norm(globals()['preacti2_d%s'%i],decay = 0.9, center = True, scale = True, is_training = train)
            globals()['conv2_d%s'%i] = activation(globals()['bn2_d%s'%i],name=scope.name)
            globals()['drop2_d%s'%i] = tf.nn.dropout(globals()['conv2_d%s'%i],1-drop)
            
        with tf.variable_scope('deconv3') as scope:
            globals()['W3_d%s'%i] = weight_variable([3,3,_conv_layers[i-1],_conv_layers[i-1]])
            globals()['b3_d%s'%i] = bias_variable([_conv_layers[i-1]])             
            globals()['preacti3_d%s'%i] = tf.nn.bias_add(tf.nn.conv2d(globals()['drop2_d%s'%i],globals()['W3_d%s'%i],[1,1,1,1],padding='SAME'),globals()['b3_d%s'%i])
            globals()['bn3_d%s'%i] = batch_norm(globals()['preacti3_d%s'%i],decay = 0.9, center = True, scale = True, is_training = train)
            globals()['conv3_d%s'%i] = activation(globals()['bn3_d%s'%i],name=scope.name)
            globals()['drop3_d%s'%i] = tf.nn.dropout(globals()['conv3_d%s'%i],1-drop)
            
with tf.variable_scope("DECONV-BLOCK1") as scope:
    with tf.variable_scope('deconv1') as scope:
        W1_d1 = weight_variable([4,4,3,_conv_layers[1]])
        b1_d1 = bias_variable([3])
        convtr_d1 = tf.nn.conv2d_transpose(conv3_d2,W1_d1,[batch_size,_h,_w,3],[1,2,2,1],padding='SAME')
        preacti1_d1 = tf.nn.bias_add(convtr_d1,b1_d1)
        bn1_d1 = batch_norm(preacti1_d1,decay = 0.9, center = True, scale = True, is_training = train)
        conv1_d1 = activation(bn1_d1,name=scope.name)
        drop1_d1 = tf.nn.dropout(conv1_d1,1-drop)
        
    with tf.variable_scope('deconv2') as scope:
        W2_d1 = weight_variable([3,3,3,3])
        b2_d1 = bias_variable([3])             
        preacti2_d1 = tf.nn.bias_add(tf.nn.conv2d(conv1_d1,W2_d1,[1,1,1,1],padding='SAME'),b2_d1)
        bn2_d1 = batch_norm(preacti2_d1,decay = 0.9, center = True, scale = True, is_training = train)
        conv2_d1 = activation(bn2_d1,name=scope.name)
        drop2_d1 = tf.nn.dropout(conv2_d1,1-drop)

    with tf.variable_scope('deconv3') as scope:
        W3_d1 = weight_variable([3,3,3,3])
        b3_d1 = bias_variable([3])             
        preacti3_d1 = tf.nn.bias_add(tf.nn.conv2d(conv2_d1,W3_d1,[1,1,1,1],padding='SAME'),b3_d1)
        bn3_d1 = batch_norm(preacti3_d1,decay = 0.9, center = True, scale = True, is_training = train)
        conv3_d1 = activation(bn3_d1,name=scope.name)
        drop3_d1 = tf.nn.dropout(conv3_d1,1-drop)


Added batch norm and dropout

## Loss and metrics

In [6]:
def charbonnier_loss(x):
    return tf.sqrt(0.1**2+tf.reduce_mean(tf.square(x)))
    
def interpol_error(x,gt):
    return tf.sqrt(tf.reduce_mean(tf.square(tf.sub(x,gt))))

def normalized_interpol_error(x,gt):
    norm_grad = cv2.Sobel(gt,cv2.CV_64F,1,0,ksize=5)**2+cv2.Sobel(gt,cv2.CV_64F,0,1,ksize=5)**2 
    return tf.sqrt(tf.reduce_mean(tf.div(tf.square(tf.sub(x,gt)),(norm_grad+1))))

In [7]:
with tf.name_scope('loss') as scope:
    cost = charbonnier_loss(tf.sub(conv3_d1,x_gt))
    tf.summary.scalar('loss',cost)

global_step = tf.Variable(0, trainable=False)
learning_rate = tf.train.exponential_decay(_learning_rate, global_step,10000, 0.99, staircase=True)
tf.summary.scalar('learning_rate',learning_rate)

optimizer = tf.train.AdamOptimizer(learning_rate).minimize(cost,global_step=global_step)

merged = tf.summary.merge_all()
writer = tf.summary.FileWriter(_train_folder+'/cnn', sess.graph)
saver = tf.train.Saver()

#initialization
init = tf.global_variables_initializer()

#launch graph
sess.run(init)

# restore weights from model
if _previously_trained :
    saver.restore(sess, _train_folder+"/model.ckpt")
    print("Model restored.")

## Visualization tools

In [None]:
np.random.seed(0)
test_frames = generate_batch(_dataset,4,train = 0)
cv2.imwrite(_train_folder+"/ground_truth.png",np.concatenate(255*test_frames[1],0))

# test_frames = load_batch(_dataset,8,train=0)
# cv2.imwrite(_train_folder+"/ground_truth2.png",np.concatenate(255*test_frames[1],0))

def test(i):
    x_reconstruct =  sess.run(conv3_d1, feed_dict={x: test_frames[0], x_gt:test_frames[1], drop:0})   
    
#     cv2.namedWindow('reconstruction',flags= cv2.WINDOW_NORMAL)
#     cv2.imshow('reconstruction',np.concatenate(x_reconstruct,0))
        
#     cv2.namedWindow('ground-truth',flags= cv2.WINDOW_NORMAL)
#     cv2.imshow('ground-truth',np.concatenate(test_frames[1],0))
    
    cv2.imwrite(_train_folder+"/reconstr_%s"%'{0:04}'.format(int(i))+".png",255*np.concatenate(x_reconstruct,0))
    cv2.imwrite(_train_folder+"/reconstr.png",255*np.concatenate(x_reconstruct,0))       
#     cv2.waitKey(10)

## Launch Training

In [None]:
start = time.time()
c = np.inf

for i in range(50000):
    batch = generate_batch(_dataset,_batch_size)
    feed = {x: batch[0], x_gt:batch[1],drop :0.2}
    summary,_ = sess.run([merged,optimizer], feed_dict=feed)
    if i % _step_test == 0 and i>0:
        
        c_old = c
        c = sess.run(cost, feed_dict=feed)
        print("STEP %d: LOSS_BATCH = %f, TIME %f"%(i,c,time.time()-start))
        save_path = saver.save(sess, _train_folder+"/model.ckpt")
        writer.add_summary(summary, i)
    
    if i % _step_viz == 0 and i>0:
        test(i/_step_viz)
    
print("Optimization Finished")


STEP 50: LOSS_BATCH = 0.285415, TIME 105.607278
STEP 100: LOSS_BATCH = 0.182938, TIME 218.396536
STEP 150: LOSS_BATCH = 0.169087, TIME 331.675244
STEP 200: LOSS_BATCH = 0.335788, TIME 450.604728
STEP 250: LOSS_BATCH = 0.335377, TIME 560.960822
STEP 300: LOSS_BATCH = 0.321129, TIME 674.635555
STEP 350: LOSS_BATCH = 0.321896, TIME 792.970636
STEP 400: LOSS_BATCH = 0.326711, TIME 908.120253
STEP 450: LOSS_BATCH = 0.318600, TIME 1025.490101
STEP 500: LOSS_BATCH = 0.335214, TIME 1139.635706
STEP 550: LOSS_BATCH = 0.337364, TIME 1260.703253
STEP 600: LOSS_BATCH = 0.337279, TIME 1373.209949
STEP 650: LOSS_BATCH = 0.322505, TIME 1491.016626
STEP 700: LOSS_BATCH = 0.335379, TIME 1617.137093
