In [None]:
import tensorflow as tf

In [None]:
# Initializing all the required libraries
import re
import os
import sys
import cv2
import math
import time
import imageio
import numpy as np
import pandas as pd
from PIL import Image
from scipy import misc
from tensorflow import keras
import matplotlib.colors as cl
import matplotlib.pyplot as plt
import tensorflow_addons as tfa
from urllib.request import urlopen
from matplotlib.image import imread
import tensorflow.keras.backend as bk
from tensorflow.keras.models import Model
from tensorflow.keras.layers import LeakyReLU
from tensorflow_addons.optimizers import AdamW
from tensorflow_addons.layers import CorrelationCost
from tensorflow.keras.layers import BatchNormalization
from tensorflow.keras.layers import Conv2D, Conv2DTranspose, InputLayer, Concatenate

In [None]:
# code to read/write the flow file 

def read(file):
    if file.endswith('.float3'): return readFloat(file)
    elif file.endswith('.flo'): return readFlow(file)
    elif file.endswith('.ppm'): return readImage(file)
    elif file.endswith('.pgm'): return readImage(file)
    elif file.endswith('.png'): return readImage(file)
    elif file.endswith('.jpg'): return readImage(file)
    elif file.endswith('.pfm'): return readPFM(file)[0]
    else: raise Exception('don\'t know how to read %s' % file)

def write(file, data):
    if file.endswith('.float3'): return writeFloat(file, data)
    elif file.endswith('.flo'): return writeFlow(file, data)
    elif file.endswith('.ppm'): return writeImage(file, data)
    elif file.endswith('.pgm'): return writeImage(file, data)
    elif file.endswith('.png'): return writeImage(file, data)
    elif file.endswith('.jpg'): return writeImage(file, data)
    elif file.endswith('.pfm'): return writePFM(file, data)
    else: raise Exception('don\'t know how to write %s' % file)

def readPFM(file):
    file = open(file, 'rb')

    color = None
    width = None
    height = None
    scale = None
    endian = None

    header = file.readline().rstrip()
    if header.decode("ascii") == 'PF':
        color = True
    elif header.decode("ascii") == 'Pf':
        color = False
    else:
        raise Exception('Not a PFM file.')

    dim_match = re.match(r'^(\d+)\s(\d+)\s$', file.readline().decode("ascii"))
    if dim_match:
        width, height = list(map(int, dim_match.groups()))
    else:
        raise Exception('Malformed PFM header.')

    scale = float(file.readline().decode("ascii").rstrip())
    if scale < 0: # little-endian
        endian = '<'
        scale = -scale
    else:
        endian = '>' # big-endian

    data = np.fromfile(file, endian + 'f')
    shape = (height, width, 3) if color else (height, width)

    data = np.reshape(data, shape)
    data = np.flipud(data)
    return data, scale

def readImage(name):
    if name.endswith('.pfm') or name.endswith('.PFM'):
        data = readPFM(name)[0]
        if len(data.shape)==3:
            return data[:,:,0:3]
        else:
            return data

    return imageio.imread(name)

In [None]:
# Reading a csv file where each entry consist of img1, img2, flow column 
df= pd.read_csv('')

# Reading images from directory
images_dir = ""

# Function to load the image
def load_image(img_1,img_2):
    
    img1 = readImage(images_dir+img_1.decode("utf-8"))
    img2 = readImage(images_dir+img_2.decode("utf-8"))
    return img1, img2
   

In [None]:
print("Total number of image pairs",len(df))
test_set = df.sample(frac = 0.0279, random_state=42)         #Creating test set. Random sampling with a random seed value
print("Total number of test image pairs",len(test_set))
test_img_1_list = test_set['img1'].tolist()                   #Coverting column to list and pass for testing
test_img_2_list = test_set['img2'].tolist()
test_flow_list = test_set['flow'].tolist()
for i in range(len(test_flow_list)):
  df =df[df['flow'] != test_flow_list[i]]                   # Removing all the test rows from the train set
print("Total number of training image pairs",len(df))

img_1_list = df['img1'].tolist()                            #coverting column to list to pass it for mapping function for tf.Dataset
img_2_list = df['img2'].tolist()
flow_list = df['flow'].tolist()

In [None]:
# Train-Test CSV generation
df.to_csv('')
test_set.to_csv('')

In [None]:
#function to map images and flow 
def map_func1(img_1,img_2):
    img1, img2 = load_image(img_1,img_2)
    # mapping function for images to dataset
    return tf.dtypes.cast(img1, tf.float32), tf.dtypes.cast(img2, tf.float32)

In [None]:
#code to load the images the and flow into the dataset 
batch_size = 16
dataset1 = tf.data.Dataset.from_tensor_slices((img_1_list,img_2_list))


# Use map to load the numpy files in parallel
dataset1 = dataset1.map(lambda item1, item2: tf.numpy_function(
          map_func1, [item1, item2], [tf.float32, tf.float32]),num_parallel_calls=tf.data.experimental.AUTOTUNE)

# Shuffle and batch
#dataset1 = dataset1.cache()
dataset1 = dataset1.shuffle(buffer_size=64)
dataset1 = dataset1.batch(batch_size)
dataset1 = dataset1.prefetch(buffer_size=tf.data.experimental.AUTOTUNE)


In [None]:
num_elements = tf.data.experimental.cardinality(dataset1).numpy()
print("Total number of batches:",num_elements)

In [None]:
def crop_like(input, target): #shape adjustment
    if input.shape[1:3] == target.shape[1:3]:
        return input
    else:
        return input[:, :target.shape[1],:target.shape[2],:]

In [None]:
class UFCFlow(tf.keras.Model):        #UFCFlow Model

  def __init__(self):
    super(UFCFlow, self).__init__()
    self.i1= InputLayer(input_shape=(384,512,3))                                            # Input Layer for first image frame
    self.i2= InputLayer(input_shape=(384,512,3))                                            # Input Layer for second image frame
      #input 1 path
    self.conva_00= Conv2D(16,7,strides=1,padding='same')
    self.conva_00_act = LeakyReLU(alpha=0.1) 
    self.conva_01= Conv2D(32,7,strides=1,padding='same')
    self.conva_01_act = LeakyReLU(alpha=0.1) 
    self.conva_1= Conv2D(64,7,strides=2,padding='same')
    self.conva_1_act = LeakyReLU(alpha=0.1)       
    self.conva_2= Conv2D(128,5,strides=2,padding='same')
    self.conva_2_act = LeakyReLU(alpha=0.1)
    self.conva_3= Conv2D(256,5,strides=2,padding='same')
    self.conva_3_act = LeakyReLU(alpha=0.1)

      #input 2 path
    self.convb_00= Conv2D(16,7,strides=1,padding='same')
    self.convb_00_act = LeakyReLU(alpha=0.1) 
    self.convb_01= Conv2D(32,7,strides=1,padding='same')
    self.convb_01_act = LeakyReLU(alpha=0.1)
    self.convb_1= Conv2D(64,7,strides=2,padding='same')
    self.convb_1_act =LeakyReLU(alpha=0.1)
    self.convb_2= Conv2D(128,5,strides=2,padding='same')
    self.convb_2_act =LeakyReLU(alpha=0.1)
    self.convb_3= Conv2D(256,5,strides=2,padding='same')
    self.convb_3_act =LeakyReLU(alpha=0.1)

    # Correalation layer
    self.cc = CorrelationCost(1,20,1,2,20,data_format='channels_last')
    self.cr_1_act= LeakyReLU(alpha=0.1)
    self.conva_redir= Conv2D(32,1,strides=1)
    self.conva_redir_act =LeakyReLU(alpha=0.1)
    self.vol_1= Concatenate(axis=3)
    
    #Single stream
    self.conv3_1= Conv2D(256,3,strides=1,padding='same')
    self.conv3_1_act= LeakyReLU(alpha=0.1)
    self.conv4= Conv2D(512,3,strides=2,padding='same')
    self.conv4_act= LeakyReLU(alpha=0.1)
    self.conv4_1= Conv2D(512,3,strides=1,padding='same')
    self.conv4_1_act= LeakyReLU(alpha=0.1)
    self.conv5= Conv2D(512,3,strides=2,padding='same')
    self.conv5_act= LeakyReLU(alpha=0.1)
    self.conv5_1= Conv2D(512,3,strides=1,padding='same')
    self.conv5_1_act= LeakyReLU(alpha=0.1)
    self.conv6= Conv2D(1024,3,strides=2,padding='same')
    self.conv6_act= LeakyReLU(alpha=0.1)
    self.conv6_1= Conv2D(1024,3,strides=1,padding='same')
    self.conv6_1_act= LeakyReLU(alpha=0.1)

    #Refinement network
    self.pf6= Conv2D(2,3,strides=1,padding='same')
    self.dc5= Conv2DTranspose(512,4,strides=2,padding='same')
    self.dc5_act= LeakyReLU(alpha=0.1)
    self.up_6to5= Conv2DTranspose(2,4,strides=2,padding='same')
    self.con_5= Concatenate(axis=3)

    self.pf5= Conv2D(2,3,strides=1,padding='same')
    self.dc4= Conv2DTranspose(256,4,strides=2,padding='same')
    self.dc4_act= LeakyReLU(alpha=0.1)
    self.up_5to4= Conv2DTranspose(2,4,strides=2,padding='same')
    self.con_4= Concatenate(axis=3)

    self.pf4= Conv2D(2,3,strides=1,padding='same')
    self.dc3= Conv2DTranspose(128,4,strides=2,padding='same')
    self.dc3_act= LeakyReLU(alpha=0.1)
    self.up_4to3= Conv2DTranspose(2,4,strides=2,padding='same')
    self.con_3= Concatenate(axis=3)

    self.pf3= Conv2D(2,3,strides=1,padding='same')
    self.dc2= Conv2DTranspose(64,4,strides=2,padding='same')
    self.dc2_act= LeakyReLU(alpha=0.1)
    self.up_3to2= Conv2DTranspose(2,4,strides=2,padding='same')
    self.con_2=Concatenate(axis=3)

    self.pf2= Conv2D(2,3,strides=1,padding='same')
    self.dc1= Conv2DTranspose(32,4,strides=2,padding='same')
    self.dc1_act= LeakyReLU(alpha=0.1)
    self.up_2to1= Conv2DTranspose(2,4,strides=2,padding='same')
    self.con_1=Concatenate(axis=3)
    self.pf1= Conv2D(2,3,strides=1,padding='same')

  def call(self, input1,input2,training=False):

    i_1=self.i1(input1)
    cona00=self.conva_00(i_1)
    cona00_act=self.conva_00_act(cona00)
    cona01=self.conva_01(cona00_act)
    cona01_act=self.conva_01_act(cona01)
    cona1=self.conva_1(cona01_act)
    cona1_act=self.conva_1_act(cona1)
    cona2=self.conva_2(cona1_act)
    cona2_act=self.conva_2_act(cona2)
    cona3=self.conva_3(cona2_act)
    cona3_act=self.conva_3_act(cona3)

    i_2=self.i2(input2)
    conb00=self.convb_00(i_2)
    conb00_act=self.convb_00_act(conb00)
    conb01=self.convb_01(conb00_act)
    conb01_act=self.convb_01_act(conb01)
    conb1=self.convb_1(conb01_act)
    conb1_act=self.convb_1_act(conb1)
    conb2=self.convb_2(conb1_act)
    conb2_act=self.convb_2_act(conb2)
    conb3=self.convb_3(conb2_act)
    conb3_act=self.convb_3_act(conb3)

    cc1=self.cc([cona3_act,conb3_act])
    cc1_act=self.cr_1_act(cc1)
    cona_r=self.conva_redir(cona3_act)
    cona_r_act=self.conva_redir_act(cona_r)
    v1=self.vol_1([cc1_act,cona_r_act]) 

    con3_1=self.conv3_1(v1)
    con3_1_act=self.conv3_1_act(con3_1)
    con4=self.conv4(con3_1_act)
    con4_act=self.conv4_act(con4)
    con4_1=self.conv4_1(con4_act)
    con4_1_act=self.conv4_1_act(con4_1)
    con5=self.conv5(con4_1_act)
    con5_act=self.conv5_act(con5)
    con5_1=self.conv5_1(con5_act)
    con5_1_act=self.conv5_1_act(con5_1)
    con6=self.conv6(con5_1_act)
    con6_act=self.conv6_act(con6)
    con6_1=self.conv6_1(con6_act)
    con6_1_act=self.conv6_1_act(con6_1)

    pf_6=self.pf6(con6_1_act)
    dc_5=self.dc5(con6_1_act)
    dc_5_act=self.dc5_act(dc_5)
    ups_6to5=self.up_6to5(pf_6)
    concat5=self.con_5([con5_1_act,dc_5_act,ups_6to5])

    pf_5=self.pf5(concat5)
    dc_4=self.dc4(concat5)
    dc_4_act=self.dc4_act(dc_4)
    ups_5to4=self.up_5to4(pf_5)
    concat4=self.con_4([con4_1_act,dc_4_act,ups_5to4])

    pf_4=self.pf4(concat4)
    dc_3=self.dc3(concat4)
    dc_3_act=self.dc3_act(dc_3)
    dc_3_crop=crop_like(dc_3_act,con3_1_act)
    ups_4to3=self.up_4to3(pf_4)
    ups_4to3_crop=crop_like(ups_4to3,con3_1_act)
    concat3=self.con_3([con3_1_act,dc_3_crop,ups_4to3_crop])

    pf_3=self.pf3(concat3)
    dc_2=self.dc2(concat3)
    dc_2_act=self.dc2_act(dc_2)
    dc_2_crop=crop_like(dc_2_act,cona2_act)
    ups_3to2=self.up_3to2(pf_3)
    ups_3to2_crop=crop_like(ups_3to2,cona2_act)
    concat2=self.con_2([cona2_act,dc_2_crop,ups_3to2_crop])

    pf_2=self.pf2(concat2)
    dc_1=self.dc1(concat2)
    dc_1_act=self.dc1_act(dc_1)
    ups_2to1=self.up_2to1(pf_2)
    concat1=self.con_1([cona1_act,dc_1_act,ups_2to1])
    pf_1=self.pf1(concat1)
    flow=tf.image.resize(pf_1,tf.stack([384,512]),method='bilinear')

    return  {'flow': flow ,'predict_flow6': pf_6, 'predict_flow5': pf_5, 'predict_flow4': pf_4, 'predict_flow3': pf_3, 'predict_flow2': pf_2,'predict_flow1': pf_1}

In [None]:
model = UFCFlow()     # Model initialization

In [None]:
# Charbonnier Penalty Function
def charbonnier_loss(delta, alpha=0.50, epsilon=1e-3):
    loss = tf.reduce_mean(tf.pow(tf.square(delta)+tf.square(epsilon), alpha))
    return loss

In [None]:
# Photometric Loss Function
def PG_loss(y,x):
  alpha=0.50
  epsilon=1e-3
  Pl_loss = tf.reduce_mean(tf.pow(tf.square(tf.subtract(y,x))+tf.square(epsilon), alpha))
  loss_PG = (Pl_loss)/len(df)
  return loss_PG


In [None]:
# First-Order Smoothness Loss Function
def conv2d(x, weights):
    return tf.nn.conv2d(x, weights, strides=[1, 1, 1, 1], padding='SAME')

def smoothness_deltas(flow):
    filter_x = [[0, 0, 0], [0, 1, -1], [0, 0, 0]]
    filter_y = [[0, 0, 0], [0, 1, 0], [0, -1, 0]]
    weight_array = np.ones([3, 3, 1, 2])
    weight_array[:, :, 0, 0] = filter_x
    weight_array[:, :, 0, 1] = filter_y
    weights = tf.constant(weight_array, dtype=tf.float32)

    flow_u, flow_v = tf.split(axis=3, num_or_size_splits=2, value=flow)
    delta_u = conv2d(flow_u, weights)
    delta_v = conv2d(flow_v, weights)
    return delta_u, delta_v

def smoothness_loss(flow):
    delta_u, delta_v = smoothness_deltas(flow)
    loss_u = charbonnier_loss(delta_u)
    loss_v = charbonnier_loss(delta_v)
    return (loss_u + loss_v)/len(df)

In [None]:
# Structural Similarity Index Loss Function
def SSIM(y,x): 
    C1 = 0.01**2
    C2 = 0.03**2
    x = tf.keras.layers.ZeroPadding2D()(x)
    y = tf.keras.layers.ZeroPadding2D()(y)
    mu_x_pool = tf.keras.layers.AveragePooling2D(pool_size=(3, 3), strides=1)(x)
    mu_y_pool = tf.keras.layers.AveragePooling2D(pool_size=(3, 3), strides=1)(y)
    
    sigma_x = tf.keras.layers.AveragePooling2D(pool_size=(3, 3), strides=1)(x**2) - mu_x_pool**2
    sigma_y = tf.keras.layers.AveragePooling2D(pool_size=(3, 3), strides=1)(y**2) - mu_y_pool**2
    sigma_xy = tf.keras.layers.AveragePooling2D(pool_size=(3, 3), strides=1)(x*y) - mu_x_pool* mu_y_pool
    
    SSIM_n = (2*mu_x_pool*mu_y_pool + C1)*(2*sigma_xy + C2)
    SSIM_d = (mu_x_pool**2 + mu_y_pool**2 + C1)*(sigma_x + sigma_y + C2)
    SSIM_loss =  tf.clip_by_value((1- SSIM_n/SSIM_d)/2,0,1)   
    return SSIM_loss/len(df)

In [None]:
def image_warp(im, flow):
    """Performs a backward warp of an image using the predicted flow.
    Args:
        im: Batch of images. [num_batch, height, width, channels]
        flow: Batch of flow vectors. [num_batch, height, width, 2]
    Returns:
        warped: transformed image of the same shape as the input image.
    """
   

    num_batch, height, width, channels = tf.unstack(tf.shape(im))
    max_x = tf.cast(width - 1, 'int32')
    max_y = tf.cast(height - 1, 'int32')
    zero = tf.zeros([], dtype='int32')

        # We have to flatten our tensors to vectorize the interpolation
    im_flat = tf.reshape(im, [-1, channels])
    flow_flat = tf.reshape(flow, [-1, 2])

        # Floor the flow, as the final indices are integers
        # The fractional part is used to control the bilinear interpolation.
    flow_floor = tf.cast(tf.floor(flow_flat),dtype=tf.int32)
    bilinear_weights = flow_flat - tf.floor(flow_flat)

        # Construct base indices which are displaced with the flow
    pos_x = tf.tile(tf.range(width), [height * num_batch])
    grid_y = tf.tile(tf.expand_dims(tf.range(height), 1), [1, width])
    pos_y = tf.tile(tf.reshape(grid_y, [-1]), [num_batch])

    x = flow_floor[:, 0]
    y = flow_floor[:, 1]
    xw = bilinear_weights[:, 0]
    yw = bilinear_weights[:, 1]

        # Compute interpolation weights for 4 adjacent pixels
        # expand to num_batch * height * width x 1 for broadcasting in add_n below
    wa = tf.expand_dims((1 - xw) * (1 - yw), 1) # top left pixel
    wb = tf.expand_dims((1 - xw) * yw, 1) # bottom left pixel
    wc = tf.expand_dims(xw * (1 - yw), 1) # top right pixel
    wd = tf.expand_dims(xw * yw, 1) # bottom right pixel

    x0 = pos_x + x
    x1 = x0 + 1
    y0 = pos_y + y
    y1 = y0 + 1

    x0 = tf.clip_by_value(x0, zero, max_x)
    x1 = tf.clip_by_value(x1, zero, max_x)
    y0 = tf.clip_by_value(y0, zero, max_y)
    y1 = tf.clip_by_value(y1, zero, max_y)

    dim1 = width * height
    batch_offsets = tf.range(num_batch) * dim1
    base_grid = tf.tile(tf.expand_dims(batch_offsets, 1), [1, dim1])
    base = tf.reshape(base_grid, [-1])

    base_y0 = base + y0 * width
    base_y1 = base + y1 * width
    idx_a = base_y0 + x0
    idx_b = base_y1 + x0
    idx_c = base_y0 + x1
    idx_d = base_y1 + x1

    Ia = tf.gather(im_flat, idx_a)
    Ib = tf.gather(im_flat, idx_b)
    Ic = tf.gather(im_flat, idx_c)
    Id = tf.gather(im_flat, idx_d)

    warped_flat = tf.add_n([wa * Ia, wb * Ib, wc * Ic, wd * Id])
    warped = tf.reshape(warped_flat, [num_batch, height, width, channels])

    return warped

In [None]:
# Multi-Scale Loss Function for Photometric Loss
def loss_function(img1, img2, pred):
  pred6 = pred['predict_flow6']
  size = [pred6.shape[1], pred6.shape[2]]
  df6 = tf.image.resize(img2, tf.stack(size))
  df6_warp = image_warp(df6,pred6)
  df6_img1 = tf.image.resize(img1, tf.stack(size))
  pl6 = 0.32 * (PG_loss(df6_img1,df6_warp))

  pred5=pred['predict_flow5']
  size = [pred5.shape[1], pred5.shape[2]]
  df5 = tf.image.resize(img2, tf.stack(size))
  df5_warp = image_warp(df5,pred5)
  df5_img1 = tf.image.resize(img1, tf.stack(size))
  pl5 = 0.32 * (PG_loss(df5_img1,df5_warp))

  pred4=pred['predict_flow4']
  size = [pred4.shape[1], pred4.shape[2]]
  df4 = tf.image.resize(img2, tf.stack(size))
  df4_warp = image_warp(df4,pred4)
  df4_img1 = tf.image.resize(img1, tf.stack(size))
  pl4 = 0.32 * (PG_loss(df4_img1,df4_warp))

  pred3=pred['predict_flow3']
  size = [pred3.shape[1], pred3.shape[2]]
  df3 = tf.image.resize(img2, tf.stack(size))
  df3_warp = image_warp(df3,pred3)
  df3_img1 = tf.image.resize(img1, tf.stack(size))
  pl3 = 0.32 * (PG_loss(df3_img1,df3_warp))

  pred2=pred['predict_flow2']
  size = [pred2.shape[1], pred2.shape[2]]
  df2 = tf.image.resize(img2, tf.stack(size))
  df2_warp = image_warp(df2,pred2)
  df2_img1 = tf.image.resize(img1, tf.stack(size))
  pl2 = 0.32 * (PG_loss(df2_img1,df2_warp))

  pred1=pred['predict_flow1']
  size = [pred1.shape[1], pred1.shape[2]]
  df1 = tf.image.resize(img2, tf.stack(size))
  df1_warp = image_warp(df1,pred1)
  df1_img1 = tf.image.resize(img1, tf.stack(size))
  pl1 = 0.64 * (PG_loss(df1_img1,df1_warp))

  loss = tf.math.add_n([pl6,pl5,pl4,pl3,pl2,pl1])

  return loss


In [None]:
# Multi-Scale Loss Function for Smoothness Loss
def loss_function1(pred):
    pred6=pred['predict_flow6']
    smooth_loss6 = 0.32*(smoothness_loss(pred6))
    pred5=pred['predict_flow5']
    smooth_loss5 = 0.32*(smoothness_loss(pred5))
    pred4=pred['predict_flow4']
    smooth_loss4 = 0.32*(smoothness_loss(pred4))
    pred3=pred['predict_flow3']
    smooth_loss3 = 0.32*(smoothness_loss(pred3))
    pred2=pred['predict_flow2']
    smooth_loss2 = 0.32*(smoothness_loss(pred2))
    pred1=pred['predict_flow1']
    smooth_loss1 = 0.64*(smoothness_loss(pred1))
    loss = tf.math.add_n([smooth_loss6,smooth_loss5,smooth_loss4,smooth_loss3,smooth_loss2,smooth_loss1])
    return loss


In [None]:
# SSIM Loss computation
def loss_function2(img1, img2, pred):
  pred1=pred['predict_flow1']
  size = [pred1.shape[1], pred1.shape[2]]
  df1 = tf.image.resize(img2, tf.stack(size))
  df1_warp = image_warp(df1,pred1)
  df1_img1 = tf.image.resize(img1, tf.stack(size))
  pl1 = SSIM(df1_img1,df1_warp)

  loss = tf.math.add_n([pl1])

  return loss


In [None]:
# Custom Adam Optimizer with Weight Decay
optimizer= AdamW(weight_decay=0.0004, learning_rate=0.0001, beta_1=0.9, beta_2=0.999)

In [None]:
# Model Checkpoint Creation and Loading
checkpoint_path = ""
ckpt = tf.train.Checkpoint(model=model,optimizer = optimizer)
ckpt.restore(tf.train.latest_checkpoint(checkpoint_path))
ckpt_manager = tf.train.CheckpointManager(ckpt, checkpoint_path, max_to_keep=3)

In [None]:
start_epoch = 0
if ckpt_manager.latest_checkpoint:
    start_epoch = int(ckpt_manager.latest_checkpoint.split('-')[-1])
print(start_epoch)

In [None]:
# Code for computing gradients 
def train_step(input1,input2):
    loss = 0
    l1=1
    l2=2

    with tf.GradientTape() as tape:
        predict = model(input1,input2)
        loss = loss_function(input1,input2,predict)
        loss1 = loss_function1(predict)
        loss2 = loss_function2(input1,input2,predict)
        final_loss = loss + l1*loss1 + l2*loss2

    trainable_variables = model.trainable_variables
    gradients = tape.gradient(final_loss, trainable_variables)
    optimizer.apply_gradients(zip(gradients, trainable_variables))

    return loss

In [None]:
#Code for inputting the number of epochs to train and print the loss values 
EPOCHS = 0
loss_list =[]
for epoch in range(start_epoch, EPOCHS):
    start = time.time()
    total_loss = 0

    for (batch, (img1,img2)) in enumerate(dataset1):
        batch_loss= train_step(img1,img2)
        total_loss += batch_loss
        
        
        if batch % 100 == 0:
            print ('Epoch {} Batch {} Loss {:.4f}'.format(epoch + 1, batch, batch_loss.numpy()))
    loss_list.append(total_loss)

    if epoch % 1 == 0:
      print("Saving model\n")
      ckpt_manager.save()
    print ('Epoch {} Loss {:.6f}'.format(epoch + 1,total_loss))
    print ('Time taken for 1 epoch {} sec\n'.format(time.time() - start))

In [None]:
# Plotting Loss graph and saving loss values in a text file
plt.plot(loss_list)
plt.imshow
plt.savefig("plot.png",dpi=400)

In [None]:
numpy_loss_history = numpy.array(loss_list)
numpy.savetxt("train.txt", numpy_loss_history, delimiter=",")

In [None]:
UNKNOWN_FLOW_THRESH = 1e7
def visualize_flow_train(flow,i, mode='Y'):
    if mode == 'Y':
        # Ccbcr color wheel
        img = flow_to_image(flow)
        plt.imshow(img)
        plt.show()
        plt.imsave("/FlyingChairs/Train/"+str(i)+".png",img)      # Location to save the visualized UFCFlow model predictions
    
    return None

def visualize_flow_test(flow,i, mode='Y'):
    if mode == 'Y':
        # Ccbcr color wheel
        img = flow_to_image(flow)
        plt.imshow(img)
        plt.show()
        plt.imsave("/FlyingChairs/Test/"+str(i)+".png",img)

    return None


def readFlow(name):
    if name.endswith('.pfm') or name.endswith('.PFM'):
        return readPFM(name)[0][:,:,0:2]

    f = open(name, 'rb')

    header = f.read(4)
    if header.decode("utf-8") != 'PIEH':
        raise Exception('Flow file header does not contain PIEH')

    width = np.fromfile(f, np.int32, 1).squeeze()
    height = np.fromfile(f, np.int32, 1).squeeze()

    flow = np.fromfile(f, np.float32, width * height * 2).reshape((height, width, 2))

    return flow.astype(np.float32)


def flow_error(tu, tv, u, v):
    """
    Calculate average end point error
    :param tu: ground-truth horizontal flow map
    :param tv: ground-truth vertical flow map
    :param u:  estimated horizontal flow map
    :param v:  estimated vertical flow map
    :return: End point error of the estimated flow
    """
    smallflow = 0.0
    '''
    stu = tu[bord+1:end-bord,bord+1:end-bord]
    stv = tv[bord+1:end-bord,bord+1:end-bord]
    su = u[bord+1:end-bord,bord+1:end-bord]
    sv = v[bord+1:end-bord,bord+1:end-bord]
    '''
    stu = tu[:]
    stv = tv[:]
    su = u[:]
    sv = v[:]

    idxUnknow = (abs(stu) > UNKNOWN_FLOW_THRESH) | (abs(stv) > UNKNOWN_FLOW_THRESH)
    stu[idxUnknow] = 0
    stv[idxUnknow] = 0
    su[idxUnknow] = 0
    sv[idxUnknow] = 0

    ind2 = [(np.absolute(stu) > smallflow) | (np.absolute(stv) > smallflow)]
    index_su = su[ind2]
    index_sv = sv[ind2]
    an = 1.0 / np.sqrt(index_su ** 2 + index_sv ** 2 + 1)
    un = index_su * an
    vn = index_sv * an

    index_stu = stu[ind2]
    index_stv = stv[ind2]
    tn = 1.0 / np.sqrt(index_stu ** 2 + index_stv ** 2 + 1)
    tun = index_stu * tn
    tvn = index_stv * tn

    epe = np.sqrt((stu - su) ** 2 + (stv - sv) ** 2)
    epe = epe[ind2]
    mepe = np.mean(epe)
    return mepe


def flow_to_image(flow):
    """
    Convert flow into middlebury color code image
    :param flow: optical flow map
    :return: optical flow image in middlebury color
    """
    u = flow[:, :, 0]
    v = flow[:, :, 1]

    maxu = -999.
    maxv = -999.
    minu = 999.
    minv = 999.

    idxUnknow = (abs(u) > UNKNOWN_FLOW_THRESH) | (abs(v) > UNKNOWN_FLOW_THRESH)
    u[idxUnknow] = 0
    v[idxUnknow] = 0

    maxu = max(maxu, np.max(u))
    minu = min(minu, np.min(u))

    maxv = max(maxv, np.max(v))
    minv = min(minv, np.min(v))

    rad = np.sqrt(u ** 2 + v ** 2)
    maxrad = max(-1, np.max(rad))

    print ("max flow: %.4f\nflow range:\nu = %.3f .. %.3f\nv = %.3f .. %.3f" % (maxrad, minu,maxu, minv, maxv))

    u = u/(maxrad + np.finfo(float).eps)
    v = v/(maxrad + np.finfo(float).eps)

    img = compute_color(u, v)

    idx = np.repeat(idxUnknow[:, :, np.newaxis], 3, axis=2)
    img[idx] = 0

    return np.uint8(img)


def evaluate_flow(gt_flow, pred_flow):
    """
    gt: ground-truth flow
    pred: estimated flow
    """
    average_pe = flow_error(gt_flow[:, :, 0], gt_flow[:, :, 1], pred_flow[:, :, 0], pred_flow[:, :, 1])
    return average_pe


def compute_color(u, v):
    """
    compute optical flow color map
    :param u: optical flow horizontal map
    :param v: optical flow vertical map
    :return: optical flow in color code
    """
    [h, w] = u.shape
    img = np.zeros([h, w, 3])
    nanIdx = np.isnan(u) | np.isnan(v)
    u[nanIdx] = 0
    v[nanIdx] = 0

    colorwheel = make_color_wheel()
    ncols = np.size(colorwheel, 0)

    rad = np.sqrt(u**2+v**2)

    a = np.arctan2(-v, -u) / np.pi

    fk = (a+1) / 2 * (ncols - 1) + 1

    k0 = np.floor(fk).astype(int)

    k1 = k0 + 1
    k1[k1 == ncols+1] = 1
    f = fk - k0

    for i in range(0, np.size(colorwheel,1)):
        tmp = colorwheel[:, i]
        col0 = tmp[k0-1] / 255
        col1 = tmp[k1-1] / 255
        col = (1-f) * col0 + f * col1

        idx = rad <= 1
        col[idx] = 1-rad[idx]*(1-col[idx])
        notidx = np.logical_not(idx)

        col[notidx] *= 0.75
        img[:, :, i] = np.uint8(np.floor(255 * col*(1-nanIdx)))

    return img


def make_color_wheel():
    """
    Generate color wheel according Middlebury color code
    :return: Color wheel
    """
    RY = 15
    YG = 6
    GC = 4
    CB = 11
    BM = 13
    MR = 6

    ncols = RY + YG + GC + CB + BM + MR

    colorwheel = np.zeros([ncols, 3])

    col = 0

    # RY
    colorwheel[0:RY, 0] = 255
    colorwheel[0:RY, 1] = np.transpose(np.floor(255*np.arange(0, RY) / RY))
    col += RY

    # YG
    colorwheel[col:col+YG, 0] = 255 - np.transpose(np.floor(255*np.arange(0, YG) / YG))
    colorwheel[col:col+YG, 1] = 255
    col += YG

    # GC
    colorwheel[col:col+GC, 1] = 255
    colorwheel[col:col+GC, 2] = np.transpose(np.floor(255*np.arange(0, GC) / GC))
    col += GC

    # CB
    colorwheel[col:col+CB, 1] = 255 - np.transpose(np.floor(255*np.arange(0, CB) / CB))
    colorwheel[col:col+CB, 2] = 255
    col += CB

    # BM
    colorwheel[col:col+BM, 2] = 255
    colorwheel[col:col+BM, 0] = np.transpose(np.floor(255*np.arange(0, BM) / BM))
    col += + BM

    # MR
    colorwheel[col:col+MR, 2] = 255 - np.transpose(np.floor(255 * np.arange(0, MR) / MR))
    colorwheel[col:col+MR, 0] = 255

    return colorwheel

In [None]:
train_set = pd.read_csv('FC-test.csv') # Loading CSV for train and test image pairs
train_img_1_list = train_set['img1'].tolist() 
train_img_2_list = train_set['img2'].tolist()
train_flow_list = train_set['flow'].tolist()

In [None]:
# UFCFlow Inference Function for Flying Chairs
def inference():
    imgpath = ""
    avgepe= 0.0
    for i in range(len(train_set)):
        images_dir1 = imgpath+train_img_1_list[i]
        img1 =  readImage(images_dir1)
        img1=tf.cast(img1,tf.float32)
        img1=tf.reshape(img1,(1,384,512,3))
        images_dir2 = imgpath+train_img_2_list[i]
        img2 =  readImage(images_dir2)
        img2=tf.reshape(img2,(1,384,512,3))
        img2=tf.cast(img2,tf.float32)
        
        start=time.time()
        flow_out=model(img1,img2)
        print ('Time taken for 1 prediction {} sec\n'.format(time.time() - start))
        
        test = flow_out['flow'][0].numpy()
        test_visualize = visualize_flow_test(flow_out['flow'][0].numpy(),i,mode = 'Y')
        gt_flow = readFlow("/flow/"+train_flow_list[i])
        
        evaluate = evaluate_flow(gt_flow,test)
        print("Entry",[i],":",evaluate)
        avgepe+= evaluate
    avg_epe= avgepe/len(train_set)
    print("Test AEPE:", avg_epe)
        

In [None]:
inference()

In [None]:
df= pd.read_csv('') # Loading CSV for MPI Sintel Dataset
img1_list = df['img1'].to_list()
img2_list = df['img2'].to_list()
flow_list = df['flow'].to_list()

In [None]:
# UFCFlow Inference Function for MPI Sintel
def inference():

    imgpath = ""
    avgepe= 0.0
    for i in range(len(df)):
        img1 = tf.io.read_file(imgpath+img1_list[i])
        img1 = tf.image.decode_png(img1, channels=3)
        img1=tf.cast(img1,tf.float32)
        img1=tf.reshape(img1,(1,436,1024,3))

        img2 = tf.io.read_file(imgpath+img2_list[i])
        img2 = tf.image.decode_png(img2, channels=3)
        img2=tf.reshape(img2,(1,436,1024,3))
        img2=tf.cast(img2,tf.float32)
    
        flow_out=model(img1,img2)
        
        test = flow_out['flow'][0].numpy()
        test_visualize = visualize_flow_test(flow_out['flow'][0].numpy(),i,mode = 'Y')
        gt_flow = readFlow("/flow/"+flow_list[i])
        dim = (512,384)
        gt_flow = cv2.resize(gt_flow,dim,interpolation = cv2.INTER_LINEAR )
        evaluate = evaluate_flow(gt_flow,test)
        print("Entry",[i],":",evaluate)
        avgepe+= evaluate
    avg_epe= avgepe/len(df)
    print("Test AEPE:", avg_epe)

In [None]:
inference()

Refrences:
1. Structural Similarity Index Loss Function - Taken from Repository of **nianticlabs** (https://github.com/nianticlabs/monodepth2/blob/master/layers.py)

2. First-Order Smoothness Loss Function, Second-Order Smoothness Loss Function, and Image warping Function - Taken from Repository of **Simon Meister** 
(https://github.com/simonmeister/UnFlow/blob/master/src/e2eflow/core/losses.py)
(https://github.com/simonmeister/UnFlow/blob/master/src/e2eflow/core/image_warp.py)

3. Visualization Functions - Taken from Repository of **Sam Pepose**
(https://github.com/sampepose/flownet2-tf/blob/master/src/flowlib.py) 
