<a href="https://colab.research.google.com/github/drakesvoboda/EECS-504-PWC-Net/blob/master/PWC_Net_KITTI_Eval.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Setup

## Mount Google Drive

In [0]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


## Setup Environment

In [0]:
import sys
import os

In [0]:
!pip install pypng
!pip install flow_vis
!pip install spatial-correlation-sampler
#!git clone https://github.com/NVlabs/PWC-Net.git
#!git clone https://github.com/NVIDIA/flownet2-pytorch.git
!git clone https://github.com/drakesvoboda/EECS-504-PWC-Net

fatal: destination path 'EECS-504-PWC-Net' already exists and is not an empty directory.


In [0]:
sys.path.append('/content/EECS-504-PWC-Net')

In [0]:
from train import *
from util import *

import os
import glob
import random
from pathlib import Path

import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader, Subset, RandomSampler
import torchvision.transforms as transforms
import torchvision.transforms.functional as TF

import cv2
import png
from PIL import Image
import flow_vis

import numpy as np
import matplotlib.pyplot as plt

from spatial_correlation_sampler import spatial_correlation_sample

#from networks.correlation_package.correlation import Correlation
#import correlation_cuda

In [0]:
class Correlation(nn.Module):
  def __init__(self, *args, **kwargs):
    super(Correlation, self).__init__()

  def forward(self, input1, input2):

    #     For a correlation with max displacement of 4, the patch size to search is 9 by 9
    #     Below is a 9 by 9 patch (with max displacement of 4)
    #
    #     4   x x x x | x x x x 
    #     3   x x x x | x x x x 
    #     2   x x x x | x x x x 
    #     1   x x x x | x x x x 
    #     0   - - - - O - - - - 
    #     1   x x x x | x x x x 
    #     2   x x x x | x x x x 
    #     3   x x x x | x x x x 
    #     4   x x x x | x x x x 
    #
    #         4 3 2 1 0 1 2 3 4

    # https://github.com/NVlabs/PWC-Net/issues/60#issuecomment-496055396
    # input1 = F.normalize(input1, p=2, dim=2)
    # input2 = F.normalize(input2, p=2, dim=2)

    out_corr = spatial_correlation_sample(input1,
                                          input2,
                                          kernel_size=1,
                                          patch_size=9,
                                          stride=1,
                                          padding=0,
                                          dilation_patch=1)
    

    # collate dimensions 1 and 2 in order to be treated as a
    # regular 4D tensor
    b, ph, pw, h, w = out_corr.size()
    out_corr = out_corr.view(b, ph * pw, h, w)/input1.size(1)
    return out_corr

# Dataloader

##Transform

In [0]:
def resample_mask(mask, size):
    """
    mask: mask map to be resampled
    size: new mask map size. Must be [height,weight]
    """
    mask = cv2.resize(mask,dsize=(size[1],size[0]),interpolation=cv2.INTER_NEAREST)
    valid_idx = (mask != 0)
    mask[valid_idx] = 1
    mask = np.expand_dims(mask, axis=2)
    return mask

In [0]:
class FlowDatasetTransform():
    def __init__(self, norm, size=(375, 1242), crop=(320, 896), flip=0.5):
        self.norm = norm
        self.size = size
        self.crop = crop
        self.flip = flip

        mean = np.array(self.norm.mean)
        std = np.array(self.norm.std)

        self.inv_norm = transforms.Normalize(mean=-mean/std, std=1/std)

    def __call__(self, im_1, im_2, target):
        resize = transforms.Resize(size=self.size)
        
        im_1 = resize(im_1)
        im_2 = resize(im_2)
        valid_mask = target[:,:,2:3]
        target = resample_flow(target, self.size)
        valid_mask = resample_mask(valid_mask, self.size)

        i, j, h, w = transforms.RandomCrop.get_params(im_1, output_size=self.crop)
        # i, j, h, w = 0, 0, self.crop[0], self.crop[1]
        im_1 = TF.crop(im_1, i, j, h, w)
        im_2 = TF.crop(im_2, i, j, h, w)
        target = target[i:i+h,j:j+w,:]
        valid_mask = valid_mask[i:i+h,j:j+w,:]

        if random.random() > self.flip:
            im_1 = TF.hflip(im_1)
            im_2 = TF.hflip(im_2)
            target = cv2.flip(target, 1)
            valid_mask = cv2.flip(valid_mask, 1)
            valid_mask = np.expand_dims(valid_mask, axis=2)
            target[:,:,0] = -target[:,:,0] # negate u when flipped

        target = target.transpose((2, 0, 1))
        valid_mask = valid_mask.transpose((2, 0, 1))

        im_1 = TF.to_tensor(im_1)
        im_2 = TF.to_tensor(im_2)
        target = torch.Tensor(target)
        valid_mask = torch.Tensor(valid_mask)

        im_1 = norm(im_1)
        im_2 = norm(im_2)

        scale_factor = 0.5

        if scale_factor != 1:
            im_1 = im_1.unsqueeze(0)
            im_2 = im_2.unsqueeze(0)
            target = target.unsqueeze(0)
            valid_mask = valid_mask.unsqueeze(0)

            im_1 = F.interpolate(im_1, scale_factor=scale_factor, mode='bilinear', align_corners=False)
            im_2 = F.interpolate(im_2, scale_factor=scale_factor, mode='bilinear', align_corners=False)
            target = F.interpolate(target, scale_factor=scale_factor, mode='bilinear', align_corners=False) * scale_factor
            valid_mask = F.interpolate(valid_mask, scale_factor=scale_factor, mode='bilinear', align_corners=False)
            
            im_1 = im_1.squeeze(0)
            im_2 = im_2.squeeze(0)
            target = target.squeeze(0)
            valid_mask = valid_mask.squeeze(0)

        valid_idx = (valid_mask != 0)
        valid_mask[valid_idx] = 1

        return im_1, im_2, target, valid_mask

    def denorm(self, im):
        return self.inv_norm(im)

## Dataset

In [0]:
class KITTIFlowDataset(Dataset):
    def __init__(self, root, tfm_function, image_folder="image_2", flow_folder="flow_occ"):
        super(KITTIFlowDataset, self).__init__()
        self.root = Path(root)
        self.image_dir = self.root/image_folder
        self.flow_dir = self.root/flow_folder
        self.im_1 = sorted([os.path.basename(x) for x in self.image_dir.glob("*_10.png")])
        self.im_2 = sorted([os.path.basename(x) for x in self.image_dir.glob("*_11.png")])
        self.tfm_function = tfm_function

    def __len__(self):
        return len(self.im_1)

    def __getitem__(self, idx):
        im_1 = Image.open(self.image_dir/self.im_1[idx]).convert('RGB')
        im_2 = Image.open(self.image_dir/self.im_2[idx]).convert('RGB')
        target = read_png_flow(self.flow_dir/self.im_1[idx])
        
        im_1, im_2, target, valid_mask = self.tfm_function(im_1, im_2, target)

        return im_1, im_2, target, valid_mask # The third channel of target is all zeros. I think the first two channels are the x and y components of the vector for that pixel 

In [0]:
mean = np.array([.5, .5, .5])
std = np.array([.5, .5, .5])
norm = transforms.Normalize(mean=mean, std=std)
tfms = FlowDatasetTransform(norm)
dataset = KITTIFlowDataset("data_scene_flow/training", tfms)
len(dataset)

0

In [0]:
trainset = Subset(dataset, np.arange(100))
valset = Subset(dataset, np.arange(101, 200))
sampler = RandomSampler(trainset)
trainloader = DataLoader(trainset, sampler=sampler,  num_workers=4, pin_memory=True, batch_size=4)#, collate_fn=H5MiniBatchDataset.collate, batch_size=2)
valloader = DataLoader(valset, batch_size=4)

In [0]:
backwarp_tenGrid = {}
backwarp_tenPartial = {}

def warp(im, flow, device='cuda'):
    if str(flow.size()) not in backwarp_tenGrid:
        tenHorizontal = torch.linspace(-1.0, 1.0, flow.shape[3]).view(1, 1, 1, flow.shape[3]).expand(flow.shape[0], -1, flow.shape[2], -1)
        tenVertical = torch.linspace(-1.0, 1.0, flow.shape[2]).view(1, 1, flow.shape[2], 1).expand(flow.shape[0], -1, -1, flow.shape[3])
        backwarp_tenGrid[str(flow.size())] = torch.cat([ tenHorizontal, tenVertical ], 1).to(device)

    if str(flow.size()) not in backwarp_tenPartial:
        backwarp_tenPartial[str(flow.size())] = flow.new_ones([ flow.shape[0], 1, flow.shape[2], flow.shape[3] ])

    flow = torch.cat([ flow[:, 0:1, :, :] / ((im.shape[3] - 1.0) / 2.0), flow[:, 1:2, :, :] / ((im.shape[2] - 1.0) / 2.0) ], 1)
    im = torch.cat([ im, backwarp_tenPartial[str(flow.size())] ], 1)

    grid = (backwarp_tenGrid[str(flow.size())] + flow).permute(0, 2, 3, 1)

    out = F.grid_sample(input=im, grid=grid, mode='bilinear', padding_mode='zeros', align_corners=False)

    mask = out[:, -1:, :, :]
    mask[mask > 0.999] = 1.0
    mask[mask < 1.0] = 0.0

    return (out[:, :-1, :, :] * mask).contiguous()

# Trainer

## Model

In [0]:
class PyramidLevel(nn.Module):
    def __init__(self, in_channels, hidden_channels):
        super(PyramidLevel, self).__init__()

        self.model = nn.Sequential(nn.Conv2d(in_channels=in_channels, out_channels=hidden_channels, kernel_size=3, stride=2, padding=1),
                                   nn.LeakyReLU(inplace=False, negative_slope=0.1),
                                   nn.Conv2d(in_channels=hidden_channels, out_channels=hidden_channels, kernel_size=3, stride=1, padding=1),
                                   nn.LeakyReLU(inplace=False, negative_slope=0.1),
                                   nn.Conv2d(in_channels=hidden_channels, out_channels=hidden_channels, kernel_size=3, stride=1, padding=1),
                                   nn.LeakyReLU(inplace=False, negative_slope=0.1))

    def forward(self, x):
        return self.model(x)

class FeaturePyramid(nn.Module):
  def __init__(self, in_channels=3, hidden_channels=[16, 32, 64, 96, 128, 196]):
    super(FeaturePyramid, self).__init__()

    self.l1 = PyramidLevel(3, hidden_channels[0])
    self.levels = nn.ModuleList([PyramidLevel(hidden_channels[idx], c) for idx, c in enumerate(hidden_channels[1:])])
      
  def forward(self, x):
    features = [self.l1(x)]

    for lvl in self.levels:
      features.append(lvl(features[-1]))

    return features

class DenseFlowEstimator(nn.Module):
    def __init__(self, in_channels):
        super(DenseFlowEstimator, self).__init__()
        self.netOne = nn.Sequential(
            nn.Conv2d(in_channels=in_channels, out_channels=128, kernel_size=3, stride=1, padding=1),
            nn.LeakyReLU(inplace=False, negative_slope=0.1)
        )

        self.netTwo = nn.Sequential(
            nn.Conv2d(in_channels=in_channels + 128, out_channels=128, kernel_size=3, stride=1, padding=1),
            nn.LeakyReLU(inplace=False, negative_slope=0.1)
        )

        self.netThr = nn.Sequential(
            nn.Conv2d(in_channels=in_channels + 128 + 128, out_channels=96, kernel_size=3, stride=1, padding=1),
            nn.LeakyReLU(inplace=False, negative_slope=0.1)
        )

        self.netFou = nn.Sequential(
            nn.Conv2d(in_channels=in_channels + 128 + 128 + 96, out_channels=64, kernel_size=3, stride=1, padding=1),
            nn.LeakyReLU(inplace=False, negative_slope=0.1)
        )

        self.netFiv = nn.Sequential(
            nn.Conv2d(in_channels=in_channels + 128 + 128 + 96 + 64, out_channels=32, kernel_size=3, stride=1, padding=1),
            nn.LeakyReLU(inplace=False, negative_slope=0.1)
        )

        self.predict_flow = nn.Conv2d(in_channels=in_channels + 128 + 128 + 96 + 64 + 32, out_channels=2, kernel_size=3, stride=1, padding=1)

    def forward(self, x):
        x = torch.cat([self.netOne(x), x], 1)
        x = torch.cat([self.netTwo(x), x], 1)
        x = torch.cat([self.netThr(x), x], 1)
        x = torch.cat([self.netFou(x), x], 1)
        x = torch.cat([self.netFiv(x), x], 1)

        flow = self.predict_flow(x)

        return x, flow

class FlowEstimator(nn.Module):
    def __init__(self, in_channels):
        super(FlowEstimator, self).__init__()
        self.netOne = nn.Sequential(
            nn.Conv2d(in_channels=in_channels, out_channels=128, kernel_size=3, stride=1, padding=1),
            nn.LeakyReLU(inplace=False, negative_slope=0.1)
        )

        self.netTwo = nn.Sequential(
            nn.Conv2d(in_channels=128, out_channels=128, kernel_size=3, stride=1, padding=1),
            nn.LeakyReLU(inplace=False, negative_slope=0.1)
        )

        self.netThr = nn.Sequential(
            nn.Conv2d(in_channels=128, out_channels=96, kernel_size=3, stride=1, padding=1),
            nn.LeakyReLU(inplace=False, negative_slope=0.1)
        )

        self.netFou = nn.Sequential(
            nn.Conv2d(in_channels=96, out_channels=64, kernel_size=3, stride=1, padding=1),
            nn.LeakyReLU(inplace=False, negative_slope=0.1)
        )

        self.netFiv = nn.Sequential(
            nn.Conv2d(in_channels=64, out_channels=32, kernel_size=3, stride=1, padding=1),
            nn.LeakyReLU(inplace=False, negative_slope=0.1)
        )

        self.predict_flow = nn.Conv2d(in_channels=32, out_channels=2, kernel_size=3, stride=1, padding=1)

    def forward(self, x):
        x = self.netOne(x)
        x = self.netTwo(x)
        x = self.netThr(x)
        x = self.netFou(x)
        x = self.netFiv(x)

        flow = self.predict_flow(x)

        return x, flow

class WarpAndUpsample(nn.Module):
  def __init__(self, in_channels, corr, flow_scale_factor=1):
    super(WarpAndUpsample, self).__init__()
    self.corr = corr
    self.upflow = nn.ConvTranspose2d(in_channels=2, out_channels=2, kernel_size=4, stride=2, padding=1)
    self.upfeat = nn.ConvTranspose2d(in_channels=in_channels, out_channels=2, kernel_size=4, stride=2, padding=1)
    self.flow_scale_factor = flow_scale_factor

  def forward(self, feat, im1, im2, flow, gt_flow=None, weights=[1, 0]):
    upflow = self.upflow(flow)
    warpflow = upflow  

    if gt_flow is not None and weights[1] != 0:
      gt_flow = F.interpolate(gt_flow, size=(upflow.shape[2], upflow.shape[3]), mode='bilinear', align_corners=False) * 0.05
      warpflow = (upflow * weights[0] + gt_flow * weights[1]) / (weights[0] + weights[1])

    im2_warp = warp(im2, warpflow*self.flow_scale_factor)
    corr = F.leaky_relu(self.corr(im1, im2_warp), negative_slope=0.1)
    feat = self.upfeat(feat) 
    feat = torch.cat([feat, im1, corr, upflow], dim=1)
    return feat

class ContextNetwork(nn.Module):
    def __init__(self, in_channels):
        super(ContextNetwork, self).__init__()

        self.model = nn.Sequential(
            nn.Conv2d(in_channels=in_channels, out_channels=128, kernel_size=3, stride=1, padding=1, dilation=1),
            nn.LeakyReLU(inplace=False, negative_slope=0.1),
            nn.Conv2d(in_channels=128, out_channels=128, kernel_size=3, stride=1, padding=2, dilation=2),
            nn.LeakyReLU(inplace=False, negative_slope=0.1),
            nn.Conv2d(in_channels=128, out_channels=128, kernel_size=3, stride=1, padding=4, dilation=4),
            nn.LeakyReLU(inplace=False, negative_slope=0.1),
            nn.Conv2d(in_channels=128, out_channels=96, kernel_size=3, stride=1, padding=8, dilation=8),
            nn.LeakyReLU(inplace=False, negative_slope=0.1),
            nn.Conv2d(in_channels=96, out_channels=64, kernel_size=3, stride=1, padding=16, dilation=16),
            nn.LeakyReLU(inplace=False, negative_slope=0.1),
            nn.Conv2d(in_channels=64, out_channels=32, kernel_size=3, stride=1, padding=1, dilation=1),
            nn.LeakyReLU(inplace=False, negative_slope=0.1),
            nn.Conv2d(in_channels=32, out_channels=2, kernel_size=3, stride=1, padding=1, dilation=1))

    def forward(self, x):
        return self.model(x)

def init_weights(m):
    if isinstance(m, nn.Conv2d) or isinstance(m, nn.ConvTranspose2d):
        nn.init.kaiming_normal_(m.weight.data, mode='fan_in')
        if m.bias is not None:
            m.bias.data.zero_()

class PWCNet(nn.Module):
  def __init__(self):
    super(PWCNet, self).__init__()

    pyr_channels = [16, 32, 64, 96, 128]

    self.feature_pyramid = FeaturePyramid(3, pyr_channels)

    md = 4
    nd = (2*md+1)**2

    self.corr = Correlation(pad_size=md, kernel_size=1, max_displacement=md, stride1=1, stride2=1, corr_multiply=1)

    #self.flow_estimator_6 = FlowEstimator(nd)
    #self.warp6 = WarpAndUpsample(32, self.corr, 0.0625)

    self.flow_estimator_5 = FlowEstimator(nd)
    self.warp5 = WarpAndUpsample(32, self.corr, 1.25)

    self.flow_estimator_4 = FlowEstimator(nd+2+2+pyr_channels[-2])
    self.warp4 = WarpAndUpsample(32, self.corr, 2.5)

    self.flow_estimator_3 = FlowEstimator(nd+2+2+pyr_channels[-3])
    self.warp3 = WarpAndUpsample(32, self.corr, 5)

    self.flow_estimator_2 = FlowEstimator(nd+2+2+pyr_channels[-4])
    self.context_net = ContextNetwork(32+2)

    self.apply(init_weights)


  def forward(self, im1, im2, gt_flow=None):
    #l1_1, l2_1, l3_1, l4_1, l5_1, l6_1 = self.feature_pyramid(im1)
    #l1_2, l2_2, l3_2, l4_2, l5_2, l6_2 = self.feature_pyramid(im2)

    l1_1, l2_1, l3_1, l4_1, l5_1 = self.feature_pyramid(im1)
    l1_2, l2_2, l3_2, l4_2, l5_2 = self.feature_pyramid(im2)

    #feat = F.leaky_relu(self.corr(l6_1, l6_2), negative_slope=0.1)
    feat = F.leaky_relu(self.corr(l5_1, l5_2), negative_slope=0.1)

    #feat, flow6 = self.flow_estimator_6(feat)  
    #feat = self.warp6(feat, l5_1, l5_2, flow6)

    feat, flow5 = self.flow_estimator_5(feat) 
    feat = self.warp5(feat, l4_1, l4_2, flow5, gt_flow)

    feat, flow4 = self.flow_estimator_4(feat)    
    feat = self.warp4(feat, l3_1, l3_2, flow4, gt_flow)

    feat, flow3 = self.flow_estimator_3(feat)
    feat = self.warp3(feat, l2_1, l2_2, flow3, gt_flow)

    feat, flow2 = self.flow_estimator_2(feat)

    feat = torch.cat([feat, flow2], dim=1)
    flow2 = flow2 + self.context_net(feat)  

    if self.training:
      return [flow2,flow3,flow4,flow5]
      # return [flow2,flow3,flow4,flow5,flow6]
        
    else:
      return [flow2]

## Loss

In [0]:
import torch.nn.functional as F

In [0]:
def L2(output, target):
    return torch.norm(output-target,p=2,dim=1).mean()

In [0]:
class PWCNetLoss(nn.Module):
    def __init__(self, alphas=[0.32, 0.08, 0.02, 0.01, 0.005]):
        super(PWCNetLoss, self).__init__()
        self.alphas = alphas

    def forward(self, output, target):
        loss = 0
        target = target * 0.05 # From the paper: "We scale the ground truth flow by 20..." this is a bit confusing.

        for pred_flow, alpha in zip(output, self.alphas):
            scaled_target = F.interpolate(target, size=(pred_flow.shape[2], pred_flow.shape[3]), mode='bilinear', align_corners=False)
            loss += alpha * L2(pred_flow, scaled_target)

        return loss

In [0]:
def L1(output, target, q, epsilon, mask):
    # print("output: ",output.shape)
    # print("target: ",target.shape)
    # print("mask: ",target.shape)
    # epsilon = 0
    return (mask*(torch.abs(output-target)+epsilon)**q).mean()

In [0]:
class PWCNetLoss_Kitti_finetune(nn.Module):
    def __init__(self, alphas=[0.32, 0.08, 0.02, 0.01, 0.005],q=0.4,epsilon=0.01):
        super(PWCNetLoss_Kitti_finetune, self).__init__()
        self.alphas = alphas
        self.q = q
        self.epsilon = epsilon

    def forward(self, output, target, mask):
        loss = 0
        target = target * 0.05 # From the paper: "We scale the ground truth flow by 20..." this is a bit confusing.

        #exclued invalid pixels

        for pred_flow, alpha in zip(output, self.alphas):
            scaled_target = F.interpolate(target, size=(pred_flow.shape[2], pred_flow.shape[3]), mode='bilinear', align_corners=False)
            scaled_mask = F.interpolate(mask, size=(pred_flow.shape[2], pred_flow.shape[3]), mode='bilinear', align_corners=False)
            valid_idx = (scaled_mask != 0)
            scaled_mask[valid_idx] = 1
            loss += alpha * L1(pred_flow, scaled_target, self.q, self.epsilon, scaled_mask)

        return loss

In [0]:
def train_custom(model, optimizer, objective, schedule):
    cb_dict = {}

    with TrainModel(model):
        schedule.on_train_begin(model, optimizer)

        for epoch in schedule:

            schedule.on_epoch_begin(model, optimizer)

            for im1, im2, target in schedule.data():
                schedule.on_batch_begin(model, optimizer)

                im1, im2, target = Variable(im1.to('cuda')), Variable(im2.to('cuda')), Variable(target.to('cuda'))

                out = model(im1, im2, target)
                loss = objective(out, target)
                loss.backward()
                optimizer.step()
                optimizer.zero_grad()

                schedule.on_batch_end(model, optimizer, loss.detach().item())

                del loss, im1, im2, target

            schedule.on_epoch_end(model, optimizer)

        schedule.on_train_end(model, optimizer)

## Load Model and Predict

In [0]:
from baseline import *

img_root = '/content/drive/Shared drives/EECS 504 PWC Net/Test Images'
save_root = '/content/drive/Shared drives/EECS 504 PWC Net/Results/Baseline_Results/Dense_GF/'
img1_name = 'car1.jpg'
img2_name = 'car2.jpg'
save_name = 'car.png'

os.chdir(img_root)
img1 = cv2.imread(img1_name)
img2 = cv2.imread(img2_name)

params = dict(pyr_scale = 0.5,
                levels = 5,
                winsize = 15, 
                iterations = 5, 
                poly_n = 7, 
                poly_sigma = 1.5,
                flags = 0)
    
flow = dense_GF(img1, img2, params, True)
flow_color = flow_vis.flow_to_color(flow[:,:,0:2])

plot_flow(img1, img2, flow_color)
# write_png_flow(flow, save_root+save_name)
# cv2.imwrite(save_root+'color_'+save_name, cv2.cvtColor(flow_color, cv2.COLOR_BGR2RGB))

In [0]:
# root = '/content/drive/Shared drives/EECS 504 PWC Net/Models/0419-1109PM_Test-Kitti-model.ckpt.tar'
root = '/content/drive/Shared drives/EECS 504 PWC Net/Models/Test-Kitti-model.ckpt.tar'
checkpoint = torch.load(root, map_location=None)
model = PWCNet().to('cuda')
model.load_state_dict(checkpoint['model'])
model.eval();

In [0]:
import datetime

x = datetime.datetime.now()
date = x.strftime("%Y")+x.strftime("%m")+x.strftime("%d")
root_date = '/content/drive/Shared drives/EECS 504 PWC Net/Results/PWC-Net/'+date+'/'

if os.path.exists(root_date):
    print('Path Exists')
    time = x.strftime("%H")+x.strftime("%M")
    root_date = root_date + date + time

root_image_2 = root_date+'/image_2/'
root_image_3 = root_date+'/image_3/'

os.makedirs(root_image_2, exist_ok=True)
os.makedirs(root_image_3, exist_ok=True)

Path Exists


In [0]:
os.chdir('/content/')
mean = np.array([.5, .5, .5])
std = np.array([.5, .5, .5])
norm = transforms.Normalize(mean=mean, std=std)
tfms = FlowDatasetTransform(norm, flip=1)
dataset = KITTIFlowDataset('/content/drive/Shared drives/EECS 504 PWC Net/Data/data_scene_flow/training', tfms)

trainset = Subset(dataset, np.arange(100))
valset = Subset(dataset, np.arange(100, 200))
sampler = RandomSampler(trainset)
trainloader = DataLoader(trainset, sampler=sampler,  num_workers=4, pin_memory=True, batch_size=4)#, collate_fn=H5MiniBatchDataset.collate, batch_size=2)
valloader = DataLoader(valset, batch_size=1)

In [0]:
from evaluation import *
import time

# img_root = '/content/drive/Shared drives/EECS 504 PWC Net/Data/data_scene_flow/training/image_2/'
# imgs_1 = sorted([os.path.basename(x) for x in image_dir.glob('*_10.png')])
# imgs_2 = sorted([os.path.basename(x) for x in image_dir.glob('*_11.png')])

save_root = root_image_2
GT_root = '/content/drive/Shared drives/EECS 504 PWC Net/Data/data_scene_flow/training/flow_occ/'
save_name = 0

# FL_up_est = np.asarray([])
# AEPE_up_est = np.asarray([])

FL_down_gt_pwc = np.asarray([])
AEPE_down_gt_pwc = np.asarray([])
FL_down_gt_GF = np.asarray([])
AEPE_down_gt_GF = np.asarray([])
FL_down_gt_LK = np.asarray([])
AEPE_down_gt_LK = np.asarray([])

time_list_pwc = []
time_list_GF = []
time_list_LK = []

params_GF = dict(pyr_scale = 0.5,
                levels = 5,
                winsize = 15, 
                iterations = 5, 
                poly_n = 7, 
                poly_sigma = 1.5,
                flags = 0)

In [0]:
import cv2
from baseline import *
from evaluation import *
count = 100
tau = np.asarray([5.0, 0.5])
for img1, img2, target, mask in valloader:
    # img1, img2, target, mask = next(iter(valloader)) 
    im1, im2 = Variable(img1.to('cuda')), Variable(img2.to('cuda'))

    ## PWC-NET Esimation
    flow_target = np.moveaxis(target.squeeze().numpy(), 0, -1)

    t = time.time()
    flow_pwc = model(im1, im2)
    elapsed_pwc = time.time() - t
    time_list_pwc.append(elapsed_pwc)

    flow_pwc = flow_pwc[0].squeeze().to('cpu').detach().numpy()
    flow_pwc = np.moveaxis(flow_pwc, 0, -1)

    img1 = img1.squeeze()
    img2 = img2.squeeze()
    img1 = tfms.denorm(img1)*255
    img2 = tfms.denorm(img2)*255
    img1 = np.moveaxis(img1.numpy(), 0, -1)
    img2 = np.moveaxis(img2.numpy(), 0, -1)
    
    ## Dense GF Estimation    
    prev_img = cv2.cvtColor(img1,cv2.COLOR_RGB2GRAY)
    next_img = cv2.cvtColor(img2,cv2.COLOR_RGB2GRAY)
    t = time.time()
    flow_GF = cv2.calcOpticalFlowFarneback(prev_img, next_img, None, **params_GF)
    elapsed_GF = time.time() - t
    time_list_GF.append(elapsed_GF)

    ## Dense LK Estimation
    t = time.time()
    flow_LK = dense_LK(img1, img2)
    elapsed_LK = time.time() - t
    time_list_LK.append(elapsed_LK)

    ## Evaluation   

    flow_mask = mask.view(mask.shape[2], mask.shape[3], -1).numpy()
    F_gt = np.concatenate((flow_target, flow_mask), axis=2)

    # fl_up_est, aepe_up_est = flow_error(F_gt, cv2.resize(flow, (F_gt.shape[1], F_gt.shape[0])))
    fl_down_gt_pwc, aepe_down_gt_pwc = flow_error(cv2.resize(F_gt, (flow_pwc.shape[1], flow_pwc.shape[0])), flow_pwc, tau)
    fl_down_gt_GF, aepe_down_gt_GF = flow_error(F_gt, flow_GF, tau)
    fl_down_gt_LK, aepe_down_gt_LK = flow_error(F_gt, flow_LK, tau)

    # FL_up_est = np.append(FL_up_est, fl_up_est)
    # AEPE_up_est = np.append(AEPE_up_est, aepe_up_est)

    FL_down_gt_pwc = np.append(FL_down_gt_pwc, fl_down_gt_pwc)
    AEPE_down_gt_pwc = np.append(AEPE_down_gt_pwc, aepe_down_gt_pwc)
    FL_down_gt_GF = np.append(FL_down_gt_GF, fl_down_gt_GF)
    AEPE_down_gt_GF = np.append(AEPE_down_gt_GF, aepe_down_gt_GF)
    FL_down_gt_LK = np.append(FL_down_gt_LK, fl_down_gt_LK)
    AEPE_down_gt_LK = np.append(AEPE_down_gt_LK, aepe_down_gt_LK)

    # ## Plot
    # print(count)
    
    # # flow_target_color = flow_vis.flow_to_color(flow_target, convert_to_bgr=False)
    # flow_target_color = flow_vis.flow_to_color(F_gt[:,:,0:2], convert_to_bgr=False)

    # flow_pwc_color  = flow_vis.flow_to_color(flow_pwc)
    # flow_GF_color  = flow_vis.flow_to_color(flow_GF)
    # flow_LK_color  = flow_vis.flow_to_color(flow_LK)

    # # titles = ['Prev Image', 'Next Image', 'Est Flow', 'GT Flow']
    # fig, ax = plt.subplots(2, 3, figsize=(30, 5))
    
    # # fig, ax = plt.subplots(221)
    # # fig.suptitle(img1_name, y=0.62)
    # for i, a in enumerate(ax.flatten()):
    #   a.set_axis_off()
    # #   a.set_title(titles[i])
    # ax[0, 0].imshow(img1/255)
    # ax[0, 1].imshow(img2/255)
    # ax[0, 2].imshow(flow_target_color)
    # ax[1, 0].imshow(flow_GF_color)
    # ax[1, 1].imshow(flow_LK_color)
    # ax[1, 2].imshow(flow_pwc_color)
    
    # # write_png_flow(flow, save_root+str(save_name)+'.png');
    # cv2.imwrite(save_root+str(count)+'_img1.png', cv2.cvtColor(img1, cv2.COLOR_BGR2RGB))
    # cv2.imwrite(save_root+str(count)+'_img2.png', cv2.cvtColor(img2, cv2.COLOR_BGR2RGB))
    # cv2.imwrite(save_root+str(count)+'_target.png', cv2.cvtColor(flow_target_color, cv2.COLOR_BGR2RGB))
    # cv2.imwrite(save_root+str(count)+'_GF.png', cv2.cvtColor(flow_GF_color, cv2.COLOR_BGR2RGB))
    # cv2.imwrite(save_root+str(count)+'_LK.png', cv2.cvtColor(flow_LK_color, cv2.COLOR_BGR2RGB))
    # cv2.imwrite(save_root+str(count)+'_pwc.png', cv2.cvtColor(flow_pwc_color, cv2.COLOR_BGR2RGB))
    # count = count+1
    # plt.show()

In [0]:
def average_error(FL_down_gt, AEPE_down_gt, time_list):
    # FL_avg_up_est = np.mean(FL_up_est)
    # AEPE_avg_up_est = np.mean(AEPE_up_est)

    FL_avg_down_gt = np.mean(FL_down_gt)
    AEPE_avg_down_gt = np.mean(AEPE_down_gt)

    t_mean = np.mean(np.asarray(time_list))

    print("Mean running time", t_mean)
    # print('Fl-All Average (Upsample Estimation):', FL_avg_up_est)
    # print('AEPE Average(Upsample Estimation): ', AEPE_avg_up_est)
    print('Fl-All Average (Downsample GT):', FL_avg_down_gt)
    print('AEPE Average(Downsample GT): ', AEPE_avg_down_gt)

In [0]:
print("PWC-Net")
average_error(FL_down_gt_pwc, AEPE_down_gt_pwc, time_list_pwc)
print("\nGF")
average_error(FL_down_gt_GF, AEPE_down_gt_GF, time_list_GF)
print("\nLK")
average_error(FL_down_gt_LK, AEPE_down_gt_LK, time_list_LK)

PWC-Net
Mean running time 0.010814585685729981
Fl-All Average (Downsample GT): 0.449372474641487
AEPE Average(Downsample GT):  5.817433965334745

GF
Mean running time 0.030926355838775635
Fl-All Average (Downsample GT): 0.4723166590624772
AEPE Average(Downsample GT):  6.667518096961297

LK
Mean running time 2.182913969039917
Fl-All Average (Downsample GT): 0.42085547587860095
AEPE Average(Downsample GT):  5.981375852854
