In [1]:
import cv2
import argparse
import os
import time
import torch
import torch.nn as nn
import torch.nn.parallel
import torch.backends.cudnn as cudnn
import torch.optim
import torch.utils.data
import torchvision.transforms as transforms
import csv
import os
import datetime
import sys
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.colors

import flow_transforms
import models
import datasets
import balancedsampler
from multiscaleloss import multiscaleloss
from flow_algo import flow_to_color

%matplotlib inline
%load_ext line_profiler

Model preparation
==

In [2]:
model_names = sorted(name for name in models.__dict__
    if name.islower() and not name.startswith("__"))

dataset_names = sorted(name for name in datasets.__all__)

parser = argparse.ArgumentParser(description='PyTorch FlowNet Training on several datasets')
parser.add_argument('data', metavar='DIR',
                    help='path to dataset')
parser.add_argument('--dataset', metavar='DATASET', default='flying_chairs',
                    choices=dataset_names,
                    help='dataset type : ' +
                        ' | '.join(dataset_names) +
                        ' (default: flying_chairs)')
parser.add_argument('-s', '--split', default=80, type=float, metavar='%',
                    help='split percentage of train samples vs test (default: 80)')
parser.add_argument('--arch', '-a', metavar='ARCH', default='flownets',
                    choices=model_names,
                    help='model architecture: ' +
                        ' | '.join(model_names) +
                        ' (default: flownets)')
parser.add_argument('--solver', default = 'adam',choices=['adam','sgd'],
                    help='solvers: adam | sgd')
parser.add_argument('-j', '--workers', default=4, type=int, metavar='N',
                    help='number of data loading workers (default: 4)')
parser.add_argument('--epochs', default=90, type=int, metavar='N',
                    help='number of total epochs to run (default: 90')
parser.add_argument('--start-epoch', default=0, type=int, metavar='N',
                    help='manual epoch number (useful on restarts)')
parser.add_argument('--epoch-size', default=0, type=int, metavar='N',
                    help='manual epoch size (will match dataset size if not set)')
parser.add_argument('-b', '--batch-size', default=16, type=int,
                    metavar='N', help='mini-batch size (default: 16)')
parser.add_argument('--lr', '--learning-rate', default=0.0001, type=float,
                    metavar='LR', help='initial learning rate')
parser.add_argument('--momentum', default=0.9, type=float, metavar='M',
                    help='momentum for sgd, alpha parameter for adam')
parser.add_argument('--beta', default=0.999, type=float, metavar='M',
                    help='beta parameters for adam')
parser.add_argument('--weight-decay', '--wd', default=4e-4, type=float,
                    metavar='W', help='weight decay (default: 4e-4)')
parser.add_argument('--print-freq', '-p', default=10, type=int,
                    metavar='N', help='print frequency (default: 10)')
parser.add_argument('--resume', default='', type=str, metavar='PATH',
                    help='path to latest checkpoint (default: none)')
parser.add_argument('-e', '--evaluate', dest='evaluate', action='store_true',
                    help='evaluate model on validation set')
parser.add_argument('--pretrained', dest='pretrained', default = None,
                    help='path to pre-trained model')
parser.add_argument('--log-summary', default = 'progress_log_summary.csv',
                    help='csv where to save per-epoch train and test stats')
parser.add_argument('--log-full', default = 'progress_log_full.csv',
                    help='csv where to save per-gradient descent train stats')
parser.add_argument('--no-date', action='store_true',
                    help='don\'t append date timestamp to folder' )
parser.add_argument('--loss', default='L1', help='loss function to apply to multiScaleCriterion : L1 (default)| SmoothL1| MSE')
parser.add_argument('--div-flow', default = 20,
                    help='value by which flow will be divided. Original value is 20 but 1 with batchNorm gives good results')


_StoreAction(option_strings=['--div-flow'], dest='div_flow', nargs=None, const=None, default=20, type=None, choices=None, help='value by which flow will be divided. Original value is 20 but 1 with batchNorm gives good results', metavar=None)

In [3]:
args = parser.parse_args(['--pretrained', 'flownets_pytorch.pth', '-e', '-s', '0', '-b', '1',
                          '--arch' , 'flownets', 'data'])
# evaluate, split at 0%, batchsize 1
# without argv[0]

In [4]:
args

Namespace(arch='flownets', batch_size=1, beta=0.999, data='data', dataset='flying_chairs', div_flow=20, epoch_size=0, epochs=90, evaluate=True, log_full='progress_log_full.csv', log_summary='progress_log_summary.csv', loss='L1', lr=0.0001, momentum=0.9, no_date=False, pretrained='flownets_pytorch.pth', print_freq=10, resume='', solver='adam', split=0.0, start_epoch=0, weight_decay=0.0004, workers=4)

In [5]:
normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                 std=[0.229, 0.224, 0.225])

input_transform = transforms.Compose([
        flow_transforms.ArrayToTensor(),
        transforms.Normalize(mean=[0,0,0], std=[255,255,255]),
        normalize
])

In [6]:
model = models.__dict__[args.arch](args.pretrained).cuda()
model = torch.nn.DataParallel(model).cuda()
model = model.eval()
cudnn.benchmark = True

Video capture
==

In [7]:
cap = cv2.VideoCapture(0)

In [8]:
cap.isOpened()

True

In [9]:
data_size = (512, 384) # here it's (width, height) order for cv2.resize()

In [16]:
def loop():
    prev = None
    while True:
        ret, frame = cap.read()
        frame = cv2.resize(frame, (data_size), interpolation = cv2.INTER_AREA) # 480 * 640 -> 384 * 512
        frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    #     cv2.imshow('frame', frame)

        # dims: height, width, channels -> batchsize = 1, channels, height, width
        if prev is not None:
            inp = [prev, frame]
            inp = map(input_transform, inp)

            input_var = torch.autograd.Variable(torch.cat(inp, 0).cuda(), volatile=True)
            input_var = input_var.view(1, *input_var.size())
            output = model(input_var) # performs 160ms => 6 fps; 80% of loop time on my laptop card

            # dims: batchsize = 1, channels, height, width -> height, width, channels
            flow = output.data.cpu().numpy()
            flow = flow.reshape(flow.shape[1:])
            flow = np.moveaxis(flow, 0, 2)

            img = flow_to_color(flow, norm = False)

            img = np.flip(img, 2) # RGB to BGR. v2.cvtColor(img, cv2.COLOR_RGB2BGR) throws an error for some reason
            img = cv2.resize(img, data_size) # bilinear interpolation
            cv2.imshow('flow', img)

        if cv2.waitKey(1) & 0xFF == ord('q'):
            break
        prev = frame

    cv2.destroyAllWindows()

In [17]:
loop()
# %lprun -f loop loop()

In [79]:
cv2.waitKey(0)
cv2.destroyAllWindows()