In [28]:
import numpy as np 
import cv2
import torch
import torchvision.transforms as transforms 
import torch.nn as nn
from torch.nn.init import kaiming_normal_, constant_
import torch.nn.functional as F

In [29]:
def conv(batchNorm, input_planes, output_planes, kernel_size=3, stride=1):
    if batchNorm:
        return nn.Sequential(
            nn.Conv2d(input_planes, output_planes, kernel_size=kernel_size, stride=stride, padding=(kernel_size-1)//2, bias=False),
            nn.BatchNorm2d(output_planes),
            nn.LeakyReLU(0.1,inplace=True)
        )
    else:
        return nn.Sequential(
            nn.Conv2d(input_planes, output_planes, kernel_size=kernel_size, stride=stride, padding=(kernel_size-1)//2, bias=True),
            nn.LeakyReLU(0.1,inplace=True)
        )


def predict_flow(input_planes):
    return nn.Conv2d(input_planes,2,kernel_size=3,stride=1,padding=1,bias=False)


def deconv(input_planes, output_planes):
    return nn.Sequential(
        nn.ConvTranspose2d(input_planes, output_planes, kernel_size=4, stride=2, padding=1, bias=False),
        nn.LeakyReLU(0.1,inplace=True)
    )


In [30]:
def correlate(inp1, inp2):
    out_corr = spatial_correlation_sample(inp1,
                                          inp2,
                                          kernel_size=1,
                                          patch_size=21,
                                          stride=1,
                                          padding=0,
                                          dilation_patch=2)
    # collate dimensions 1 and 2 in order to be treated as a
    # regular 4D tensor
    b, ph, pw, h, w = out_corr.size()
    out_corr = out_corr.view(b, ph * pw, h, w)/inp1.size(1)
    return F.leaky_relu_(out_corr, 0.1)


def crop_like(input, target):
    if input.size()[2:] == target.size()[2:]:
        return input
    else:
        return input[:, :, :target.size(2), :target.size(3)]

In [31]:
class FlowNetS(nn.Module):
    expansion = 1

    def __init__(self,batchNorm=True):
        super(FlowNetS,self).__init__()

        self.batchNorm = batchNorm
        self.conv1   = conv(self.batchNorm,   6,   64, kernel_size=7, stride=2)
        self.conv2   = conv(self.batchNorm,  64,  128, kernel_size=5, stride=2)
        self.conv3   = conv(self.batchNorm, 128,  256, kernel_size=5, stride=2)
        self.conv3_1 = conv(self.batchNorm, 256,  256)
        self.conv4   = conv(self.batchNorm, 256,  512, stride=2)
        self.conv4_1 = conv(self.batchNorm, 512,  512)
        self.conv5   = conv(self.batchNorm, 512,  512, stride=2)
        self.conv5_1 = conv(self.batchNorm, 512,  512)
        self.conv6   = conv(self.batchNorm, 512, 1024, stride=2)
        self.conv6_1 = conv(self.batchNorm,1024, 1024)

        self.deconv5 = deconv(1024,512)
        self.deconv4 = deconv(1026,256)
        self.deconv3 = deconv(770,128)
        self.deconv2 = deconv(386,64)

        self.predict_flow6 = predict_flow(1024)
        self.predict_flow5 = predict_flow(1026)
        self.predict_flow4 = predict_flow(770)
        self.predict_flow3 = predict_flow(386)
        self.predict_flow2 = predict_flow(194)
        self.upsampled_flow6_to_5 = nn.ConvTranspose2d(2, 2, 4, 2, 1, bias=False)
        self.upsampled_flow5_to_4 = nn.ConvTranspose2d(2, 2, 4, 2, 1, bias=False)
        self.upsampled_flow4_to_3 = nn.ConvTranspose2d(2, 2, 4, 2, 1, bias=False)
        self.upsampled_flow3_to_2 = nn.ConvTranspose2d(2, 2, 4, 2, 1, bias=False)

        for m in self.modules():
            if isinstance(m, nn.Conv2d) or isinstance(m, nn.ConvTranspose2d):
                kaiming_normal_(m.weight, 0.1)
                if m.bias is not None:
                    constant_(m.bias, 0)
            elif isinstance(m, nn.BatchNorm2d):
                constant_(m.weight, 1)
                constant_(m.bias, 0)
    def forward(self, x):
        out_conv2 = self.conv2(self.conv1(x))
        out_conv3 = self.conv3_1(self.conv3(out_conv2))
        out_conv4 = self.conv4_1(self.conv4(out_conv3))
        out_conv5 = self.conv5_1(self.conv5(out_conv4))
        out_conv6 = self.conv6_1(self.conv6(out_conv5))

        flow6       = self.predict_flow6(out_conv6)
        flow6_up    = crop_like(self.upsampled_flow6_to_5(flow6), out_conv5)
        out_deconv5 = crop_like(self.deconv5(out_conv6), out_conv5)

        concat5 = torch.cat((out_conv5,out_deconv5,flow6_up),1)
        flow5       = self.predict_flow5(concat5)
        flow5_up    = crop_like(self.upsampled_flow5_to_4(flow5), out_conv4)
        out_deconv4 = crop_like(self.deconv4(concat5), out_conv4)

        concat4 = torch.cat((out_conv4,out_deconv4,flow5_up),1)
        flow4       = self.predict_flow4(concat4)
        flow4_up    = crop_like(self.upsampled_flow4_to_3(flow4), out_conv3)
        out_deconv3 = crop_like(self.deconv3(concat4), out_conv3)

        concat3 = torch.cat((out_conv3,out_deconv3,flow4_up),1)
        flow3       = self.predict_flow3(concat3)
        flow3_up    = crop_like(self.upsampled_flow3_to_2(flow3), out_conv2)
        out_deconv2 = crop_like(self.deconv2(concat3), out_conv2)

        concat2 = torch.cat((out_conv2,out_deconv2,flow3_up),1)
        flow2 = self.predict_flow2(concat2)

        if self.training:
            return flow2,flow3,flow4,flow5,flow6
        else:
            return flow2
  

In [32]:
def flownets(data=None):
    """FlowNetS model architecture from the
    "Learning Optical Flow with Convolutional Networks" paper (https://arxiv.org/abs/1504.06852)

    Args:
        data : pretrained weights of the network. will create a new one if not set
    """
    model = FlowNetS(batchNorm=False)
    if data is not None:
        model.load_state_dict(data['state_dict'])
    return model


def flownets_bn(data=None):
    """FlowNetS model architecture from the
    "Learning Optical Flow with Convolutional Networks" paper (https://arxiv.org/abs/1504.06852)

    Args:
        data : pretrained weights of the network. will create a new one if not set
    """
    model = FlowNetS(batchNorm=True)
    if data is not None:
        model.load_state_dict(data['state_dict'])
    return model

In [38]:
# Load pre-trained weights of FlowNetS
model_dict = torch.load('/content/flownets_EPE1.951.pth.tar',map_location=torch.device('cpu'))
model = flownets(model_dict)
# model.flownet
# model.load_state_dict(model_dict['state_dict'])

frame1 = cv2.imread('img1.jpg',cv2.IMREAD_COLOR)
frame2 = cv2.imread('img2.jpg',cv2.IMREAD_COLOR)
frame1_gray = cv2.cvtColor(frame1, cv2.COLOR_BGR2GRAY)
frame2_gray = cv2.cvtColor(frame2, cv2.COLOR_BGR2GRAY)
img1 = torch.from_numpy(frame1.T)
img2 = torch.from_numpy(frame2.T)

model_input = torch.cat((img1,img2), dim=0)
model_input = model_input.to(torch.float32)
model_input = model_input.unsqueeze(0)
with torch.no_grad():
    flow = model(model_input)
flow_np = flow[0].detach().cpu().numpy().T

flow_np = flow_np[:,:,:,0]

#flow_lk = cv2.calcOpticalFlowFarneback(frame1_gray, frame2_gray, None, 0.5, 3, 15, 3, 5, 1.2, 0)
flow_lk = cv2.calcOpticalFlowFarneback(frame1_gray, frame2_gray, None, 0.5, 3, 15, 3, 5, 1.2, 0)
# Resize the optical flow to match the original frame size
flow_lk = cv2.resize(flow_lk,(flow_np.shape[1],flow_np.shape[0]),interpolation=cv2.INTER_LINEAR)

mse = np.mean((flow_np-flow_lk)**2)
print(mse)


18.799227


In [40]:
from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [41]:
!unzip /content/gdrive/MyDrive/video_data.zip
# /content/gdrive/MyDrive/video_data.zip

Archive:  /content/gdrive/MyDrive/video_data.zip
  inflating: vid13.mp4               
  inflating: vid3.mp4                
  inflating: vid2.mp4                
  inflating: vid11.mp4               
  inflating: vid1.mp4                
  inflating: vid5.mp4                
  inflating: vid14.mp4               
  inflating: vid8.mp4                
  inflating: vid10.mp4               
  inflating: vid12.mp4               
  inflating: vid9.mp4                
  inflating: vid4.mp4                
  inflating: vid15.mp4               
  inflating: vid7.mp4                
  inflating: vid6.mp4                


In [45]:
! pip install ptlflow

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting ptlflow
  Downloading ptlflow-0.2.7-py3-none-any.whl (416 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m416.1/416.1 KB[0m [31m7.6 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting torchmetrics<=0.9.*,>=0.2
  Downloading torchmetrics-0.8.2-py3-none-any.whl (409 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m409.8/409.8 KB[0m [31m32.5 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting timm~=0.6.3
  Downloading timm-0.6.13-py3-none-any.whl (549 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m549.1/549.1 KB[0m [31m23.2 MB/s[0m eta [36m0:00:00[0m
Collecting pypng~=0.0.16
  Downloading pypng-0.0.21-py3-none-any.whl (48 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m48.1/48.1 KB[0m [31m4.5 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting einops<=0.4.*,>=0.3.0
  Downloading einops-0.3.2-py3-none-any.whl (2

In [52]:
model

FlowNetS(
  (conv1): Sequential(
    (0): Conv2d(6, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3))
    (1): LeakyReLU(negative_slope=0.1, inplace=True)
  )
  (conv2): Sequential(
    (0): Conv2d(64, 128, kernel_size=(5, 5), stride=(2, 2), padding=(2, 2))
    (1): LeakyReLU(negative_slope=0.1, inplace=True)
  )
  (conv3): Sequential(
    (0): Conv2d(128, 256, kernel_size=(5, 5), stride=(2, 2), padding=(2, 2))
    (1): LeakyReLU(negative_slope=0.1, inplace=True)
  )
  (conv3_1): Sequential(
    (0): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): LeakyReLU(negative_slope=0.1, inplace=True)
  )
  (conv4): Sequential(
    (0): Conv2d(256, 512, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
    (1): LeakyReLU(negative_slope=0.1, inplace=True)
  )
  (conv4_1): Sequential(
    (0): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): LeakyReLU(negative_slope=0.1, inplace=True)
  )
  (conv5): Sequential(
    (0): Conv2d(512, 512,

In [57]:
for i in range(5):
    for param in model.conv1.parameters():
        param.requires_grad = False

In [58]:
import cv2
import torch
import numpy as np
import tensorflow as tf
# from tensorflow.keras.applications import FlowNetS
# from ptlflow import flownets

# Load pre-trained FlowNetS model
# flownet_model = flownets(weights='imagenet', include_top=False, input_shape=(384, 512, 6))
flownet_model = model

# Freeze all layers except for the last few layers
for i in range(5):
    for param in model.conv1.parameters():
        param.requires_grad = False

for i in range(5):
    for param in model.conv2.parameters():
        param.requires_grad = False

# Add new layers for motion estimation
x = flownet_model.output
x = tf.keras.layers.Conv2D(filters=2, kernel_size=3, padding='same')(x)
motion_model = tf.keras.Model(inputs=flownet_model.input, outputs=x)

# Compile motion model with mean squared error loss
motion_model.compile(loss='mean_squared_error', optimizer='adam')

# Collect dataset of videos
videos = ['vid1.mp4', 'vid2.mp4', 'vid3.mp4', 'vid4.mp4', 'vid5.mp4', 'vid6.mp4', 'vid7.mp4',
          'vid8.mp4', 'vid9.mp4', 'vid10.mp4', 'vid11.mp4', 'vid12.mp4', 'vid13.mp4', 'vid14.mp4',
          'vid15.mp4']

# Split dataset into training and testing sets
train_videos = videos[:12]
test_videos = videos[12:]

# Preprocess dataset
def preprocess_frame(frame):
    frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
    frame = cv2.resize(frame, (512, 384))
    return frame

train_frames = []
for video in train_videos:
    cap = cv2.VideoCapture(video)
    while True:
        ret, frame = cap.read()
        if not ret:
            break
        frame = preprocess_frame(frame)
        train_frames.append(frame)
    cap.release()

test_frames = []
for video in test_videos:
    cap = cv2.VideoCapture(video)
    while True:
        ret, frame = cap.read()
        if not ret:
            break
        frame = preprocess_frame(frame)
        test_frames.append(frame)
    cap.release()

# Train motion model
train_input = np.zeros((len(train_frames) - 1, 384, 512, 6), dtype=np.float32)
train_output = np.zeros((len(train_frames) - 1, 384, 512, 2), dtype=np.float32)
for i in range(len(train_frames) - 1):
    prev_frame = train_frames[i]
    next_frame = train_frames[i+1]
    train_input[i] = np.dstack((prev_frame, next_frame, np.zeros_like(prev_frame)))
    train_output[i] = motion_vectors

motion_model.fit(train_input, train_output, batch_size=4, epochs=10)

# Test motion model
def estimate_motion(frame1, frame2, model):
    input_data = np.dstack((frame1, frame2, np.zeros_like(frame1)))
    output_data = model.predict(np.array([input_data]))[0]
    return output_data

def mse(image1, image2):
    return np.mean((image1 - image2) ** 2)

mse_lk = 0.0
mse_bm = 0.0
mse_nn = 0.0
count = 0
for video in test_videos:
    cap = cv2.VideoCapture(video)
    ret, prev_frame = cap.read()
    prev_frame = preprocess_frame(prev_frame)
    while True:
        ret, next_frame = cap.read
        if not ret:
            break
        next_frame = preprocess_frame(next_frame)

    # Compute Lucas-Kanade motion vectors
    lk_params = dict(winSize=(15, 15), maxLevel=2, criteria=(cv2.TERM_CRITERIA_EPS | cv2.TERM_CRITERIA_COUNT, 10, 0.03))
    motion_vectors_lk, status, err = cv2.calcOpticalFlowPyrLK(prev_frame, next_frame, None, **lk_params)

    # Compute block-matching motion vectors
    motion_vectors_bm = cv2.calcOpticalFlowFarneback(prev_frame, next_frame, None, 0.5, 3, 15, 3, 5, 1.2, 0)

    # Compute deep network motion vectors
    motion_vectors_nn = estimate_motion(prev_frame, next_frame, motion_model)

    # Compute MSE between motion vectors and ground truth
    mse_lk += mse(motion_vectors_lk, motion_vectors)
    mse_bm += mse(motion_vectors_bm, motion_vectors)
    mse_nn += mse(motion_vectors_nn, motion_vectors)
    count += 1

    prev_frame = next_frame

cap.release()
mse_lk /= count
mse_bm /= count
mse_nn /= count

print('MSE Lucas-Kanade: %.4f' % mse_lk)
print('MSE Block-Matching: %.4f' % mse_bm)
print('MSE Deep Network: %.4f' % mse_nn)

AttributeError: ignored