In [1]:
import numpy as np
import cv2
import os
import sys

In [2]:
import cv2  
import numpy as np
import os 
from tqdm import tqdm

In [3]:
def getOpticalFlow(video):
    """Calculate dense optical flow of input video
    Args:
        video: the input video with shape of [frames,height,width,channel]. dtype=np.array
    Returns:
        flows_x: the optical flow at x-axis, with the shape of [frames,height,width,channel]
        flows_y: the optical flow at y-axis, with the shape of [frames,height,width,channel]
    """
    # initialize the list of optical flows
    gray_video = []
    for i in range(len(video)):
        img = cv2.cvtColor(video[i], cv2.COLOR_RGB2GRAY)
        gray_video.append(np.reshape(img,(224,224,1)))

    flows = []
    for i in range(0,len(video)-1):
        # calculate optical flow between each pair of frames
        flow = cv2.calcOpticalFlowFarneback(gray_video[i], gray_video[i+1], None, 0.5, 3, 15, 3, 5, 1.2, cv2.OPTFLOW_FARNEBACK_GAUSSIAN)
        # subtract the mean in order to eliminate the movement of camera
        flow[..., 0] -= np.mean(flow[..., 0])
        flow[..., 1] -= np.mean(flow[..., 1])
        # normalize each component in optical flow
        flow[..., 0] = cv2.normalize(flow[..., 0],None,0,255,cv2.NORM_MINMAX)
        flow[..., 1] = cv2.normalize(flow[..., 1],None,0,255,cv2.NORM_MINMAX)
        # Add into list 
        flows.append(flow)
        
    # Padding the last frame as empty array
    flows.append(np.zeros((224,224,2)))
      
    return np.array(flows, dtype=np.float32)

In [4]:
def Video2Npy(file_path, resize=(224,224)):
    """Load video and tansfer it into .npy format
    Args:
        file_path: the path of video file
        resize: the target resolution of output video
    Returns:
        frames: gray-scale video
        flows: magnitude video of optical flows 
    """
    # Load video
    cap = cv2.VideoCapture(file_path)
    # Get number of frames
    len_frames = int(cap.get(7))
    # Extract frames from video
    try:
        frames = []
        for i in range(len_frames-1):
            _, frame = cap.read()
            frame = cv2.resize(frame,resize, interpolation=cv2.INTER_AREA)
            frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            frame = np.reshape(frame, (224,224,3))
            frames.append(frame)   
    except:
        print("Error: ", file_path, len_frames,i)
    finally:
        frames = np.array(frames)
        cap.release()
            
    # Get the optical flow of video
    flows = getOpticalFlow(frames)
    
    result = np.zeros((len(flows),224,224,5))
    result[...,:3] = frames
    result[...,3:] = flows
    
    return result

In [5]:
def warn(*args, **kwargs):
    pass
import warnings
warnings.warn = warn

In [6]:
import numpy as np
import os
from time import time
import cv2

In [7]:
from keras.models import Sequential, Input, Model
from keras.layers import Dense, Flatten, Conv3D, MaxPooling3D, Dropout, BatchNormalization, Activation, LeakyReLU, Add, Multiply
from keras.regularizers import l2
from keras.layers.core import Lambda
from keras.layers.core import Lambda

In [8]:
# extract the rgb images 
def get_rgb(input_x):
    rgb = input_x[...,:3]
    return rgb

# extract the optical flows
def get_opt(input_x):
    opt= input_x[...,3:5]
    return opt

In [9]:
inputs = Input(shape=(64,224,224,5))

rgb = Lambda(get_rgb,output_shape=None)(inputs)
opt = Lambda(get_opt,output_shape=None)(inputs)

##################################################### RGB channel
rgb = Conv3D(
    16, kernel_size=(1,3,3), strides=(1,1,1), kernel_initializer='he_normal', activation='relu', padding='same')(rgb)
rgb = Conv3D(
    16, kernel_size=(3,1,1), strides=(1,1,1), kernel_initializer='he_normal', activation='relu', padding='same')(rgb)
rgb = MaxPooling3D(pool_size=(1,2,2))(rgb)

rgb = Conv3D(
    16, kernel_size=(1,3,3), strides=(1,1,1), kernel_initializer='he_normal', activation='relu', padding='same')(rgb)
rgb = Conv3D(
    16, kernel_size=(3,1,1), strides=(1,1,1), kernel_initializer='he_normal', activation='relu', padding='same')(rgb)
rgb = MaxPooling3D(pool_size=(1,2,2))(rgb)

rgb = Conv3D(
    32, kernel_size=(1,3,3), strides=(1,1,1), kernel_initializer='he_normal', activation='relu', padding='same')(rgb)
rgb = Conv3D(
    32, kernel_size=(3,1,1), strides=(1,1,1), kernel_initializer='he_normal', activation='relu', padding='same')(rgb)
rgb = MaxPooling3D(pool_size=(1,2,2))(rgb)

rgb = Conv3D(
    32, kernel_size=(1,3,3), strides=(1,1,1), kernel_initializer='he_normal', activation='relu', padding='same')(rgb)
rgb = Conv3D(
    32, kernel_size=(3,1,1), strides=(1,1,1), kernel_initializer='he_normal', activation='relu', padding='same')(rgb)
rgb = MaxPooling3D(pool_size=(1,2,2))(rgb)

##################################################### Optical Flow channel
opt = Conv3D(
    16, kernel_size=(1,3,3), strides=(1,1,1), kernel_initializer='he_normal', activation='relu', padding='same')(opt)
opt = Conv3D(
    16, kernel_size=(3,1,1), strides=(1,1,1), kernel_initializer='he_normal', activation='relu', padding='same')(opt)
opt = MaxPooling3D(pool_size=(1,2,2))(opt)

opt = Conv3D(
    16, kernel_size=(1,3,3), strides=(1,1,1), kernel_initializer='he_normal', activation='relu', padding='same')(opt)
opt = Conv3D(
    16, kernel_size=(3,1,1), strides=(1,1,1), kernel_initializer='he_normal', activation='relu', padding='same')(opt)
opt = MaxPooling3D(pool_size=(1,2,2))(opt)

opt = Conv3D(
    32, kernel_size=(1,3,3), strides=(1,1,1), kernel_initializer='he_normal', activation='relu', padding='same')(opt)
opt = Conv3D(
    32, kernel_size=(3,1,1), strides=(1,1,1), kernel_initializer='he_normal', activation='relu', padding='same')(opt)
opt = MaxPooling3D(pool_size=(1,2,2))(opt)

opt = Conv3D(
    32, kernel_size=(1,3,3), strides=(1,1,1), kernel_initializer='he_normal', activation='sigmoid', padding='same')(opt)
opt = Conv3D(
    32, kernel_size=(3,1,1), strides=(1,1,1), kernel_initializer='he_normal', activation='sigmoid', padding='same')(opt)
opt = MaxPooling3D(pool_size=(1,2,2))(opt)


##################################################### Fusion and Pooling
x = Multiply()([rgb,opt])
x = MaxPooling3D(pool_size=(8,1,1))(x)

##################################################### Merging Block
x = Conv3D(
    64, kernel_size=(1,3,3), strides=(1,1,1), kernel_initializer='he_normal', activation='relu', padding='same')(x)
x = Conv3D(
    64, kernel_size=(3,1,1), strides=(1,1,1), kernel_initializer='he_normal', activation='relu', padding='same')(x)
x = MaxPooling3D(pool_size=(2,2,2))(x)

x = Conv3D(
    64, kernel_size=(1,3,3), strides=(1,1,1), kernel_initializer='he_normal', activation='relu', padding='same')(x)
x = Conv3D(
    64, kernel_size=(3,1,1), strides=(1,1,1), kernel_initializer='he_normal', activation='relu', padding='same')(x)
x = MaxPooling3D(pool_size=(2,2,2))(x)

x = Conv3D(
    128, kernel_size=(1,3,3), strides=(1,1,1), kernel_initializer='he_normal', activation='relu', padding='same')(x)
x = Conv3D(
    128, kernel_size=(3,1,1), strides=(1,1,1), kernel_initializer='he_normal', activation='relu', padding='same')(x)
x = MaxPooling3D(pool_size=(2,3,3))(x)

##################################################### FC Layers
x = Flatten()(x)
x = Dense(128,activation='relu')(x)
x = Dropout(0.2)(x)
x = Dense(32, activation='relu')(x)

# Build the model
pred = Dense(2, activation='softmax')(x)
model = Model(inputs=inputs, outputs=pred)
model.summary()

Model: "model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            [(None, 64, 224, 224 0                                            
__________________________________________________________________________________________________
lambda (Lambda)                 (None, 64, 224, 224, 0           input_1[0][0]                    
__________________________________________________________________________________________________
lambda_1 (Lambda)               (None, 64, 224, 224, 0           input_1[0][0]                    
__________________________________________________________________________________________________
conv3d (Conv3D)                 (None, 64, 224, 224, 448         lambda[0][0]                     
______________________________________________________________________________________________

In [10]:
from keras.optimizers import Adam, SGD

sgd = SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True)
model.compile(optimizer=sgd, loss='categorical_crossentropy', metrics=['accuracy'])

In [11]:
from keras.models import load_model
from keras.optimizers import SGD

model.load_weights('./Models/keras_model.h5')


In [12]:
v1 = Video2Npy('./data/5.mpeg')
v1.shape

(1250, 224, 224, 5)

In [13]:
# v2 = Video2Npy('./data/2.mpeg')
# v2.shape

In [14]:
def normalize(data):
    mean = np.mean(data)
    std = np.std(data)
    return (data-mean) / std

In [15]:
def sliding_window(np_video, w=64, stride = 32):
    np_video[..., :3]= normalize(np_video[..., :3])
    np_video[..., 3:]= normalize(np_video[..., 3:])
    if np_video.shape[0] < w:
        return np_video[:, ...]
    ret = []
    for i in range(w, np_video.shape[0], stride):
        print(i)
        ret.append(np_video[i-w: i, ...])
        
    return np.array(ret)

In [23]:
def inference(np_video):
    pred = model.predict(sliding_window(np_video)[5:14])
    print(pred)
    pred = pred[:, 1] < pred[:, 0]
    return pred

In [24]:
inference(v1)

64
96
128
160
192
224
256
288
320
352
384
416
448
480
512
544
576
608
640
672
704
736
768
800
832
864
896
928
960
992
1024
1056
1088
1120
1152
1184
1216
1248
[[0.12164606 0.87835395]
 [0.04828709 0.9517129 ]
 [0.23664582 0.76335424]
 [0.7583628  0.24163716]
 [0.84651035 0.15348966]
 [0.7435627  0.25643733]
 [0.72628754 0.27371243]
 [0.75406015 0.24593982]
 [0.6953702  0.3046298 ]]


array([False, False, False,  True,  True,  True,  True,  True,  True])

In [18]:
def visualize(np_video):
    clips = sliding_window(np_video)
    pred = model.predict(clips)
    pred_bool = pred[:, 1] > pred[:, 0]
    for frame in clips[:, -1, :, :, 0:3]:
        cv2.imshow('visualize', frame)

In [19]:
# visualize(v1)

In [20]:
# cv2.imshow('abc', v1[0, :, :, 0])

In [21]:
v1[0]

array([[[-1.90219141, -1.90219141, -1.90219141,  0.79470148,
          2.17354236],
        [-1.90219141, -1.90219141, -1.90219141,  0.79470676,
          2.17354726],
        [-1.90219141, -1.90219141, -1.90219141,  0.79471053,
          2.17355028],
        ...,
        [-1.78584605, -1.90219141, -1.90219141,  0.80715646,
          2.17494991],
        [-1.75260451, -1.90219141, -1.90219141,  0.80523776,
          2.17543166],
        [-1.66950068, -1.86894988, -1.90219141,  0.80010551,
          2.17645698]],

       [[-1.90219141, -1.90219141, -1.90219141,  0.79470186,
          2.17354802],
        [-1.90219141, -1.90219141, -1.90219141,  0.79470487,
          2.17355141],
        [-1.90219141, -1.90219141, -1.88557064,  0.79470676,
          2.17355292],
        ...,
        [-1.86894988, -1.90219141, -1.78584605,  0.7993124 ,
          2.17780271],
        [-1.86894988, -1.90219141, -1.83570834,  0.79812235,
          2.1777767 ],
        [-1.76922528, -1.81908758, -1.85232911, 

In [22]:
print(np.sum(v1[64, :, :, 0]))
v1[64, :, :, 0]

-19963.40736949229


array([[-1.90219141, -1.90219141, -1.90219141, ..., -1.90219141,
        -1.90219141, -1.66950068],
       [-1.90219141, -1.90219141, -1.90219141, ..., -1.90219141,
        -1.90219141, -1.70274222],
       [-1.90219141, -1.90219141, -1.90219141, ..., -1.47005149,
        -1.61963839, -1.90219141],
       ...,
       [-1.90219141, -1.90219141, -1.90219141, ..., -1.3204646 ,
        -1.50329302, -1.88557064],
       [-1.90219141, -1.90219141, -1.90219141, ..., -1.85232911,
        -1.80246681, -1.76922528],
       [-1.90219141, -1.90219141, -1.90219141, ..., -1.88557064,
        -1.85232911, -1.68612145]])