In [1]:
import os
import pickle
import sys
import time
from os.path import abspath, dirname
from time import time

import cv2
import numpy as np
import pandas as pd
import tensorflow as tf
from dlib import get_frontal_face_detector
from sklearn.model_selection import train_test_split
from sklearn.utils.class_weight import compute_class_weight
from tensorflow.keras import layers
from tensorflow.keras.models import Model
from tensorflow.keras.utils import to_categorical

from_generator = tf.data.Dataset.from_generator
np.random.seed(666)



In [2]:


class DataPrep():

    def __init__(self, segment_size=5, rsz=(128, 128)):
        self.fd = get_frontal_face_detector()
        self.segment_size = segment_size
        self.frames = None
        self.flows = None
        self.rsz = rsz

    def getFrameSnippet(self, filepath, start_frame=None):
        cap = cv2.VideoCapture(filepath)
        frameCount = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
        frameWidth = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
        frameHeight = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
        if not start_frame:
            start_frame = np.random.choice(range(int(frameCount)), size=1)[0]
        if frameCount - start_frame < self.segment_size:
            start_frame = 0
        self.frames = np.empty(
            (self.segment_size, frameHeight, frameWidth, 3), dtype=np.uint8)
        cap.set(cv2.CAP_PROP_POS_FRAMES, start_frame)
        j = 0
        while j < self.segment_size:
            ret, self.frames[j] = cap.read()
            j += 1
        cap.release()

    def getOpticalFlows(self):
        if self.frames is not None:
            self.flows = np.empty(
                (self.frames.shape[0] - 1,
                 self.frames.shape[1],
                 self.frames.shape[2],
                 2))
            prvs = cv2.cvtColor(
                self.frames[0].astype(np.uint8), cv2.COLOR_BGR2GRAY)
            for i in range(1, int(self.frames.shape[0])):
                frame = cv2.cvtColor(
                    self.frames[i].astype(np.uint8), cv2.COLOR_BGR2GRAY)
                self.flows[i - 1] = cv2.calcOpticalFlowFarneback(
                    prvs, frame, None, 0.5, 3, 15, 3, 5, 1.2, 0)
                prvs = frame

    def resize(self, frame):
        # TODO: will want to test different sizes here as a hyperparameter
        height, width = self.rsz
        return cv2.resize(frame, (height, width))

    def getFaces(self, frame, grayscale=True):
        orig_frame = frame
        if grayscale:
            frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
        faces = self.fd(frame, 0)
        if len(faces) < 1:
            frame = cv2.equalizeHist(frame)
            faces = self.fd(frame, 0)
        if len(faces) < 1:
            faces = orig_frame
        return faces

    def getFaceRois(self, frame, faces):
        if isinstance(faces, np.ndarray):
            return self.resize(frame)
        f = faces[0]
        h = f.bottom() - f.top()
        face_rois = None
        for face in faces:
            x, y, r = f.left(), f.top(), f.right()
            w = r - x
            roi = frame[y:y + h, x:x + w, :]
            if face_rois is None:
                face_rois = roi
            else:
                face_rois = np.hstack((face_rois, roi))
        face_rois = self.resize(face_rois)
        return face_rois

    def prepVid(self, filepath, start_frame=None):
        self.getFrameSnippet(filepath, start_frame)
        self.getOpticalFlows()
        w, h = self.rsz
        rgb_rois = np.empty((self.segment_size, w, h, 3), dtype=np.int8)
        flow_rois = np.empty(
            (self.segment_size - 1, w, h, 2), dtype=np.float32)
        for i, frame in enumerate(self.frames):
            faces = self.getFaces(frame)
            rois = self.getFaceRois(frame, faces)
            rgb_rois[i] = rois
            if i == 0:
                continue
            else:
                flow = self.flows[i - 1]
                rois = self.getFaceRois(flow, faces)
                flow_rois[i - 1] = rois
        return rgb_rois, flow_rois

    def prepFullFrames(self, filepath, start_frame=None):
        self.getFrameSnippet(filepath, start_frame)
        self.getOpticalFlows()
        w, h = self.rsz
        rgb_rois = np.empty((self.segment_size, w, h, 3), dtype=np.int8)
        flow_rois = np.empty(
            (self.segment_size - 1, w, h, 2), dtype=np.float32)
        for i, frame in enumerate(self.frames):
            rois = self.resize(frame)
            rgb_rois[i] = rois
            if i == 0:
                continue
            else:
                flow = self.flows[i - 1]
                rois = self.resize(flow)
                flow_rois[i - 1] = rois
        return rgb_rois, flow_rois



In [3]:

def input_fn(files, labels, segment_size=5, batch_size=1, rsz=(128, 128)):
    def dataGenerator():
        for f, label in zip(files, labels):
            dp = DataPrep(segment_size=segment_size, rsz=rsz)
            frames, flows = dp.prepVid(filepath=f)
            yield {'rgb_input': frames, 'flow_input': flows}, label
    dataset = from_generator(
        dataGenerator,
        output_types=(
            {
                "rgb_input": tf.int8,
                "flow_input": tf.float32
            },
            tf.int8),
        output_shapes=(
            {
                "rgb_input": (segment_size, rsz[0], rsz[1], 3),
                "flow_input": (segment_size - 1, rsz[0], rsz[1], 2)
            },
            (2,))
    )
    dataset = dataset.batch(batch_size)
    return dataset



In [4]:
filepath = 'data/train_sample_videos'
segment_size = 5
datapath = os.path.join(filepath, 'metadata.json')
data = pd.read_json(datapath).T
files = [os.path.join(filepath, f) for f in data.index]
labels = data.label.values
x_train, x_test, y_train, y_test = train_test_split(
    files, labels, test_size=0.1)
class_weights = compute_class_weight('balanced', np.unique(y_train), y_train)
for k, v in zip(np.unique(y_train), class_weights):
    print(k, v)
y_train = list(map(lambda x: 0 if x == 'REAL' else 1, y_train))
y_test = list(map(lambda x: 0 if x == 'REAL' else 1, y_test))
y_train = to_categorical(y_train, num_classes=2)
y_test = to_categorical(y_test, num_classes=2)
print(len(x_train), len(y_train), len(x_test), len(y_test))


FAKE 0.6228373702422145
REAL 2.535211267605634
360 360 40 40


In [5]:
batch_size = 4
segment_size = 10
rsz = (128, 128)
train_data = input_fn(
    x_train,
    y_train,
    segment_size=segment_size,
    batch_size=batch_size,
    rsz=rsz)
test_data = input_fn(
    x_test,
    y_test,
    segment_size=segment_size,
    batch_size=batch_size,
    rsz=rsz)


In [6]:
rgb_input = tf.keras.Input(
    shape=(segment_size, rsz[0], rsz[1], 3),
    name='rgb_input')
flow_input = tf.keras.Input(
    shape=(segment_size - 1, rsz[0], rsz[1], 2),
    name='flow_input')


In [7]:
# block 1
x = layers.Conv3D(
    filters=8,
    kernel_size=3,
    strides=(1, 1, 1),
    padding='same',
    data_format='channels_last',
    activation='relu',
)(rgb_input)
x = layers.Conv3D(
    filters=8,
    kernel_size=4,
    strides=(1, 1, 1),
    padding='same',
    data_format='channels_last',
    activation='relu',
)(x)
block1_output = layers.MaxPool3D(
    pool_size=(2, 2, 2),
    strides=(2, 2, 2),
    padding='same'
)(x)
# block 2
x = layers.Conv3D(
    filters=8,
    kernel_size=3,
    strides=(1, 1, 1),
    padding='same',
    data_format='channels_last',
    activation='relu',
)(block1_output)
x = layers.Conv3D(
    filters=8,
    kernel_size=4,
    strides=(1, 1, 1),
    padding='same',
    data_format='channels_last',
    activation='relu',
)(x)
block2_output = layers.add([x, block1_output])
# block 3
x = layers.Conv3D(
    filters=8,
    kernel_size=3,
    strides=(1, 1, 1),
    padding='same',
    data_format='channels_last',
    activation='relu',
)(block2_output)
x = layers.Conv3D(
    filters=8,
    kernel_size=4,
    strides=(1, 1, 1),
    padding='same',
    data_format='channels_last',
    activation='relu',
)(x)
block3_output = layers.add([x, block2_output])

x = layers.Conv3D(
    filters=8,
    kernel_size=3,
    strides=(1, 1, 1),
    padding='same',
    data_format='channels_last',
    activation='relu',
)(block3_output)
x = layers.GlobalAveragePooling3D()(x)
x = layers.Dense(64, activation='relu')(x)
x = layers.Dropout(0.5)(x)
rgb_outputs = layers.Dense(2, activation='softmax')(x)

rgb_model = Model(inputs=rgb_input, outputs=rgb_outputs)
rgb_model.summary()

Model: "model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
rgb_input (InputLayer)          [(None, 10, 128, 128 0                                            
__________________________________________________________________________________________________
conv3d (Conv3D)                 (None, 10, 128, 128, 656         rgb_input[0][0]                  
__________________________________________________________________________________________________
conv3d_1 (Conv3D)               (None, 10, 128, 128, 4104        conv3d[0][0]                     
__________________________________________________________________________________________________
max_pooling3d (MaxPooling3D)    (None, 5, 64, 64, 8) 0           conv3d_1[0][0]                   
______________________________________________________________________________________________

In [8]:
x = layers.ConvLSTM2D(
    filters=8,
    kernel_size=3,
    strides=1,
    padding='same',
    data_format='channels_last',
    return_sequences=True,
    dropout=0.5
)(flow_input)
x = layers.BatchNormalization()(x)
x = layers.ConvLSTM2D(
    filters=8,
    kernel_size=3,
    strides=1,
    padding='same',
    data_format='channels_last',
    return_sequences=True,
    dropout=0.5
)(x)
x = layers.BatchNormalization()(x)
x = layers.ConvLSTM2D(
    filters=8,
    kernel_size=3,
    strides=1,
    padding='same',
    data_format='channels_last',
    return_sequences=False,
    dropout=0.5
)(x)
x = layers.BatchNormalization()(x)
x = layers.Flatten()(x)
x = layers.Dense(128, activation='relu')(x)
x = layers.Dense(128, activation='relu')(x)
x = layers.Dense(128, activation='relu')(x)
x = layers.Dropout(0.5)(x)
flow_output = layers.Dense(2)(x)
flow_model = Model(inputs=flow_input, outputs=flow_output)
flow_model.summary()

Model: "model_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
flow_input (InputLayer)      [(None, 9, 128, 128, 2)]  0         
_________________________________________________________________
conv_lst_m2d (ConvLSTM2D)    (None, 9, 128, 128, 8)    2912      
_________________________________________________________________
batch_normalization (BatchNo (None, 9, 128, 128, 8)    32        
_________________________________________________________________
conv_lst_m2d_1 (ConvLSTM2D)  (None, 9, 128, 128, 8)    4640      
_________________________________________________________________
batch_normalization_1 (Batch (None, 9, 128, 128, 8)    32        
_________________________________________________________________
conv_lst_m2d_2 (ConvLSTM2D)  (None, 128, 128, 8)       4640      
_________________________________________________________________
batch_normalization_2 (Batch (None, 128, 128, 8)       32  

In [9]:
final_average = layers.average([rgb_outputs, flow_output])
x = layers.Flatten()(final_average)
final_output = layers.Dense(2, activation='softmax', name='final_output')(x)
model = Model(
    inputs={"rgb_input": rgb_input, "flow_input": flow_input},
    outputs=final_output,
    name='my_model'
)
model.summary()

Model: "my_model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
rgb_input (InputLayer)          [(None, 10, 128, 128 0                                            
__________________________________________________________________________________________________
conv3d (Conv3D)                 (None, 10, 128, 128, 656         rgb_input[0][0]                  
__________________________________________________________________________________________________
conv3d_1 (Conv3D)               (None, 10, 128, 128, 4104        conv3d[0][0]                     
__________________________________________________________________________________________________
flow_input (InputLayer)         [(None, 9, 128, 128, 0                                            
___________________________________________________________________________________________

In [10]:
tf.keras.utils.plot_model(
    model,
    to_file='model.png',
    show_shapes=True,
    show_layer_names=True
)

Failed to import pydot. You must install pydot and graphviz for `pydotprint` to work.


In [11]:
opt = tf.keras.optimizers.Adam()
model.compile(
    optimizer=opt,
    loss='categorical_crossentropy',
    metrics=['acc'])
model.fit(
    x=train_data,
    validation_data=test_data,
    epochs=5,
    verbose=1,
    class_weight=class_weights
)

Epoch 1/5
Epoch 2/5

In [None]:
model.evaluate(
    test_data
)
