## Imports

In [0]:
import glob
import os
import math

from time import time

import keras
from keras.applications.inception_resnet_v2 import InceptionResNetV2
from keras.applications.imagenet_utils import preprocess_input

import keras.backend as K

from keras import Model
from keras.callbacks import ModelCheckpoint, TensorBoard
from keras.layers import Input
from keras.models import load_model
from keras.optimizers import Adam
from skimage import color
from multiprocessing import Pool
from keras.layers import Conv2D, Dense, GlobalAveragePooling2D, LSTM, TimeDistributed, UpSampling2D, Dropout
from keras.engine import Layer
from keras.utils import plot_model

from collections import namedtuple
from datetime import datetime
from os import listdir
from os.path import join, isfile

import re
import cv2
import numpy as np
import tensorflow as tf

## Mount Drive

In [0]:
from google.colab import drive
drive.mount('/content/drive')

## Tensorboard

In [0]:
! wget https://bin.equinox.io/c/4VmDzA7iaHb/ngrok-stable-linux-amd64.zip
! unzip ngrok-stable-linux-amd64.zip

In [0]:
!kill $(lsof -t -i:6006)

In [0]:
LOG_DIR = '/content/drive/My\ Drive/new-logs/'
get_ipython().system_raw(
    'tensorboard --logdir {} --host 0.0.0.0 --port 6006 &'
    .format(LOG_DIR)
)

In [0]:
get_ipython().system_raw('./ngrok http 6006 &')

In [0]:
! curl -s http://localhost:4040/api/tunnels | python3 -c \
    "import sys, json; print(json.load(sys.stdin)['tunnels'][0]['public_url'])"

## Sampling functions

In [0]:
class Sample:
    def __init__(self, source_dir, dest_dir, length=3, skip=1):
        self.source_dir = source_dir
        self.dest_dir = dest_dir
        self.length = length
        self.skip = skip

    def save_sliced_video(self, input_file, output_file):
        try:
            video = cv2.VideoCapture(input_file)
            count = 0
            frames = []
            frames_count = int(video.get(cv2.CAP_PROP_FRAME_COUNT))
            start_frame = int(frames_count/2)
            video.set(cv2.CAP_PROP_POS_FRAMES, start_frame)

            while count < self.length:
                ret, frame = video.read()
                if not ret:
                    break
                if int(video.get(cv2.CAP_PROP_POS_FRAMES)) % self.skip != 0:
                    continue
                frames.append(frame)
                count += 1
            flag = len(frames) == self.length
            fps = video.get(cv2.CAP_PROP_FPS)
            size = (int(video.get(cv2.CAP_PROP_FRAME_WIDTH)), int(video.get(cv2.CAP_PROP_FRAME_HEIGHT)))
            fourcc = cv2.VideoWriter_fourcc(*'XVID')
            out = cv2.VideoWriter(output_file, fourcc, fps, size)
            # check if video has required length if not return false
            # assert len(frames) == frames_per_video
            if flag:
                for f in frames:
                    out.write(f)
            video.release()
            out.release()
            return flag
        except:
            return False

    def slice_all(self):
        # iterate over each file in the source directory
        file_list = listdir(self.source_dir)
        print("Start sampling %d files" % len(file_list))
        count = 0
        for file_name in file_list:
            input_file = join(self.source_dir, file_name)
            new_file_name = "video_" + format(count, '05d') + ".avi"
            output_file = join(self.dest_dir, new_file_name)
            if isfile(input_file):
                flag = self.save_sliced_video(input_file, output_file)
                if flag:
                    count += 1
            if count % 10 == 0:
                print("Processed %d out of %d" % (count, len(file_list)))
        print("Sampling completed %d out of %d" % (count, len(file_list)))

## Video Preprocessing functions

In [0]:
def get_video(file):
    '''
    Parameters
    ----------
    file - path to video file

    Returns
    -------
    frames - frames array of the video
    '''

    video = cv2.VideoCapture(file)
    frames = []
    while video.isOpened():
        ret, frame = video.read()
        if not ret:
            break
        frames.append(frame)
    frames = np.asarray(frames)
    return frames


def get_lab_layer(frames):
    '''
        Parameters
        -----------
        frames - color/gray video frames with 3 chanels

        Returns
        -------
        (rgb2lab, gray2lab) - RGB frames converted to LAB, GRAY frames converted to LAB
    '''
    rgb2lab_frames = []
    gray2lab_frames = []

    for frame in frames:
        resized_frame = resize_pad_frame(frame, (default_nn_input_height, default_nn_input_width), equal_padding=True)

        rgb2lab_frame = color.rgb2lab(resized_frame)
        rgb2lab_frames.append(rgb2lab_frame)

        rgb2gray_frame = color.rgb2gray(resized_frame)

        #         Display Grayscale frame
        #         cv2.imshow('grey', rgb2gray_frame)
        #         cv2.waitKey(0)
        #         cv2.destroyAllWindows()

        gray2rgb_frame = color.gray2rgb(rgb2gray_frame)
        lab_frame = color.rgb2lab(gray2rgb_frame)
        gray2lab_frames.append(lab_frame)

    return np.asarray(rgb2lab_frames), np.asarray(gray2lab_frames)


def preprocess_frames(gray2lab_frames):
    '''
    Parameters
    ---------
    gray2lab_frames - LAB frames of Grayscale video

    Returns
    -------
    processed_l_layer - L Layer processed (L/50 - 1)
    '''
    processed = np.empty(gray2lab_frames.shape)

    processed[:, :, :, 0] = np.divide(gray2lab_frames[:, :, :, 0], 50) - 1  # data loss
    processed[:, :, :, 1] = np.divide(gray2lab_frames[:, :, :, 1], 128)
    processed[:, :, :, 2] = np.divide(gray2lab_frames[:, :, :, 2], 128)

    processed_l_layer = processed[:, :, :, np.newaxis, 0]

    return processed_l_layer


def get_resnet_records(frames):
    '''
    Parameters
    ----------
    frames - original frames without color conversion or resizing

    Details
    -------
    Implementation adopted from Deep Kolorization implementation

    Returns
    -------
    predictions - restnet predictions
    '''
    resnet_input = []
    for frame in frames:
        resized_frame = resize_pad_frame(frame, (resnet_input_height, resnet_input_width))
        gray_scale_frame = cv2.cvtColor(resized_frame, cv2.COLOR_RGB2GRAY)
        gray_scale_frame_colored = cv2.cvtColor(gray_scale_frame, cv2.COLOR_GRAY2RGB)
        resnet_input.append(gray_scale_frame_colored)
    resnet_input = np.asarray(resnet_input)

    predictions = inception_resnet_v2_predict(resnet_input)
    return predictions


def getInputRange(frames_count, time_steps, current_frame):
    '''
    Deciding the moving window
    '''
    # this function should change according to our selection of
    frame_selection = []
    last_selection = current_frame
    for i in range(current_frame, current_frame - time_steps, -1):
        if (i < 0):
            frame_selection.append(last_selection)
        else:
            frame_selection.append(i)
            last_selection = i
    frame_selection = frame_selection[::-1]
    return frame_selection


def get_nn_input(l_layer, resnet_out):
    '''
    Define the flowchroma input
    '''
    frames_count = l_layer.shape[0]
    time_steps = frames_per_video
    X = []
    Y = []

    for i in range(frames_count):
        frame_index_selection = getInputRange(frames_count, time_steps, i)
        frame_selection = []
        resnet_selection = []
        for j in frame_index_selection:
            frame_selection.append(l_layer[j])
            resnet_selection.append(resnet_out[j])
        X.append(frame_selection)
        Y.append(resnet_selection)

    X = np.asarray(X)
    Y = np.asarray(Y)
    return [X, Y]


def post_process_predictions(original_l_layers, predicted_AB_layers):
    '''
    Combine original L layer and predicted AB Layers
    '''
    time_steps = frames_per_video
    total_frames = original_l_layers.shape[0]
    predicted_frames = []
    for i in range(total_frames):
        l_layer = original_l_layers[i]
        a_layer = np.multiply(predicted_AB_layers[i, time_steps - 1, :, :, 0],
                              128)  # select the first frame outof three predictions
        b_layer = np.multiply(predicted_AB_layers[i, time_steps - 1, :, :, 1], 128)
        frame = np.empty((240, 320, 3))
        frame[:, :, 0] = l_layer
        frame[:, :, 1] = a_layer
        frame[:, :, 2] = b_layer
        # frame = color.lab2rgb(frame)
        predicted_frames.append(frame)
    return np.asarray(predicted_frames)


def save_output_video(frames, output_file):
    '''
    Save the output video
    '''
    fps = 20
    size = (320, 240)
    fourcc = cv2.VideoWriter_fourcc(*'XVID')
    out = cv2.VideoWriter(output_file, fourcc, fps, size)
    for frame in frames:
        final_out = color.lab2rgb(frame)
        final_out_write_video = final_out * 255  # color.lab2rgb results values in [0,1]
        final_out_write_video = final_out_write_video.astype(np.uint8)
        out.write(final_out_write_video)
    out.release()


def process_test_file(file):
    # Pre-processing
    frames = get_video(dir_test+'/'+file)
    (rgb2lab_frames, gray2lab_frames) = get_lab_layer(frames)
    processed_l_layer = preprocess_frames(gray2lab_frames)
    print('running resnet model')
    predictions = get_resnet_records(frames)
    print('Combining L layer and resnet out')
    X = get_nn_input(processed_l_layer, predictions)

    # Predicting
    ckpts = glob.glob("/content/drive/My Drive/new-checkpoints-3/*.hdf5")
    latest_ckpt = max(ckpts, key=os.path.getctime)
    print("loading from checkpoint:", latest_ckpt)
    model = load_model(latest_ckpt, custom_objects={'FusionLayer': FusionLayer})
    predictions = []
    for i in range(X[0].shape[0]):
        predictions.append(model.predict([X[0][i:i + 1], X[1][i:i + 1]])[0])  # shape is (1, 3, 240, 320, 2)
    predictions = np.asarray(predictions)
    print("Flowchroma model predictions calculated")

    # Post processing
    frame_predictions = post_process_predictions(gray2lab_frames[:, :, :, 0], predictions)

    save_output_video(frame_predictions, dir_test_results+ '/' + file.split('.')[0] + '.avi')

In [0]:
def resize_pad_frame(img, size, pad_color=255, equal_padding=True):
    """
    Resize the frame,
    If image is a horizontal one first match the horizontal axis then resize vertical axis and fill the remaining
    with padding color, similar process for vertical images
    :param equal_padding:
    :param img: frame to be resized
    :param size: final frame size
    :param pad_color: color of tha padding
    :return: re-sized frame
    """
    h, w = float(img.shape[0]), float(img.shape[1])
    expected_height, expected_width = size

    # interpolation method
    if h > expected_height or w > expected_width:  # shrinking image
        interp = cv2.INTER_AREA
    else:  # stretching image
        interp = cv2.INTER_CUBIC

    # aspect ratio of image
    aspect = w / h

    # compute scaling and pad sizing
    if aspect >= 1:  # horizontal image
        new_w = expected_width
        new_h = np.round(new_w / aspect).astype(int)
        if expected_height >= new_h:
            if equal_padding:
                pad_vert = (expected_height - new_h) / 2.0
                pad_top, pad_bot = np.floor(pad_vert).astype(int), np.ceil(pad_vert).astype(int)
                pad_left, pad_right = 0, 0
            else:
                pad_vert = (expected_height - new_h)
                pad_top, pad_bot = 0, pad_vert
                pad_left, pad_right = 0, 0
        else:
            new_h = expected_height
            new_w = np.round(new_h * aspect).astype(int)
            if equal_padding:
                pad_horz = (expected_width - new_w) / 2
                pad_left, pad_right = np.floor(pad_horz).astype(int), np.ceil(pad_horz).astype(int)
                pad_top, pad_bot = 0, 0
            else:
                pad_horz = (expected_width - new_w)
                pad_left, pad_right = 0, pad_horz
                pad_top, pad_bot = 0, 0

    elif aspect < 1:  # vertical image
        new_h = expected_height
        new_w = np.round(new_h * aspect).astype(int)
        if expected_width >= new_w:
            if equal_padding:
                pad_horz = (expected_width - new_w) / 2
                pad_left, pad_right = np.floor(pad_horz).astype(int), np.ceil(pad_horz).astype(int)
                pad_top, pad_bot = 0, 0
            else:
                pad_horz = (expected_width - new_w)
                pad_left, pad_right = 0, pad_horz
                pad_top, pad_bot = 0, 0
        else:
            new_w = expected_width
            new_h = np.round(new_w / aspect).astype(int)
            if equal_padding:
                pad_vert = (expected_height - new_h) / 2.0
                pad_top, pad_bot = np.floor(pad_vert).astype(int), np.ceil(pad_vert).astype(int)
                pad_left, pad_right = 0, 0
            else:
                pad_vert = (expected_height - new_h)
                pad_top, pad_bot = 0, pad_vert
                pad_left, pad_right = 0, 0

    # set pad color
    if len(img.shape) is 3 and not isinstance(pad_color,
                                              (list, tuple, np.ndarray)):  # color image but only one color provided
        pad_color = [pad_color] * 3

    # scale and pad
    scaled_img = cv2.resize(img, (new_w, new_h), interpolation=interp)
    scaled_img = cv2.copyMakeBorder(scaled_img, pad_top, pad_bot, pad_left, pad_right,
                                    borderType=cv2.BORDER_CONSTANT, value=pad_color)

    return scaled_img


In [0]:
class FrameExtractor:
    def __init__(self, input_dir, output_dir, type):
        self.input_dir = input_dir
        self.output_dir = output_dir
        self.type = type

    @staticmethod
    def write_frame_to_image(self, file):
        """
        Read content from AVI file and save frames as images
            :param file: video file
            :param output_file: output image file
        """
        video = cv2.VideoCapture(file)
        frames = []
        while video.isOpened():
            ret, frame = video.read()
            if not ret:
                break
            frames.append(frame)

        for i, frame in enumerate(frames):
            frame_name = join(self.output_dir, file.split(".")[0].split("/")[-1] + "_img_{:05d}" + '.' + self.type).format(i)
            cv2.imwrite(frame_name, frame)



    def write_all(self):
        makedirs(self.output_dir, exist_ok=True)
        
        file_list = []
        for file_name in listdir(self.input_dir):
            if file_name.endswith(".avi"):
                file_list.append(join(self.input_dir, file_name))
        file_list = sorted(file_list)

        print("Start processing %d files" % len(file_list))
        for i in range(len(file_list)):
            self.write_frame_to_image(self, file_list[i])
            if i % 10 == 0:
                print("Processed %d out of %d" % (i, len(file_list)))
        print("Saving images completed.")

## Data Generator

In [0]:
class DataGenerator(keras.utils.Sequence):
    """
    Generates data for the Keras model
    """

    def __init__(self,
                 resnet_path,
                 lab_path,
                 file_ids,
                 batch_size=2,
                 time_steps=3,
                 h=240,
                 w=320,
                 shuffle=True):

        self.resnet_path = resnet_path
        self.lab_path = lab_path
        self.file_ids = file_ids

        self.batch_size = batch_size
        self.time_steps = time_steps
        self.h = h
        self.w = w
        self.shuffle = shuffle
        self.on_epoch_end()

    def __len__(self):
        """
        Denotes the number of batches per epoch
        """
        return int(np.floor(len(self.file_ids) / self.batch_size))

    def __getitem__(self, index):
        """
        Generate one batch of data
        """
        # Generate indexes of the batch
        indexes = self.indexes[index * self.batch_size: (index + 1) * self.batch_size]

        # Find list of IDs
        batch_file_ids = [self.file_ids[k] for k in indexes]

        # Generate data
        return self.__data_generation(batch_file_ids)

    def on_epoch_end(self):
        """
        Updates indexes after each epoch
        """
        self.indexes = np.arange(len(self.file_ids))
        if self.shuffle:
            np.random.shuffle(self.indexes)

    def __data_generation(self, batch_file_ids):
        """
        Generates data containing batch_size samples
        """

        x = [np.empty((self.batch_size, self.time_steps, self.h, self.w, 1)),
             np.empty((self.batch_size, self.time_steps, 1000))]

        y = np.empty((self.batch_size, self.time_steps, self.h, self.w, 2))

        # Generate data
        for i, file_id in enumerate(batch_file_ids):
            lab_record = np.load('{0}/lab_record_{1}.npy'.format(self.lab_path, file_id))
            resnet_record = np.load('{0}/resnet_record_{1}.npy'.format(self.resnet_path, file_id))

            x[0][i, :, :, :, 0] = lab_record[:, :, :, 0]
            x[1][i, :, :] = resnet_record

            y[i, :, :, :, :] = lab_record[:, :, :, 1:]

        return x, y


In [0]:
class ImageRecord:
    def __init__(self, input_dir, output_dir, size, equal_padding):
        self.input_dir = input_dir
        self.output_dir = output_dir
        self.size = size
        self.equal_padding = equal_padding

    @staticmethod
    def write_to_csv(file, output_file, size, equal_padding):
        """
        Read content from AVI file and resize frames and convert frames to LAB color space
        :param file: video file
        :param output_file: LAB output file
        :param size: (width, height)
        :param equal_padding: True/False
        """
        video = cv2.VideoCapture(file)
        frames = []
        while video.isOpened():
            ret, frame = video.read()
            if not ret:
                break
            frame = resize_pad_frame(frame, size, equal_padding=equal_padding)
            # frame = cv2.cvtColor(frame, cv2.COLOR_RGB2LAB)
            frame = color.rgb2lab(frame)
            frames.append(frame)
        frames = np.asarray(frames)

        # LAB layers should be brought to [-1, +1] region
        frames[:, :, :, 0] = np.divide(frames[:, :, :, 0], 50) - 1
        frames[:, :, :, 1] = np.divide(frames[:, :, :, 1], 128)
        frames[:, :, :, 2] = np.divide(frames[:, :, :, 2], 128)

        np.save(output_file, frames)

    def write_all(self):
        file_list = []
        for file_name in listdir(self.input_dir):
            file_list.append(join(self.input_dir, file_name))
        file_list = sorted(file_list)

        print("Start processing %d files" % len(file_list))
        for i in range(len(file_list)):
            self.write_to_csv(file_list[i], join(self.output_dir, "lab_record_" + format(i, '05d')), self.size,
                              self.equal_padding)
            if i % 10 == 0:
                print("Processed %d out of %d" % (i, len(file_list)))
        print("LAB conversion completed")

In [0]:
class ResnetRecordCreator:
    def __init__(self, video_dir, image_dir, record_dir):
        self.video_dir = video_dir
        self.image_dir = image_dir
        self.record_dir = record_dir

    def convert_all(self, file_list):
        """
        Convert videos in the source directory to images
        :return:
        """
        video_frames = []
        video_indexes = []
        for file_name in file_list:
            input_file = join(self.video_dir, file_name)

            # self.convert_video_to_images(input_file, sample_count)
            sample_index = int(re.search("video_(.*).avi", file_name).group(1))
            resized_frames = self.convert_video_to_images(input_file)

            assert len(resized_frames) == frames_per_video

            video_indexes.append(sample_index)
            video_frames += resized_frames
            print("Video %d converted to frames" % sample_index)
        return [video_indexes, np.asarray(video_frames)]

    def convert_video_to_images(self, input_file):
        """
        convert a single video file to images
        :param input_file:
        :param sample_count:
        :return:
        """
        video = cv2.VideoCapture(input_file)
        frames = []
        while video.isOpened():
            ret, frame = video.read()
            if not ret:
                break
            frame = resize_pad_frame(frame, (resnet_input_height, resnet_input_width))
            gray_scale_frame = cv2.cvtColor(frame, cv2.COLOR_RGB2GRAY)
            gray_scale_frame_colored = cv2.cvtColor(gray_scale_frame, cv2.COLOR_GRAY2RGB)
            frames.append(gray_scale_frame_colored)
        return frames

    def predict_all(self, video_details):
        video_indexes, video_frames = video_details
        predictions = inception_resnet_v2_predict(video_frames)
        self.write_files(video_indexes, predictions)

    def write_files(self, video_indexes, predictions):
        for i in range(len(video_indexes)):
            frames_start = i * frames_per_video
            frames_end = frames_start + frames_per_video
            file_index = video_indexes[i]
            file_content = predictions[frames_start:frames_end]
            output_file = join(self.record_dir, "resnet_record_" + format(file_index, "05d"))
            np.save(output_file, file_content)

    def process_all(self):
        file_list = listdir(self.video_dir)
        file_list = sorted(file_list)

        for chunk in self.chunks(file_list, resnet_video_chunk_size):
            # convert to images of the size 299x299
            video_details = self.convert_all(chunk)
            # pass those images to RestNet
            self.predict_all(video_details)

            print("Chunk completed")

    @staticmethod
    def chunks(l, n):
        """Yield successive n-sized chunks from l."""
        for i in range(0, len(l), n):
            yield l[i:i + n]

In [0]:
def process_file(index):
    lab_record = np.load(join(dir_lab_records, "lab_record_" + format(index, '05d') + ".npy"))
    resnet_record = np.load(join(dir_resnet_csv, "resnet_record_" + format(index, "05d") + ".npy"))
    for i in range(frames_per_video):
        np.save(join(dir_frame_lab_records, "lab_record_" + format(index * frames_per_video + i, '08d')), lab_record[i])
        np.save(join(dir_frame_resnet_records,"resnet_record_" + format(index * frames_per_video + i, '08d')), resnet_record[i])
    print('Index ' + str(index) + ' done')

def prepare_image_data():
    lab_records = listdir(dir_lab_records)
    resnet_records = listdir(dir_resnet_csv)

    assert len(lab_records) == len(resnet_records)
    n = len(lab_records)

    file_indexes = [x for x in range(n)]
    pool = Pool(2)
    pool.map(process_file, file_indexes)

## Model

In [0]:
class FusionLayer(Layer):
    def __init__(self, **kwargs):
        super(FusionLayer, self).__init__(**kwargs)

    def call(self, inputs, mask=None):
        enc_out, incep_out, rnn_out = inputs
        enc_out_shape = enc_out.shape.as_list()
        batch_size, time_steps, h, w, _ = map(lambda x: -1 if x is None else x, enc_out_shape)

        def _repeat(emb):
            # keep batch_size, time_steps axes unchanged
            # while replicating features h*w times
            emb_rep = K.tile(emb, [1, 1, h * w])
            return K.reshape(emb_rep, (batch_size, time_steps, h, w, emb.shape[2]))

        incep_rep = _repeat(incep_out)
        rnn_rep = _repeat(rnn_out)
        return K.concatenate([enc_out, incep_rep, rnn_rep], axis=4)

    def compute_output_shape(self, input_shapes):
        enc_out_shape, incep_out_shape, rnn_out_shape = input_shapes

        # Must have 3 tensors as input
        assert input_shapes and len(input_shapes) == 3

        # Batch size of the three tensors must match
        assert enc_out_shape[0] == incep_out_shape[0] == rnn_out_shape[0]

        # Number of time steps of the three tensors must match
        assert enc_out_shape[1] == incep_out_shape[1] == rnn_out_shape[1]

        # batch_size, time_steps, h, w, enc_out_depth = map(lambda x: -1 if x == None else x, enc_out_shape)
        batch_size, time_steps, h, w, enc_out_depth = enc_out_shape
        final_depth = enc_out_depth + incep_out_shape[2] + rnn_out_shape[2]
        return batch_size, time_steps, h, w, final_depth


In [0]:
class FlowChroma:
    def __init__(self, inputs):
        # provide encoder input and inception ResNet's output as model inputs
        self.enc_input, self.incep_out = inputs

    @staticmethod
    def _encoder(encoder_input):
        x = TimeDistributed(Conv2D(64, (3, 3), activation='relu', padding='same', strides=2), name='encoder_conv1')(
            encoder_input)
        x = TimeDistributed(Conv2D(128, (3, 3), activation='relu', padding='same'), name='encoder_conv2')(x)
        x = TimeDistributed(Conv2D(128, (3, 3), activation='relu', padding='same', strides=2), name='encoder_conv3')(x)
        x = TimeDistributed(Conv2D(256, (3, 3), activation='relu', padding='same'), name='encoder_conv4')(x)
        x = TimeDistributed(Conv2D(256, (3, 3), activation='relu', padding='same', strides=2), name='encoder_conv5')(x)
        x = TimeDistributed(Conv2D(512, (3, 3), activation='relu', padding='same'), name='encoder_conv6')(x)
        x = TimeDistributed(Conv2D(512, (3, 3), activation='relu', padding='same'), name='encoder_conv7')(x)
        x = TimeDistributed(Conv2D(256, (3, 3), activation='relu', padding='same'), name='encoder_conv8')(x)
        return x

    @staticmethod
    def _rnn(rnn_input):
        x = LSTM(256, return_sequences=True, name='rnn_lstm1')(rnn_input)
        x = LSTM(256, return_sequences=True, name='rnn_lstm2')(x)
        x = TimeDistributed(Dense(256, activation='relu'), name='rnn_dense1')(x)
        x = TimeDistributed(Dropout(0.2), name='rnn_dropout')(x)
        return x

    @staticmethod
    def _decoder(decoder_input):
        x = TimeDistributed(Conv2D(256, (1, 1), activation='relu'))(decoder_input)
        x = TimeDistributed(Conv2D(128, (3, 3), activation='relu', padding='same'), name='decoder_conv1')(x)
        x = TimeDistributed(UpSampling2D((2, 2)), name='decoder_upsamp1')(x)
        x = TimeDistributed(Conv2D(64, (3, 3), activation='relu', padding='same'), name='decoder_conv2')(x)
        x = TimeDistributed(Conv2D(64, (3, 3), activation='relu', padding='same'), name='decoder_conv3')(x)
        x = TimeDistributed(UpSampling2D((2, 2)), name='decoder_upsamp2')(x)
        x = TimeDistributed(Conv2D(32, (3, 3), activation='relu', padding='same'), name='decoder_conv4')(x)
        x = TimeDistributed(Conv2D(2, (3, 3), activation='tanh', padding='same'), name='decoder_conv5')(x)
        x = TimeDistributed(UpSampling2D((2, 2)), name='decoder_upsamp3')(x)
        return x

    def build(self):
        x = self._encoder(self.enc_input)
        x = TimeDistributed(GlobalAveragePooling2D())(x)
        x = self._rnn(x)
        x = Model(inputs=self.enc_input, outputs=x)

        rnn_out = x.get_layer(name='rnn_dropout').output
        enc_out = x.get_layer(name='encoder_conv8').output

        x = FusionLayer()([enc_out, self.incep_out, rnn_out])
        x = self._decoder(x)

        model = Model(inputs=[self.enc_input, self.incep_out], outputs=x)
        return model

    @staticmethod
    def generate_model_summaries(model):
        model.summary(line_length=150)
        plot_model(model, to_file='flowchroma.png')


## Configurations

In [0]:
dir_root = '/content/drive/My Drive'
dir_originals = join(dir_root, 'original')
dir_sampled = join(dir_root, 'sampled')
dir_resnet_images = join(dir_root, 'resized_resnet_images')
dir_resnet_csv = join(dir_root, 'resnet_csv_records')
dir_lab_records = join(dir_root, 'lab_records')
dir_tfrecord = join(dir_root, 'tfrecords')
dir_test = join(dir_root, 'test')
dir_test_results = join(dir_root, 'test_results')
dir_frame_lab_records = join(dir_root, 'frame_lab_records')
dir_frame_resnet_records = join(dir_root, 'frame_resnet_records')
checkpoint_url = join(dir_root,"updated_resnet_v2.h5")

frames_per_video = 3
default_nn_input_height = 240
default_nn_input_width = 320
resnet_input_height = 299
resnet_input_width = 299
resnet_video_chunk_size = 100
resnet_batch_size = 100

training_set_size = 2000
test_set_size = 239
validation_set_size = 200

resnet_output = 1000


def progressive_filename_generator(pattern='file_{}.ext'):
    for i in itertools.count():
        yield pattern.format(i)

# !mkdir $dir_originals
# !mkdir $dir_sampled
# !mkdir $dir_resnet_images
# !mkdir $dir_resnet_csv
# !mkdir $dir_lab_records
# !mkdir $dir_tfrecord
# !mkdir $dir_test
# !mkdir $dir_test_results
# !mkdir $dir_frame_lab_records
# !mkdir $dir_frame_resnet_records

In [0]:
def frame_extract(source, output):
  frameExtractor = FrameExtractor(source, output, 'jpeg')
  frameExtractor.write_all()

## Inception Feature Extractor Init

In [0]:
K.clear_session()

x = InceptionResNetV2(include_top=True, weights='imagenet')
# load_model(checkpoint_url)
y = x.output
model = Model(inputs=x.input, outputs=y)


def inception_resnet_v2_predict(images):
    images = images.astype(np.float32)
    predictions = model.predict(preprocess_input(images))
    return predictions

## Calling Functions

In [0]:
def lab_image_record():
  imageRecord = ImageRecord(dir_sampled, dir_lab_records, (default_nn_input_height, default_nn_input_width), True)
  imageRecord.write_all()

In [0]:
def inference():
  files = listdir(dir_test)
  for file in files:
      process_test_file(file)

In [0]:
def sampling():
  sample = Sample(dir_originals, dir_sampled, frames_per_video, 3)
  sample.slice_all()


In [0]:
def resnet_record():
    resnetRecordConverter = ResnetRecordCreator(dir_sampled, dir_resnet_images, dir_resnet_csv)
    resnetRecordConverter.process_all()

## Model Train

In [0]:
from keras.callbacks import ReduceLROnPlateau, EarlyStopping

def train():
  resnet_path = dir_resnet_csv
  lab_path = dir_lab_records
  val_split_ratio = 0.3
  lr = 0.0001
  train_batch_size = 4
  val_batch_size = 4
  n_epochs_to_train = 250
  ckpt_period = 3

  time_steps, h, w = frames_per_video, default_nn_input_height, default_nn_input_width

  initial_epoch = 0
  ckpts = glob.glob("/content/drive/My Drive/new-checkpoints-3/*.hdf5")
  if len(ckpts) != 0:
      # there are ckpts
      latest_ckpt = max(ckpts, key=os.path.getctime)
      print("loading from checkpoint:", latest_ckpt)
      initial_epoch = int(latest_ckpt[latest_ckpt.find("-epoch-") + len("-epoch-"):latest_ckpt.rfind("-lr-")])
      model = load_model(latest_ckpt, custom_objects={'FusionLayer': FusionLayer})

  else:
      # no ckpts
      enc_input = Input(shape=(time_steps, h, w, 1), name='encoder_input')
      incep_out = Input(shape=(time_steps, 1000), name='inception_input')

      model = FlowChroma([enc_input, incep_out]).build()

  opt = Adam(lr=lr)
  model.compile(optimizer=opt, loss='mse', metrics=['accuracy'])
  # generate_model_summaries(model)

  n_lab_records = len(glob.glob('{0}/*.npy'.format(dir_lab_records)))
  n_resnet_records = len(glob.glob('{0}/*.npy'.format(dir_resnet_csv)))

  assert n_lab_records == n_resnet_records

  val_split = int(math.floor(n_lab_records * val_split_ratio))

  dataset = {
      "validation": ['{0:05}'.format(i) for i in range(val_split)],
      "train": ['{0:05}'.format(i) for i in range(val_split, n_lab_records)]
  }

  basic_generator_params = {
      "resnet_path": resnet_path,
      "lab_path": lab_path,
      "time_steps": time_steps,
      "h": h,
      "w": w
  }
  # generators
  training_generator = DataGenerator(**basic_generator_params,
                                    file_ids=dataset['train'],
                                    batch_size=train_batch_size)

  validation_generator = DataGenerator(**basic_generator_params,
                                      file_ids=dataset['validation'],
                                      batch_size=val_batch_size)

  os.makedirs("/content/drive/My Drive/new-checkpoints-3", exist_ok=True)
  file_path = "/content/drive/My Drive/new-checkpoints-3/flowchroma-epoch-{epoch:05d}-lr-" + str(
      lr) + "-train_loss-{loss:.4f}-val_loss-{val_loss:.4f}.hdf5"

  reduce_lr_loss = ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=7, verbose=1, epsilon=1e-4, mode='min')

  checkpoint = ModelCheckpoint(file_path,
                              monitor=['loss'],
                              verbose=1,
                              save_best_only=False,
                              save_weights_only=False,
                              mode='min',
                              period=ckpt_period)

  tensorboard = TensorBoard(log_dir="/content/drive/My Drive/new-logs-3/{}".format(time()), histogram_freq=0)

  if n_epochs_to_train <= initial_epoch:
      n_epochs_to_train += initial_epoch

  model.fit_generator(generator=training_generator,
                      validation_data=validation_generator,
                      use_multiprocessing=True,
                      epochs=n_epochs_to_train,
                      initial_epoch=initial_epoch,
                      callbacks=[checkpoint, tensorboard, reduce_lr_loss],
                      workers=6)
  K.clear_session()


In [0]:
# sampling()

In [0]:
# lab_image_record()

In [0]:
# resnet_record()

In [0]:
!ls '/content/drive/My Drive/sampled' | wc -l

3130


In [0]:
!ls '/content/drive/My Drive/lab_records' | wc -l

3130


In [0]:
!ls '/content/drive/My Drive/resnet_csv_records' | wc -l

3130


In [0]:
train()

loading from checkpoint: /content/drive/My Drive/new-checkpoints-3/flowchroma-epoch-00123-lr-0.0001-train_loss-0.0020-val_loss-0.0017.hdf5




Epoch 124/250
Epoch 125/250
Epoch 126/250

Epoch 00126: saving model to /content/drive/My Drive/new-checkpoints-3/flowchroma-epoch-00126-lr-0.0001-train_loss-0.0024-val_loss-0.0021.hdf5
Epoch 127/250
Epoch 128/250
Epoch 129/250

Epoch 00129: saving model to /content/drive/My Drive/new-checkpoints-3/flowchroma-epoch-00129-lr-0.0001-train_loss-0.0022-val_loss-0.0018.hdf5
Epoch 130/250
Epoch 131/250
Epoch 131/250Epoch 132/250

Epoch 00132: saving model to /content/drive/My Drive/new-checkpoints-3/flowchroma-epoch-00132-lr-0.0001-train_loss-0.0020-val_loss-0.0017.hdf5
Epoch 133/250
Epoch 134/250
Epoch 135/250

Epoch 00135: saving model to /content/drive/My Drive/new-checkpoints-3/flowchroma-epoch-00135-lr-0.0001-train_loss-0.0019-val_loss-0.0017.hdf5
Epoch 136/250
Epoch 137/250
Epoch 138/250

Epoch 00138: saving model to /content/drive/My Drive/new-checkpoints-3/flowchroma-epoch-00138-lr-0.0001-train_loss-0.0017-val_loss-0.0018.hdf5
Epoch 139/250
Epoch 140/250
Epoch 141/250

Epoch 00141: s

## Testing

In [0]:
# inference()
process_test_file("squirrel.mp4")

running resnet model
Combining L layer and resnet out
loading from checkpoint: /content/drive/My Drive/new-checkpoints-3/flowchroma-epoch-00249-lr-0.0001-train_loss-0.0012-val_loss-0.0013.hdf5
Flowchroma model predictions calculated
