In [0]:
# %tensorflow_version 1.x
import keras

In [0]:
# !pip install git+https://www.github.com/keras-team/keras-contrib.git

In [0]:
# import keras_contrib

# 3D_ResNet50 : Source (https://github.com/JihongJu/keras-resnet3d)

In [0]:
"""A vanilla 3D resnet implementation.
Based on Raghavendra Kotikalapudi's 2D implementation
keras-resnet (See https://github.com/raghakot/keras-resnet.)
"""
from __future__ import (
    absolute_import,
    division,
    print_function,
    unicode_literals
)
import six
from math import ceil
from keras.models import Model
from keras.layers import (
    Input,
    Activation,
    Dense,
    Flatten
)
from keras.layers.convolutional import (
    Conv3D,
    AveragePooling3D,
    MaxPooling3D
)
from keras.layers.merge import add
from keras.layers.normalization import BatchNormalization
from keras.regularizers import l2
from keras import backend as K


def _bn_relu(input):
    """Helper to build a BN -> relu block (by @raghakot)."""
    norm = BatchNormalization(axis=CHANNEL_AXIS)(input)
    return Activation("relu")(norm)


def _conv_bn_relu3D(**conv_params):
    filters = conv_params["filters"]
    kernel_size = conv_params["kernel_size"]
    strides = conv_params.setdefault("strides", (1, 1, 1))
    kernel_initializer = conv_params.setdefault(
        "kernel_initializer", "he_normal")
    padding = conv_params.setdefault("padding", "same")
    kernel_regularizer = conv_params.setdefault("kernel_regularizer",
                                                l2(1e-4))

    def f(input):
        conv = Conv3D(filters=filters, kernel_size=kernel_size,
                      strides=strides, kernel_initializer=kernel_initializer,
                      padding=padding,
                      kernel_regularizer=kernel_regularizer)(input)
        return _bn_relu(conv)

    return f


def _bn_relu_conv3d(**conv_params):
    """Helper to build a  BN -> relu -> conv3d block."""
    filters = conv_params["filters"]
    kernel_size = conv_params["kernel_size"]
    strides = conv_params.setdefault("strides", (1, 1, 1))
    kernel_initializer = conv_params.setdefault("kernel_initializer",
                                                "he_normal")
    padding = conv_params.setdefault("padding", "same")
    kernel_regularizer = conv_params.setdefault("kernel_regularizer",
                                                l2(1e-4))

    def f(input):
        activation = _bn_relu(input)
        return Conv3D(filters=filters, kernel_size=kernel_size,
                      strides=strides, kernel_initializer=kernel_initializer,
                      padding=padding,
                      kernel_regularizer=kernel_regularizer)(activation)
    return f


def _shortcut3d(input, residual):
    """3D shortcut to match input and residual and merges them with "sum"."""
    stride_dim1 = ceil(input._keras_shape[DIM1_AXIS] \
        / residual._keras_shape[DIM1_AXIS])
    stride_dim2 = ceil(input._keras_shape[DIM2_AXIS] \
        / residual._keras_shape[DIM2_AXIS])
    stride_dim3 = ceil(input._keras_shape[DIM3_AXIS] \
        / residual._keras_shape[DIM3_AXIS])
    equal_channels = residual._keras_shape[CHANNEL_AXIS] \
        == input._keras_shape[CHANNEL_AXIS]

    shortcut = input
    if stride_dim1 > 1 or stride_dim2 > 1 or stride_dim3 > 1 \
            or not equal_channels:
        shortcut = Conv3D(
            filters=residual._keras_shape[CHANNEL_AXIS],
            kernel_size=(1, 1, 1),
            strides=(stride_dim1, stride_dim2, stride_dim3),
            kernel_initializer="he_normal", padding="valid",
            kernel_regularizer=l2(1e-4)
            )(input)
    return add([shortcut, residual])


def _residual_block3d(block_function, filters, kernel_regularizer, repetitions,
                      is_first_layer=False):
    def f(input):
        for i in range(repetitions):
            strides = (1, 1, 1)
            if i == 0 and not is_first_layer:
                strides = (2, 2, 2)
            input = block_function(filters=filters, strides=strides,
                                   kernel_regularizer=kernel_regularizer,
                                   is_first_block_of_first_layer=(
                                       is_first_layer and i == 0)
                                   )(input)
        return input

    return f


def basic_block(filters, strides=(1, 1, 1), kernel_regularizer=l2(1e-4),
                is_first_block_of_first_layer=False):
    """Basic 3 X 3 X 3 convolution blocks. Extended from raghakot's 2D impl."""
    def f(input):
        if is_first_block_of_first_layer:
            # don't repeat bn->relu since we just did bn->relu->maxpool
            conv1 = Conv3D(filters=filters, kernel_size=(3, 3, 3),
                           strides=strides, padding="same",
                           kernel_initializer="he_normal",
                           kernel_regularizer=kernel_regularizer
                           )(input)
        else:
            conv1 = _bn_relu_conv3d(filters=filters,
                                    kernel_size=(3, 3, 3),
                                    strides=strides,
                                    kernel_regularizer=kernel_regularizer
                                    )(input)

        residual = _bn_relu_conv3d(filters=filters, kernel_size=(3, 3, 3),
                                   kernel_regularizer=kernel_regularizer
                                   )(conv1)
        return _shortcut3d(input, residual)

    return f


def bottleneck(filters, strides=(1, 1, 1), kernel_regularizer=l2(1e-4),
               is_first_block_of_first_layer=False):
    """Basic 3 X 3 X 3 convolution blocks. Extended from raghakot's 2D impl."""
    def f(input):
        if is_first_block_of_first_layer:
            # don't repeat bn->relu since we just did bn->relu->maxpool
            conv_1_1 = Conv3D(filters=filters, kernel_size=(1, 1, 1),
                              strides=strides, padding="same",
                              kernel_initializer="he_normal",
                              kernel_regularizer=kernel_regularizer
                              )(input)
        else:
            conv_1_1 = _bn_relu_conv3d(filters=filters, kernel_size=(1, 1, 1),
                                       strides=strides,
                                       kernel_regularizer=kernel_regularizer
                                       )(input)

        conv_3_3 = _bn_relu_conv3d(filters=filters, kernel_size=(3, 3, 3),
                                   kernel_regularizer=kernel_regularizer
                                   )(conv_1_1)
        residual = _bn_relu_conv3d(filters=filters * 4, kernel_size=(1, 1, 1),
                                   kernel_regularizer=kernel_regularizer
                                   )(conv_3_3)

        return _shortcut3d(input, residual)

    return f


def _handle_data_format():
    global DIM1_AXIS
    global DIM2_AXIS
    global DIM3_AXIS
    global CHANNEL_AXIS
    if K.image_data_format() == 'channels_last':
        DIM1_AXIS = 1
        DIM2_AXIS = 2
        DIM3_AXIS = 3
        CHANNEL_AXIS = 4
    else:
        CHANNEL_AXIS = 1
        DIM1_AXIS = 2
        DIM2_AXIS = 3
        DIM3_AXIS = 4


def _get_block(identifier):
    if isinstance(identifier, six.string_types):
        res = globals().get(identifier)
        if not res:
            raise ValueError('Invalid {}'.format(identifier))
        return res
    return identifier


class Resnet3DBuilder(object):
    """ResNet3D."""

    @staticmethod
    def build(input_shape, num_outputs, block_fn, repetitions, reg_factor):
        """Instantiate a vanilla ResNet3D keras model.
        # Arguments
            input_shape: Tuple of input shape in the format
            (conv_dim1, conv_dim2, conv_dim3, channels) if dim_ordering='tf'
            (filter, conv_dim1, conv_dim2, conv_dim3) if dim_ordering='th'
            num_outputs: The number of outputs at the final softmax layer
            block_fn: Unit block to use {'basic_block', 'bottlenack_block'}
            repetitions: Repetitions of unit blocks
        # Returns
            model: a 3D ResNet model that takes a 5D tensor (volumetric images
            in batch) as input and returns a 1D vector (prediction) as output.
        """
        _handle_data_format()
        if len(input_shape) != 4:
            raise ValueError("Input shape should be a tuple "
                             "(conv_dim1, conv_dim2, conv_dim3, channels) "
                             "for tensorflow as backend or "
                             "(channels, conv_dim1, conv_dim2, conv_dim3) "
                             "for theano as backend")

        block_fn = _get_block(block_fn)
        input = Input(shape=input_shape)
        # first conv
        conv1 = _conv_bn_relu3D(filters=64, kernel_size=(7, 7, 7),
                                strides=(2, 2, 2),
                                kernel_regularizer=l2(reg_factor)
                                )(input)
        pool1 = MaxPooling3D(pool_size=(3, 3, 3), strides=(2, 2, 2),
                             padding="same")(conv1)

        # repeat blocks
        block = pool1
        filters = 64
        for i, r in enumerate(repetitions):
            block = _residual_block3d(block_fn, filters=filters,
                                      kernel_regularizer=l2(reg_factor),
                                      repetitions=r, is_first_layer=(i == 0)
                                      )(block)
            filters *= 2

        # last activation
        block_output = _bn_relu(block)

        # average poll and classification
        pool2 = AveragePooling3D(pool_size=(block._keras_shape[DIM1_AXIS],
                                            block._keras_shape[DIM2_AXIS],
                                            block._keras_shape[DIM3_AXIS]),
                                 strides=(1, 1, 1))(block_output)
        flatten1 = Flatten()(pool2)
        if num_outputs > 1:
            dense = Dense(units=num_outputs,
                          kernel_initializer="he_normal",
                          activation="softmax",
                          kernel_regularizer=l2(reg_factor))(flatten1)
        else:
            dense = Dense(units=num_outputs,
                          kernel_initializer="he_normal",
                          activation="sigmoid",
                          kernel_regularizer=l2(reg_factor))(flatten1)

        model = Model(inputs=input, outputs=dense)
        return model

    @staticmethod
    def build_resnet_18(input_shape, num_outputs, reg_factor=1e-4):
        """Build resnet 18."""
        return Resnet3DBuilder.build(input_shape, num_outputs, basic_block,
                                     [2, 2, 2, 2], reg_factor=reg_factor)

    @staticmethod
    def build_resnet_34(input_shape, num_outputs, reg_factor=1e-4):
        """Build resnet 34."""
        return Resnet3DBuilder.build(input_shape, num_outputs, basic_block,
                                     [3, 4, 6, 3], reg_factor=reg_factor)

    @staticmethod
    def build_resnet_50(input_shape, num_outputs, reg_factor=1e-4):
        """Build resnet 50."""
        return Resnet3DBuilder.build(input_shape, num_outputs, bottleneck,
                                     [3, 4, 6, 3], reg_factor=reg_factor)

    @staticmethod
    def build_resnet_101(input_shape, num_outputs, reg_factor=1e-4):
        """Build resnet 101."""
        return Resnet3DBuilder.build(input_shape, num_outputs, bottleneck,
                                     [3, 4, 23, 3], reg_factor=reg_factor)

    @staticmethod
    def build_resnet_152(input_shape, num_outputs, reg_factor=1e-4):
        """Build resnet 152."""
        return Resnet3DBuilder.build(input_shape, num_outputs, bottleneck,
                                     [3, 8, 36, 3], reg_factor=reg_factor)

## install video generator library

In [0]:
!pip install keras-video-generators

## Call 3D_Resnet50 

In [0]:
i3d = Resnet3DBuilder.build_resnet_50(input_shape = (16, 224, 224, 3), num_outputs=3)

In [0]:
i3d.summary()

## Preprocess method for train

In [0]:
def get_random_crop(image, crop_x, crop_y, crop_height, crop_width):



    crop = image[crop_y: crop_y + crop_height, crop_x: crop_x + crop_width]

    return crop

## Preprocess method for val

In [0]:
def crop_center(img, new_size):
    y, x, c = img.shape
    (cropx, cropy) = new_size
    startx = x // 2 - (cropx // 2)
    starty = y // 2 - (cropy // 2)
    return img[starty:starty + cropy, startx:startx + cropx]

## define custom video generator for train

In [0]:
from keras_video import VideoFrameGenerator
from math import floor
import cv2
import numpy as np
from keras.preprocessing.image import ImageDataGenerator, img_to_array

class VideoFrameGenerator_RGB(VideoFrameGenerator):
    def _get_frames(self, video, nbframe, shape, force_no_headers=False):
        cap = cv2.VideoCapture(video)
        total_frames = self.count_frames(cap, video, force_no_headers)
        frame_step = int(np.random.randint(total_frames-nbframe, size=1)) # 시작 프레임 랜덤으로 정의
        # TODO: fix that, a tiny video can have a frame_step that is
        # under 1
        frame_step = max(1, frame_step)
        frames = []
        frame_i = 0
        crop_width = 224
        crop_height = 224
        max_x = shape[1] - crop_width
        max_y = shape[0] - crop_height

        crop_x = np.random.randint(0, max_x)
        crop_y = np.random.randint(0, max_y)
        while True:
            grabbed, frame = cap.read()
            if not grabbed:
                break

            frame_i += 1
            if frame_i >= frame_step: # 랜덤으로 정의된 값보다 크면
                # resize
                frame = cv2.resize(frame, shape)  ## 256, 256으로 리사이징
                                
                frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) ## 모델에 넣기 위해 RGB 채널로 변경
                
                #to np
                frame = img_to_array(frame) 
                
                frame = get_random_crop(frame, crop_x, crop_y,  crop_width, crop_height) ## 데이터 증강을 위한 랜덤 크롭
                frame = (frame - frame.mean()) / frame.std()
                
                frames.append(frame)
            if len(frames) == nbframe:
                break
        cap.release()

        if not force_no_headers and len(frames) != nbframe:
            # There is a problem here
            # That means that frame count in header is wrong or broken,
            # so we need to force the full read of video to get the right
            # frame counter
            return self._get_frames(
                    video,
                    nbframe,
                    shape,
                    force_no_headers=True)

        if force_no_headers and len(frames) != nbframe:
            # and if we really couldn't find the real frame counter
            # so we return None. Sorry, nothing can be done...
            log.error("Frame count is not OK for video %s, "
                      "%d total, %d extracted" % (
                        video, total_frames, len(frames)))
            return None
        return np.array(frames)
    
    def __getitem__(self, index): ## 데이터 증강 후 0~255 사이의 값이기 때문에 255로 나눠줌
        classes = self.classes
        shape = self.target_shape
        nbframe = self.nbframe

        labels = []
        images = []

        indexes = self.indexes[index*self.batch_size:(index+1)*self.batch_size]

        transformation = None

        for i in indexes:
            # prepare a transformation if provided
            if self.transformation is not None:
                transformation = self._random_trans[i]

            video = self.files[i]
            classname = self._get_classname(video)

            # create a label array and set 1 to the right column
            label = np.zeros(len(classes))
            col = classes.index(classname)
            label[col] = 1.


            frames = self._get_frames(
                video,
                nbframe,
                shape,
                force_no_headers=not self.use_video_header)
            if frames is None:
                # avoid failure, nevermind that video...
                continue

            # apply transformation
            if transformation is not None:
                frames = [self.transformation.apply_transform(
                    frame, transformation) for frame in frames]

            # add the sequence in batch
            images.append(frames)
            labels.append(label)

        return np.array(images), np.array(labels)


## define custom video generator for val

In [0]:
from keras_video import VideoFrameGenerator
from math import floor
import cv2
import numpy as np
class VideoFrameGenerator_RGB_val(VideoFrameGenerator):
    def _get_frames(self, video, nbframe, shape, force_no_headers=False):
        cap = cv2.VideoCapture(video)
        total_frames = self.count_frames(cap, video, force_no_headers)
        frame_step = int(np.random.randint(total_frames-nbframe, size=1))  # 시작 프레임 랜덤으로 정의
        # TODO: fix that, a tiny video can have a frame_step that is
        # under 1
        frame_step = max(1, frame_step)
        frames = []
        frame_i = 0

        while True:
            grabbed, frame = cap.read()
            if not grabbed:
                break

            frame_i += 1
            if frame_i >= frame_step:  # 랜덤으로 정의된 값보다 크면
                # resize
                frame = cv2.resize(frame, shape)  ## 224, 224으로 리사이징
                frame = crop_center(frame, (224, 224))
                frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)  ## 모델에 넣기 위해 RGB 채널로 변경
                
                #to np
                frame = img_to_array(frame)
                frame = (frame - frame.mean()) / frame.std()
                
                frames.append(frame)
            if len(frames) == nbframe:
                break

        cap.release()

        if not force_no_headers and len(frames) != nbframe:
            # There is a problem here
            # That means that frame count in header is wrong or broken,
            # so we need to force the full read of video to get the right
            # frame counter
            return self._get_frames(
                    video,
                    nbframe,
                    shape,
                    force_no_headers=True)

        if force_no_headers and len(frames) != nbframe:
            # and if we really couldn't find the real frame counter
            # so we return None. Sorry, nothing can be done...
            log.error("Frame count is not OK for video %s, "
                      "%d total, %d extracted" % (
                        video, total_frames, len(frames)))
            return None

        return np.array(frames)

## call custom video generator and image augmentation for train

In [0]:
from keras_video import VideoFrameGenerator
import os
import glob
import keras
NBFRAME = 16
classes = [i.split(os.path.sep)[-1] for i in glob.glob('drive/My Drive/Dataset/final_project/Image/*')]
classes.sort()
# some global params
SIZE = (256, 256)
CHANNELS = 1
BS = 32
# pattern to get videos and classes
glob_pattern='./drive/My Drive/Dataset/final_project/Image/{classname}/*.mp4'

# for data augmentation
data_aug = keras.preprocessing.image.ImageDataGenerator(
    # zoom_range=.2,
    # brightness_range=[0.7, 1.3],
    # rotation_range = 20,
    horizontal_flip=True,
    width_shift_range=.2,
    height_shift_range=.2
    )
# Create video frame generator
train = VideoFrameGenerator_RGB(
    classes=classes, 
    glob_pattern=glob_pattern,
    nb_frames=NBFRAME,
    shuffle=True,
    batch_size=BS,
    target_shape=SIZE,
    nb_channel=CHANNELS,
    transformation=data_aug,
    use_frame_cache=True)

## call custom video generator for val

In [0]:
glob_pattern='./drive/My Drive/Dataset/final_project/Val/{classname}/*.mp4'

valid = VideoFrameGenerator_RGB_val(
    rescale=1./255,
    classes=classes, 
    glob_pattern=glob_pattern,
    nb_frames=NBFRAME, 
    batch_size=16,
    target_shape=(256, 256),
    nb_channel=CHANNELS,
    use_frame_cache=True)

## define recall method for evaluating model

In [0]:
def single_class_recall(interesting_class_id):
    def recall(y_true, y_pred):
        class_id_true = keras.backend.argmax(y_true, axis=-1)
        class_id_pred = keras.backend.argmax(y_pred, axis=-1)
        recall_mask = keras.backend.cast(keras.backend.equal(class_id_true, interesting_class_id), 'int32')
        class_recall_tensor = keras.backend.cast(keras.backend.equal(class_id_true, class_id_pred), 'int32') * recall_mask
        class_recall = keras.backend.cast(keras.backend.sum(class_recall_tensor), 'float32') / keras.backend.cast(keras.backend.maximum(keras.backend.sum(recall_mask), 1), 'float32')
        return class_recall
    return recall

## model compile

In [0]:
i3d.compile(loss="categorical_crossentropy",
                optimizer=keras.optimizers.SGD(lr=0.1, momentum=0.9, decay=0.9)
            , metrics=['accuracy', single_class_recall(0), single_class_recall(1), single_class_recall(2)])

# Tensorboard 사용 (For colab)

In [0]:
LOG_DIR = '/tmp/log'
get_ipython().system_raw(
    'tensorboard --logdir {} --host 0.0.0.0 --port 6006 &'
    .format(LOG_DIR)
)

In [0]:
! wget https://bin.equinox.io/c/4VmDzA7iaHb/ngrok-stable-linux-amd64.zip
! unzip ngrok-stable-linux-amd64.zip

In [0]:
get_ipython().system_raw('./ngrok http 6006 &')

In [0]:
! curl -s http://localhost:4040/api/tunnels | python3 -c \
    "import sys, json; print(json.load(sys.stdin)['tunnels'][0]['public_url'])"

## define callback method for training

In [0]:
EPOCHS = 20000
callbacks = [
    # keras.callbacks.ReduceLROnPlateau(verbose=1, 
    #                                   monitor='loss',# 모니터 기준 설정 (loss) 
    #                                   patience=10, # 10 회 Epoch동안 loss가 감소하지 않으면
    #                                   factor=0.9, #learning_rate*factor로 learning rate 수정 
    #                                   min_lr=1e-4 #최소 0.00001
    #                                   ),
    keras.callbacks.ModelCheckpoint('drive/My Drive/Dataset/final_project/i3d_resnet_RGB.hdf5', 
                                              monitor='loss', 
                                              verbose=1, save_best_only=True, save_weights_only=True, 
                                              mode='auto', period=1),
    # keras.callbacks.TensorBoard(log_dir='./graph', histogram_freq=0, write_graph=True, update_freq='epoch', write_images=True),
#     keras.callbacks.EarlyStopping(monitor='val_loss',  # 모니터 기준 설정 (loss) 
#                               patience=100,         # 100회 Epoch동안 개선되지 않는다면 종료
#                              ),
    keras.callbacks.TensorBoard(log_dir='/tmp/log', histogram_freq=0, write_graph=True, update_freq='epoch', write_images=True),
]


## model training

In [0]:
i3d.fit_generator(
    train,
    validation_data=valid,
    verbose=1,
    epochs=EPOCHS,
    callbacks=callbacks
)