In [0]:
%tensorflow_version 1.x
import keras

In [0]:
def get_random_crop(image, crop_height, crop_width):

    max_x = image.shape[1] - crop_width
    max_y = image.shape[0] - crop_height
    
    # print(max_x)
    # print(max_y)

    x = np.random.randint(0, max_x)
    y = np.random.randint(0, max_y)

    crop = image[y: y + crop_height, x: x + crop_width]

    return crop

In [0]:
import tensorflow as tf
from tensorflow.keras.utils import Sequence
from tensorflow.keras.utils import to_categorical
import numpy as np
from keras.preprocessing.image import ImageDataGenerator, img_to_array
import random
import cv2
import time
class DataGenerator(Sequence):
    def __init__(self, total_file_names, classes, nbframe, x_path, X, y, batch_size, dim, n_classes, n_channels=1,  transformation: ImageDataGenerator = None, target_shape: tuple = (224, 224), shuffle = True):
        self.X = X
        self.total_file_names = total_file_names
        self.classes = classes
        self.files_count = len(X)
        self.nbframe = nbframe
        self.target_shape = target_shape
        self.x_path = x_path
        self.transformation = transformation
        self._random_trans = []
        self.y = y if y is not None else y
        self.batch_size = batch_size
        self.dim = dim
        self.n_channels = n_channels
        self.n_classes = n_classes
        self.shuffle = shuffle
        self.on_epoch_end()
        
    def on_epoch_end(self):

        self.indexes = np.arange(len(self.X))

        if self.transformation is not None:
            self._random_trans = []
            for _ in range(self.files_count):
                self._random_trans.append(
                    self.transformation.get_random_transform(self.target_shape)
                )

        if self.shuffle:
            np.random.shuffle(self.indexes)
    
    def __len__(self):
        return int(np.floor(len(self.X) / self.batch_size))

    def __data_generation(self, X_list):
        X = np.empty((len(X_list), self.nbframe, 224,224, self.n_channels))
        y = np.empty((len(X_list)), dtype = int)
        
        ##

        if y is not None:
            # 배열에 그냥 넣어주면 되는 식이지만,
            # custom image data를 사용하는 경우 
            # 이 부분에서 이미지를 batch_size만큼 불러오게 하면 됩니다.
            
            for i, (start_index) in enumerate(X_list):
                # X_files = X_batch_files_list[i];
                X_arrays = np.empty((self.nbframe,224,224,self.n_channels))
                
                # print(start_index)
                num_class = len(self.classes)
                # print(num_class)
                for j in range(num_class):
                    if self.y[self.classes[j]] <= start_index:
                        if j+1 == num_class:
                            temp = [self.x_path + self.classes[j] + '/', j]
                            break
                        elif start_index < self.y[self.classes[j+1]]:
                            temp = [self.x_path + self.classes[j] + '/', j]
                            break

                file_path = temp[0]
                y[i] = temp[1]
                # print(y[i])
                
                for j in range(start_index, self.nbframe + start_index):
                    
                    frame_name = file_path + self.total_file_names[j]
                    
                    frame = cv2.imread(frame_name,cv2.IMREAD_GRAYSCALE)
                    
                    #print(frame_name)
                    #print(frame.shape)
                    
                    frame = cv2.resize(frame, self.dim)
                    
                    frame = get_random_crop(frame, 224, 224) ## 데이터 증강을 위한 랜덤 크롭

                    frame = (frame - frame.mean()) / frame.std()

                    #print(type(frame))
                    
                    X_arrays[j-start_index] = frame.reshape(224,224,1)
                X[i] = X_arrays
            #print(y)
            #print(self.n_classes)
                
            return X, to_categorical(y, num_classes = self.n_classes)
        
        else:
            for i, img in enumerate(X_list):
                X[i] = img
                
            return X
        
    def __getitem__(self, index):
        indexes = self.indexes[index * self.batch_size : (index + 1) * self.batch_size]
        X_list = []
        transformationList = []

        for i in indexes:
            # prepare a transformation if provided
            if self.transformation is not None:
                transformationList.append(self._random_trans[i])
            else :
                transformationList.append(None)

            X_list.append(self.X[i])
            
        # print(transformationList)

        if self.y is not None:
            X, y = self.__data_generation(X_list)

            for i in range(len(indexes)):
                if transformationList[i] is not None:
                    X[i] = [self.transformation.apply_transform(
                        frame, transformationList[i]) for frame in X[i]]
            
            return X, y
        else:
            y_list = None
            X = self.__data_generation(X_list)
            return X

In [0]:
from google.colab import drive
drive.mount('/content/drive')

In [0]:
from os import listdir
from os.path import isfile, join, isdir

def getFileList(mypath):
    onlyFiles = [f for f in listdir(mypath) if isfile(join(mypath, f))]
    return onlyFiles

def getDirList(mypath):
    onlyDirs = [d for d in listdir(mypath) if isdir(join(mypath, d))]
    return onlyDirs

In [0]:
import random

transformation=None
x_path = "/content/drive/My Drive/data/image/"
#classes = getDirList(x_path)
classes = ["kicking","punching"]
nbframe=25
total_file_names = []
class_len_list = []
num_input_per_class = 1500
test_input_per_class = 500

X_train = []
X_test = []
y = {}
start_file_index = 0
for temp_class in classes:
    y[temp_class] = start_file_index
    temp_file_list = getFileList(x_path + temp_class)
    end_file_index = start_file_index + int(len(temp_file_list)*2/3) - nbframe
    X_train.extend(list(random.sample(range(start_file_index, end_file_index), num_input_per_class)))
    X_test.extend(list(random.sample(range(end_file_index + nbframe, start_file_index + len(temp_file_list) - nbframe), test_input_per_class)))
    class_len_list.append(len(temp_file_list))
    total_file_names.extend(temp_file_list)
    start_file_index += len(temp_file_list)

batch_size=8
dim=(256,256)
n_channels=1
n_classes=len(classes)
print(n_classes)
shuffle = True

In [0]:
len(X_test)

In [0]:
data_aug = keras.preprocessing.image.ImageDataGenerator(
    zoom_range=.2,
    brightness_range=[0.7, 1.3],
    rotation_range = 20,
    horizontal_flip=True,
    width_shift_range=.2,
    height_shift_range=.2)

train = DataGenerator(total_file_names = total_file_names, classes = classes, transformation=None, x_path=x_path, nbframe=nbframe, X=X_train, y=y, batch_size=batch_size, dim=dim, n_channels=n_channels, n_classes=n_classes, shuffle = shuffle)

In [0]:
!pip install git+https://www.github.com/keras-team/keras-contrib.git

In [0]:
import keras_contrib

In [0]:
"""A vanilla 3D resnet implementation.
Based on Raghavendra Kotikalapudi's 2D implementation
keras-resnet (See https://github.com/raghakot/keras-resnet.)
"""
from __future__ import (
    absolute_import,
    division,
    print_function,
    unicode_literals
)
import six
from math import ceil
from keras.models import Model
from keras.layers import (
    Input,
    Activation,
    Dense,
    Flatten
)
from keras.layers.convolutional import (
    Conv3D,
    AveragePooling3D,
    MaxPooling3D
)
from keras.layers.merge import add
from keras.layers.normalization import BatchNormalization
from keras.regularizers import l2
from keras import backend as K


def _bn_relu(input):
    """Helper to build a BN -> relu block (by @raghakot)."""
    norm = BatchNormalization(axis=CHANNEL_AXIS)(input)
    return Activation("relu")(norm)


def _conv_bn_relu3D(**conv_params):
    filters = conv_params["filters"]
    kernel_size = conv_params["kernel_size"]
    strides = conv_params.setdefault("strides", (1, 1, 1))
    kernel_initializer = conv_params.setdefault(
        "kernel_initializer", "he_normal")
    padding = conv_params.setdefault("padding", "same")
    kernel_regularizer = conv_params.setdefault("kernel_regularizer",
                                                l2(1e-4))

    def f(input):
        conv = Conv3D(filters=filters, kernel_size=kernel_size,
                      strides=strides, kernel_initializer=kernel_initializer,
                      padding=padding,
                      kernel_regularizer=kernel_regularizer)(input)
        return _bn_relu(conv)

    return f


def _bn_relu_conv3d(**conv_params):
    """Helper to build a  BN -> relu -> conv3d block."""
    filters = conv_params["filters"]
    kernel_size = conv_params["kernel_size"]
    strides = conv_params.setdefault("strides", (1, 1, 1))
    kernel_initializer = conv_params.setdefault("kernel_initializer",
                                                "he_normal")
    padding = conv_params.setdefault("padding", "same")
    kernel_regularizer = conv_params.setdefault("kernel_regularizer",
                                                l2(1e-4))

    def f(input):
        activation = _bn_relu(input)
        return Conv3D(filters=filters, kernel_size=kernel_size,
                      strides=strides, kernel_initializer=kernel_initializer,
                      padding=padding,
                      kernel_regularizer=kernel_regularizer)(activation)
    return f


def _shortcut3d(input, residual):
    """3D shortcut to match input and residual and merges them with "sum"."""
    stride_dim1 = ceil(input._keras_shape[DIM1_AXIS] \
        / residual._keras_shape[DIM1_AXIS])
    stride_dim2 = ceil(input._keras_shape[DIM2_AXIS] \
        / residual._keras_shape[DIM2_AXIS])
    stride_dim3 = ceil(input._keras_shape[DIM3_AXIS] \
        / residual._keras_shape[DIM3_AXIS])
    equal_channels = residual._keras_shape[CHANNEL_AXIS] \
        == input._keras_shape[CHANNEL_AXIS]

    shortcut = input
    if stride_dim1 > 1 or stride_dim2 > 1 or stride_dim3 > 1 \
            or not equal_channels:
        shortcut = Conv3D(
            filters=residual._keras_shape[CHANNEL_AXIS],
            kernel_size=(1, 1, 1),
            strides=(stride_dim1, stride_dim2, stride_dim3),
            kernel_initializer="he_normal", padding="valid",
            kernel_regularizer=l2(1e-4)
            )(input)
    return add([shortcut, residual])


def _residual_block3d(block_function, filters, kernel_regularizer, repetitions,
                      is_first_layer=False):
    def f(input):
        for i in range(repetitions):
            strides = (1, 1, 1)
            if i == 0 and not is_first_layer:
                strides = (2, 2, 2)
            input = block_function(filters=filters, strides=strides,
                                   kernel_regularizer=kernel_regularizer,
                                   is_first_block_of_first_layer=(
                                       is_first_layer and i == 0)
                                   )(input)
        return input

    return f


def basic_block(filters, strides=(1, 1, 1), kernel_regularizer=l2(1e-4),
                is_first_block_of_first_layer=False):
    """Basic 3 X 3 X 3 convolution blocks. Extended from raghakot's 2D impl."""
    def f(input):
        if is_first_block_of_first_layer:
            # don't repeat bn->relu since we just did bn->relu->maxpool
            conv1 = Conv3D(filters=filters, kernel_size=(3, 3, 3),
                           strides=strides, padding="same",
                           kernel_initializer="he_normal",
                           kernel_regularizer=kernel_regularizer
                           )(input)
        else:
            conv1 = _bn_relu_conv3d(filters=filters,
                                    kernel_size=(3, 3, 3),
                                    strides=strides,
                                    kernel_regularizer=kernel_regularizer
                                    )(input)

        residual = _bn_relu_conv3d(filters=filters, kernel_size=(3, 3, 3),
                                   kernel_regularizer=kernel_regularizer
                                   )(conv1)
        return _shortcut3d(input, residual)

    return f


def bottleneck(filters, strides=(1, 1, 1), kernel_regularizer=l2(1e-4),
               is_first_block_of_first_layer=False):
    """Basic 3 X 3 X 3 convolution blocks. Extended from raghakot's 2D impl."""
    def f(input):
        if is_first_block_of_first_layer:
            # don't repeat bn->relu since we just did bn->relu->maxpool
            conv_1_1 = Conv3D(filters=filters, kernel_size=(1, 1, 1),
                              strides=strides, padding="same",
                              kernel_initializer="he_normal",
                              kernel_regularizer=kernel_regularizer
                              )(input)
        else:
            conv_1_1 = _bn_relu_conv3d(filters=filters, kernel_size=(1, 1, 1),
                                       strides=strides,
                                       kernel_regularizer=kernel_regularizer
                                       )(input)

        conv_3_3 = _bn_relu_conv3d(filters=filters, kernel_size=(3, 3, 3),
                                   kernel_regularizer=kernel_regularizer
                                   )(conv_1_1)
        residual = _bn_relu_conv3d(filters=filters * 4, kernel_size=(1, 1, 1),
                                   kernel_regularizer=kernel_regularizer
                                   )(conv_3_3)

        return _shortcut3d(input, residual)

    return f


def _handle_data_format():
    global DIM1_AXIS
    global DIM2_AXIS
    global DIM3_AXIS
    global CHANNEL_AXIS
    if K.image_data_format() == 'channels_last':
        DIM1_AXIS = 1
        DIM2_AXIS = 2
        DIM3_AXIS = 3
        CHANNEL_AXIS = 4
    else:
        CHANNEL_AXIS = 1
        DIM1_AXIS = 2
        DIM2_AXIS = 3
        DIM3_AXIS = 4


def _get_block(identifier):
    if isinstance(identifier, six.string_types):
        res = globals().get(identifier)
        if not res:
            raise ValueError('Invalid {}'.format(identifier))
        return res
    return identifier


class Resnet3DBuilder(object):
    """ResNet3D."""

    @staticmethod
    def build(input_shape, num_outputs, block_fn, repetitions, reg_factor):
        """Instantiate a vanilla ResNet3D keras model.
        # Arguments
            input_shape: Tuple of input shape in the format
            (conv_dim1, conv_dim2, conv_dim3, channels) if dim_ordering='tf'
            (filter, conv_dim1, conv_dim2, conv_dim3) if dim_ordering='th'
            num_outputs: The number of outputs at the final softmax layer
            block_fn: Unit block to use {'basic_block', 'bottlenack_block'}
            repetitions: Repetitions of unit blocks
        # Returns
            model: a 3D ResNet model that takes a 5D tensor (volumetric images
            in batch) as input and returns a 1D vector (prediction) as output.
        """
        _handle_data_format()
        if len(input_shape) != 4:
            raise ValueError("Input shape should be a tuple "
                             "(conv_dim1, conv_dim2, conv_dim3, channels) "
                             "for tensorflow as backend or "
                             "(channels, conv_dim1, conv_dim2, conv_dim3) "
                             "for theano as backend")

        block_fn = _get_block(block_fn)
        input = Input(shape=input_shape)
        # first conv
        conv1 = _conv_bn_relu3D(filters=64, kernel_size=(7, 7, 7),
                                strides=(2, 2, 2),
                                kernel_regularizer=l2(reg_factor)
                                )(input)
        pool1 = MaxPooling3D(pool_size=(3, 3, 3), strides=(2, 2, 2),
                             padding="same")(conv1)

        # repeat blocks
        block = pool1
        filters = 64
        for i, r in enumerate(repetitions):
            block = _residual_block3d(block_fn, filters=filters,
                                      kernel_regularizer=l2(reg_factor),
                                      repetitions=r, is_first_layer=(i == 0)
                                      )(block)
            filters *= 2

        # last activation
        block_output = _bn_relu(block)

        # average poll and classification
        pool2 = AveragePooling3D(pool_size=(block._keras_shape[DIM1_AXIS],
                                            block._keras_shape[DIM2_AXIS],
                                            block._keras_shape[DIM3_AXIS]),
                                 strides=(1, 1, 1))(block_output)
        flatten1 = Flatten()(pool2)
        if num_outputs > 1:
            dense = Dense(units=num_outputs,
                          kernel_initializer="he_normal",
                          activation="softmax",
                          kernel_regularizer=l2(reg_factor))(flatten1)
        else:
            dense = Dense(units=num_outputs,
                          kernel_initializer="he_normal",
                          activation="sigmoid",
                          kernel_regularizer=l2(reg_factor))(flatten1)

        model = Model(inputs=input, outputs=dense)
        return model

    @staticmethod
    def build_resnet_18(input_shape, num_outputs, reg_factor=1e-4):
        """Build resnet 18."""
        return Resnet3DBuilder.build(input_shape, num_outputs, basic_block,
                                     [2, 2, 2, 2], reg_factor=reg_factor)

    @staticmethod
    def build_resnet_34(input_shape, num_outputs, reg_factor=1e-4):
        """Build resnet 34."""
        return Resnet3DBuilder.build(input_shape, num_outputs, basic_block,
                                     [3, 4, 6, 3], reg_factor=reg_factor)

    @staticmethod
    def build_resnet_50(input_shape, num_outputs, reg_factor=1e-4):
        """Build resnet 50."""
        return Resnet3DBuilder.build(input_shape, num_outputs, bottleneck,
                                     [3, 4, 6, 3], reg_factor=reg_factor)

    @staticmethod
    def build_resnet_101(input_shape, num_outputs, reg_factor=1e-4):
        """Build resnet 101."""
        return Resnet3DBuilder.build(input_shape, num_outputs, bottleneck,
                                     [3, 4, 23, 3], reg_factor=reg_factor)

    @staticmethod
    def build_resnet_152(input_shape, num_outputs, reg_factor=1e-4):
        """Build resnet 152."""
        return Resnet3DBuilder.build(input_shape, num_outputs, bottleneck,
                                     [3, 8, 36, 3], reg_factor=reg_factor)

In [0]:
# from keras_contrib.layers.normalization.groupnormalization import GroupNormalization

In [0]:
i3d = Resnet3DBuilder.build_resnet_50(input_shape = (25, 224, 224, 1), num_outputs=2)

In [0]:
i3d.summary()

In [0]:
# glob_pattern='./drive/My Drive/Dataset/final_project/Val/{classname}/*.mp4'

# valid = VideoFrameGenerator_RGB_val(
#     classes=classes, 
#     glob_pattern=glob_pattern,
#     nb_frames=NBFRAME, 
#     batch_size=16,
#     target_shape=(256, 256),
#     nb_channel=CHANNELS,
#     use_frame_cache=True)

In [0]:
def single_class_recall(interesting_class_id):
    def recall(y_true, y_pred):
        class_id_true = keras.backend.argmax(y_true, axis=-1)
        class_id_pred = keras.backend.argmax(y_pred, axis=-1)
        recall_mask = keras.backend.cast(keras.backend.equal(class_id_true, interesting_class_id), 'int32')
        class_recall_tensor = keras.backend.cast(keras.backend.equal(class_id_true, class_id_pred), 'int32') * recall_mask
        class_recall = keras.backend.cast(keras.backend.sum(class_recall_tensor), 'float32') / keras.backend.cast(keras.backend.maximum(keras.backend.sum(recall_mask), 1), 'float32')
        return class_recall
    return recall

In [0]:
i3d.compile(loss="categorical_crossentropy",
                optimizer=keras.optimizers.SGD(lr=0.1, momentum=0.9, decay=0.9)
            , metrics=['accuracy', single_class_recall(0), single_class_recall(1), single_class_recall(2)])

In [0]:
EPOCHS = 20000
callbacks = [
    # keras.callbacks.ReduceLROnPlateau(verbose=1, 
    #                                   monitor='loss',# 모니터 기준 설정 (loss) 
    #                                   patience=10, # 10 회 Epoch동안 loss가 감소하지 않으면
    #                                   factor=0.9, #learning_rate*factor로 learning rate 수정 
    #                                   min_lr=1e-4 #최소 0.00001
    #                                   ),
    keras.callbacks.ModelCheckpoint('drive/My Drive/data/i3d_resnet_GRAY.hdf5', 
                                              monitor='loss', 
                                              verbose=1, save_best_only=True, save_weights_only=True, 
                                              mode='auto', period=1),
    # keras.callbacks.TensorBoard(log_dir='./graph', histogram_freq=0, write_graph=True, update_freq='epoch', write_images=True),
#     keras.callbacks.EarlyStopping(monitor='val_loss',  # 모니터 기준 설정 (loss) 
#                               patience=100,         # 100회 Epoch동안 개선되지 않는다면 종료
#                              ),
    # keras.callbacks.TensorBoard(log_dir='./graph', histogram_freq=0, write_graph=True, update_freq='epoch', write_images=True),
]


In [0]:
i3d.load_weights('drive/My Drive/data/i3d_resnet_GRAY.hdf5')

In [0]:
i3d.fit_generator(
    train,
    #validation_data=valid,
    verbose=1,
    epochs=EPOCHS,
    callbacks=callbacks
)

In [0]:
i3d.save_weights('drive/My Drive/data/i3d_resnet_GRAY.hdf5')

In [0]:
test = DataGenerator(total_file_names = total_file_names, classes = classes, transformation=None, x_path=x_path, nbframe=nbframe, X=X_test, y=y, batch_size=batch_size, dim=dim, n_channels=n_channels, n_classes=n_classes, shuffle = shuffle)
loss_and_metrics = i3d.evaluate(test)
print('## evaluation loss and_metrics ##')
print(loss_and_metrics)

In [0]:
train