In [0]:
# %tensorflow_version 1.x
import keras

## ResNet50 + LSTM

In [0]:
from keras.applications.vgg16 import VGG16
from keras.applications.resnet50 import ResNet50
from keras.models import Model
from keras.layers import Dense, Input, Flatten, MaxPooling2D, BatchNormalization, ReLU, Conv2D
from keras.layers.pooling import GlobalAveragePooling2D
from keras.layers.recurrent import LSTM
from keras.layers.wrappers import TimeDistributed
from keras.optimizers import Nadam
from keras.backend import flatten
from keras.activations import relu

IMG_WIDTH = 224
IMG_HEIGHT = 224
frame_len = 16

video = Input(shape=(frame_len,
                     IMG_WIDTH,
                     IMG_HEIGHT,
                     3))
# cnn_base = VGG16(include_top=False,
#                  input_shape=(224,
#                               224,
#                               3),
#                  weights="imagenet",
#                  )
cnn_base = ResNet50(include_top = False,
                    input_shape = (IMG_WIDTH,
                                IMG_HEIGHT, 3),
                    weights="imagenet")
cnn_base.trainable = True
cnn_out = Conv2D(1024, kernel_size=(1,1), strides=(1,1), activation=None)(cnn_base.output)
cnn_out = BatchNormalization()(cnn_out)
cnn_out = ReLU()(cnn_out)
cnn_out = Conv2D(512, kernel_size=(2,2), strides=(2,2), activation=None)(cnn_out)
cnn_out = BatchNormalization()(cnn_out)
cnn_out = ReLU()(cnn_out)
cnn_out = MaxPooling2D(pool_size=(2, 2), strides=(2, 2))(cnn_out)
cnn_out = Flatten()(cnn_out)

# cnn_out = GlobalAveragePooling2D()(cnn_base.output)
cnn_out = Dense(output_dim=1024, activation="tanh")(cnn_out)
cnn = Model(input=cnn_base.input, output=cnn_out)
cnn.trainable = True
encoded_frames = TimeDistributed(cnn)(video)
encoded_sequence = LSTM(512, activation='tanh', dropout=0.5, return_sequences = True, name="lstm")(encoded_frames)
encoded_sequence = Flatten()(encoded_sequence)
hidden_layer = Dense(output_dim=1024, activation="relu")(encoded_sequence)
outputs = Dense(output_dim=3, activation="softmax")(hidden_layer)
model = Model([video], outputs)


In [0]:
model.summary()

## video generator install

In [0]:
!pip install keras-video-generators

## Preprocessing method for train

In [0]:
def get_random_crop(image, crop_x, crop_y, crop_height, crop_width):



    crop = image[crop_y: crop_y + crop_height, crop_x: crop_x + crop_width]

    return crop

## Preprocessing method for validation

In [0]:
def crop_center(img, new_size):
    y, x, c = img.shape
    (cropx, cropy) = new_size
    startx = x // 2 - (cropx // 2)
    starty = y // 2 - (cropy // 2)
    return img[starty:starty + cropy, startx:startx + cropx]

## Custom Video generator class for train

In [0]:
from keras_video import VideoFrameGenerator
from math import floor
import cv2
import numpy as np
from keras.preprocessing.image import ImageDataGenerator, img_to_array

class VideoFrameGenerator_RGB(VideoFrameGenerator):
    def _get_frames(self, video, nbframe, shape, force_no_headers=False):
        cap = cv2.VideoCapture(video)
        total_frames = self.count_frames(cap, video, force_no_headers)
        frame_step = int(np.random.randint(total_frames-nbframe, size=1)) # 시작 프레임 랜덤으로 정의
        # TODO: fix that, a tiny video can have a frame_step that is
        # under 1
        frame_step = max(1, frame_step)
        frames = []
        frame_i = 0
        crop_width = 224
        crop_height = 224
        max_x = shape[1] - crop_width
        max_y = shape[0] - crop_height

        crop_x = np.random.randint(0, max_x)
        crop_y = np.random.randint(0, max_y)
        while True:
            grabbed, frame = cap.read()
            if not grabbed:
                break

            frame_i += 1
            if frame_i >= frame_step: # 랜덤으로 정의된 값보다 크면
                # resize
                frame = cv2.resize(frame, shape)  ## 256, 256으로 리사이징
                                
                frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) ## 모델에 넣기 위해 RGB 채널로 변경
                
                #to np
                frame = img_to_array(frame) 
                
                frame = get_random_crop(frame, crop_x, crop_y,  crop_width, crop_height) ## 데이터 증강을 위한 랜덤 크롭
                frame = (frame - frame.mean()) / frame.std()
                
                frames.append(frame)
            if len(frames) == nbframe:
                break
        cap.release()

        if not force_no_headers and len(frames) != nbframe:
            # There is a problem here
            # That means that frame count in header is wrong or broken,
            # so we need to force the full read of video to get the right
            # frame counter
            return self._get_frames(
                    video,
                    nbframe,
                    shape,
                    force_no_headers=True)

        if force_no_headers and len(frames) != nbframe:
            # and if we really couldn't find the real frame counter
            # so we return None. Sorry, nothing can be done...
            log.error("Frame count is not OK for video %s, "
                      "%d total, %d extracted" % (
                        video, total_frames, len(frames)))
            return None
        return np.array(frames)
    
    def __getitem__(self, index): ## 데이터 증강 후 0~255 사이의 값이기 때문에 255로 나눠줌
        classes = self.classes
        shape = self.target_shape
        nbframe = self.nbframe

        labels = []
        images = []

        indexes = self.indexes[index*self.batch_size:(index+1)*self.batch_size]

        transformation = None

        for i in indexes:
            # prepare a transformation if provided
            if self.transformation is not None:
                transformation = self._random_trans[i]

            video = self.files[i]
            classname = self._get_classname(video)

            # create a label array and set 1 to the right column
            label = np.zeros(len(classes))
            col = classes.index(classname)
            label[col] = 1.


            frames = self._get_frames(
                video,
                nbframe,
                shape,
                force_no_headers=not self.use_video_header)
            if frames is None:
                # avoid failure, nevermind that video...
                continue

            # apply transformation
            if transformation is not None:
                frames = [self.transformation.apply_transform(
                    frame, transformation) for frame in frames]

            # add the sequence in batch
            images.append(frames)
            labels.append(label)

        return np.array(images), np.array(labels)


## Custom Video generator class for Val

In [0]:
from keras_video import VideoFrameGenerator
from math import floor
import cv2
import numpy as np
class VideoFrameGenerator_RGB_val(VideoFrameGenerator):
    def _get_frames(self, video, nbframe, shape, force_no_headers=False):
        cap = cv2.VideoCapture(video)
        total_frames = self.count_frames(cap, video, force_no_headers)
        frame_step = int(np.random.randint(total_frames-nbframe, size=1))  # 시작 프레임 랜덤으로 정의
        # TODO: fix that, a tiny video can have a frame_step that is
        # under 1
        frame_step = max(1, frame_step)
        frames = []
        frame_i = 0

        while True:
            grabbed, frame = cap.read()
            if not grabbed:
                break

            frame_i += 1
            if frame_i >= frame_step:  # 랜덤으로 정의된 값보다 크면
                # resize
                frame = cv2.resize(frame, shape)  ## 224, 224으로 리사이징
                frame = crop_center(frame, (224, 224))
                frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)  ## 모델에 넣기 위해 RGB 채널로 변경
                
                #to np
                frame = img_to_array(frame)
                frame = (frame - frame.mean()) / frame.std()
                
                frames.append(frame)
            if len(frames) == nbframe:
                break

        cap.release()

        if not force_no_headers and len(frames) != nbframe:
            # There is a problem here
            # That means that frame count in header is wrong or broken,
            # so we need to force the full read of video to get the right
            # frame counter
            return self._get_frames(
                    video,
                    nbframe,
                    shape,
                    force_no_headers=True)

        if force_no_headers and len(frames) != nbframe:
            # and if we really couldn't find the real frame counter
            # so we return None. Sorry, nothing can be done...
            log.error("Frame count is not OK for video %s, "
                      "%d total, %d extracted" % (
                        video, total_frames, len(frames)))
            return None

        return np.array(frames)

## call Generator and data augmentation for train

In [0]:
from keras_video import VideoFrameGenerator
import os
import glob
import keras
NBFRAME = 16
classes = [i.split(os.path.sep)[-1] for i in glob.glob('drive/My Drive/Dataset/final_project/Image/*')]
classes.sort()
# some global params
SIZE = (256, 256)
CHANNELS = 1
BS = 4
# pattern to get videos and classes
glob_pattern='./drive/My Drive/Dataset/final_project/Image/{classname}/*.mp4'

# for data augmentation
data_aug = keras.preprocessing.image.ImageDataGenerator(
    # zoom_range=.2,
    # brightness_range=[0.7, 1.3],
    # rotation_range = 20,
    horizontal_flip=True,
    width_shift_range=.2,
    height_shift_range=.2
    )
# Create video frame generator
train = VideoFrameGenerator_RGB(
    classes=classes, 
    glob_pattern=glob_pattern,
    nb_frames=NBFRAME,
    shuffle=True,
    batch_size=BS,
    target_shape=SIZE,
    nb_channel=CHANNELS,
    transformation=data_aug,
    use_frame_cache=True)

## call Generator for val

In [0]:
glob_pattern='./drive/My Drive/Dataset/final_project/Val/{classname}/*.mp4'

valid = VideoFrameGenerator_RGB_val(
    rescale=1./255,
    classes=classes, 
    glob_pattern=glob_pattern,
    nb_frames=NBFRAME, 
    batch_size=16,
    target_shape=(256, 256),
    nb_channel=CHANNELS,
    use_frame_cache=True)

## define recall method

In [0]:
def single_class_recall(interesting_class_id):
    def recall(y_true, y_pred):
        class_id_true = keras.backend.argmax(y_true, axis=-1)
        class_id_pred = keras.backend.argmax(y_pred, axis=-1)
        recall_mask = keras.backend.cast(keras.backend.equal(class_id_true, interesting_class_id), 'int32')
        class_recall_tensor = keras.backend.cast(keras.backend.equal(class_id_true, class_id_pred), 'int32') * recall_mask
        class_recall = keras.backend.cast(keras.backend.sum(class_recall_tensor), 'float32') / keras.backend.cast(keras.backend.maximum(keras.backend.sum(recall_mask), 1), 'float32')
        return class_recall
    return recall

## model compile

In [0]:
model.compile(loss="categorical_crossentropy",
                optimizer=keras.optimizers.SGD(lr=0.1, momentum=0.9, decay=0.9)
            , metrics=['accuracy', single_class_recall(0), single_class_recall(1), single_class_recall(2)])

# Tensorboard 사용 (For colab)

In [0]:
LOG_DIR = '/tmp/log'
get_ipython().system_raw(
    'tensorboard --logdir {} --host 0.0.0.0 --port 6006 &'
    .format(LOG_DIR)
)

In [0]:
! wget https://bin.equinox.io/c/4VmDzA7iaHb/ngrok-stable-linux-amd64.zip
! unzip ngrok-stable-linux-amd64.zip

In [0]:
get_ipython().system_raw('./ngrok http 6006 &')

In [21]:
! curl -s http://localhost:4040/api/tunnels | python3 -c \
    "import sys, json; print(json.load(sys.stdin)['tunnels'][0]['public_url'])"

http://6352c9bd.ngrok.io


## define callback methods for train

In [0]:
EPOCHS = 20000
callbacks = [

    keras.callbacks.ModelCheckpoint('drive/My Drive/Dataset/final_project/LSTM_resnet_RGB.hdf5', 
                                              monitor='loss', 
                                              verbose=1, save_best_only=True, save_weights_only=True, 
                                              mode='auto', period=1),
    keras.callbacks.TensorBoard(log_dir='/tmp/log', histogram_freq=0, write_graph=True, update_freq='epoch', write_images=True),
]


## model train

In [19]:
model.fit_generator(
    train,
    validation_data=valid,
    verbose=1,
    epochs=EPOCHS,
    callbacks=callbacks
)




Epoch 1/20000

Epoch 00001: loss improved from inf to 1.40615, saving model to drive/My Drive/Dataset/final_project/LSTM_resnet_RGB.hdf5

Epoch 2/20000

Epoch 00002: loss improved from 1.40615 to 1.09730, saving model to drive/My Drive/Dataset/final_project/LSTM_resnet_RGB.hdf5
Epoch 3/20000

Epoch 00003: loss improved from 1.09730 to 1.09610, saving model to drive/My Drive/Dataset/final_project/LSTM_resnet_RGB.hdf5
Epoch 4/20000

Epoch 00004: loss improved from 1.09610 to 1.09591, saving model to drive/My Drive/Dataset/final_project/LSTM_resnet_RGB.hdf5
Epoch 5/20000

Epoch 00005: loss improved from 1.09591 to 1.09533, saving model to drive/My Drive/Dataset/final_project/LSTM_resnet_RGB.hdf5
Epoch 6/20000

Epoch 00006: loss improved from 1.09533 to 1.09438, saving model to drive/My Drive/Dataset/final_project/LSTM_resnet_RGB.hdf5
Epoch 7/20000

Epoch 00007: loss improved from 1.09438 to 1.09412, saving model to drive/My Drive/Dataset/final_project/LSTM_resnet_RGB.hdf5
Epoch 8/20000