In [None]:

import tensorflow as tf
import os
# os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID" 
#os.environ["CUDA_VISIBLE_DEVICES"]="0,1,2"
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras import optimizers, regularizers
from tensorflow.keras.models import Sequential, Model ,load_model
from tensorflow.keras.layers import *
from tensorflow.keras.callbacks import ModelCheckpoint, LearningRateScheduler, TensorBoard, EarlyStopping, ReduceLROnPlateau
from tensorflow import keras

from tensorflow.keras.applications.efficientnet import EfficientNetB3
import efficientnet.tfkeras as efn 
import cv2
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn import metrics 
from sklearn.metrics import make_scorer, accuracy_score, precision_score, recall_score, f1_score
from sklearn.metrics import multilabel_confusion_matrix
from sklearn.metrics import classification_report

from sklearn.model_selection import cross_val_score, train_test_split, KFold, StratifiedKFold, cross_validate

import tensorflow.keras.backend as K
import keras_video.utils
from keras_video import VideoFrameGenerator,SlidingFrameGenerator
import glob
import tensorflow_addons as tfa
import pandas as pd
from PIL import ImageOps

In [None]:
tf.test.is_gpu_available()

In [None]:
dataset = 'UCF101' #'UCF101' or 'HMDB51' dataset folder name
with open(dataset+'/classInd.txt') as f:
# with open('classInd.txt') as f:
    classes = f.readlines()
# you may also want to remove whitespace characters like `\n` at the end of each line
classes = [x.strip() for x in classes]
classes.sort()
len(classes)

In [None]:
# Create video frame generator
def frame_generator(video_path,classes,NBFRAME,BS,CHANNELS,SIZE,sliding_time):
    data_aug = ImageDataGenerator(
#     zoom_range=[0.8, 1.2],
#       rescale=1./255,
#     horizontal_flip=True,
#     preprocessing_function=get_random_eraser(v_l=0, v_h=255)
#     rotation_range=8,
#     width_shift_range=.2,
#     height_shift_range=.2
)
#     training_data = VideoFrameGenerator(
#         classes = classes, 
#         glob_pattern = video_path,
#         nb_frames = NBFRAME,
#         shuffle = True,
#         batch_size=BS,
#         target_shape=SIZE,
#         nb_channel=CHANNELS,
#         transformation=data_aug,
#         use_frame_cache=False)
    
    training_data = SlidingFrameGenerator(
        sequence_time=sliding_time,
        classes = classes, 
        glob_pattern = video_path,
        nb_frames = NBFRAME,
        shuffle = True,
        batch_size=BS,
        target_shape=SIZE,
        nb_channel=CHANNELS,
        transformation=data_aug,
        use_frame_cache=False)
    return training_data

In [None]:
def spatial_attention(input_feature):
    #kernel_size = 7
    kernel_size = 3
    
    if K.image_data_format() == "channels_first":
        channel = input_feature.shape[1]
        cbam_feature = Permute((2,3,1))(input_feature)
    else:
        channel = input_feature.shape[-1]
        cbam_feature = input_feature

    avg_pool = Lambda(lambda x: K.mean(x, axis=3, keepdims=True))(cbam_feature)
    assert avg_pool.shape[-1] == 1
    max_pool = Lambda(lambda x: K.max(x, axis=3, keepdims=True))(cbam_feature)
    assert max_pool.shape[-1] == 1
    concat = Concatenate(axis=3)([avg_pool, max_pool])
    assert concat.shape[-1] == 2
    cbam_feature = Conv2D(filters = 1,
                    kernel_size=kernel_size,
                    strides=1,
                    padding='same',
                    activation='sigmoid',
                    kernel_initializer='he_normal',
                    use_bias=False)(concat)	
    assert cbam_feature.shape[-1] == 1

    if K.image_data_format() == "channels_first":
        cbam_feature = Permute((3, 1, 2))(cbam_feature)

    return multiply([input_feature, cbam_feature])



# Adaptive LR Scheduler

In [None]:
class LossLearningRateScheduler(tf.keras.callbacks.History):
    """
    A learning rate scheduler that relies on changes in loss function
    value to dictate whether learning rate is decayed or not.
    LossLearningRateScheduler has the following properties:
    base_lr: the starting learning rate
    lookback_epochs: the number of epochs in the past to compare with the loss function at the current epoch to determine if progress is being made.
    decay_threshold / decay_multiple: if loss function has not improved by a factor of decay_threshold * lookback_epochs, then decay_multiple will be applied to the learning rate.
    spike_epochs: list of the epoch numbers where you want to spike the learning rate.
    spike_multiple: the multiple applied to the current learning rate for a spike.
    """

    def __init__(self, base_lr, lookback_epochs, spike_epochs = None, spike_multiple = 10, decay_threshold = 0.002, decay_multiple = 0.50, loss_type = 'val_loss'):

        super(LossLearningRateScheduler, self).__init__()

        self.base_lr = base_lr
        self.lookback_epochs = lookback_epochs
        self.spike_epochs = spike_epochs
        self.spike_multiple = spike_multiple
        self.decay_threshold = decay_threshold
        self.decay_multiple = decay_multiple
        self.loss_type = loss_type


    def on_epoch_begin(self, epoch, logs=None):

        if len(self.epoch) > self.lookback_epochs:

            current_lr = tf.keras.backend.get_value(self.model.optimizer.lr)

            target_loss = self.history[self.loss_type] 

            loss_diff =  target_loss[-int(self.lookback_epochs)] - target_loss[-1]

            if loss_diff <= np.abs(target_loss[-1]) * (self.decay_threshold * self.lookback_epochs):

                print(' '.join(('Changing learning rate from', str(current_lr), 'to', str(current_lr * self.decay_multiple))))
                tf.keras.backend.set_value(self.model.optimizer.lr, current_lr * self.decay_multiple)
                current_lr = current_lr * self.decay_multiple

            else:

                print(' '.join(('Learning rate:', str(current_lr))))

            if self.spike_epochs is not None and len(self.epoch) in self.spike_epochs:
                print(' '.join(('Spiking learning rate from', str(current_lr), 'to', str(current_lr * self.spike_multiple))))
                tf.keras.backend.set_value(self.model.optimizer.lr, current_lr * self.spike_multiple)

        else:

            print(' '.join(('Setting learning rate to', str(self.base_lr))))
            tf.keras.backend.set_value(self.model.optimizer.lr, self.base_lr)


        return tf.keras.backend.get_value(self.model.optimizer.lr)

# Backbone Layers

In [None]:
# eff_model = efn.EfficientNetB3(weights='imagenet', include_top = False)
# model_backbone = Model(eff_model.input,eff_model.output)
# pd.set_option('max_colwidth', -1)
# layers = [(layer, layer.name, layer.trainable) for layer in model_backbone.layers]
# dt = pd.DataFrame(layers, columns=['Layer Type', 'Layer Name', 'Layer Trainable'])
# dt.to_csv("layers.csv", index=False)


## LIGHTWEIGHT ONE-IN-TWO STREAM ATTENTION-BASED DNN

In [None]:
from keras_self_attention import SeqSelfAttention
def fusion_attention_lstm(image_input_shape,n_class,height,width):
    y = Input(shape=(n_class,))
    input_image = Input(shape=image_input_shape)
    eff_model=efn.EfficientNetB3(input_shape=(height, width, 3),
                                 include_top=False,
                                 weights='noisy-student')
    model_backbone = Model(eff_model.input,eff_model.get_layer('block7a_project_bn').output)
    timeDistributed_layer = tf.keras.layers.TimeDistributed(model_backbone)(input_image)
    print("TimeDistributed", timeDistributed_layer.shape)
    
    '''Temporal'''
    t = tf.keras.layers.TimeDistributed(GlobalAveragePooling2D())(timeDistributed_layer)
    t = LSTM(256, return_sequences=True, input_shape=(t.shape[1],t.shape[2]), name="lstm_layer_in")(t)
    t = SeqSelfAttention(attention_activation='sigmoid')(t)
    avg_pool = GlobalAveragePooling1D()(t)
    max_pool = GlobalMaxPooling1D()(t)
    t = concatenate([avg_pool, max_pool])
    
    t = Dropout(0.3)(t)
    print("Temporal: ", t.shape)
    
    '''Spatial'''
    s = tf.math.reduce_mean(timeDistributed_layer, axis=1)   
    s = SeparableConv2D(filters = 512, kernel_size = (3, 3), padding = 'same')(s)
    s = spatial_attention(s)
    s = SeparableConv2D(filters = 512, kernel_size = (3, 3), padding = 'same')(s)
    s = spatial_attention(s)
    s = BatchNormalization()(s)
    a = GlobalAveragePooling2D()(s)
    c = Dropout(0.3)(a)
    print("Spatial: ", s.shape)
    
    
    '''Fusion'''
    f = tf.keras.layers.Concatenate()([c, t])
    f = Dropout(0.3)(f)
    print("Fusion: ", f.shape)
    
    return f,y,input_image

def fc_action(x,n_class,y):
    x = Dense(1024, name="fusion_dense1")(x)
    x = PReLU()(x)
    x = BatchNormalization()(x)
    x = Dropout(0.5)(x)
    x = Dense(n_class, activation='softmax',name="action_output")(x)
    return x

# Model Initialization

In [None]:
lr_init = 1e-4
def create_model_fusion(image_input_shape,n_class,height,width,lr_init):
    model,y,input_image = fusion_attention_lstm(image_input_shape,n_class,height,width)
    softmax_action = fc_action(model,n_class,y)
    model = tf.keras.models.Model(inputs=input_image, outputs=softmax_action)
    opt = tfa.optimizers.LazyAdam(lr=lr_init)
#     model.load_weights("ucf_model/UCF_MTDNN_2.h5")
    model.compile(loss='categorical_crossentropy', optimizer=opt, metrics=["accuracy"])
    return model



# Callback & Compile

In [None]:
def scheduler(epoch, lr =lr_init):
    if epoch <= 5:
        return 0.000
    if epoch > 5 and epoch <= 10:
        return 0.00005
    if epoch > 10 and epoch <= 20:
        return 0.000005
    if epoch > 20:
        return 0.00001


def run_model_generator(Model_input_size,img_height,img_width,data_train,data_test,epoch,n_split,lr_init):
#     mirrored_strategy = tf.distribute.MirroredStrategy(devices=["/gpu:0", "/gpu:1", "/gpu:2"], cross_device_ops=tf.distribute.HierarchicalCopyAllReduce())
    #print('Number of devices: {}'.format(strategy.numde_replicas_in_sync))

    history_list = []
    n_class = len(classes)
#     with tf.device('/gpu:1'):
#     with mirrored_strategy.scope():
    model = create_model_fusion(Model_input_size,n_class,img_height, img_width,lr_init)
    model.summary()

    model_path = dataset+"_model_"+str(n_split)+".h5"
    
#     callback_step = tf.keras.callbacks.LearningRateScheduler(scheduler) 
    callback_adapt = LossLearningRateScheduler(base_lr=lr_init, lookback_epochs=3)

    checkpoint = ModelCheckpoint(filepath=model_path,
                                 monitor='val_loss',
                                 verbose=1,
                                 save_best_only=True)
    stop = EarlyStopping(monitor='val_loss', patience = 10,
                          verbose=0, mode='auto', baseline=None, 
                          restore_best_weights=False)
    callbacks = [checkpoint, stop,callback_adapt]
    steps_per_epoch= (9537 * 0.7) // BS
    eval_per_epoch= 100
    history = model.fit_generator(data_train,
                                  epochs=epoch, 
                                  shuffle=True, 
                                  steps_per_epoch=steps_per_epoch,
                                  validation_data = data_test, 
                                  validation_steps=eval_per_epoch,
                                  callbacks=callbacks)

    history_list.append(np.max(model.history.history['val_accuracy']))
    return history_list
    


In [None]:
epoch= 200
img_height, img_width = 299,299
SIZE = (img_height, img_width)
CHANNELS = 3
NBFRAME = 5
sliding_time = 4
Model_input_size = (NBFRAME, img_height, img_width, CHANNELS)
BS =4
seq_len = NBFRAME
stride = 1

# Split 1 Train

In [None]:
print("***Load split 1***")
train_files =dataset+'/train1/{classname}/*.avi'
test_files =dataset+'/test1/{classname}/*.avi'

train_data = frame_generator(train_files,classes,NBFRAME,BS,CHANNELS,SIZE,sliding_time)
test_data = frame_generator(test_files,classes,NBFRAME,BS,CHANNELS,SIZE,sliding_time)

In [None]:
%%time

split1_acc = run_model_generator(Model_input_size,img_height,img_width,train_data,test_data,epoch,1,lr_init)
print("Split 1 Accuracy : ",split1_acc)

# Split 2 Train

In [None]:
print("***Load split 2***")

train_files =dataset+'/train2/{classname}/*.avi'
test_files =dataset+'/test2/{classname}/*.avi'

train_data = frame_generator(train_files,classes,NBFRAME,BS,CHANNELS,SIZE,sliding_time)
test_data = frame_generator(test_files,classes,NBFRAME,BS,CHANNELS,SIZE,sliding_time)

In [None]:
%%time   
split2_acc = run_model_generator(Model_input_size,img_height,img_width,train_data,test_data,epoch,2,lr_init)
print("Split 2 Accuracy : ",split2_acc)

# Split 3 Train

In [None]:
print("***Load split 3***")

train_files =dataset+'/train3/{classname}/*.avi'
test_files =dataset+'/test3/{classname}/*.avi'

train_data = frame_generator(train_files,classes,NBFRAME,BS,CHANNELS,SIZE,sliding_time)
test_data = frame_generator(test_files,classes,NBFRAME,BS,CHANNELS,SIZE,sliding_time)

In [None]:
%%time
split3_acc = run_model_generator(Model_input_size,img_height,img_width,train_data,test_data,epoch,3,lr_init)
print("Split 3 Accuracy : ",split3_acc)

In [None]:
(90.25+91.75+90)/3

# 3 Split Accuracy (Mean)

In [None]:
print("Split 1 Accuracy : ",np.max(split1_acc))
print("Split 2 Accuracy : ",np.max(split2_acc))
print("Split 3 Accuracy : ",np.max(split3_acc))
print("3 Split Accuracy (Mean): ", (np.max(split1_acc)+np.max(split2_acc)+np.max(split3_acc))/3)

# Inference Testing

In [None]:
model = create_model_fusion((5,299,299,3),128,101,299, 299,1e-4)


In [None]:
def frames_extraction(video_path, c, X, Y, Xf, Yf, img_width, img_height,sscnt,stride,seq_len, isTraining):
    frames_list = []
    flist = []
     
    vidObj = cv2.VideoCapture(video_path)
    # Used as counter variable 
    count = 1
    
    tmp_frames = []
    zoom_frames = []
    
    while 1:
        success, image = vidObj.read()
        if success:
            count += 1
            if count % stride == 0:
                image = image.astype(np.float32)
#                 image /= 255.0
                image = cv2.resize(image, (img_width, img_height))
#                 gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
                tmp_frames.append(image)
    
                if isTraining:
                    zoom_image = clipped_zoom(image, 1.2)
                    zoom_frames.append(zoom_image)
            
            if len(tmp_frames) == seq_len:
                sscnt += 1
                X.append(tmp_frames)

                y = [0]*len(classes)
                y[classes.index(c)] = 1
                Y.append(y)
                
                if isTraining:
                    aug_frames = []
                    for t in tmp_frames:
                        aug_frames.append(cv2.flip(t, 1))
                    X.append(aug_frames)
                    Y.append(y)
                    X.append(zoom_frames)
                    Y.append(y)
                
                
                tmp_frames = []
                break
                #tmp_flow_frames = []
        else:
            #print("Defected frame")
            break
            
    return X, Y, Xf, Yf, sscnt
 
def create_data(input_dir,stride,seq_len,img_width, img_height, isTraining):
    X = []
    Y = []
    Xf = []
    Yf = []
    Xt = []
    Yt = []
    sscnt = 0
    for c in classes:
        print(c)
        if not (c in classes):
            continue
        files_list = os.listdir(os.path.join(input_dir, c))
        sscnt = 0
        for f in files_list:
            X, Y, Xf, Yf, sscnt = frames_extraction(os.path.join(os.path.join(input_dir, c), f), c, X, Y, Xf, Yf, img_width, img_height,sscnt,stride,seq_len, isTraining)
            
    X = np.asarray(X)
    Y = np.asarray(Y)
    print(X.shape)
    return X, Y


In [None]:

test3 = "test1"
_X, _Y = create_data(test3,stride,seq_len, img_width,img_height, 0)

In [None]:
_X.shape

In [None]:

x = np.expand_dims(_X[0], axis=0)

In [None]:
x.shape

In [None]:
model = create_model_fusion(_X[0].shape,128,101,img_height, img_width,lr_init)

In [None]:
%%time
z=model.predict(x)