In [None]:
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras import optimizers, regularizers
from tensorflow.keras.models import Sequential, Model ,load_model
from tensorflow.keras.layers import *
from tensorflow.keras.callbacks import ModelCheckpoint, LearningRateScheduler, TensorBoard, EarlyStopping, ReduceLROnPlateau
from tensorflow import keras
import os

from tensorflow.keras.applications.efficientnet import EfficientNetB3
import efficientnet.tfkeras as efn 
import cv2
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn import metrics 
from sklearn.metrics import make_scorer, accuracy_score, precision_score, recall_score, f1_score
from sklearn.metrics import multilabel_confusion_matrix
from sklearn.metrics import classification_report

from sklearn.model_selection import cross_val_score, train_test_split, KFold, StratifiedKFold, cross_validate

import tensorflow.keras.backend as K
import keras_video.utils
from keras_video import VideoFrameGenerator,SlidingFrameGenerator
import glob
import tensorflow_addons as tfa
import pandas as pd
from PIL import Image


###  Swimming Dataset Preparation

In [None]:
from os import listdir
from os.path import isfile, join
mypath = 'Swimming Dataset/30hz/ALL'
onlyfiles = [f for f in listdir(mypath) if isfile(join(mypath, f))]

In [None]:
dataset_dict = {
    'swimmer_id': {
        0: 'Farhan', 
        1: 'Ivan', 
        2: 'Steven', 
        3: 'Dust',
    },
    'action_id': {
#         0: 'Backstroke',
        0: 'Breaststroke',
        1: 'Drown',
        2: 'Freestyle',
        3: 'Safe'
    
    }
}
dataset_dict['action_alias'] = dict((g, i) for i, g in dataset_dict['action_id'].items())
dataset_dict['swimmer_alias'] = dict((r, i) for i, r in dataset_dict['swimmer_id'].items())

In [None]:
def parse_dataset(dataset_path, ext='avi'):
    """
    Used to extract information about our dataset. It does iterate over all images and return a DataFrame with
    the data (action,identity) of all files.
    """
    def parse_info_from_file(path):
        """
        Parse information from a single file
        """
        try:
            filename = os.path.split(path)[1]
            filename = os.path.splitext(filename)[0]
            filename = filename.replace(" ", "_")
            
#             print(filename)
            action, identity, _= filename.split('_')
#             print(action)
            return dataset_dict['action_id'][int(action)], dataset_dict['swimmer_id'][int(identity)]
        except Exception as ex:
            return None, None
        
    files = glob.glob(os.path.join(dataset_path, "*.%s" % ext))
#     print(files)
    records = []
    for file in files:
        info = parse_info_from_file(file)
        records.append(info)
#     print(records)
    df = pd.DataFrame(records)
    df['file'] = files
    df.columns = ['action', 'id',"file"]
    df = df.dropna()
    
    return df
df = parse_dataset(mypath)
df.head()

In [None]:
import plotly.graph_objects as go
def plot_distribution(pd_series):
    labels = pd_series.value_counts().index.tolist()
    counts = pd_series.value_counts().values.tolist()
    
    pie_plot = go.Pie(labels=labels, values=counts, hole=.3)
    fig = go.Figure(data=[pie_plot])
    fig.update_layout(font=dict(family="'Times New Roman'",size=25),title_text='Distribution for %s' % pd_series.name)
    
    fig.show()
plot_distribution(df['action'])
plot_distribution(df['id'])

In [None]:
import tensorflow as tf
from tensorflow.keras.utils import to_categorical
from PIL import Image
from keras.preprocessing.image import ImageDataGenerator, img_to_array
import logging
log = logging.getLogger()
from math import floor


"""
Data generator for the multi-labeled dataset. This class should be used when training our Keras multi-output model.
"""

'''Collect images from the first part of the video clip'''
#this function will ignore the clip that is not satisfies the sequence length value
class SwimmerDataGenerator():
    def __init__(self, df):
        
        self.df = df
    def generate_split_indexes(self):
        p = np.random.permutation(len(self.df))
        train_up_to = int(len(self.df) * TRAIN_TEST_SPLIT)
        train_idx = p[:train_up_to]
        test_idx = p[train_up_to:]
        
        valid_idx = p[train_up_to:]
#         train_up_to = int(train_up_to * TRAIN_TEST_SPLIT)
#         train_idx, valid_idx = train_idx[:train_up_to], train_idx[train_up_to:]
        
        # converts alias to id
        self.df['action_id'] = self.df['action'].map(lambda gender: dataset_dict['action_alias'][gender])
        self.df['swimmer_id'] = self.df['id'].map(lambda race: dataset_dict['swimmer_alias'][race])
        
        return train_idx, valid_idx, test_idx
    
    def preprocess_image(self, video_path):
        """
        Used to perform some minor preprocessing on the image before inputting into the network.
        """
#         im = Image.open(img_path)
#         im = im.resize((IM_WIDTH, IM_HEIGHT))
#         im = np.array(im) / 255.0
        count = 1
        vidObj = cv2.VideoCapture(video_path)
        stride = 1
        X = []
        tmp_frames = []
        while 1:
            success, image = vidObj.read()
            if success:
                count += 1
                if count % stride == 0:
                    image = image.astype(np.float32)
#                     image /=255.0
                    image = cv2.resize(image, (img_width, img_height))
    #                 gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
                    tmp_frames.append(image)
                count += 1
                if len(tmp_frames) == seq_len:
                    X.append(tmp_frames)

                    tmp_frames = []
                    break
                    
            else:
                print("Video has defected frame")
                break
            
        X = np.squeeze(np.array(X))
        return X
    
    def generate_images(self, image_idx, is_training, batch_size=16):
        """
        Used to generate a batch with images when training/testing/validating our Keras model.
        """
        
        # arrays to store our batched data
        images, actions, identities = [], [],[]
        while True:
            for idx in image_idx:
                person = self.df.iloc[idx]
                
                action = person['action_id']
                swimmer = person['swimmer_id']
                file = person['file']
                
                X = self.preprocess_image(file)
                if X is not None:
                    actions.append(to_categorical(action, len(dataset_dict['action_id'])))
                    identities.append(to_categorical(swimmer, len(dataset_dict['swimmer_id'])))
                    images.append(X)

                    # yielding condition
                    if len(images) == batch_size:
    #                     print(len(images)) 
                        yield np.array(images).astype('float32'), [np.array(actions), np.array(identities)]
                        images, actions, identities = [], [], []
                    
                else : pass
                
            if not is_training:
                break

'''Collect images from entire video clip'''
class SwimmerDataGeneratorAll():
    
    def __init__(self, df):
        
        self.df = df
    def generate_split_indexes(self):
        p = np.random.permutation(len(self.df))
        train_up_to = int(len(self.df) * TRAIN_TEST_SPLIT)
        train_idx = p[:train_up_to]
        test_idx = p[train_up_to:]
        
        valid_idx = p[train_up_to:]
#         train_up_to = int(train_up_to * TRAIN_TEST_SPLIT)
#         train_idx, valid_idx = train_idx[:train_up_to], train_idx[train_up_to:]
        
        # converts alias to id
        self.df['action_id'] = self.df['action'].map(lambda gender: dataset_dict['action_alias'][gender])
        self.df['swimmer_id'] = self.df['id'].map(lambda race: dataset_dict['swimmer_alias'][race])
        
        return train_idx, valid_idx, test_idx
    
    def count_frames(self,cap, name, force_no_headers=False):
        framecounters = {}
        """ Count number of frame for video
        if it's not possible with headers """
        if not force_no_headers and name in framecounters:
            return framecounters[name]

        total = cap.get(cv2.CAP_PROP_FRAME_COUNT)

        if force_no_headers or total < 0:
            # headers not ok
            total = 0
            # TODO: we're unable to use CAP_PROP_POS_FRAME here
            # so we open a new capture to not change the
            # pointer position of "cap"
            c = cv2.VideoCapture(name)
            while True:
                grabbed, frame = c.read()
                if not grabbed:
                    # rewind and stop
                    break
                total += 1

        # keep the result
        framecounters[name] = total

        return total
    
    def get_frames(self,video, nbframe,nb_channel,force_no_headers=False):
        cap = cv2.VideoCapture(video)
        total_frames =self.count_frames(cap, video, force_no_headers)
#         print(total_frames)
        orig_total = total_frames
        if total_frames % 2 != 0:
            total_frames += 1
        frame_step = floor(total_frames/(nbframe-1))
        # TODO: fix that, a tiny video can have a frame_step that is
        # under 1
        frame_step = max(1, frame_step)
        frames = []
        frame_i = 0

        while True:
            grabbed, frame = cap.read()
            if not grabbed:
                break

            frame_i += 1
            if frame_i == 1 or frame_i % frame_step == 0 or frame_i == orig_total:
                # resize
                frame = cv2.resize(frame, (img_width, img_height))

                # use RGB or Grayscale ?
                if nb_channel == 3:
                    frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
                else:
                    frame = cv2.cvtColor(frame, cv2.COLOR_RGB2GRAY)

                # to np
                frame = img_to_array(frame) 
    #             frame /=255.0

                # keep frame
                frames.append(frame)

            if len(frames) == nbframe:
                break

        cap.release()

        if not force_no_headers and len(frames) != nbframe:
            # There is a problem here
            # That means that frame count in header is wrong or broken,
            # so we need to force the full read of video to get the right
            # frame counter
            return self.get_frames(
                    video,
                    nbframe,
                    nb_channel,
                    force_no_headers=True)

        if force_no_headers and len(frames) != nbframe:
            # and if we really couldn't find the real frame counter
            # so we return None. Sorry, nothing can be done...
            log.error("Frame count is not OK for video %s, "
                      "%d total, %d extracted" % (
                        video, total_frames, len(frames)))
            return None

        return np.array(frames)

    def generate_images(self, image_idx, is_training, batch_size=16):
        """
        Used to generate a batch with images when training/testing/validating our Keras model.
        """
        
        # arrays to store our batched data
        images, actions, identities = [], [],[]
        while True:
            for idx in image_idx:
                person = self.df.iloc[idx]
                
                action = person['action_id']
                swimmer = person['swimmer_id']
                file = person['file']
                X = self.get_frames(file, seq_len,CHANNELS,force_no_headers=False)
                if X is not None:
                    actions.append(to_categorical(action, len(dataset_dict['action_id'])))
                    identities.append(to_categorical(swimmer, len(dataset_dict['swimmer_id'])))
                    images.append(X)

                    # yielding condition
                    if len(images) == batch_size:
    #                     print(len(images)) 
                        yield np.array(images).astype('float32'), [np.array(actions), np.array(identities)]
                        images, actions, identities = [], [], []
                    
                else : pass
                
            if not is_training:
                break
             

### Attention Spatial Module

In [None]:
def channel_attention(input_feature, ratio=2):

    channel_axis = 1 if K.image_data_format() == "channels_first" else -1
    channel = input_feature.shape[channel_axis]

    shared_layer_one = Dense(channel//ratio,
                             activation='relu',
                             kernel_initializer='he_normal',
                             use_bias=True,
                             bias_initializer='zeros')
    shared_layer_two = Dense(channel,
                             kernel_initializer='he_normal',
                             use_bias=True,
                             bias_initializer='zeros')

    avg_pool = GlobalAveragePooling2D()(input_feature)    
    avg_pool = tf.keras.layers.Reshape((1,1,channel))(avg_pool)
    assert avg_pool.shape[1:] == (1,1,channel)
    avg_pool = shared_layer_one(avg_pool)
    assert avg_pool.shape[1:] == (1,1,channel//ratio)
    avg_pool = shared_layer_two(avg_pool)
    assert avg_pool.shape[1:] == (1,1,channel)

    max_pool = tf.keras.layers.GlobalMaxPooling2D()(input_feature)
    max_pool = tf.keras.layers.Reshape((1,1,channel))(max_pool)
    assert max_pool.shape[1:] == (1,1,channel)
    max_pool = shared_layer_one(max_pool)
    assert max_pool.shape[1:] == (1,1,channel//ratio)
    max_pool = shared_layer_two(max_pool)
    assert max_pool.shape[1:] == (1,1,channel)

    cbam_feature = tf.keras.layers.Add()([avg_pool,max_pool])
    cbam_feature = Activation('sigmoid')(cbam_feature)

    if K.image_data_format() == "channels_first":
        cbam_feature = Permute((3, 1, 2))(cbam_feature)

    return multiply([input_feature, cbam_feature])

def spatial_attention(input_feature):
    #kernel_size = 7
    kernel_size = 3
    
    if K.image_data_format() == "channels_first":
        channel = input_feature.shape[1]
        cbam_feature = Permute((2,3,1))(input_feature)
    else:
        channel = input_feature.shape[-1]
        cbam_feature = input_feature

    avg_pool = Lambda(lambda x: K.mean(x, axis=3, keepdims=True))(cbam_feature)
    assert avg_pool.shape[-1] == 1
    max_pool = Lambda(lambda x: K.max(x, axis=3, keepdims=True))(cbam_feature)
    assert max_pool.shape[-1] == 1
    concat = Concatenate(axis=3)([avg_pool, max_pool])
    assert concat.shape[-1] == 2
    cbam_feature = Conv2D(filters = 1,
                    kernel_size=kernel_size,
                    strides=1,
                    padding='same',
                    activation='sigmoid',
                    kernel_initializer='he_normal',
                    use_bias=False)(concat)	
    assert cbam_feature.shape[-1] == 1

    if K.image_data_format() == "channels_first":
        cbam_feature = Permute((3, 1, 2))(cbam_feature)

    return multiply([input_feature, cbam_feature])



# LR Scheduler

In [None]:
class LossLearningRateScheduler(tf.keras.callbacks.History):
    """
    A learning rate scheduler that relies on changes in loss function
    value to dictate whether learning rate is decayed or not.
    LossLearningRateScheduler has the following properties:
    base_lr: the starting learning rate
    lookback_epochs: the number of epochs in the past to compare with the loss function at the current epoch to determine if progress is being made.
    decay_threshold / decay_multiple: if loss function has not improved by a factor of decay_threshold * lookback_epochs, then decay_multiple will be applied to the learning rate.
    spike_epochs: list of the epoch numbers where you want to spike the learning rate.
    spike_multiple: the multiple applied to the current learning rate for a spike.
    """

    def __init__(self, base_lr, lookback_epochs, spike_epochs = None, spike_multiple = 10, decay_threshold = 0.002, decay_multiple = 0.40, loss_type = 'val_loss'):

        super(LossLearningRateScheduler, self).__init__()

        self.base_lr = base_lr
        self.lookback_epochs = lookback_epochs
        self.spike_epochs = spike_epochs
        self.spike_multiple = spike_multiple
        self.decay_threshold = decay_threshold
        self.decay_multiple = decay_multiple
        self.loss_type = loss_type


    def on_epoch_begin(self, epoch, logs=None):

        if len(self.epoch) > self.lookback_epochs:

            current_lr = tf.keras.backend.get_value(self.model.optimizer.lr)

            target_loss = self.history[self.loss_type] 

            loss_diff =  target_loss[-int(self.lookback_epochs)] - target_loss[-1]

            if loss_diff <= np.abs(target_loss[-1]) * (self.decay_threshold * self.lookback_epochs):

                print(' '.join(('Changing learning rate from', str(current_lr), 'to', str(current_lr * self.decay_multiple))))
                tf.keras.backend.set_value(self.model.optimizer.lr, current_lr * self.decay_multiple)
                current_lr = current_lr * self.decay_multiple

            else:

                print(' '.join(('Learning rate:', str(current_lr))))

            if self.spike_epochs is not None and len(self.epoch) in self.spike_epochs:
                print(' '.join(('Spiking learning rate from', str(current_lr), 'to', str(current_lr * self.spike_multiple))))
                tf.keras.backend.set_value(self.model.optimizer.lr, current_lr * self.spike_multiple)

        else:

            print(' '.join(('Setting learning rate to', str(self.base_lr))))
            tf.keras.backend.set_value(self.model.optimizer.lr, self.base_lr)


        return tf.keras.backend.get_value(self.model.optimizer.lr)

### Multi-Task Model

In [None]:

img_height, img_width = 100,100
SIZE = (img_height, img_width)
CHANNELS = 3
seq_len =15
embedding_size=128
act_class =4
id_class =4
Model_input_size = (seq_len, img_height, img_width, CHANNELS)

In [None]:

TRAIN_TEST_SPLIT = 0.3
data_generator = SwimmerDataGenerator(df)
valid_idx, train_idx, test_idx = data_generator.generate_split_indexes() 

TRAIN_TEST_SPLIT = 0.3
data_generator_add = SwimmerDataGeneratorAll(df)
valid_idx_add, train_idx_add, test_idx_add = data_generator_add.generate_split_indexes() 

batch_size = len(train_idx)
valid_batch_size = len(valid_idx)
train_gen = data_generator.generate_images(train_idx, is_training=True, batch_size=batch_size)
valid_gen = data_generator.generate_images(valid_idx, is_training=True, batch_size=valid_batch_size)
X_train , y_train = next(train_gen)
X_val , y_val = next(valid_gen)
train_gen_all = data_generator_add.generate_images(train_idx_add, is_training=True, batch_size=batch_size)
valid_gen_all = data_generator_add.generate_images(valid_idx_add, is_training=True, batch_size=valid_batch_size)
X_train_all , y_train_all = next(train_gen_all)
X_val_all , y_val_all = next(valid_gen_all)

X_all = np.concatenate((X_train, X_train_all), axis=0)
X_valid_all = np.concatenate((X_val, X_val_all), axis=0)

y_train_all1 = np.concatenate((y_train[0], y_train_all[0]))
y_train_all2 = np.concatenate((y_train[1], y_train_all[1]))
y_train_all= np.stack((y_train_all1,y_train_all2))
y_valid_all1 = np.concatenate((y_val[0], y_val_all[0]))
y_valid_all2 = np.concatenate((y_val[1], y_val_all[1]))
y_valid_all= np.stack((y_valid_all1,y_valid_all2))

y_train_all = list(y_train_all)
y_valid_all = list(y_valid_all)


print('Number of training data: ',len(y_train_all[0]))
print('Number of validation data: ',len(y_valid_all[0]))

In [None]:
from keras_self_attention import SeqSelfAttention
def fusion_attention_lstm(image_input_shape,height,width):
    y_act= Input(shape=(act_class,),name = 'input_action')
    y_id= Input(shape=(id_class,),name = 'input_reid')
#     input_image = Input(shape=image_input_shape)
    input_image = Input(batch_shape=(None, seq_len,height, width, 3))
    eff_model=efn.EfficientNetB3(input_shape=(height, width, 3),
                                 include_top=False,
                                 weights='noisy-student')
    model_backbone = Model(eff_model.input,eff_model.get_layer('block7a_project_bn').output)
    timeDistributed_layer = tf.keras.layers.TimeDistributed(model_backbone)(input_image)
    print("TimeDistributed", timeDistributed_layer.shape)
    
#     '''Temporal'''
    t = tf.keras.layers.TimeDistributed(GlobalAveragePooling2D())(timeDistributed_layer)
    t = LSTM(256, return_sequences=True, input_shape=(t.shape[1],t.shape[2]), name="lstm_layer_in")(t)
    t = SeqSelfAttention(attention_activation='sigmoid')(t)
    avg_pool = GlobalAveragePooling1D()(t)
    max_pool = GlobalMaxPooling1D()(t)
    t = concatenate([avg_pool, max_pool])
    
    t = Dropout(0.3)(t)
    print("Temporal: ", t.shape)
    
    '''Spatial'''
    s = tf.math.reduce_mean(timeDistributed_layer, axis=1)  
    s = SeparableConv2D(filters = 512, kernel_size = (3, 3), padding = 'same')(s)
    s = spatial_attention(s)
    s = SeparableConv2D(filters = 512, kernel_size = (3, 3), padding = 'same')(s)
    s = spatial_attention(s)
    s = BatchNormalization()(s)
    a = GlobalAveragePooling2D()(s)
    c = Dropout(0.3)(a)
    print("Spatial: ", s.shape)
        
    '''Fusion'''
    f = tf.keras.layers.Concatenate()([c, t])
    f = Dropout(0.3)(f)
    print("Fusion: ", f.shape)
    
    return f,y_act,y_id,input_image

def fc_action(x,y):
    x = Dense(1024, name="fusion_dense1")(x)
    x = PReLU()(x)
    x = BatchNormalization()(x)
    x = Dropout(0.5)(x)
    x = Dense(act_class, activation='softmax',name="action_output")(x)
    return x

def fc_reid(x,y):
    
    x = Dense(1024, activation='relu')(x)
    x = PReLU()(x)
    x = BatchNormalization()(x)
    x = Dropout(0.5)(x)    
    softmax = Dense(id_class, activation='softmax',name='reid_output')(x)
    return softmax

def classification_reid(x):
    softmax = Dense(id_class, name='softmax_id')(x)
    return softmax

def get_lr_metric(optimizer):
    def lr(y_true, y_pred):
        return optimizer.lr
    return lr      

In [None]:
lr_init = 5e-4
def create_model_fusion(image_input_shape,height,width,lr_init,compile_model):
     
    model,y_act,y_id,input_image = fusion_attention_lstm(image_input_shape,height,width)
    softmax_id = fc_reid(model,y_id)
    softmax_action = fc_action(model,y_act)
    optimizer = tfa.optimizers.LazyAdam(lr_init)
    lr_metric = get_lr_metric(optimizer)
    if compile_model == 1:
        
        model = tf.keras.models.Model(inputs=input_image, outputs=[softmax_id,softmax_action]) 
        model.compile(loss={
                        'reid_output' :tf.keras.losses.CategoricalCrossentropy(), 
                        'action_output' :tf.keras.losses.CategoricalCrossentropy()},
                  optimizer=optimizer,
                    metrics={
                        'reid_output':'accuracy',
                        'action_output' : 'accuracy'
                    },
#                       loss_weights = [1, 0.8] #Weighting coefficients
#                   loss_weights={
#               ""        'reid_output':1,
#                       'l2_loss' :0.0005,
#                       'action_output': 1}
                 )
    else : 
        model = tf.keras.models.Model(inputs=input_image, outputs=[softmax_id,softmax_action]) 
    model.summary()
  
    return model
model = create_model_fusion(Model_input_size,img_height, img_width,lr_init,compile_model=True)


In [None]:
%%time
model_path = "MODEL_MTDNN_Swim.hdf5"
checkpoint_path = "multi_model"
checkpoint_dir = os.path.dirname(checkpoint_path)

callback_adapt = LossLearningRateScheduler(base_lr=lr_init, lookback_epochs=6,loss_type = 'loss')

checkpoint = ModelCheckpoint(filepath=model_path,
                             monitor='loss',
                             verbose=1,
                             save_best_only=True)

stop = EarlyStopping(monitor='loss', patience = 10,
                      verbose=0, mode='auto', baseline=None, 
                      restore_best_weights=False)
callbacks = [stop, callback_adapt,checkpoint]

history = model.fit(x = X_all, y = y_train_all, class_weight=None,epochs=150, batch_size = 4 , 
                    shuffle=True, validation_data=(X_valid_all,y_valid_all),verbose=1,callbacks=callbacks)

# Training Curve

In [None]:
height_plot,width_plot=400,800
import matplotlib.pyplot as plt
plt.rcParams["font.family"] = "Times New Roman"
plt.clf()
fig = go.Figure()
fig.add_trace(go.Scatter(
                    y=history.history['action_output_accuracy'],
                    name='Train'))
fig.add_trace(go.Scatter(
                    y=history.history['val_action_output_accuracy'],
                    name='Valid'))
fig.update_layout(height=height_plot, 
                  width=width_plot,
                  font=dict(
                    family="'Times New Roman'",size=18),
                  title='Accuracy for Action Recognition',
                  xaxis_title='Epoch',
                  yaxis_title='Accuracy')
fig.show()
plt.clf()
fig = go.Figure()
fig.add_trace(go.Scatter(
                    y=history.history['action_output_loss'],
                    name='Train'))
fig.add_trace(go.Scatter(
                    y=history.history['val_action_output_loss'],
                    name='Valid'))
fig.update_layout(height=height_plot, 
                  width=width_plot,
                  font=dict(
                    family="'Times New Roman'",size=18),
                  title='Loss for Action Recognition',
                  xaxis_title='Epoch',
                  yaxis_title='Loss')
fig.show()

plt.clf()
fig = go.Figure()
fig.add_trace(go.Scatter(
                    y=history.history['reid_output_accuracy'],
                    name='Train'))
fig.add_trace(go.Scatter(
                    y=history.history['val_reid_output_accuracy'],
                    name='Valid'))
fig.update_layout(height=height_plot, 
                  width=width_plot,
                  font=dict(
                    family="'Times New Roman'",size=18),
                  title='Accuracy for Re-identification ',
                  xaxis_title='Epoch',
                  yaxis_title='Accuracy')
fig.show()
fig = go.Figure()
fig.add_trace(go.Scatter(
                    y=history.history['reid_output_loss'],
                    name='Train'))
fig.add_trace(go.Scatter(
                    y=history.history['val_reid_output_loss'],
                    name='Valid'))
fig.update_layout(height=height_plot, 
                  width=width_plot,
                  font=dict(
                    family="'Times New Roman'",size=18),
                  title='Loss for Re-identification',
                  xaxis_title='Epoch',
                  yaxis_title='Loss')
fig.show()

In [None]:
from tensorflow.keras.utils import plot_model
plot_model(model, to_file='model_plot.png', show_shapes=True, show_layer_names=True)

In [None]:
# model.save_weights('multi_model')

In [None]:
# model.save('multi_model')

# Testing

In [None]:

model = create_model_fusion(Model_input_size,img_height, img_width,lr_init,compile_model = False)
model.load_weights("MODEL_MTDNN_3.hdf5")

In [None]:
model.layers[-1].output

In [None]:
model = Model(inputs=[model.input],outputs=[model.layers[-2].output,model.layers[-1].output])
# model = Model(inputs=[model.input],outputs=[model.layers[-1].output])

In [None]:
def preprocess_TEST(video_path,seq_len):
    """
    Used to perform some minor preprocessing on the image before inputting into the network.
    """
#         im = Image.open(img_path)
#         im = im.resize((IM_WIDTH, IM_HEIGHT))
#         im = np.array(im) / 255.0
    count = 1
    vidObj = cv2.VideoCapture(video_path)
    stride = 1
    X = []
    tmp_frames = []
    while 1:
        success, image = vidObj.read()
        if success:
            count += 1
            if count % stride == 0:
                image = image.astype(np.float32)
#                 image /=255.0
                image = cv2.resize(image, (img_width, img_height))
#                 gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
                tmp_frames.append(image)
            count += 1
            if len(tmp_frames) == seq_len:
                X.append(tmp_frames)

                tmp_frames = []
                break
        else:
            break
            print("Video has defected frame")
#                 break

    X = np.squeeze(np.array(X))
    return X

In [None]:
path_test = "Multi-label-dataset/30hz/TEST"


In [None]:
vidObj = cv2.VideoCapture("2_0 (42).avi")

In [None]:
success, image = vidObj.read()

In [None]:
image.shape

In [None]:
np.set_printoptions(suppress=True)
# file = "0_1 (6).avi"
file="test-multi/1_2 (10).avi"
test_len = 15
X = preprocess_TEST(file,test_len)
# X.extend(X)
X = np.expand_dims(X, axis=0)
X.shape

In [None]:
%%time

embedded_features = model.predict(X, verbose=1)
# embedded_features /= np.linalg.norm(embedded_features, axis=1, keepdims=True)

In [None]:
embedded_features

In [None]:
np.argmax(embedded_features[0],axis=-1)

In [None]:
y_pred_test=np.argmax(embedded_features,axis=-1)

In [None]:
print(*y_pred_test[0])

# Evaluation

In [None]:

model = create_model_fusion(Model_input_size,img_height, img_width,lr_init,compile_model = False)
model.load_weights("MODEL_MTDNN_new.hdf5")

In [None]:
from sklearn.metrics import *

In [None]:
model = Model(inputs=[model.input],outputs=[model.layers[-2].output,model.layers[-1].output])

In [None]:
X_valid_all.shape

In [None]:
yhat_probs = model.predict(X_valid_all, verbose=0)

## re-identification

In [None]:
list_id = list(dataset_dict['swimmer_alias'].keys())
print(list_id[::-1])

In [None]:
# predict crisp classes for test set
yhat_classes = np.argmax(yhat_probs[0],axis=1)
yhat_probs1 = yhat_probs[0][:, 0]
y_valid_all1=np.argmax(y_valid_all[0],axis=1)
cr_race = classification_report(y_valid_all1, yhat_classes, target_names=['dust', 'farhan', 'ivan', 'steven'])
print(cr_race)

In [None]:
matrix = confusion_matrix(y_valid_all1, yhat_classes)
print(matrix)

### Confusion Matrix

In [None]:
from sklearn.metrics import confusion_matrix
import pandas as pd
import seaborn as sn
import matplotlib.pyplot as plt
%matplotlib inline
import numpy as np

y_true = ['farhan', 'ivan', 'steven', 'dust']
y_pred = ['farhan', 'ivan', 'steven', 'dust']
df_cm = pd.DataFrame(matrix, columns=np.unique(y_true), 
                     index = np.unique(y_pred))
# df_cm.index.name = 'Actual'
# df_cm.columns.name = 'Predicted'
font = {'family': 'Times New Roman', 'size': 25, }

plt.figure(figsize = (10,7))
plt.title('Identification confusion matrix',fontdict=font)
sn.set(font_scale=1.4)#for label size
sn.heatmap(df_cm, cmap="Blues", annot=True,annot_kws={"size": 16})# font size

## action recognition

In [None]:
# predict crisp classes for test set
yhat_classes2 = np.argmax(yhat_probs[1],axis=1)
yhat_probs = yhat_probs[1][:, 0]
y_valid_all2=np.argmax(y_valid_all[1],axis=1)
cr_race = classification_report(y_valid_all2, yhat_classes2, target_names=dataset_dict['action_alias'].keys())
print(cr_race)

In [None]:
matrix = confusion_matrix(y_valid_all2, yhat_classes2)
print(matrix)

In [None]:
dataset_dict['action_alias'].keys()

### Confusion Matrix

In [None]:
### Confusion Matrixfrom sklearn.metrics import confusion_matrix
import pandas as pd
import seaborn as sn
import matplotlib.pyplot as plt
%matplotlib inline
import numpy as np
font = {'family': 'Times New Roman', 'size': 25, }
y_true = ['Breaststroke', 'Drown', 'Freestyle', 'Safe']
y_pred = ['Breaststroke', 'Drown', 'Freestyle', 'Safe']
df_cm = pd.DataFrame(matrix, columns=np.unique(y_true), 
                     index = np.unique(y_pred))
# df_cm.index.name = 'Actual'
# df_cm.columns.name = 'Predicted'
plt.figure(figsize = (10,8))
plt.title('Action recognition confusion matrix',fontdict=font)
sn.set(font_scale=1.4)#for label size
sn.heatmap(df_cm, cmap="Blues", annot=True,annot_kws={"size": 16})# font size

# Grad-CAM Visualization

In [None]:
from gradcamplusplus.gradcam import grad_cam_plus, grad_cam

In [None]:

# model = create_model_fusion(Model_input_size,img_height, img_width,lr_init,compile_model = False)

model.load_weights("model.hdf5")

# model.load_weights('multi_model/variables/variables')
# model = Model(inputs=model.input[0], outputs=model.layers[-1].output)

In [None]:
X[0][0].shape

In [None]:

from keras_video import VideoFrameGenerator,SlidingFrameGenerator

def frame_generator(video_path,classes,NBFRAME,BS,CHANNELS,SIZE):
    data_aug = ImageDataGenerator()
    training_data = VideoFrameGenerator(
        classes = classes, 
        glob_pattern = video_path,
        nb_frames = NBFRAME,
        shuffle = True,
        batch_size=BS,
        target_shape=SIZE,
        nb_channel=CHANNELS,
        transformation=data_aug,
        use_frame_cache=False)
    
    return training_data

In [None]:
model.summary()

In [None]:
def show_imgwithheat(img_path, heatmap, alpha=0.4, return_array=False):
    """Show the image with heatmap.

    Args:
        img_path: string.
        heatmap:  image array, get it by calling grad_cam().
        alpha:    float, transparency of heatmap.
        return_array: bool, return a superimposed image array or not.
    Return:
        None or image array.
    """
    #img = cv2.imread(img_path)
    img  = img_path
    heatmap = cv2.resize(heatmap, (img.shape[1], img.shape[0]))
    heatmap = (heatmap*255).astype("uint8")
    heatmap = cv2.applyColorMap(heatmap, cv2.COLORMAP_JET)
    superimposed_img = heatmap * alpha + img
    superimposed_img = np.clip(superimposed_img, 0, 255).astype("uint8")
    superimposed_img = cv2.cvtColor(superimposed_img, cv2.COLOR_BGR2RGB)

    imgwithheat = Image.fromarray(superimposed_img)  
    display(imgwithheat)

    if return_array:
        return superimposed_img

In [None]:

img_height, img_width = 150,150
SIZE = (img_height, img_width)
CHANNELS = 3
seq_len =15
embedding_size=128
act_class =4
id_class =4
Model_input_size = (seq_len, img_height, img_width, CHANNELS)

In [None]:
# test_files ='test3/{classname}/*.avi'
classes = [i.split(os.path.sep)[1] for i in glob.glob('C:/Users/farha/Documents/GitHub/ConvLSTM-action-recognition/our_data/30hz/*')]
classes.sort()
test_files ='C:/Users/farha/Documents/GitHub/ConvLSTM-action-recognition/our_data/30hz/{classname}/*.avi'
test_data = frame_generator(test_files,classes,seq_len,64,CHANNELS,SIZE)

In [None]:
X = next(test_data)

In [None]:
X[0].shape

In [None]:
X[0][30].shape

In [None]:
multiply_3
batch_normalization_3

In [None]:
import random
data = random.randint(0, 62)
# data = 25
print(data)
x=X[0][data]*255
heatmap_id = grad_cam_plus(model, x,branch = 'id',layer_name="batch_normalization", label_name=classes,category_id=None)
heatmap_act = grad_cam_plus(model, x,branch = 'action',layer_name="batch_normalization", label_name=classes,category_id=None)


In [None]:

_frame =14
img = X[0][data][_frame]*255
superimposed_img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
show_imgwithheat(superimposed_img, heatmap_id,alpha=0.7)
show_imgwithheat(superimposed_img, heatmap_act,alpha=0.7)

In [None]:
cv2.imwrite('drown-ivan.png',superimposed_img)

In [None]:
import tensorflow as tf
from tensorflow.keras.preprocessing import image
class ReshapeLayer(tf.keras.layers.Layer):
    def call(self,inputs):
        nshape = (1) + inputs.shape[0:]
        return tf.reshape(inputs,nshape)
def preprocess_img(img_path, target_size=(224,224)):
    """Preprocess the image by reshape and normalization.

    Args:
        img_path:  A string.
        target_size: A tuple, reshape to this size.
    Return:
        An image ndarray.
    """
    img = image.load_img(img_path, target_size=target_size)
    img = image.img_to_array(img)
#     img /= 255
#     img=np.expand_dims(img, axis=0)
    
    img_tensor = ReshapeLayer()(img)
    return img_tensor
img_path = 'test-multi/frame_14_4 (69).png'
img = preprocess_img(img_path,target_size=(100,100))

In [None]:
img[0].shape

In [None]:
heatmap = grad_cam_plus(model, img, branch ="acation",layer_name="batch_normalization_3", label_name=None,category_id=None)


In [None]:
def show_imgwithheat1(img_path, heatmap, alpha=0.4, return_array=False):
    """Show the image with heatmap.

    Args:
        img_path: string.
        heatmap:  image array, get it by calling grad_cam().
        alpha:    float, transparency of heatmap.
        return_array: bool, return a superimposed image array or not.
    Return:
        None or image array.
    """
    img = cv2.imread(img_path)
#     img = img_path
    heatmap = cv2.resize(heatmap, (img.shape[1], img.shape[0]))
    heatmap = (heatmap*255).astype("uint8")
    heatmap = cv2.applyColorMap(heatmap, cv2.COLORMAP_JET)
    superimposed_img = heatmap * alpha + img
    superimposed_img = np.clip(superimposed_img, 0, 255).astype("uint8")
    superimposed_img = cv2.cvtColor(superimposed_img, cv2.COLOR_BGR2RGB)

    imgwithheat = Image.fromarray(superimposed_img)  
    display(imgwithheat)

    if return_array:
        return superimposed_img
show_imgwithheat1(img_path, heatmap,alpha=0.6)