# Setup

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os
import random
import cv2
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from keras.models import Sequential, load_model, Model
from keras.layers import Input, average, concatenate, GlobalAveragePooling2D
from keras.layers import TimeDistributed, GlobalAveragePooling1D
from keras.layers.core import Dense, Dropout, Activation, Flatten
from keras.models import Sequential, load_model, Model
from pathlib import Path
import os

from keras.layers.core import Dense,Flatten
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import Adam

In [2]:
# Setup
path = './data/hmdb51'
path_rowframes = './data/hmdb51/rawframes/'
path_annotations = './data/hmdb51/annotations/'

# Parametri Comuni
img_height = 224
img_width = 224
image_shape=(img_height, img_width)
batch_size = 256
num_classes = 51

# Parametri del temporal batch generator
num_of_snip=1
opt_flow_len=10

# Parametri di evaluation
fuse_method = 'average'


# Batch Generation Setting

Tentativo di creare un generator che invii simultaneamente 1 frame di un video e 1 stacked optical flow rispettivamente ai frame spaziali e temporali

In [82]:
class DataSet():
    def __init__(self, 
                 num_of_snip=1, 
                 opt_flow_len=10, 
                 image_shape=(224, 224),
                 partition='val',
                 batch_size = 16):
        
    # opt_flow_len = (int) number of optical flow frames pet stacked optical flow (snip)

        self.opt_flow_len = opt_flow_len
        self.num_of_snip = num_of_snip
        self.image_shape = image_shape
        self.opt_flow_path = os.path.join(path_rowframes)
        self.path_annotations = path_annotations
        self.partition = partition
        self.batch_size = batch_size
        
        # Get data
        self.video_list = self.find_videos_and_metadata()
        self.n_batch = len(self.video_list) // self.batch_size

        
    def find_videos_and_metadata(self):
        if self.partition == 'val':
            video_list = pd.read_csv(f'{self.path_annotations}/hmdb51_val_split_1_rawframes.txt', sep=" ", header=None) #test
            video_list.columns = ["path", "num_frames_tot", "class"]
        else:
            raise Exception("invalid partition")
        return(video_list)
    
    def val_generator(self):
        video_list = self.video_list
        idx = 0
        print(f"Creating validation generator with {len(self.video_list)} samples.")
        while 1:
            idx +=1
            idx = idx % self.n_batch
            #print(f"Generator creating batch {idx}")
            X_spatial_batch = []
            X_temporal_batch = []
            y_batch = []
            
            batch_list = video_list.iloc[idx * self.batch_size: (idx + 1) * self.batch_size]
            for index, row in batch_list.iterrows():
                # Get the stacked optical flows from disk.
                X_spatial, X_temporal = self.find_frame_and_stacked_optical_flows(row)
                y = row['class']
                y = np.array(y)
                y = np.squeeze(y) 

                X_spatial_batch.append(X_spatial)
                X_temporal_batch.append(X_temporal)
                y_batch.append(y)

            X_batch = [np.array(X_spatial_batch), np.array(X_temporal_batch)]
            y_batch = np.array(y_batch)
            
            yield X_batch, y_batch
    
    def val_generator1(self):
        video_list = self.video_list
        idx = 0
        #print(f"Creating validation generator with {len(self.video_list)} samples.")
        idx +=1
        idx = idx % self.n_batch
        #print(f"Generator creating batch {idx}")
        X_spatial_batch = []
        X_temporal_batch = []
        y_batch = []
        #print(video_list)
        batch_list = video_list.iloc[idx * self.batch_size: (idx + 1) * self.batch_size]
        #print(type(batch_list))
        for index, row in batch_list.iterrows():
            #print(row[0][2])
            # Get the stacked optical flows from disk.
            #print(row['num_frames_tot'])
            # print(type(row))
            X_spatial, X_temporal = self.find_frame_and_stacked_optical_flows(row)
            y = row['class']
            y = np.array(y)
            y = np.squeeze(y) 

            X_spatial_batch.append(X_spatial)
            X_temporal_batch.append(X_temporal)
            y_batch.append(y)

        X_batch = [np.array(X_spatial_batch), np.array(X_temporal_batch)]
        y_batch = np.array(y_batch)
        
        return X_batch, y_batch
            
    def find_frame_and_stacked_optical_flows(self, row):
        static_frames = []
        opt_flow_stack = []
        opt_flow_dir = self.opt_flow_path
        
        # Temporal parameters
        total_frames = row['num_frames_tot'] # row[0][1]
        if total_frames - self.opt_flow_len + 1 < self.num_of_snip:
            loop = True
            start_frame_window_len = 1
        else:
            loop = False
            start_frame_window_len = (total_frames - self.opt_flow_len + 1) // self.num_of_snip # starting frame selection window length
        '''win_len = (total_frames - self.opt_flow_len) // self.num_of_snip
        if self.partition=='train':
            start_frame = int(random.random() * win_len) + 1
        else:
            start_frame = int(0.5 * win_len) + 1
        frames = [] # selected optical flow frames
        for i in range(self.num_of_snip):
            frames += range(start_frame + self.opt_flow_len * i, 
                            start_frame + self.opt_flow_len * (i + 1))  
        if self.partition == 'train' and random.random() > 0.5:
            flip = True
        else:
            flip = False'''
        
        # Spatial Parameter
        img_path = None
        img_path = row['path']
        img_test = cv2.imread(os.path.join(f'.\\{img_path}' + '\\img_' + str("%05d"%(1)) + '.jpg'), 0)
        #print(img_test)
        top = int((img_test.shape[0] - self.image_shape[0]) * random.random())
        left = int((img_test.shape[1] - self.image_shape[1]) * random.random())
        right = left + self.image_shape[1]
        bottom = top + self.image_shape[0]
        
        # loop over snip
        for i_snip in range(self.num_of_snip):
            if loop:
                start_frame = i_snip % (total_frames - self.opt_flow_len + 1) + 1
            else:
                start_frame = int(0.5 * start_frame_window_len + 0.5) + start_frame_window_len * i_snip

            # Get the static frame
            static_frame = cv2.imread(os.path.join(f'.\\{img_path}' + '\\img_' + str("%05d"%(start_frame)) + '.jpg'))
            static_frame = static_frame / 255.0
            static_frame = cv2.resize(static_frame, self.image_shape)

            static_frames.append(static_frame)

            # Get the optical flow stack
            frames = range(start_frame, start_frame + self.opt_flow_len) # selected optical flow frames
            opt_flow_stack = []
            for i_frame in frames:
                # x flow
                img = None # reset to be safe
                temp_path = None
                temp_path = row['path']
                img = cv2.imread(os.path.join(f'.\\{temp_path}' + '\\flow_x_' + str("%05d"%(i_frame)) + '.jpg'), 0)
                #print(os.path.join(f'.\\{temp_path}' + '\\flow_x_' + str("%05d"%(i_frame)) + '.jpg'))
                #print(os.path.join(f'.\\{temp_path}' + '\\flow_x_' + str("%05d"%(i_frame)) + '.jpg'))
                #print(img.shape)
                img = np.array(img)
                # mean substraction 
                img = img - np.mean(img)
                img = img[top : bottom, left : right]
                img = img / 255. # normalize pixels 
                img = cv2.resize(img, self.image_shape)
                #print(img.shape)
                opt_flow_stack.append(img)
                
                # y flow
                img2 = None # reset to be safe
                img2 = cv2.imread(os.path.join(f'.\\{temp_path}' + '\\flow_y_' + str("%05d"%(i_frame)) + '.jpg'), 0)
                #print(img2.shape)
                img2 = np.array(img2)
                #img2 = np.swapaxes(img2, 0, 1)
                img2 = img2 - np.mean(img2)
                img2 = img2[top : bottom, left : right]
                img2 = img2 / 255. # normalize pixels
                img2 = cv2.resize(img2, self.image_shape)
                #print(img2.shape)
                opt_flow_stack.append(img2)
                
            opt_flow_stack = np.array(opt_flow_stack)
            opt_flow_stack = np.swapaxes(opt_flow_stack, 0, 2)
        # random horizontal flip for training sets
        
        return np.array(static_frames), np.array(opt_flow_stack)


# Two Stream Model: Fusion

In [12]:
def two_stream_fuse(spatial_model, temporal_model):
    # spatial stream (frozen)
    cnn_spatial = spatial_model

    # temporal stream (frozen)
    cnn_temporal = temporal_model

    # fused by taking average
    outputs = average([cnn_spatial.output, cnn_temporal.output])

    model = Model([cnn_spatial.input, temporal_model.input], outputs)

    return model

In [24]:
# Classe data_val
data_val = DataSet(num_of_snip=num_of_snip, 
                  opt_flow_len=opt_flow_len, 
                  image_shape=image_shape,
                  partition='val',
                  batch_size = 256)

In [15]:
# Creazione del generatore
validation_generator = data_val.val_generator()
steps = data_val.n_batch

In [7]:
# Importazione dei modelli
spatial_model = load_model('./Models/spatial_spat_resnet.hdf5')
temporal_model = load_model('./Models/model_mot.h5')

In [16]:
# Model
two_stream_model = two_stream_fuse(spatial_model, temporal_model)

In [17]:
# Compiling
optimizer = Adam()
two_stream_model.compile(loss=keras.losses.sparse_categorical_crossentropy, 
                   metrics=['sparse_categorical_accuracy','sparse_top_k_categorical_accuracy'], 
                   optimizer=optimizer)

In [9]:
two_stream_model.summary()

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 conv2d_5_input (InputLayer)    [(None, 224, 224, 2  0           []                               
                                0)]                                                               
                                                                                                  
 conv2d_5 (Conv2D)              (None, 109, 109, 96  94176       ['conv2d_5_input[0][0]']         
                                )                                                                 
                                                                                                  
 batch_normalization_2 (BatchNo  (None, 109, 109, 96  384        ['conv2d_5[0][0]']               
 rmalization)                   )                                                             

In [None]:
two_stream_model.fit_generator(generator=validation_generator, steps_per_epoch=steps)