In [1]:
### TODO:
# * check mobilenetv2_1.00_224 sequences bug

In [213]:
# whether to log each feature and sequence status
verbose = 1

In [214]:
import os
import pandas as pd
import numpy as np
import json
from PIL import Image
import cv2
from sklearn.utils import shuffle
import sys
sys.path.append('..')

import h5py
import random

In [None]:
import keras

In [215]:
# import pretrained model functions
from deepvideoclassification.models import precompute_CNN_features
from deepvideoclassification.models import load_pretrained_model_preprocessor
from deepvideoclassification.models import load_pretrained_model

# import pretrained model properties
from deepvideoclassification.models import pretrained_model_len_features
from deepvideoclassification.models import pretrained_model_sizes
from deepvideoclassification.models import pretrained_model_names, poolings

In [None]:
load_pretrained_model_preprocessor()

In [216]:
# setup paths
pwd = os.getcwd().replace("deepvideoclassification","")
path_cache = pwd + 'cache/'
path_data = pwd + 'data/'

In [217]:
# setup logging
import logging
logging.basicConfig(
    level=logging.INFO,
    format="%(asctime)s [%(threadName)-12.12s] [%(levelname)-5.5s]  %(message)s",
    handlers=[
        logging.FileHandler("{0}/{1}.log".format(pwd, "logs")),
        logging.StreamHandler()
    ])
logger = logging.getLogger()

In [218]:
# read vid folders
def get_video_paths():
    """
    Return list of video paths 

    Videos should be in /data/video_1/, /data/video_2/ style folders 
    with sequentially numbered frame images e.g. /data/video_1/frame00001.jpg

    There should be at least 3 videos, 1 for each of train/test/valid splits
    Split assignment is given in /data/labels.csv (also to be provided by user)

    Functionality to use different parts of a video as train/valid/test 
    not currently implemented.
    """
    path_videos = []
    for filename in os.listdir(path_data):
        if os.path.isdir(os.path.join(path_data, filename)):
            path_videos.append(filename)

    path_videos = [path_data + v + '/' for v in path_videos]

    # make sure that there is video data in /data/ and give instructions if not done correctly
    assert len(path_videos)>0, "There need to be at least 3 video folders (at least 1 for each of train, valid, \
    and test splits) in /data/ - each video should be its own folder of frame images with ascending time-ordered \
    filenames e.g. /data/vid1/frame00001.jpg ... videos assignment to train/valid/test split should be given in \
    /data/labels.csv ... cross-validation or train/valid/test splits within a single long video not currently implemented"

    return path_videos

In [219]:
def resize_frames(target_size):
    """
    Resize the frames of all videos and save them to /cache/ 
    to make model fitting faster .

    We resize once upfront rather than each we use a pretrained model or architecture.

    Our models require inputs resized to:
    * 224 x 224 VGG16, ResNet50, DenseNet, MobileNet
    * 299 x 299 XCeption, InceptionV3, InceptionResNetV2
    * 112 x 112 3D CNN 
    """

    if not os.path.exists(path_cache + 'frames/' + str(target_size[0]) + "_" + str(target_size[1]) + '/'):
        
        os.makedirs(path_cache + 'frames/' + str(target_size[0]) + "_" + str(target_size[1]) + '/')

        # read vid paths
        path_videos = get_video_paths()

        # loop over all vids and resize frames, saving to new folder in /cache/frames/
        for c,path_video in enumerate(path_videos):

            logger.info("resizing vid {}/{} to {}x{}".format(c+1,len(path_videos),target_size[0], target_size[1]))

            # get vid name from path
            video_name = path_video.split("/")[-2]

            # create directory for resized frames - just storing arrays now so commented out
            # e.g. path_vid_resized = /cache/frames/224_224/s23-4847/
            # path_vid_resized = path_cache + 'frames/'
            # path_vid_resized += str(target_size[0]) + "_" + str(target_size[1]) + '/' 
            # path_vid_resized += video_name + '/'

            # load frame paths for vid
            path_frames = os.listdir(path_video)
            path_frames = [path_video + f for f in path_frames if f != '.DS_Store']
            path_frames.sort()

            # load frames
            frames = []
            for path_frame in path_frames:

                # open image and resize
                filename = path_frame.split("/").pop()
                img_frame = Image.open(path_frame)
                img_frame = img_frame.resize(target_size)
                # img_frame.save(path_vid_resized + filename, "JPEG", quality = 100)

                # convert to array and append to list
                img_frame = np.array(img_frame)
                frames.append(img_frame)

            # save array of resized frames
            np.save(path_cache + "frames/" + str(target_size[0]) + "_" + str(target_size[1]) + "/" + video_name, np.array(frames))

In [220]:
def get_labels():
    # read labels - should be CSV with columns "video","frame","label","split"
    # e.g. "s1-218", "s1-218-00001.jpeg", "noseal", "train"
    labels = None
    try:
        labels = pd.read_csv(path_data + 'labels.csv', usecols=['video','frame','label','split'])
    except ValueError as e:
        raise Exception("Labels file must contain columns ['video','frame','label','split'] - if you only have ['video','frame','label'], use the helper function add_splits_to_labels_file to add train/valid/test splits to your labels file")
    except FileNotFoundError as e:
        raise Exception("No labels found - please save labels file to /data/labels.csv") from e

    return labels.sort_values(["video","frame"])

In [221]:
def create_video_label_arrays():
    """
    Create numpy array with labels for each vid and a label_map.json file
    in /cache/labels/
    """

    # create folder for labels
    if not os.path.exists(path_cache + 'labels/'):
        os.makedirs(path_cache + 'labels/')

    # load labels
    labels = get_labels()

    # build label_map
    label_dummies = pd.get_dummies(labels, columns = ['label'])

    # get label columns list and build label map dict
    label_columns = []
    label_map = {}
    label_map_idx = 0
    for i, col in enumerate(label_dummies.columns):
        if col[:6] == 'label_':
            label_columns.append(col)
            label_map[label_map_idx] = col
            label_map_idx+=1

    # save label map to json
    with open(path_cache + 'labels/label_map.json', 'w') as fp:
        json.dump(label_map, fp)

    # get video paths
    path_videos = get_video_paths()

    # save numpy array of labels for each vid
    for path_video in path_videos:

        # get vid name from path
        video_name = path_video.split("/")[-2]

        vid_labels = np.array(label_dummies[label_dummies['video'] == video_name][label_columns])

        # save labels array for this vid
        np.save(path_cache + "/labels/" + video_name, np.array(vid_labels))

In [222]:
def load_label_map():
    """
    Returns label map - read from disk
    """

    # load label map from disk
    label_map = None
    try:
        if os.path.exists(path_cache + 'labels/label_map.json'):
            with open(path_cache + 'labels/label_map.json', 'r') as fp:
                label_map = json.load(fp)
        else:
            # build labels and label map
            create_video_label_arrays()
            if os.path.exists(path_cache + 'labels/label_map.json'):
                with open(path_cache + 'labels/label_map.json', 'r') as fp:
                    label_map = json.load(fp)
    except Exception as e:
        logger.error ('label map not found - make sure /data/labels.csv exists and data cache has been built')

    return label_map

In [249]:
class DataGenerator(keras.utils.Sequence):
    """
    Generator (used in Data) that generates data for Keras fit_generator method because full dataset too big to load into memory
    
    > inherits from keras.utils.Sequence
    
    """
    def __init__(self, batch_size, h5_path, h5_row_count):
        """
        Initialization DataGenerator class
        
        :batch_size: number of samples to return in batch 
        :h5_path: path to h5 dataset (where we stored the generated sequence data via save_frame_sequences_to_h5())
        :h5_row_count: number of rows in h5 dataset
        
        """
        self.batch_size = batch_size
        self.h5_path = h5_path
        self.h5_row_count = h5_row_count
        
        # init (shuffle dataset)
        self.on_epoch_end()

    def __len__(self):
        'Denotes the number of batches per epoch'
        return int(np.floor(h5_row_count / self.batch_size))

    def __getitem__(self, index):
        'Generate one batch of data'
        
        # Generate indexes of the batch
        batch_indexes = self.indexes[index*self.batch_size:(index+1)*self.batch_size]

        # Generate data
        x, y = self.__data_generation(batch_indexes)

        return x, y

    def on_epoch_end(self):
        'Updates indexes after each epoch'
        self.indexes = np.arange(h5_row_count)
        
        # shuffle indexes -> shuffle samples returned in each batch
        np.random.shuffle(self.indexes)

    def __data_generation(self, batch_indexes):
        """
        Generates data containing batch_size samples
        
        :batch_indexes: list (of size batch_size) with indexes into h5 file
        """ 
        x, y = None, None

        # slices into h5 file need to be sorted
        batch_indexes.sort()

        # read sample from h5 file
        with h5py.File(h5_path, 'r') as h5:
            ### read sample indexes from h5 file
            # sample sequences
            x = h5['sequences'][batch_indexes,:]
            # sample labels
            y = h5['labels'][batch_indexes,:]

        return x, y 

In [None]:
class Data(object):
    
    def __init__(self, sequence_length, 
                    return_CNN_features = False, pretrained_model_name = None, pooling = None, 
                    frame_size = None, augmentation = False, oversampling = False,
                    model_weights_path = None, custom_model_name = None,
                    return_generator = False, batch_size = None,
                    verbose = False):
        """
        Data object constructor
        
        
        :sequence_length: number of frames in sequence to be returned by Data object
        :return_CNN_features: whether to return precomputed features or return frames (or sequences of features/frames if sequence_length>1)

        :return_features: if True then return features (or sequences of feature) from pretrained model, if False then return frames (or sequences of frames)        
        :pretrained_model_name: name of pretrained model (or None if not using pretrained model e.g. for 3D-CNN)
        :pooling: name of pooling variant (or None if not using pretrained model e.g. for 3D-CNN)
        :frame_size: size that frames are resized to (this is looked up for pretrained models)
        :augmentation: whether to apply data augmentation (horizontal flips)
        :oversampling: whether to apply oversampling to create class balance
        
        :model_weights_path: path to custom model weights if we want to load CNN model we've fine-tuned to produce features (e.g. for LRCNN)
        :custom_model_name: custom output name to append to pretrained model name
        
        :return_generator: if True and sequence_length > 1 and return_CNN_features == False, then do not return dataset, instead construct h5 file with sequences for each split and return generator that samples from that (dataset of sequecne frames too big to load into memory)
        :batch_size: size of batches that generator must return
        
        :verbose: whether to log details
        
        Notes: 
        * if pretrained_model_name != None and return_CNN_features=False then will first apply preprocessor to frames (or frame sequences)
        * if return_generator = True and sequence_length > 1 and return_CNN_features == False, large h5 files will be created in cache before returning generator
        """
    
        # required params
        self.sequence_length = sequence_length
        self.frame_size = frame_size
        
        # optional params
        self.pretrained_model_name = pretrained_model_name
        self.pooling = pooling
        self.return_CNN_features = return_CNN_features
        self.augmentation = augmentation
        self.oversampling = oversampling
        
        # init model data
        self.x_train = []
        self.y_train = []
        #
        self.x_valid = []
        self.y_valid = []
        # 
        self.x_test = []
        self.y_test = []
        
        # fix case sensitivity
        if type(self.pretrained_model_name) == str:
            self.pretrained_model_name = self.pretrained_model_name.lower()
        if type(self.pooling) == str:
            self.pooling = self.pooling.lower()
        
        ################
        ### Prepare data
        ################
        
        # get video paths
        self.path_videos = get_video_paths()
        
        # create label array for each video and load label map
        create_video_label_arrays()
        self.label_map = load_label_map()
        
        # get labels
        self.labels = get_labels()
        
        # pull number of classes from labels shape
        self.num_classes = self.labels['label'].nunique()
        
        # create dict mapping video to train/valid/test split assignment
        video_splits = self.labels[['video','split']].drop_duplicates()
        video_splits.set_index("video", inplace=True)
        video_splits = video_splits.to_dict()['split']
        self.video_splits = video_splits
        
        # look up target size for pretrained model
        if pretrained_model_name is not None:
            self.frame_size = pretrained_model_sizes[pretrained_model_name]
            print(self.frame_size)
        
        # precompute resized frames (won't recompute if already resized)
        resize_frames(self.frame_size)

        # pre compute CNN features (won't recompute if already computed)
        if return_CNN_features and pretrained_model_name is not None:
            if model_weights_path is not None and custom_model_name is not None:
                # precompute with custom weights input and name
                precompute_CNN_features(self.pretrained_model_name, self.pooling, self.model_weights_path, self.custom_model_name)
            else:
                precompute_CNN_features(self.pretrained_model_name, self.pooling)
            
            
            
        # get preprocessor given pretrained if we will need to apply preprocessor 
        # (i.e. if return_CNN_features == False and pretrained_model_name != None)
        if not return_CNN_features and pretrained_model_name is not None:
            self.preprocess_input = load_pretrained_model_preprocessor(self.pretrained_model_name)
        
        
        self.verbose = verbose
        
        self.return_generator = return_generator
        self.batch_size = batch_size
        
        # do some checks
        if self.return_generator:
            assert self.batch_size != None, "batch size required to construct generator"
        if self.return_generator:
            assert self.return_CNN_features == False, "generator only implemented for frame sequences - features usually large enough to load into memory"
        
        ###################################
        ### load features / build sequences
        ###################################
        
        
        # load features/frames from all videos and concat into big array for each of train, valid and test
        if self.sequence_length > 1:
            
            ### sequences
            
            if return_CNN_features:
                
                if verbose:
                    logging.info("Loading features sequence data into memory")
                
                #####################
                ### feature sequences
                #####################
                
                path_features = path_cache + 'features/' + pretrained_model_name + "/" + pooling + '/'
                if not return_CNN_features and pretrained_model_name is not None:
                    path_features = path_cache + 'features/' + pretrained_model_name + "__" + custom_model_name + "/" + pooling + '/'
                path_labels = path_cache + 'labels/'
                
                # read vid paths
                path_videos = get_video_paths()

                # loop over all vids and resize frames, saving to new folder in /cache/frames/
                for c, path_video in enumerate(path_videos):

                    # get vid name from path
                    video_name = path_video.split("/")[-2]
                    
                    
                    ### create sequence: features
                    # load precomputed features
                    features = np.load(path_features + video_name + '.npy')
                    # build sequences
                    x = []
                    for i in range(sequence_length, len(features) + 1):
                        x.append(features[i-sequence_length:i])
                    x = np.array(x)
                    
                    
                    ### create sequence: labels
                    # load precomputed labels
                    labels = np.load(path_labels + video_name + '.npy')     

                    # temp lists to store sequences
                    y = []
                    for i in range(sequence_length, len(labels) + 1):
                        y.append(labels[i-1])
                    y = (np.array(y))

                    
                    ### build output
                    if self.video_splits[video_name] == "train":
                        self.x_train.append(x)
                        self.y_train.append(y)
                    if self.video_splits[video_name] == "valid":
                        self.x_valid.append(x)
                        self.y_valid.append(y)
                    if self.video_splits[video_name] == "test":
                        self.x_test.append(x)
                        self.y_test.append(y)
                        
            else:

                ###################
                ### frame sequences
                ###################
                
                # load full frame sequecne dataset into memory and return
                if not return_generator:
                    
                    ##############################################################################
                    ### load full sequence dataset into memory (will likely run into memory error)
                    ##############################################################################
                    
                    if verbose:
                        logging.info("Loading frame sequence data into memory")

                    # load resized numpy array
                    path_vid_resized = path_cache + 'frames/'
                    path_vid_resized += str(self.frame_size[0]) + "_" + str(self.frame_size[1]) + '/' 

                    path_labels = path_cache + 'labels/'

                    # read vid paths
                    path_videos = get_video_paths()

                    # loop over all vids and resize frames, saving to new folder in /cache/frames/
                    for c, path_video in enumerate(path_videos):

                        # get vid name from path
                        video_name = path_video.split("/")[-2]

                        ### create sequence: features
                        # load precomputed frames
                        frames = np.load(path_vid_resized  + video_name + '.npy')

                        # first apply preprocessing if pretrained model given
                        if pretrained_model_name != None:
                            frames = self.preprocess_input(frames)

                        # build sequences
                        x = []
                        for i in range(sequence_length, len(frames) + 1):
                            x.append(frames[i-sequence_length:i])
                        x = np.array(x)


                        ### create sequence: labels
                        # load precomputed labels
                        labels = np.load(path_labels + video_name + '.npy')     

                        # temp lists to store sequences
                        y = []
                        for i in range(sequence_length, len(labels) + 1):
                            y.append(labels[i-1])
                        y = (np.array(y))

                        ### build output
                        if self.video_splits[video_name] == "train":
                            self.x_train.append(x)
                            self.y_train.append(y)
                        if self.video_splits[video_name] == "valid":
                            self.x_valid.append(x)
                            self.y_valid.append(y)
                        if self.video_splits[video_name] == "test":
                            self.x_test.append(x)
                            self.y_test.append(y)
                else:
                    #############################
                    ### Build sequences generator
                    #############################
            
                    # compute and save h5 sequence files
                    self.save_frame_sequences_to_h5()
                    
                    # set generator
                    self.train_generator = DataGenerator(self.batch_size, self.path_h5_train, self.total_rows_train)
                    self.valid_generator = DataGenerator(self.batch_size, self.path_h5_valid, self.total_rows_valid)
                    self.test_generator = DataGenerator(self.batch_size, self.path_h5_test, self.total_rows_test)
                
        else:

            ### not sequence
            
            if return_CNN_features:
                
                if verbose:
                    logging.info("Loading features data into memory")
                
                ###################
                ### feature vectors
                ###################
                
                path_features = path_cache + 'features/' + pretrained_model_name + "/" + pooling + '/'
                if not return_CNN_features and pretrained_model_name is not None:
                    path_features = path_cache + 'features/' + pretrained_model_name + "__" + custom_model_name + "/" + pooling + '/'
                
                path_labels = path_cache + 'labels/'
                
                # read vid paths
                path_videos = get_video_paths()

                # loop over all vids and resize frames, saving to new folder in /cache/frames/
                for c, path_video in enumerate(path_videos):

                    # get vid name from path
                    video_name = path_video.split("/")[-2]
                    
                    ### load precomputed features
                    x = np.load(path_features + video_name + '.npy')
                    y = np.load(path_labels + video_name + '.npy')
                    
                    ### build output
                    if self.video_splits[video_name] == "train":
                        self.x_train.append(x)
                        self.y_train.append(y)
                    if self.video_splits[video_name] == "valid":
                        self.x_valid.append(x)
                        self.y_valid.append(y)
                    if self.video_splits[video_name] == "test":
                        self.x_test.append(x)
                        self.y_test.append(y)
            else:
                
                if verbose:
                    logging.info("Loading frames into memory")
                
                #################
                ### single frames
                #################
                
                # load resized numpy array
                path_vid_resized = path_cache + 'frames/'
                path_vid_resized += str(self.frame_size[0]) + "_" + str(self.frame_size[1]) + '/' 
                
                path_labels = path_cache + 'labels/'
                
                # read vid paths
                path_videos = get_video_paths()

                # loop over all vids and resize frames, saving to new folder in /cache/frames/
                for c, path_video in enumerate(path_videos):

                    # get vid name from path
                    video_name = path_video.split("/")[-2]
                    
                    # load precomputed numpy arrays for frames and labels
                    x = np.load(path_vid_resized  + video_name + '.npy')
                    y = np.load(path_labels + video_name + '.npy')
                    
                    # apply preprocessing if pretrained model given
                    if pretrained_model_name != None:
                        x = self.preprocess_input(x)
                
                    ### build output
                    if self.video_splits[video_name] == "train":
                        self.x_train.append(x)
                        self.y_train.append(y)
                    if self.video_splits[video_name] == "valid":
                        self.x_valid.append(x)
                        self.y_valid.append(y)
                    if self.video_splits[video_name] == "test":
                        self.x_test.append(x)
                        self.y_test.append(y)
            
            
        #################################
        ### get file paths for each split
        #################################
        #
        # Note: only makes sense for sequence_length = 1
        
        # get file paths: train
        dflab = self.labels[self.labels['split'] == 'train']
        self.paths_train = list(path_data + dflab['video'] + "/" + dflab['frame'])

        # get file paths: valid
        dflab = self.labels[self.labels['split'] == 'valid']
        self.paths_valid = list(path_data + dflab['video'] + "/" + dflab['frame'])

        # get file paths: test
        dflab = self.labels[self.labels['split'] == 'test']
        self.paths_test = list(path_data + dflab['video'] + "/" + dflab['frame'])
        
        
        
        #################################################
        ### reshape list outputs (if not using generator)
        #################################################
        
        if not return_generator:
            ## e.g. (9846, 224, 224, 3) for frames [return_CNN_features=True]
            ## or  (9846, 512) for features [return_CNN_features=False]
            self.x_train = np.concatenate(self.x_train, axis=0)
            self.y_train = np.concatenate(self.y_train, axis=0)
            self.x_valid = np.concatenate(self.x_valid, axis=0)
            self.y_valid = np.concatenate(self.y_valid, axis=0)
            self.x_test = np.concatenate(self.x_test, axis=0)
            self.y_test = np.concatenate(self.y_test, axis=0)
            
            # shuffle train and validation set
            self.shuffle()
            

    def __str__(self):
        return "x_train: {}, y_train: {} ... x_valid: {}, y_valid: {} ... x_test: {}, y_test: {}".format(self.x_train.shape,self.y_train.shape,self.x_valid.shape,self.y_valid.shape,self.x_test.shape,self.y_test.shape)
            
    def shuffle(self):
        """
        Randomize the order of samples in train and valid splits
        """
        ###########
        ### shuffle
        ###########
        # paths will no longer be correct (they're for debugging anyway)
        self.x_train, self.y_train, self.paths_train = shuffle(self.x_train, self.y_train, self.paths_train)
        self.x_valid, self.y_valid, self.paths_valid = shuffle(self.x_valid, self.y_valid, self.paths_valid)
        

    # Even at small sequence lengths, loading the full dataset as 
    # a sequence into memory is not feasible so we need to use generators
    # that iterate over the dataset without loading it all into memory
    # 
    # For now, we will assume that we will load the features datasets into memory
    # because this is more feasible but for large datasets, we'd want to use generators for that too. 
    # An implementation for a feature generator  can be done by pattern matching the implementation for frames 
    # 
    # we first precompute a sequences h5 file (it's too big to fit in memory but we never have more than 1
    # video's sequences in memory) ...then we will initialize a generator that samples sequences from the 
    # h5 file and returns batches that will be passed to our model's fit_generator method

    def save_frame_sequences_to_h5(self):
        """
        Save sequence of frames to h5 files (1 for each split) in cache 
        because dataset too big to load into memory
        
        Will create generator that reads random rows from these h5 files
        
        Inspired by: https://stackoverflow.com/questions/41849649/write-to-hdf5-and-shuffle-big-arrays-of-data
        """
    
        #######################
        ### setup h5 file paths
        #######################
        
        if not os.path.exists(path_cache + 'sequences/'):
            os.makedirs(path_cache + 'sequences/')

        path_h5_base = path_cache + 'sequences/'

        # store h5 files in subfolder in cache/sequences/ either with pretrained model name or resize name
        # since we need to run preprocessing for pretrained models but not for vanilla resizing (3DCNN)
        if pretrained_model_name is not None:
            path_h5_base += pretrained_model_name + '/'
        else:
            path_h5_base += str(self.frame_size[0]) + "_" + str(self.frame_size[1]) + '/' 

        if not os.path.exists(path_h5_base):
            os.makedirs(path_h5_base)

        self.path_h5_train = path_h5_base + '/h5_' + str(self.sequence_length) + 'train.h5'
        self.path_h5_valid = path_h5_base + '/h5_' + str(self.sequence_length) + 'valid.h5'
        self.path_h5_test = path_h5_base + '/h5_' + str(self.sequence_length) + 'test.h5'
    
        # build h5 file if doesn't exists()
        if not os.path.exists(self.path_h5_train) or not os.path.exists(self.path_h5_valid) or not os.path.exists(self.path_h5_test):
            
            if verbose:
                logging.info("Computing frame sequence h5 files: {}".format(path_h5_base))

            ##################################################
            ### get size of train/valid/test sequence datasets
            ##################################################

            # total number of rows of sequence data we have for each split
            # this is not the same as the number of frames since we exclude
            # the first (self.sequence_length-1) frames
            total_rows_train = 0
            total_rows_valid = 0
            total_rows_test = 0

            # load resized numpy array
            path_vid_resized = path_cache + 'frames/'
            path_vid_resized += str(self.frame_size[0]) + "_" + str(self.frame_size[1]) + '/' 
            
            print(path_vid_resized)

            path_labels = path_cache + 'labels/'

            # read vid paths
            path_videos = get_video_paths()

            # loop over all vids and resize frames, saving to new folder in /cache/frames/
            for c, path_video in enumerate(path_videos):

                # get vid name from path
                video_name = path_video.split("/")[-2]

                # load resized frames
                frames = np.load(path_vid_resized  + video_name + '.npy')

                # build sequences
                x = []
                for i in range(self.sequence_length, len(frames) + 1):
                    x.append(frames[i-self.sequence_length:i])
                x = np.array(x)

                if self.video_splits[video_name] == "train":
                    total_rows_train += len(x)
                if self.video_splits[video_name] == "valid":
                    total_rows_valid += len(x)
                if self.video_splits[video_name] == "test":
                    total_rows_test += len(x)

            # calc shapes required for full sequence dataset
            h5_shape_train_x = (total_rows_train, self.sequence_length, self.frame_size[0], self.frame_size[1], 3)
            h5_shape_train_y = (total_rows_train, self.num_classes)

            h5_shape_valid_x = (total_rows_valid, self.sequence_length, self.frame_size[0], self.frame_size[1], 3)
            h5_shape_valid_y = (total_rows_valid, self.num_classes)

            h5_shape_test_x = (total_rows_test, self.sequence_length, self.frame_size[0], self.frame_size[1], 3)
            h5_shape_test_y = (total_rows_test, self.num_classes)


            ################################
            ### Initialize and open h5 files
            ################################

            # open h5 file to store big sequence dataset feature and label arrays
            # path_h5file = MODEL -> SEQUENCE LENGTH
            f_train = h5py.File(self.path_h5_train, 'a')
            f_valid = h5py.File(self.path_h5_valid, 'a')
            f_test = h5py.File(self.path_h5_test, 'a')

            # initialize h5 datasets
            h5_train_x = f_train.create_dataset('sequences', shape= h5_shape_train_x, dtype='uint8')
            h5_train_y = f_train.create_dataset('labels', shape= h5_shape_train_y, dtype='uint8')

            h5_valid_x = f_valid.create_dataset('sequences', shape= h5_shape_valid_x, dtype='uint8')
            h5_valid_y = f_valid.create_dataset('labels', shape= h5_shape_valid_y, dtype='uint8')

            h5_test_x = f_test.create_dataset('sequences', shape= h5_shape_test_x, dtype='uint8')
            h5_test_y = f_test.create_dataset('labels', shape= h5_shape_test_y, dtype='uint8')

            ##################################################
            ### Build h5 files for this sequence / model combo
            ##################################################

            # load resized numpy array
            path_vid_resized = path_cache + 'frames/'
            path_vid_resized += str(self.frame_size[0]) + "_" + str(self.frame_size[1]) + '/' 

            path_labels = path_cache + 'labels/'

            # read vid paths
            path_videos = get_video_paths()

            # keep track of where we are in the h5 file
            h5_cursor_train = 0
            h5_cursor_valid = 0
            h5_cursor_test = 0

            # loop over all vids and resize frames, saving to new folder in /cache/frames/
            for c, path_video in enumerate(path_videos):

                # get vid name from path
                video_name = path_video.split("/")[-2]

                print(c, len(path_videos), video_name)

                ### create sequence: features
                # load precomputed frames
                frames = np.load(path_vid_resized  + video_name + '.npy')
                
                # first apply preprocessing if pretrained model given
                if pretrained_model_name != None:
                    frames = self.preprocess_input(frames)
                    
                # build sequences
                x = []
                for i in range(self.sequence_length, len(frames) + 1):
                    x.append(frames[i-self.sequence_length:i])
                x = np.array(x)

                ### create sequence: labels
                # load precomputed labels
                labels = np.load(path_labels + video_name + '.npy')     

                # temp lists to store sequences
                y = []
                for i in range(self.sequence_length, len(labels) + 1):
                    y.append(labels[i-1])
                y = (np.array(y))

                ### write this vid's data to relevant h5 dataset
                if video_splits[video_name] == "train":
                    h5_train_x[h5_cursor_train:h5_cursor_train + x.shape[0], :, :, :, :] = x
                    h5_train_y[h5_cursor_train:h5_cursor_train + y.shape[0], :] = y
                    h5_cursor_train += len(x)
                if video_splits[video_name] == "valid":
                    h5_valid_x[h5_cursor_valid:h5_cursor_valid + x.shape[0], :, :, :, :] = x
                    h5_valid_y[h5_cursor_valid:h5_cursor_valid + y.shape[0], :] = y
                    h5_cursor_valid += len(x)
                if video_splits[video_name] == "test":
                    h5_test_x[h5_cursor_test:h5_cursor_test + x.shape[0], :, :, :, :] = x
                    h5_test_y[h5_cursor_test:h5_cursor_test + y.shape[0], :] = y
                    h5_cursor_test += len(x)

            # store total samples for each split so we can pass them to our DataGenerator
            self.total_rows_train = total_rows_train
            self.total_rows_valid = total_rows_valid
            self.total_rows_test = total_rows_test
                    
            # close h5 files
            f_train.close()
            f_valid.close()
            f_test.close()

# TEST GENERATOR

In [225]:
from deepvideoclassification.models import Architecture
from deepvideoclassification.models import train

In [227]:
pretrained_model_name = "vgg16"
pooling="max"
sequence_length = 3

layer_1_size = 128
layer_2_size = 64
layer_3_size = 32
dropout=0.20

In [228]:
data = Data(sequence_length = sequence_length, 
            return_CNN_features = False, 
            pretrained_model_name=pretrained_model_name,
            pooling = pooling)

num_classes = data.num_classes 
frame_size = data.frame_size
num_features = pretrained_model_len_features[pretrained_model_name]

(224, 224)


In [229]:
architecture = Architecture(architecture="video_LRCNN_trainable", 
                            sequence_model = 'LSTM',
                            sequence_model_layers = 1,
                            sequence_length = sequence_length,
                            num_classes = data.num_classes, 
                            frame_size = data.frame_size, 
                            pretrained_model_name='vgg16', 
                            pooling='max',
                            layer_1_size=64, 
                            layer_2_size=32, 
                            layer_3_size=8, 
                            dropout=0.2,
                            convolution_kernel_size=3)

## train with no generator

In [230]:
# train model with no data generator
train(architecture.model, data, path_model = pwd+'models/', learning_rate = 0.001, epochs = 10)

Train on 10697 samples, validate on 1370 samples
Epoch 1/10

Epoch 00001: val_acc improved from -inf to 0.74380, saving model to /mnt/seals/models/model.h5
Epoch 2/10

Epoch 00002: val_acc improved from 0.74380 to 0.85255, saving model to /mnt/seals/models/model.h5
Epoch 3/10

Epoch 00003: val_acc did not improve from 0.85255
Epoch 4/10

Epoch 00004: val_acc did not improve from 0.85255
Epoch 5/10

Epoch 00005: val_acc improved from 0.85255 to 0.86971, saving model to /mnt/seals/models/model.h5
Epoch 6/10

KeyboardInterrupt: 

## train with generator

> need to store generator params in json with sequences file

In [None]:
data = Data(sequence_length = sequence_length, 
            return_CNN_features = False, 
            pretrained_model_name=pretrained_model_name,
            pooling = pooling,
            batch_size=32,
            return_generator=True)

## build generator dataset

In [158]:
h5_path = 'train_sequences' + str(sequence_length) + '.h5'
h5_file_len = h5_shape_train_x[0]
sample_size = 32

In [159]:
x,y = get_sample_from_h5(h5_path, h5_file_len, sample_size)

In [162]:
x.shape, y.shape

((32, 5, 224, 224, 3), (32, 2))

# Build cache

In [212]:
# build h5 cache
for sequence_length in [2,3,5,10,20]:
    for pretrained_model_name in pretrained_model_names:

        data = Data(sequence_length=sequence_length, 
                    return_CNN_features=False, 
                    pretrained_model_name = pretrained_model_name, 
                    return_generator = True,
                    verbose=True)

2019-01-11 00:44:16,106 [MainThread  ] [INFO ]  Computing frame sequence h5 files


(299, 299)
path_h5_base /mnt/seals/cache/sequences/inception_resnet_v2/


2019-01-11 00:44:16,308 [MainThread  ] [INFO ]  Computing frame sequence h5 files


(299, 299)
path_h5_base /mnt/seals/cache/sequences/inception_v3/
/mnt/seals/cache/frames/299_299/
0 46 s23-4847
1 46 s43-5211
2 46 s2-1133
3 46 s21-919
4 46 s20-842
5 46 s37-3930
6 46 s5-1102
7 46 s19-672
8 46 s26-8164
9 46 s41-4712
10 46 s18-630
11 46 s25-5886
12 46 s35-3664
13 46 s33-3405
14 46 s45-6301
15 46 s16-0
16 46 s39-4336
17 46 s29-316
18 46 s12-3465
19 46 s46-8087
20 46 s31-784
21 46 s28-20
22 46 s3-1993
23 46 s9-5491
24 46 s11-7363
25 46 s22-3733
26 46 s13-14
27 46 s15-2589
28 46 s40-4508
29 46 s17-2973
30 46 s6-1247
31 46 s42-4950
32 46 s30-516
33 46 s34-3590
34 46 s36-3838
35 46 s1-218
36 46 s38-4060
37 46 s7-2029
38 46 s8-2244
39 46 s10-6558
40 46 s32-3110
41 46 s14-1705
42 46 s27-8212
43 46 s24-5851
44 46 s44-5304
45 46 s4-6975


2019-01-11 00:45:17,088 [MainThread  ] [INFO ]  Computing frame sequence h5 files


(224, 224)
path_h5_base /mnt/seals/cache/sequences/mobilenetv2_1.00_224/
/mnt/seals/cache/frames/224_224/
0 46 s23-4847


TypeError: ufunc 'true_divide' output (typecode 'd') could not be coerced to provided output parameter (typecode 'B') according to the casting rule ''same_kind''

In [199]:
# build h5 caches
# build feature cache in advance by running python3 data.py
for pretrained_model_name in pretrained_model_names:
    for pooling in poolings:
        data = Data(sequence_length=1, 
                    return_CNN_features=True,
                    pretrained_model_name = pretrained_model_name,
                    pooling=pooling)

2019-01-11 00:37:33,620 [MainThread  ] [INFO ]  Features already cached: /mnt/seals/cache/features/inception_resnet_v2/max/


(299, 299)


2019-01-11 00:37:34,692 [MainThread  ] [INFO ]  Features already cached: /mnt/seals/cache/features/inception_resnet_v2/avg/


(299, 299)


2019-01-11 00:37:35,157 [MainThread  ] [INFO ]  Features already cached: /mnt/seals/cache/features/inception_v3/max/


(299, 299)


2019-01-11 00:37:35,720 [MainThread  ] [INFO ]  Features already cached: /mnt/seals/cache/features/inception_v3/avg/


(299, 299)


2019-01-11 00:37:36,287 [MainThread  ] [INFO ]  Features already cached: /mnt/seals/cache/features/mobilenetv2_1.00_224/max/


(224, 224)


2019-01-11 00:37:36,702 [MainThread  ] [INFO ]  Features already cached: /mnt/seals/cache/features/mobilenetv2_1.00_224/avg/


(224, 224)


2019-01-11 00:37:37,114 [MainThread  ] [INFO ]  Features already cached: /mnt/seals/cache/features/resnet50/max/


(224, 224)


2019-01-11 00:37:37,676 [MainThread  ] [INFO ]  Features already cached: /mnt/seals/cache/features/resnet50/avg/


(224, 224)


2019-01-11 00:37:38,237 [MainThread  ] [INFO ]  Features already cached: /mnt/seals/cache/features/vgg16/max/


(224, 224)


2019-01-11 00:37:38,563 [MainThread  ] [INFO ]  Features already cached: /mnt/seals/cache/features/vgg16/avg/


(224, 224)


2019-01-11 00:37:38,890 [MainThread  ] [INFO ]  Features already cached: /mnt/seals/cache/features/xception/max/


(299, 299)


2019-01-11 00:37:39,454 [MainThread  ] [INFO ]  Features already cached: /mnt/seals/cache/features/xception/avg/


(299, 299)


In [18]:
# if __name__ == "__main__":
#     # build feature cache in advance by running python3 data.py
#     for pretrained_model_name in pretrained_model_names:
#         for pooling in poolings:
#             data = Data(sequence_length=1, 
#                         return_CNN_features=True,
#                         pretrained_model_name = pretrained_model_name,
#                         pooling=pooling)