In [1]:
### TODO
# * fit_models create_architectures_list (append mode)
# * fit_models worker if experiment id last digit in os environment var

# * collect garbage between fitting models

# * refactor custom_model_name and model_weights_path to instead use trained model id

# * delete cached sequences after each experiment

# train c3d and c3dsmall separately
# save model summary

In [2]:
# whether to log each feature and sequence status
verbose = True

In [3]:
# to get number of CPUs to parallelize fit generator batch loading
import multiprocessing

In [4]:
from contextlib import redirect_stdout

In [5]:
import gc
import os
import pandas as pd
import numpy as np
from PIL import Image
import json
import cv2
import matplotlib.pyplot as plt
import itertools
import sys
sys.path.append('..')

In [14]:
from keras import backend as K
from keras.callbacks import EarlyStopping, ModelCheckpoint, CSVLogger, TensorBoard
from keras.layers import Dense, Flatten, Dropout, ZeroPadding3D, Input
from keras.layers.recurrent import SimpleRNN, GRU, LSTM
from keras.layers.wrappers import TimeDistributed
from keras.layers.convolutional import Conv2D, MaxPooling3D, Conv3D, MaxPooling2D, Convolution1D, Convolution3D, MaxPooling3D, ZeroPadding3D
from keras.models import Sequential, Model, load_model
from keras.optimizers import Adam, RMSprop
from keras.preprocessing.image import img_to_array
import datetime

Using TensorFlow backend.


In [15]:
from sklearn.metrics import confusion_matrix

In [16]:
# setup paths
pwd = os.getcwd().replace("deepvideoclassification","")
path_cache = pwd + 'cache/'
path_data = pwd + 'data/'

In [17]:
# setup logging
# any explicit log messages or uncaught errors to stdout and file /logs.log
import logging
logging.basicConfig(
    level=logging.INFO,
    format="%(asctime)s [%(threadName)-12.12s] [%(levelname)-5.5s]  %(message)s",
    handlers=[
        logging.FileHandler("{0}/{1}.log".format(pwd, "logs")),
        logging.StreamHandler()
    ])
# init logger
logger = logging.getLogger()
# make logger aware of any uncaught exceptions
def handle_exception(exc_type, exc_value, exc_traceback):
    if issubclass(exc_type, KeyboardInterrupt):
        sys.__excepthook__(exc_type, exc_value, exc_traceback)
        return

    logger.error("Uncaught exception", exc_info=(exc_type, exc_value, exc_traceback))
sys.excepthook = handle_exception

In [18]:
from deepvideoclassification.data import Data

In [21]:
# get worker id for this instance
WORKERID = None
try:
    WORKERID = int(os.environ['WORKERID'])
except:
    pass

logging.info("WORKER ID={}".format(WORKERID))

2019-01-20 01:28:17,926 [MainThread  ] [INFO ]  WORKER ID=1


# Pretrained_CNNs

In [22]:
# pretrained model shapes
pretrained_model_len_features = {}
#
pretrained_model_len_features['vgg16'] = 512
#
pretrained_model_len_features['mobilenetv2_1.00_224'] = 1280
#
pretrained_model_len_features['inception_resnet_v2'] = 1536
#
pretrained_model_len_features['resnet50'] = 2048
pretrained_model_len_features['xception'] = 2048
pretrained_model_len_features['inception_v3'] = 2048

In [23]:
# pretrained model shapes
pretrained_model_sizes = {}
#
pretrained_model_sizes['vgg16'] = (224,224)
pretrained_model_sizes['resnet50'] = (224,224)
pretrained_model_sizes['mobilenetv2_1.00_224'] = (224,224)
#
pretrained_model_sizes['xception'] = (299,299)
pretrained_model_sizes['inception_v3'] = (299,299)
pretrained_model_sizes['inception_resnet_v2'] = (299,299)

In [24]:
pretrained_model_names = ["inception_resnet_v2", "inception_v3", "mobilenetv2_1.00_224", "resnet50", "vgg16", "xception"]
poolings = ['max','avg']

In [25]:
def load_pretrained_model(pretrained_model_name, pooling, model_weights_path = None):
    """ Load pretrained model with given pooling applied
    
    Args:
        pretrained_model: name of pretrained model ["Xception", "VGG16", "ResNet50", "InceptionV3", "InceptionResNetV2", "MobileNetV2"]
        pooling: pooling strategy for final pretrained model layer ["max","avg"]
        :model_weights_path: path to custom model weights if we want to load CNN model we've fine-tuned to produce features (e.g. for LRCNN)
    
    Returns:
        Pretrained model object (excluding dense softmax 1000 ImageNet classes layer)
    """
    
    # initialize output
    model = None
    
    pretrained_model_name = pretrained_model_name.lower()
    
    ###########################
    ### import pretrained model
    ###########################
    if pretrained_model_name == "xception":   
        from keras.applications.xception import Xception
        model = Xception(include_top=False, weights='imagenet', pooling=pooling)
    elif pretrained_model_name == "vgg16":   
        from keras.applications.vgg16 import VGG16
        model = VGG16(include_top=False, weights='imagenet', pooling=pooling)
    elif pretrained_model_name == "resnet50":   
        from keras.applications.resnet50 import ResNet50
        model = ResNet50(include_top=False, weights='imagenet', pooling=pooling)
    elif pretrained_model_name == "inception_v3":   
        from keras.applications.inception_v3 import InceptionV3
        model = InceptionV3(include_top=False, weights='imagenet', pooling=pooling)
    elif pretrained_model_name == "inception_resnet_v2":   
        from keras.applications.inception_resnet_v2 import InceptionResNetV2
        model = InceptionResNetV2(include_top=False, weights='imagenet', pooling=pooling)
    elif pretrained_model_name == "mobilenetv2_1.00_224":   
        from keras.applications.mobilenet_v2 import MobileNetV2
        model = MobileNetV2(include_top=False, weights='imagenet', pooling=pooling)
    else:
        raise NameError('Invalid pretrained model name - must be one of ["Xception", "VGG16", "ResNet50", "InceptionV3", "InceptionResNetV2", "MobileNetV2"]')
    
    if model_weights_path is not None:
        if os.path.exists(model_weights_path):
            model.load_weights(model_weights_path)
        else:
            raise NameError('pretrained model weights not found')
    
    return model

In [26]:
def load_pretrained_model_preprocessor(pretrained_model_name):
    """
    Return preprocessing function for a given pretrained model
    """

    preprocess_input = None

    pretrained_model_name = pretrained_model_name.lower()
        
    if pretrained_model_name == "xception":   
        from keras.applications.xception import preprocess_input
    elif pretrained_model_name == "vgg16":   
        from keras.applications.vgg16 import preprocess_input
    elif pretrained_model_name == "resnet50":   
        from keras.applications.resnet50 import preprocess_input
    elif pretrained_model_name == "inception_v3":   
        from keras.applications.inception_v3 import preprocess_input
    elif pretrained_model_name == "inception_resnet_v2":   
        from keras.applications.inception_resnet_v2 import preprocess_input
    elif pretrained_model_name == "mobilenetv2_1.00_224":   
        from keras.applications.mobilenet_v2 import preprocess_input
    else:
        raise NameError('Invalid pretrained model name - must be one of ["Xception", "VGG16", "ResNet50", "InceptionV3", "InceptionResNetV2", "MobileNetV2"]')
        
    return preprocess_input

In [27]:
def precompute_CNN_features(pretrained_model_name, pooling, model_weights_path = None, custom_model_name = None):
    """ 
    Save pretrained features array computed over all frames of each video 
    using given pretrained model and pooling method
    
    :pretrained_model_name: pretrained model object loaded using `load_pretrained_model`
    :pooling: pooling method used with pretrained model
    :model_weights_path: path to custom model weights if we want to load CNN model we've fine-tuned to produce features (e.g. for LRCNN)
    :custom_model_name: custom output name to append to pretrained model name

    """
    
    assert (pretrained_model_name is not None or custom_model_name is not None), "need to specify a pretrained_model_name in ['Xception', 'VGG16', 'ResNet50', 'InceptionV3', 'InceptionResNetV2', 'MobileNetV2'] or a custom_model_name"
    
    pretrained_model_name = pretrained_model_name.lower()
    
    # setup path to save features
    path_features = path_cache + 'features/' + pretrained_model_name + "/" + pooling + '/'
    
    # store in custom directory if custom model name given (for when loading weights from fine-tuned CNN and precomputing features from that model)
    if custom_model_name is not None and model_weights_path is not None:
        path_features = path_cache + 'features/' + pretrained_model_name + "__" + custom_model_name + "/" + pooling + '/'
    
    if not os.path.exists(path_features):
        
        os.makedirs(path_features)
        
        # load pretrained model
        pretrained_model = load_pretrained_model(pretrained_model_name, pooling, model_weights_path)

        # load preprocessing function
        preprocess_input = load_pretrained_model_preprocessor(pretrained_model_name)

        # lookup pretrained model input shape
        model_input_size = pretrained_model_sizes[pretrained_model_name]
        
        # precompute features for each video using pretrained model
        from deepvideoclassification.data import get_video_paths
        path_videos = get_video_paths()

        for c, path_video in enumerate(path_videos):

            if verbose:
                logging.info("Computing pretrained model features for video {}/{} using pretrained model: {}, pooling: {}".format(c+1,len(path_videos),pretrained_model_name, pooling))

            # get video name from video path
            video_name = path_video.split("/")[-2]

            # build output path
            path_output = path_features + video_name

            try:
                if not os.path.exists(path_output + '.npy'):

                    path_frames = path_data + video_name + "/"

                    # initialize features list
                    features = []

                    frame_paths = os.listdir(path_frames)
                    frame_paths = [path_frames + f for f in frame_paths if f != '.DS_Store']

                    # sort paths in sequence (they were created with incrementing filenames through time)
                    frame_paths.sort()

                    # load each frame in vid and get features
                    for j, frame_path in enumerate(frame_paths):

                        # load image & preprocess
                        image = cv2.imread(frame_path, cv2.COLOR_BGR2RGB)
                        img = cv2.resize(image, model_input_size, interpolation=cv2.INTER_AREA)
                        img = img_to_array(img)
                        img = np.expand_dims(img, axis=0)
                        img = preprocess_input(img)

                        # get features from pretrained model
                        feature = pretrained_model.predict(img).ravel()
                        features.append(feature)

                    # convert to arrays
                    features = np.array(features)

                    np.save(path_output, features)
                else:
                    if verbose:
                        logger.info("Features already cached: {}".format(path_output))

            except Exception as e:
                logging.error("Error precomputing features {} / {},{}".format(video_namepretrained_model_name, pooling))
                logging.fatal(e, exc_info=True)
                
    else:
        if verbose:
            logger.info("Features already cached: {}".format(path_features))

# Confusion Matrix

In [28]:
def plot_confusion_matrix(cm, classes, normalize=False, title='Confusion matrix', cmap=plt.cm.Blues):
    """
    This function prints and plots the confusion matrix.
    Normalization can be applied by setting `normalize=True`.
    """
#     if normalize:
#         cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
#         print("Normalized confusion matrix")
#     else:
#         print('Confusion matrix, without normalization')

#     print(cm)

    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    plt.title(title)
    plt.colorbar()
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes, rotation=90)
    plt.yticks(tick_marks, classes)

    fmt = '.2f' if normalize else 'd'
    thresh = cm.max() / 2.
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        plt.text(j, i, format(cm[i, j], fmt),
                 horizontalalignment="center",
                 color="white" if cm[i, j] > thresh else "black")

    plt.ylabel('True label')
    plt.xlabel('Predicted label')
    plt.tight_layout()

# Architecture class (contains keras model object and train/evaluate method, writes training results to /models/)

In [78]:
class Architecture(object):
    
    def __init__(self, model_id, architecture, sequence_length, 
                frame_size = None, 
                pretrained_model_name = None, pooling = None,
                sequence_model = None, sequence_model_layers = None,
                layer_1_size = 0, layer_2_size = 0, layer_3_size = 0, 
                dropout = 0, convolution_kernel_size = 3, 
                model_weights_path = None, 
                batch_size = 32, 
                verbose = False):
        """
        Model object constructor. Contains Keras model object and training/evaluation methods. Writes model results to /models/_id_ folder
        
        Architecture can be one of: 
        image_MLP_frozen, image_MLP_trainable, video_MLP_concat, video_LRCNN_frozen, video_LRCNN_trainable, C3D, C3Dsmall
        
        :model_id: integer identifier for this model e.g. 1337
        
        :architecture: architecture of model in [image_MLP_frozen, image_MLP_trainable, video_MLP_concat, video_LRCNN_frozen, video_LRCNN_trainable, C3D, C3Dsmall]
        
        :sequence_length: number of frames in sequence to be returned by Data object
        
        :frame_size: size that frames are resized to (different models / architectures accept different input sizes - will be inferred if pretrained_model_name is given since they have fixed sizes)
        :pretrained_model_name: name of pretrained model (or None if not using pretrained model e.g. for 3D-CNN)
        :pooling: name of pooling variant (or None if not using pretrained model e.g. for 3D-CNN or if fitting more non-dense layers on top of pretrained model base)
        
        :sequence_model: sequence model in [LSTM, SimpleRNN, GRU, Convolution1D]
        :sequence_model_layers: default to 1, can be stacked 2 or 3 (but less than 4) layer sequence model (assume always stacking the same sequence model, not mixing LSTM and GRU, for example)
        
        :layer_1_size: number of neurons in layer 1
        :layer_2_size: number of neurons in layer 2
        :layer_3_size: number of neurons in layer 3 
        
        :dropout: amount of dropout to add (same applied throughout model - good default is 0.2) 
        
        :convolution_kernel_size: size of 1-D convolutional kernel for 1-d conv sequence models (good default is 3)
        
        :model_weights_path: path to .h5 weights file to be loaded for pretrained CNN in LRCNN-trainable 
        
        :batch_size: batch size used to fit model (default to 32)
        
        :verbose: whether to log progress updates
        """
    
        # required params
        self.model_id = model_id
        
        # create path based on model id
        self.path_model = pwd + 'models/' + str(model_id) + '/'
        if not os.path.exists(self.path_model):
            os.makedirs(self.path_model)
        else:
            if not os.path.exists(self.path_model + 'results.json'):
                logging.info("Model folder exists but no results found - potential error in previous model training")
        
        self.architecture = architecture
        self.sequence_length = sequence_length
        
        # model architecture params
        self.frame_size = frame_size
        self.pretrained_model_name = pretrained_model_name
        self.pooling = pooling
        self.sequence_model = sequence_model
        self.sequence_model_layers = sequence_model_layers
        #
        self.layer_1_size = layer_1_size
        self.layer_2_size = layer_2_size
        self.layer_3_size = layer_3_size
        #
        self.dropout = dropout
        #
        self.convolution_kernel_size = convolution_kernel_size
        #
        self.model_weights_path = model_weights_path
        #
        self.batch_size = batch_size
        #
        self.verbose = verbose
        
        # fix case sensitivity
        if type(self.architecture) == str:
            self.architecture = self.architecture.lower()
        if type(self.pretrained_model_name) == str:
            self.pretrained_model_name = self.pretrained_model_name.lower()
        if type(self.pooling) == str:
            self.pooling = self.pooling.lower()
        
        # read num features from pretrained model
        if pretrained_model_name is not None:
            self.num_features = pretrained_model_len_features[pretrained_model_name]
            self.frame_size = pretrained_model_sizes[pretrained_model_name]
        
        # check one of pretrained model and frame size is specified
        assert (self.pretrained_model_name is not None or self.frame_size is not None), "Must specify one of pretrained_model_name or frame_size"
            
            
        # init model and data objects for this architecture
        self.model = None
        self.data = None
        
        
        #############################################################
        ### Build model architecture and init appropriate data object
        #############################################################
        
        if architecture == "image_MLP_frozen":
            
            ####################
            ### image_MLP_frozen
            ####################
            # image classification (single frame)
            # train MLP on top of weights extracted from pretrained CNN with no fine-tuning
            
            # check inputs
            assert self.sequence_length == 1, "image_MLP_frozen requires sequence length of 1"
            assert self.pretrained_model_name is not None, "image_MLP_frozen requires a pretrained_model_name input" 
            assert self.pooling is not None, "image_MLP_frozen requires a pooling input" 
            
            
            ### create data object for this architecture
            if self.verbose:
                logging.info("Loading data")
            self.data = Data(sequence_length = 1, 
                                return_CNN_features = True, 
                                pretrained_model_name= self.pretrained_model_name,
                                pooling = self.pooling)
            
            # init model
            model = Sequential()

            # 1st layer group
            if self.layer_1_size > 0:
                model.add(Dense(self.layer_1_size, activation='relu', input_shape=(self.num_features,)))
                if self.dropout > 0:
                    model.add(Dropout(self.dropout))
                
            # 2nd layer group
            if self.layer_2_size > 0 and self.layer_1_size > 0:
                model.add(Dense(self.layer_2_size, activation='relu'))
                if self.dropout > 0:
                    model.add(Dropout(self.dropout))

            # 3rd layer group
            if self.layer_3_size > 0 and self.layer_2_size > 0 and self.layer_1_size > 0:
                model.add(Dense(self.layer_3_size, activation='relu'))
                if dropout > 0:
                    model.add(Dropout(self.dropout))

            # classifier layer
            model.add(Dense(self.data.num_classes, activation='softmax'))
            

        elif architecture == "image_MLP_trainable":
            
            #######################
            ### image_MLP_trainable
            #######################
            # image classification (single frame)
            # fine-tune pretrained CNN with MLP on top
            #
            # start off freezing base CNN layers then will unfreeze 
            # after each training round
            #
            # we will ultimately want to compare our best fine-tuned 
            # CNN as a feature extractor vs fixed ImageNet pretrained CNN features
            
            # check inputs
            assert self.sequence_length == 1, "image_MLP_trainable requires sequence length of 1"
            assert self.pretrained_model_name is not None, "image_MLP_trainable requires a pretrained_model_name input" 
            assert self.pooling is not None, "image_MLP_trainable requires a pooling input" 
            
            ### create data object for this architecture
            if self.verbose:
                logging.info("Loading data")
            self.data = Data(sequence_length = 1, 
                                return_CNN_features = False, 
                                pretrained_model_name = self.pretrained_model_name,
                                pooling = self.pooling,
                                return_generator = True,
                                batch_size = self.batch_size)
            
            # create the base pre-trained model
            model_base = load_pretrained_model(self.pretrained_model_name, pooling=self.pooling)
            

            # freeze base model layers (will unfreeze after train top)
            for l in model_base.layers:
                l.trainable=False

            # use Keras functional API
            model_top = model_base.output

            # note layer names are there so we can exclude those layers 
            # when setting base model layers to trainable

            # 1st layer group
            if self.layer_1_size > 0:
                model_top = Dense(self.layer_1_size, activation="relu", name='top_a')(model_top)
                if self.dropout > 0:
                    model_top = Dropout(self.dropout, name='top_b')(model_top)

            # 2nd layer group
            if self.layer_2_size > 0 and self.layer_1_size > 0:
                model_top = Dense(self.layer_2_size, activation="relu", name='top_c')(model_top)
                if self.dropout > 0:
                    model_top = Dropout(self.dropout, name='top_d')(model_top)

            # 3rd layer group
            if self.layer_3_size > 0 and self.layer_2_size > 0 and self.layer_1_size > 0:
                model_top = Dense(self.layer_3_size, activation="relu", name='top_e')(model_top)
                if self.dropout > 0:
                    model_top = Dropout(self.dropout, name='top_f')(model_top)

            # classifier layer
            model_predictions = Dense(self.data.num_classes, activation="softmax", name='top_g')(model_top)

            # combine base and top models into single model object
            model = Model(inputs=model_base.input, outputs=model_predictions)
                
        elif architecture == "video_MLP_concat":

            ####################
            ### video_MLP_concat
            ####################
            
            # concatenate all frames in sequence and train MLP on top of concatenated frame input
            
            assert self.sequence_length > 1, "video_MLP_concat requires sequence length > 1"
            assert self.pretrained_model_name is not None, "video_MLP_concat requires a pretrained_model_name input"
            assert self.pooling is not None, "video_MLP_concat requires a pooling input"
            
            ### create data object for this architecture
            if self.verbose:
                logging.info("Loading data")
            self.data = Data(sequence_length = self.sequence_length, 
                                return_CNN_features = True, 
                                pretrained_model_name=self.pretrained_model_name,
                                pooling = self.pooling)

            # init model
            model = Sequential()

            model.add(Flatten(input_shape=(self.sequence_length, self.num_features)))

            # 1st layer group
            if self.layer_1_size > 0:
                model.add(Dense(self.layer_1_size, activation='relu', input_shape=(self.num_features,)))
                if self.dropout > 0:
                    model.add(Dropout(self.dropout))

            # 2nd layer group
            if self.layer_2_size > 0 and self.layer_1_size > 0:
                model.add(Dense(self.layer_2_size, activation='relu'))
                if self.dropout > 0:
                    model.add(Dropout(self.dropout))

            # 3rd layer group
            if self.layer_3_size > 0 and self.layer_2_size > 0 and self.layer_1_size > 0:
                model.add(Dense(self.layer_3_size, activation='relu'))
                if self.dropout > 0:
                    model.add(Dropout(self.dropout))

            # classifier layer
            model.add(Dense(self.data.num_classes, activation='softmax'))
            
        elif architecture == "video_LRCNN_frozen":

            ######################
            ### video_LRCNN_frozen
            ######################
            
            # Implement:
            # “Long-Term Recurrent Convolutional Networks for Visual Recognition and Description.”
            # Donahue, Jeff, Lisa Anne Hendricks, Marcus Rohrbach, Subhashini Venugopalan, 
            # Sergio Guadarrama, Kate Saenko, and Trevor Darrell.  
            # Proceedings of the IEEE Computer Society Conference on Computer Vision and 
            # Pattern Recognition, 2015, 2625–34.
            #
            # Essentially they extract features with fine-tuned CNN then fit recurrent models on top
            # in the paper they only use LSTM but we will also try RNN, GRU and 1-D CNN
            # 
            # note: no fine-tuning of CNN in this frozen LRCNN architecture
            # 
            # implementation inspired by:
            # https://github.com/sagarvegad/Video-Classification-CNN-and-LSTM-/blob/master/train_CNN_RNN.py

            
            # check inputs
            assert self.sequence_length > 1, "video_LRCNN_frozen requires sequence length > 1"
            assert self.layer_1_size > 0, "video_LRCNN_frozen requires a layer_1_size > 0" 
            assert self.pretrained_model_name is not None, "video_LRCNN_frozen requires a pretrained_model_name input" 
            assert self.pooling is not None, "video_LRCNN_frozen requires a pooling input" 
            assert self.sequence_model_layers is not None, "video_LRCNN_frozen requires sequence_model_layers >= 1" 
            assert self.sequence_model_layers >= 1, "video_LRCNN_frozen requires sequence_model_layers >= 1" 
            assert self.sequence_model_layers < 4, "video_LRCNN_frozen requires sequence_model_layers <= 3" 
            assert self.sequence_model is not None, "video_LRCNN_frozen requires a sequence_model" 
            if self.sequence_model == 'Convolution1D':
                assert self.convolution_kernel_size > 0, "Convolution1D sequence model requires convolution_kernel_size parameter > 0"
                assert self.convolution_kernel_size < self.sequence_length, "convolution_kernel_size must be less than sequence_length"

                
            ### create data object for this architecture
            if self.verbose:
                logging.info("Loading data")
            self.data = Data(sequence_length = self.sequence_length, 
                                return_CNN_features = True, 
                                pretrained_model_name = self.pretrained_model_name,
                                pooling = self.pooling)
            
                
            # set whether to return sequences for stacked sequence models
            return_sequences_1, return_sequences_2 = False, False
            if sequence_model_layers > 1 and layer_2_size > 0:
                return_sequences_1 = True
            if sequence_model_layers >= 2 and layer_3_size > 0 and layer_2_size > 0:
                return_sequences_2 = True
            
            # init model
            model = Sequential()

            # layer 1 (sequence layer)
            if sequence_model == "LSTM":
                model.add(LSTM(self.layer_1_size, return_sequences=return_sequences_1, dropout=self.dropout, 
                               input_shape=(self.sequence_length, self.num_features)))
            elif sequence_model == "SimpleRNN":
                model.add(SimpleRNN(self.layer_1_size, return_sequences=return_sequences_1, dropout=self.dropout, 
                               input_shape=(self.sequence_length, self.num_features)))
            elif sequence_model == "GRU":
                model.add(GRU(self.layer_1_size, return_sequences=return_sequences_1, dropout=self.dropout, 
                               input_shape=(self.sequence_length, self.num_features)))
            elif sequence_model == "Convolution1D":
                model.add(Convolution1D(self.layer_1_size, kernel_size = self.convolution_kernel_size, padding = 'valid', 
                               input_shape=(self.sequence_length, self.num_features)))
                if layer_2_size == 0 or sequence_model_layers == 1:
                    model.add(Flatten())
            else:
                raise NameError('Invalid sequence_model - must be one of [LSTM, SimpleRNN, GRU, Convolution1D]')    

            # layer 2 (sequential or dense)
            if layer_2_size > 0:
                if return_sequences_1 == False:
                    model.add(Dense(self.layer_2_size, activation='relu'))
                    model.add(Dropout(self.dropout))
                else:
                    if sequence_model == "LSTM":
                        model.add(LSTM(self.layer_2_size, return_sequences=return_sequences_2, dropout=self.dropout))
                    elif sequence_model == "SimpleRNN":
                        model.add(SimpleRNN(self.layer_2_size, return_sequences=return_sequences_2, dropout=self.dropout))
                    elif sequence_model == "GRU":
                        model.add(GRU(self.layer_2_size, return_sequences=return_sequences_2, dropout=self.dropout))
                    elif sequence_model == "Convolution1D":
                        model.add(Convolution1D(self.layer_2_size, kernel_size = self.convolution_kernel_size, padding = 'valid'))
                    else:
                        raise NameError('Invalid sequence_model - must be one of [LSTM, SimpleRNN, GRU, Convolution1D]') 

            # layer 3 (sequential or dense)
            if layer_3_size > 0:
                if sequence_model_layers < 3:
                    if sequence_model_layers == 2:
                        model.add(Flatten())
                    model.add(Dense(self.layer_3_size, activation='relu'))
                    model.add(Dropout(self.dropout))
                else:
                    if sequence_model == "LSTM":
                        model.add(LSTM(self.layer_3_size, return_sequences=False, dropout=self.dropout))
                        model.add(Flatten())
                    elif sequence_model == "SimpleRNN":
                        model.add(SimpleRNN(self.layer_3_size, return_sequences=False, dropout=self.dropout))
                        model.add(Flatten())
                    elif sequence_model == "GRU":
                        model.add(GRU(self.layer_3_size, return_sequences=False, dropout=self.dropout))
                        model.add(Flatten())
                    elif sequence_model == "Convolution1D":
                        model.add(Convolution1D(self.layer_3_size, kernel_size = self.convolution_kernel_size, padding = 'valid'))
                        model.add(Flatten())
                    else:
                        raise NameError('Invalid sequence_model - must be one of [LSTM, SimpleRNN, GRU, Convolution1D]') 
            else:
                if return_sequences_2 == True: 
                    model.add(Flatten())

            # classifier layer
            if self.dropout > 0:
                model.add(Dropout(self.dropout))
            model.add(Dense(self.data.num_classes, activation='softmax'))

        elif architecture == "video_LRCNN_trainable":
            
            #########################
            ### video_LRCNN_trainable
            #########################
            
            # Same as above:
            # “Long-Term Recurrent Convolutional Networks for Visual Recognition and Description.”
            # Donahue, Jeff, Lisa Anne Hendricks, Marcus Rohrbach, Subhashini Venugopalan, 
            # Sergio Guadarrama, Kate Saenko, and Trevor Darrell.  
            # Proceedings of the IEEE Computer Society Conference on Computer Vision and 
            # Pattern Recognition, 2015, 2625–34.
            #
            # But with fine-tuning of the CNNs that are input into the recurrent models
            # 
            # note: will take long because not precomputing the CNN part so re-computed 
            # on each training pass

            # implementation inspired by https://stackoverflow.com/questions/49535488/lstm-on-top-of-a-pre-trained-cnn
            
            # check inputs
            assert self.sequence_length > 1, "video_LRCNN_trainable requires sequence length > 1"
            assert self.layer_1_size > 0, "video_LRCNN_trainable requires a layer_1_size > 0" 
            assert self.pretrained_model_name is not None, "video_LRCNN_trainable requires a pretrained_model_name input" 
            assert self.pooling is not None, "video_LRCNN_trainable requires a pooling input" 
            assert self.sequence_model_layers >= 1, "video_LRCNN_trainable requires sequence_model_layers >= 1" 
            assert self.sequence_model_layers < 4, "video_LRCNN_trainable requires sequence_model_layers <= 3" 
            assert self.sequence_model is not None, "video_LRCNN_trainable requires a sequence_model" 
            if self.sequence_model == 'Convolution1D':
                assert self.convolution_kernel_size > 0, "Convolution1D sequence model requires convolution_kernel_size parameter > 0"
                assert self.convolution_kernel_size < self.sequence_length, "convolution_kernel_size must be less than sequence_length"
                
                
            ### create data object for this architecture
            if self.verbose:
                logging.info("Loading data")
            self.data = Data(sequence_length = self.sequence_length, 
                                return_CNN_features = False, 
                                return_generator=True,
                                pretrained_model_name = self.pretrained_model_name,
                                pooling = self.pooling,
                                batch_size=self.batch_size)
            
                
            # set whether to return sequences for stacked sequence models
            return_sequences_1, return_sequences_2 = False, False
            if sequence_model_layers > 1 and layer_2_size > 0:
                return_sequences_1 = True
            if sequence_model_layers >= 2 and layer_3_size > 0 and layer_2_size > 0:
                return_sequences_2 = True

            # load pretrained model weights - will train from there
            model_cnn = load_pretrained_model(self.pretrained_model_name, pooling=self.pooling)

            # optionally load weights for pretrained architecture
            # (will likely be better to first train CNN then load weights in LRCNN vs. use pretrained ImageNet CNN)
            if self.model_weights_path is not None:
                model_base.load_weights(self.model_weights_path)
            
            # freeze model_cnn layers but make final 3 layers of pretrained CNN trainable
            for i, l in enumerate(model_cnn.layers):
                if i < len(model_cnn.layers)-3:
                    l.trainable = False
                else:
                    l.trainable = True

            # sequential component on top of CNN
            frames = Input(shape=(self.sequence_length, self.frame_size[0], self.frame_size[1], 3))
            x = TimeDistributed(model_cnn)(frames)
            x = TimeDistributed(Flatten())(x)
            

            # layer 1 (sequence layer)
            if sequence_model == "LSTM":
                x = LSTM(self.layer_1_size, return_sequences=return_sequences_1, dropout=self.dropout)(x)
            elif sequence_model == "SimpleRNN":
                x = SimpleRNN(self.layer_1_size, return_sequences=return_sequences_1, dropout=self.dropout)(x)
            elif sequence_model == "GRU":
                x = GRU(self.layer_1_size, return_sequences=return_sequences_1, dropout=self.dropout)(x)
            elif sequence_model == "Convolution1D":
                x = Convolution1D(self.layer_1_size, kernel_size = self.convolution_kernel_size, padding = 'valid')(x)
                if layer_2_size == 0 or sequence_model_layers == 1:
                    x = Flatten()(x)
            else:
                raise NameError('Invalid sequence_model - must be one of [LSTM, SimpleRNN, GRU, Convolution1D]')    

            # layer 2 (sequential or dense)
            if layer_2_size > 0:
                if return_sequences_1 == False:
                    x = Dense(self.layer_2_size, activation='relu')(x)
                    x = Dropout(self.dropout)(x)
                else:
                    if sequence_model == "LSTM":
                        x = LSTM(self.layer_2_size, return_sequences=return_sequences_2, dropout=self.dropout)(x)
                    elif sequence_model == "SimpleRNN":
                        x = SimpleRNN(self.layer_2_size, return_sequences=return_sequences_2, dropout=self.dropout)(x)
                    elif sequence_model == "GRU":
                        x = GRU(self.layer_2_size, return_sequences=return_sequences_2, dropout=self.dropout)(x)
                    elif sequence_model == "Convolution1D":
                        x = Convolution1D(self.layer_2_size, kernel_size = self.convolution_kernel_size, padding = 'valid')(x)
                    else:
                        raise NameError('Invalid sequence_model - must be one of [LSTM, SimpleRNN, GRU, Convolution1D]') 

            # layer 3 (sequential or dense)
            if layer_3_size > 0:
                if sequence_model_layers < 3:
                    if sequence_model_layers == 2:
                        x = Flatten()(x)
                    x = Dense(self.layer_3_size, activation='relu')(x)
                    x = Dropout(self.dropout)(x)
                else:
                    if sequence_model == "LSTM":
                        x = LSTM(self.layer_3_size, return_sequences=False, dropout=self.dropout)(x)
                        x = Flatten()(x)
                    elif sequence_model == "SimpleRNN":
                        x = SimpleRNN(self.layer_3_size, return_sequences=False, dropout=self.dropout)(x)
                        x = Flatten()(x)
                    elif sequence_model == "GRU":
                        x = GRU(self.layer_3_size, return_sequences=False, dropout=self.dropout)(x)
                        x = Flatten()(x)
                    elif sequence_model == "Convolution1D":
                        x = Convolution1D(self.layer_3_size, kernel_size = self.convolution_kernel_size, padding = 'valid')(x)
                        x = Flatten()(x)
                    else:
                        raise NameError('Invalid sequence_model - must be one of [LSTM, SimpleRNN, GRU, Convolution1D]') 
            else:
                if return_sequences_2 == True: 
                    x = Flatten()(x)

            # classifier layer
            if self.dropout > 0:
                x = Dropout(self.dropout)(x)
            out = Dense(self.data.num_classes, activation='softmax')(x)
                        

            # join cnn frame model and LSTM top
            model = Model(inputs=frames, outputs=out)
         
        elif architecture == "C3D":
            
            #########
            ### C3D
            #########
            
            # Implement:
            # Learning Spatiotemporal Features with 3D Convolutional Networks
            # Tran et al 2015
            # https://arxiv.org/abs/1412.0767
            #
            # Implementation inspired by https://gist.github.com/albertomontesg/d8b21a179c1e6cca0480ebdf292c34d2
            
            assert self.sequence_length == 16, "C3D requires sequence length 16"
            assert self.frame_size == (112,112), "C3D requires frame size 112x112"
            assert self.layer_1_size == 0, "C3D does not accept layer size inputs since it's a predefined architecture"
            assert self.layer_2_size == 0, "C3D does not accept layer size inputs since it's a predefined architecture"
            assert self.layer_3_size == 0, "C3D does not accept layer size inputs since it's a predefined architecture"
            assert self.dropout == 0, "C3D does not accept layer size inputs since it's a predefined architecture"
            assert self.sequence_model == None, "C3D does not accept a sequence_model parameter"
            assert self.sequence_model_layers == None, "C3D does not accept a sequence_model_layers parameter"
            assert self.pretrained_model_name == None, "C3D does not accept a pretrained_model_name parameter"            
            assert self.pooling == None, "C3D does not accept a pooling parameter"                            
            
            
            ### create data object for this architecture
            if self.verbose:
                logging.info("Loading data")
            self.data = Data(sequence_length = 16, 
                                return_CNN_features = False, 
                                return_generator = True,
                                frame_size = (112,112),
                                batch_size=self.batch_size,
                                verbose = False)
            
            # C3D
            model = Sequential()
            # 1st layer group
            model.add(Conv3D(64, (3, 3, 3), activation='relu', padding='same', name='conv1', input_shape=(16, 112, 112, 3)))
            model.add(MaxPooling3D(pool_size=(1, 2, 2), strides=(1, 2, 2), padding='valid', name='pool1'))
            # 2nd layer group
            model.add(Conv3D(128, (3, 3, 3), activation='relu',padding='same', name='conv2'))
            model.add(MaxPooling3D(pool_size=(2, 2, 2), strides=(2, 2, 2),padding='valid', name='pool2'))
            # 3rd layer group
            model.add(Conv3D(256, (3, 3, 3), activation='relu',padding='same', name='conv3a'))
            model.add(Conv3D(256, (3, 3, 3), activation='relu',padding='same', name='conv3b'))
            model.add(MaxPooling3D(pool_size=(2, 2, 2), strides=(2, 2, 2), padding='valid', name='pool3'))
            # 4th layer group
            model.add(Conv3D(512, (3, 3, 3), activation='relu',padding='same', name='conv4a'))
            model.add(Conv3D(512, (3, 3, 3), activation='relu',padding='same', name='conv4b'))
            model.add(MaxPooling3D(pool_size=(2, 2, 2), strides=(2, 2, 2),padding='valid', name='pool4'))
            # 5th layer group
            model.add(Conv3D(512, (3, 3, 3), activation='relu',padding='same', name='conv5a'))
            model.add(Conv3D(512, (3, 3, 3), activation='relu',padding='same', name='conv5b'))
            model.add(ZeroPadding3D(padding=(0, 1, 1)))
            model.add(MaxPooling3D(pool_size=(2, 2, 2), strides=(2, 2, 2),padding='valid', name='pool5'))
            model.add(Flatten())
            # FC layers group
            model.add(Dense(4096, activation='relu', name='fc6'))
            model.add(Dropout(.5))
            model.add(Dense(4096, activation='relu', name='fc7'))
            model.add(Dropout(.5))
            model.add(Dense(self.data.num_classes, activation='softmax', name='fc8'))
            
        elif architecture == "C3Dsmall":
            
            #########################
            ### C3D - small variation
            #########################
            
            # Custom small version of C3D from paper:
            # Learning Spatiotemporal Features with 3D Convolutional Networks
            # Tran et al 2015
            # https://arxiv.org/abs/1412.0767
            #
            # Implementation inspired by https://gist.github.com/albertomontesg/d8b21a179c1e6cca0480ebdf292c34d2
            
            assert self.sequence_length == 16, "C3Dsmall requires sequence length 16"
            assert self.frame_size == (112,112), "C3Dsmall requires frame size 112x112"
            assert self.layer_1_size == 0, "C3Dsmall does not accept layer size inputs since it's a predefined architecture"
            assert self.layer_2_size == 0, "C3Dsmall does not accept layer size inputs since it's a predefined architecture"
            assert self.layer_3_size == 0, "C3Dsmall does not accept layer size inputs since it's a predefined architecture"
            assert self.dropout == 0, "C3Dsmall does not accept layer size inputs since it's a predefined architecture"
            assert self.sequence_model == None, "C3Dsmall does not accept a sequence_model parameter"
            assert self.sequence_model_layers == None, "C3Dsmall does not accept a sequence_model_layers parameter"
            assert self.pretrained_model_name == None, "C3Dsmall does not accept a pretrained_model_name parameter"            
            assert self.pooling == None, "C3Dsmall does not accept a pooling parameter"      
            
            
            ### create data object for this architecture
            if self.verbose:
                logging.info("Loading data")
            self.data = Data(sequence_length = 16, 
                                return_CNN_features = False, 
                                return_generator = True,
                                frame_size = (112,112),
                                batch_size=self.batch_size,
                                verbose = False)
            
            # C3Dsmall
            model = Sequential()
            # 1st layer group
            model.add(Conv3D(32, (3,3,3), activation='relu', input_shape=(data.sequence_length, 112, 112, 3)))
            model.add(MaxPooling3D(pool_size=(1, 2, 2), strides=(1, 2, 2)))
            # 2nd layer group
            model.add(Conv3D(64, (3,3,3), activation='relu'))
            model.add(MaxPooling3D(pool_size=(1, 2, 2), strides=(1, 2, 2)))
            # 3rd layer group
            model.add(Conv3D(128, (3,3,3), activation='relu'))
            model.add(Conv3D(128, (3,3,3), activation='relu'))
            model.add(MaxPooling3D(pool_size=(1, 2, 2), strides=(1, 2, 2)))
            # 4th layer group
            model.add(Conv3D(256, (2,2,2), activation='relu'))
            model.add(Conv3D(256, (2,2,2), activation='relu'))
            model.add(MaxPooling3D(pool_size=(1, 2, 2), strides=(1, 2, 2)))
            # FC layers group
            model.add(Flatten())
            model.add(Dense(256))
            model.add(Dropout(0.5))
            model.add(Dense(128))
            model.add(Dropout(0.5))
            model.add(Dense(self.data.num_classes, activation='softmax'))
            
        else:
            raise NameError('Invalid architecture - must be one of [image_MLP_frozen, image_MLP_trainable, video_MLP_concat, video_LRCNN_frozen, video_LRCNN_trainable, C3D, C3Dsmall]')    
        
        
        
        
        ###############
        ### Finish init
        ###############
        
        # set class model to constructed model
        self.model = model
        
        # load weights of model if they exist
        if os.path.exists(self.path_model + 'model.h5'):
            if self.verbose:
                logging.info("Loading saved model weights")
            model.load_weights(self.path_model + 'model.h5')            
        
        # save model summary to model folder
        with open(self.path_model + 'model_summary.txt', 'w') as f:
            with redirect_stdout(f):
                self.model.summary()
        
        # save architecture params to model folder
        params = self.__dict__.copy()
        params['data_shape'] = str(self.data)
        del params['model']
        del params['data']
        with open(self.path_model + 'params.json', 'w') as fp:
            json.dump(params, fp, indent=4)
    
    
    def train_model(self):
        """
        Run several rounds of fitting to train model, reducing learning rate after each round
        
        Progress and model parameters will be saved to the model's path e.g. /models/1/
        
        """
        
        # init results with architecture params
        results = self.__dict__.copy()
        results['data_total_rows_train'] = self.data.total_rows_train
        results['data_total_rows_valid'] = self.data.total_rows_train
        results['data_total_rows_test'] = self.data.total_rows_train
        del results['model']
        del results['data']
        results['model_param_count'] = self.model.count_params()
        
        
        ###############
        ### Train model
        ###############
        
        # start training timer
        start = datetime.datetime.now()
        results['fit_dt_train_start'] = start.strftime("%Y-%m-%d %H:%M:%S")
        
        # do first round of fitting
        history1, stopped_epoch1 = self.fit(learning_rate = 0.001)
        
        # load best model weights so far
        self.model.load_weights(self.path_model + 'model.h5')
        
        # reduce learning rate and fit some more
        history2, stopped_epoch2 = self.fit(learning_rate = 0.0001)
        
        # load best model weights so far
        self.model.load_weights(self.path_model + 'model.h5')
        
        # reduce learning rate and fit some more
        history3, stopped_epoch3 = self.fit(learning_rate = 0.00001)
        
        # end time training
        end = datetime.datetime.now()    
        results['fit_dt_train_end']  = end.strftime("%Y-%m-%d %H:%M:%S")
        results['fit_dt_train_duration_seconds']  = str((end - start).total_seconds()).split(".")[0]
        
        
        #################
        ### build results
        #################
        # combine fit histories into big dataframe and write to model folder
        # only keep history until accuracy declined (where early stopping made checkpoint)

        # parse history dicts to dataframes
        history1 = pd.DataFrame(history1.history).head(stopped_epoch1)
        history1['fit_round'] = 1
        history2 = pd.DataFrame(history2.history).head(stopped_epoch2)
        history2['fit_round'] = 2
        history3 = pd.DataFrame(history3.history).head(stopped_epoch3)
        history3['fit_round'] = 3
        
        # combine and save csv
        fit_history = pd.concat([history1, history2, history3], axis=0)
        fit_history = fit_history.reset_index(drop=True)
        fit_history['epoch'] = fit_history.index+1
        fit_history.to_csv(self.path_model + 'fit_history.csv')
        self.fit_history = fit_history
        
        results['fit_stopped_epoch1'] = stopped_epoch1
        results['fit_stopped_epoch2'] = stopped_epoch2
        results['fit_stopped_epoch3'] = stopped_epoch3
        
        # add 3 = 1 for each training round because stopped_epoch is 0 indexed
        results['fit_num_epochs'] = stopped_epoch1 + stopped_epoch2 + stopped_epoch3 + 3
        results['fit_val_acc'] = list(fit_history.tail(1)['val_acc'])[0]
        results['fit_train_acc'] = list(fit_history.tail(1)['acc'])[0]
        results['fit_val_loss'] = list(fit_history.tail(1)['val_loss'])[0]
        results['fit_train_loss'] = list(fit_history.tail(1)['loss'])[0]

        #######################
        ### Predict on test set
        #######################
        
        # start test timer
        start = datetime.datetime.now()
        results['fit_dt_test_start'] = start.strftime("%Y-%m-%d %H:%M:%S")
        
        y_pred = None
        y_test = None
        if self.data.return_generator:
            # predict on test set via generator
            y_pred = self.model.predict_generator(self.data.generator_test,verbose=self.verbose)
            
            # save predicted clas probabilities
            np.save(self.path_model + 'test_predictions', y_pred)
            
            # take argmax to get predicted class
            y_pred = np.argmax(y_pred, axis = 1)

            # get truth labels from generator
            y_test = []
            for _, label in self.data.generator_test:
                y_test.extend(label)
            y_test = np.argmax(np.array(y_test), axis = 1)
            
        else:
            # predict on test data loaded into memory
            y_pred = self.model.predict(self.data.x_test, verbose=self.verbose)
            
            # save predicted clas probabilities
            np.save(self.path_model + 'test_predictions', y_pred)
            
            # take argmax to get predicted class
            y_pred = np.argmax(y_pred, axis=1)

            # get truth labels from memory
            y_test = np.argmax(self.data.y_test,axis=1)
        
        # end time testing
        end = datetime.datetime.now()    
        results['fit_dt_test_end']  = end.strftime("%Y-%m-%d %H:%M:%S")
        results['fit_dt_test_duration_seconds']  = str((end - start).total_seconds()).split(".")[0]
        
        ############################
        ### Compute confusion matrix
        ############################
        
        # Compute and store confusion matrix
        cnf_matrix = confusion_matrix(y_test, y_pred)
        pd.DataFrame(cnf_matrix).to_csv(self.path_model + "confusion_matrix.csv")

        # get clas names from label map for plot
        class_names = list(self.data.label_map.values())

        # Plot non-normalized confusion matrix
        plt.figure(figsize=(8,8))
        plot_confusion_matrix(cnf_matrix, classes=class_names, title='Confusion matrix, without normalization')
        plt.savefig(self.path_model + 'confusion_matrix.png', bbox_inches='tight')

        # Plot normalized confusion matrix
        plt.figure(figsize=(8,8))
        plot_confusion_matrix(cnf_matrix, classes=class_names, normalize=True, title='Normalized confusion matrix')
        plt.savefig(self.path_model + 'confusion_matrix_normalized.png', bbox_inches='tight')
        
        
        ##########################
        ### Compute raw error rate
        ##########################
        
        # build dataframe and calculate test error (assuming we classify using majority rule, not ROC cutoff approach)
        pdf = pd.DataFrame(y_pred, columns = ['pred'])
        pdf['prediction'] = pdf['pred'].apply(lambda x: self.data.label_map[str(x)])

        truth = pd.DataFrame(y_test, columns = ['truth'])
        truth['label'] = truth['truth'].apply(lambda x: self.data.label_map[str(x)])
        truth = truth[['label']]

        pdf = pd.concat([pdf, truth], axis=1)
        pdf['error'] = (pdf['prediction'] != pdf['label']).astype(int)
        test_acc = 1 - pdf['error'].mean()
        
        results['fit_test_acc'] = test_acc
        
        if self.verbose:
            logger.info(str(results))
            logger.info("model {} test acc: {}".format(self.model_id, test_acc))
        
        
        ##################
        ### Output results
        ##################
        self.results = results
        with open(self.path_model + 'results.json', 'w') as fp:
            json.dump(results, fp, indent=4)

        
    def fit(self, learning_rate = 0.001, epochs = 30, patience=5):
        """
        Compile and fit model for *epochs* rounds of training, dividing learning rate by 10 after each round

        Fitting will stop if val_acc does not improve for at least patience epochs

        Only the best weights will be kept

        The model is saved to /models/*model_id*/

        Good practice is to decrease the learning rate by a factor of 10 after each plateau and train some more 
        (after first re-loading best weights from previous training round)...

        for example (not exact example because this fit method has been refactored into the architecture object but the principle remains):
            fit_history = fit(model_id, model, data, learning_rate = 0.001, epochs = 30)
            model.load_weights(path_model + "model.h5")
            model = fit(model, 5)
            fit_history = train(model_id, model, data, learning_rate = 0.0001, epochs = 30)

        :learning_rate: learning rate parameter for Adam optimizer (default is 0.001)

        :epochs: number of training epochs per fit round (subject to patience)
        :batch_size: number of samples in each batch
        :patience: how many epochs without val_acc improvement before stopping fit round
        :verbose: print progress

        """

        # get number of processors for multiprocessing fit generators
        num_workers = multiprocessing.cpu_count()

        # create optimizer with given learning rate 
        opt = Adam(lr = learning_rate)

        # compile model
        self.model.compile(optimizer=opt, loss='binary_crossentropy', metrics=['accuracy'])

        # setup training callbacks
        callback_stopper = EarlyStopping(monitor='val_acc', patience=patience, verbose=0)
        callback_csvlogger = CSVLogger(self.path_model + 'training.log')
        callback_checkpointer = ModelCheckpoint(self.path_model +  'model.h5', monitor='val_acc', save_best_only=True, verbose=verbose)
        callbacks = [callback_stopper, callback_checkpointer, callback_csvlogger]

        # fit model
        if self.data.return_generator == True:
            # train using generator
            history = self.model.fit_generator(generator=self.data.generator_train,
                validation_data=self.data.generator_valid,
                use_multiprocessing=True,
                workers=num_workers,
                epochs=epochs,
                callbacks=callbacks,
                shuffle=True,
                verbose=self.verbose)
        else:
            # train using full dataset
            history = self.model.fit(self.data.x_train, self.data.y_train, 
                validation_data=(self.data.x_valid, self.data.y_valid),
                batch_size=self.batch_size,
                epochs=epochs,
                callbacks=callbacks,
                shuffle=True,
                verbose=self.verbose)

        # get number of epochs actually trained (might have early stopped)
        epochs_trained = callback_stopper.stopped_epoch
        
        if epochs_trained == 0:
            # trained but didn't stop early
            if len(history.history) > 0:
                epochs_trained = epochs
        else:
            # subtract patience from stop point to get actual peak epoch for this fitting round
            epochs_trained -= patience 
        
        # return fit history and the epoch that the early stopper completed on
        return history, epochs_trained
        
        
    def make_last_layers_trainable(self, num_layers):
        """
        Set the last *num_layers* non-trainable layers to trainable  

        NB to be used with model_base and assumes name = "top_xxx" added to each top layer to know 
        to ignore that layer when looping through layers from top backwards

        :num_layers: number of layers from end of model (that are currently not trainable) to be set as trainable
        """

        # get index of last non-trainable layer
        # (the layers we added on top of model_base are already trainable=True)
        # ...
        # need to find last layer of base model and set that (and previous num_layers)
        # to trainable=True via this method

        # find last non-trainable layer index
        idx_not_trainable = 0
        for i, l in enumerate(self.model.layers):
            if "top" not in l.name:
                idx_not_trainable = i

        # set last non-trainable layer and num_layers prior to trainable=True
        for i in reversed(range(idx_not_trainable-num_layers+1, idx_not_trainable+1)):
            self.model.layers[i].trainable = True
        
        if self.verbose:
            logging.info("last {} layers of CNN set to trainable".format(num_layers))

# Create experiments lists and run experiments

* batch 1 = run frozen MLPs and LRCNNs and all concats

* batch 2 = run trainable MLP and LRCNN on best performing frozen variants

* batch 3 = run trainable but initializing with best CNN weights

* batch 4 = run C3D models

## Experiment batch 1

In [79]:
experiment_batch_name = 'experiment_batch_1'

In [80]:
# init list of experiments
experiments = []

In [81]:
pooling = 'max'
layer_sizes = [512, 256, 128, 0]
dropouts = [0.2]
sequence_lengths = [1,3,5,10,20]
sequence_models = ["LSTM", "SimpleRNN", "GRU", "Convolution1D"]
sequence_model_layer_counts = [1,2,3]

In [82]:
####################
### image_MLP_frozen 
####################

for pretrained_model_name in pretrained_model_names:
    for layer_1_size in layer_sizes:
        for layer_2_size in layer_sizes:
            for layer_3_size in layer_sizes:
                for dropout in dropouts:

                    # build experiment parameters
                    experiment = {}
                    
                    experiment['architecture'] = 'image_MLP_frozen'
                    experiment['sequence_length'] = 1
                    experiment['pretrained_model_name'] = pretrained_model_name
                    experiment['layer_1_size'] = layer_1_size
                    experiment['layer_2_size'] = layer_2_size
                    experiment['layer_3_size'] = layer_3_size
                    experiment['dropout'] = dropout
                    experiment['pooling'] = 'max' # outperforms avg across all parameters
                    
                    # add to list of experiments
                    experiments.append(experiment)

In [83]:
####################
### video_MLP_concat
####################

for sequence_length in sequence_lengths:
    for pretrained_model_name in pretrained_model_names:
        for layer_1_size in layer_sizes:
            for layer_2_size in layer_sizes:
                for layer_3_size in layer_sizes:
                    for dropout in dropouts:

                        # build experiment parameters
                        experiment = {}

                        experiment['architecture'] = 'video_MLP_concat'
                        experiment['pretrained_model_name'] = pretrained_model_name
                        experiment['layer_1_size'] = layer_1_size
                        experiment['layer_2_size'] = layer_2_size
                        experiment['layer_3_size'] = layer_3_size
                        experiment['dropout'] = dropout
                        experiment['pooling'] = 'max' # outperforms avg across all parameters

                        # add to list of experiments
                        experiments.append(experiment)

In [84]:
######################
### video_LRCNN_frozen
######################

for sequence_length in sequence_lengths:
    for pretrained_model_name in pretrained_model_names:
        for layer_1_size in layer_sizes:
            for layer_2_size in layer_sizes:
                for layer_3_size in layer_sizes:
                    for dropout in dropouts:
                        for sequence_model in sequence_models:
                            for sequence_model_layers in sequence_model_layer_counts:

                                # build experiment parameters
                                experiment = {}

                                experiment['architecture'] = 'video_LRCNN_frozen'
                                experiment['pretrained_model_name'] = pretrained_model_name
                                experiment['layer_1_size'] = layer_1_size
                                experiment['layer_2_size'] = layer_2_size
                                experiment['layer_3_size'] = layer_3_size
                                experiment['dropout'] = dropout
                                experiment['pooling'] = 'max' # outperforms avg across all parameters
                                experiment['sequence_model'] = sequence_model
                                experiment['sequence_model_layers'] = sequence_model_layers

                                # add to list of experiments
                                experiments.append(experiment)

In [85]:
########################
### convert to dataframe
########################

experiments = pd.DataFrame(experiments)
experiments['model_id'] = experiments.index

In [86]:
experiments.to_csv(pwd + "experiments/" + experiment_batch_name + '.csv')

In [None]:
###################
### Run experiments
###################

for row in experiments.values:
    
    # get experiment params from dataframe row
    experiment = dict(zip(experiments.columns, row))
    print(experiment)

    logging.info("Begin experiment for model_id={}".format(experiment['model_id']))
    
    architecture = Architecture(model_id = experiment['model_id'], 
                                architecture = experiment['architecture'], 
                                sequence_length = experiment['sequence_length'], 
                                pretrained_model_name = experiment['pretrained_model_name'],
                                pooling = experiment['pooling'],
                                sequence_model = experiment['sequence_model'],
                                sequence_model_layers = experiment['sequence_model_layers'],
                                layer_1_size = experiment['layer_1_size'],
                                layer_2_size = experiment['layer_2_size'],
                                layer_3_size = experiment['layer_3_size'],
                                dropout = experiment['dropout'],
                                verbose=True)
    
    architecture.train_model()

2019-01-20 01:41:08,918 [MainThread  ] [INFO ]  Begin experiment for model_id=0
2019-01-20 01:41:08,920 [MainThread  ] [INFO ]  Loading data


{'sequence_length': 1.0, 'dropout': 0.2, 'model_id': 0, 'layer_3_size': 512, 'architecture': 'image_MLP_frozen', 'layer_1_size': 512, 'sequence_model': nan, 'sequence_model_layers': nan, 'pooling': 'max', 'layer_2_size': 512, 'pretrained_model_name': 'inception_resnet_v2'}


## batch 2

In [None]:
#######################
### image_MLP_trainable
#######################

architecture = 'video_LRCNN_trainable'

In [None]:
#########################
### video_LRCNN_trainable
#########################

architecture = 'video_LRCNN_trainable'

## batch 3

In [None]:
# TODO - custom weights MLP loaded into trainable and LRCNN

## batch 4

In [None]:
#######
### C3D
#######

architecture = 'C3D'

############
### C3Dsmall
############

architecture = 'C3Dsmall' 

# Run experiments

In [None]:
# architecture = Architecture(model_id = 1221, 
#                             architecture = 'image_MLP_trainable',
#                             sequence_length = 1, 
#                             pretrained_model_name = "vgg16", 
#                             pooling = 'max', 
#                             layer_1_size=32,
#                             layer_2_size=0, 
#                             layer_3_size=0,
#                             dropout=0.2,
#                             verbose=True)

In [None]:
architecture = Architecture(model_id = 55, 
                            architecture = 'image_MLP_frozen',
                            sequence_length = 1, 
                            pretrained_model_name = "vgg16", 
                            pooling = 'max', 
                            layer_1_size=256,
                            layer_2_size=128, 
                            layer_3_size=0,
                            dropout=0.2,
                            verbose=True)

In [None]:
architecture.train_model()

# Debug

## image_MLP_trainable

In [None]:
pretrained_model_name = "vgg16"
pooling="max"
sequence_length = 2

layer_1_size = 128
layer_2_size = 64
layer_3_size = 32
dropout=0.20

data = Data(sequence_length = 1, 
            return_CNN_features = False, 
            pretrained_model_name=pretrained_modesl_name,
            pooling = pooling,
            return_generator=True,
            batch_size=32)

num_classes = data.num_classes 
frame_size = data.frame_size
num_features = pretrained_model_len_features[pretrained_model_name]

In [None]:
architecture = Architecture(model_id = 1, 
                            architecture = 'image_MLP_trainable',
                            sequence_length = 1, 
                            num_classes = num_classes, 
                            pretrained_model_name = pretrained_model_name, 
                            pooling = 'max', 
                            layer_1_size=128,
                            layer_2_size=0, 
                            layer_3_size=0,
                            dropout=0.2)

In [None]:
fit(model_id=1337, model = architecture.model, data = data)

## image_MLP_frozen

In [None]:
pretrained_model_name = "vgg16"
pooling="max"
layer_1_size = 128
layer_2_size = 64
layer_3_size = 32
dropout=0.20

In [None]:
data = Data(sequence_length = 1, 
            return_CNN_features = True, 
            pretrained_model_name=pretrained_model_name,
            pooling = pooling)

In [None]:
num_classes = data.num_classes 
frame_size = data.frame_size
num_features = pretrained_model_len_features[pretrained_model_name]

In [None]:
architecture = Architecture(model_id = 1, 
                            architecture = 'image_MLP_frozen',
                            sequence_length = 1, 
                            num_classes = num_classes, 
                            pretrained_model_name = pretrained_model_name, 
                            pooling = 'max', 
                            layer_1_size=128,
                            layer_2_size=0, 
                            layer_3_size=0,
                            dropout=0.2)

In [None]:
# train model
fit_history = fit(model_id = 1, model = architecture.model, data = data, learning_rate = 0.001, epochs = 10)

## video_MLP_concat

In [None]:
pretrained_model_name = "vgg16"
pooling="max"
sequence_length = 3
layer_1_size = 128
layer_2_size = 64
layer_3_size = 32
dropout=0.20

In [None]:
data = Data(sequence_length = sequence_length, 
            return_CNN_features = True, 
            pretrained_model_name=pretrained_model_name,
            pooling = pooling)

In [None]:
num_classes = data.num_classes 
frame_size = data.frame_size
num_features = pretrained_model_len_features[pretrained_model_name]

In [None]:
architecture = Architecture(model_id = 1, 
                            architecture = 'video_MLP_concat',
                            sequence_length = 3, 
                            num_classes = num_classes, 
                            pretrained_model_name = pretrained_model_name, 
                            pooling = 'max', 
                            layer_1_size=128,
                            layer_2_size=0, 
                            layer_3_size=0,
                            dropout=0.2)

In [None]:
# train model
fit_history = fit(model_id = 1, model = architecture.model, data = data, learning_rate = 0.001, epochs = 10)

## video_LRCNN_frozen

In [None]:
pretrained_model_name = "vgg16"
pooling="max"
sequence_length = 
layer_1_size = 128
layer_2_size = 64
layer_3_size = 32
dropout=0.20

In [None]:
data = Data(sequence_length = sequence_length, 
            return_CNN_features = True, 
            pretrained_model_name=pretrained_model_name,
            pooling = pooling)

In [None]:
architecture = Architecture(model_id=1,
                            architecture="video_LRCNN_frozen", 
                            sequence_model = 'LSTM',
                            sequence_model_layers = 1,
                            sequence_length = sequence_length,
                            num_classes = data.num_classes, 
                            frame_size = data.frame_size, 
                            pretrained_model_name='vgg16', 
                            pooling='max',
                            layer_1_size=64, 
                            layer_2_size=32, 
                            layer_3_size=8, 
                            dropout=0.2,
                            convolution_kernel_size=3)

In [None]:
# train model
fit_history = fit(model_id=1, model=architecture.model, data=data, learning_rate = 0.001, epochs = 50)

## video_LRCNN_trainable

In [None]:
sequence_length=5

In [None]:
data = Data(sequence_length = sequence_length, 
            return_CNN_features = False, 
            pretrained_model_name="vgg16",
            pooling = "max",
            batch_size=32,
            return_generator=True)

In [None]:
architecture = Architecture(model_id = 4,
                            architecture="video_LRCNN_trainable", 
                            sequence_model = 'LSTM',
                            sequence_model_layers = 1,
                            sequence_length = sequence_length,
                            num_classes = data.num_classes, 
                            frame_size = data.frame_size, 
                            pretrained_model_name='vgg16', 
                            pooling='max',
                            layer_1_size=64, 
                            layer_2_size=32, 
                            layer_3_size=8, 
                            dropout=0.2,
                            convolution_kernel_size=3)

In [None]:
# train model
fit_history = fit(model_id = 1, model = architecture.model, data = data, learning_rate = 0.001, epochs = 50)

## C3D

In [None]:
architecture = Architecture(model_id = 4,
                            architecture="C3D", 
                            sequence_length = 16,
                            num_classes = data.num_classes, 
                            frame_size = data.frame_size)

In [None]:
# train model
fit_history = fit(model_id = 122, model = architecture.model, data = data, learning_rate = 0.001, epochs = 10)

## C3Dsmall

In [None]:
data = Data(sequence_length = 16, 
            return_CNN_features = False, 
            frame_size = (112,112),
            return_generator=True,
            batch_size=32,
            verbose=False)

In [None]:
architecture = Architecture(model_id = 4,
                            architecture="C3Dsmall", 
                            sequence_length = 16,
                            num_classes = data.num_classes, 
                            frame_size = data.frame_size)

In [None]:
# train model
fit_history = fit(model_id = 122, model = model, data = data, learning_rate = 0.001, epochs = 10)

# analyze results

In [None]:
path_models = pwd + 'models/'

results = []

for folder, subs, files in os.walk(path_models):
    for filename in files:
        if 'results.json' in filename:
            with open(os.path.abspath(os.path.join(folder, filename))) as f:
                data = json.load(f)
            results.append(data)

results = pd.DataFrame(results)        
results.sort_values("fit_val_acc", inplace=True, ascending=False)

In [None]:
results.head(10)