In [None]:
# Standard Library Imports
import os
from os import listdir
from os.path import join
import sys
import random
from random import shuffle
from random import randint

# Third-Party Imports
import keras.backend as K
from keras import optimizers
from keras import regularizers
from keras.models import model_from_json
from keras.models import load_model
from keras.models import Sequential
from keras.models import Model
from keras.layers.core import Dense
from keras.layers.core import Dropout
from keras.callbacks import ModelCheckpoint
from keras.callbacks import EarlyStopping
import matplotlib
%matplotlib inline
import matplotlib.pyplot as plt
from matplotlib import animation
import cv2
import numpy as np
from sklearn.metrics import confusion_matrix
import itertools
from IPython.display import display
from JSAnimation import IPython_display
from IPython.display import HTML

# Local Imports
import c3d_model
import clip_dataset
from clip_dataset import DataGenerator
import config_clips

# Use tf backend
dim_ordering = K.image_dim_ordering()
print("[Info] image_dim_order (from default ~/.keras/keras.json)={}".format(
        dim_ordering))
backend = dim_ordering

def play_video(vid, size):
    html_code = """
    <video width="{}" height="{}" controls>
      <source src={} type="video/mp4">
    </video>""".format(size[0], size[1], vid)
    return html_code

def get_partitions(PATH):
    """
    Return dictionary that places each filename into
    a list with the parent dataset (train/valid/test) as the key
    """
    datasets = listdir(PATH)
    # print('datasets are: {}'.format(datasets))
    partitions = {d:[] for d in datasets}

    for d in datasets:
        classes = listdir(join(PATH, d))
        for c in classes:
            files = listdir(join(PATH, d, c))
            [partitions[d].append(join(PATH, d, c, f)) for f in files]
        # Randomize order
        shuffle(partitions[d])

    return partitions

def get_best_model(model_dir, metric='acc'):
    """
    Return path to model weights with either lowest
    loss or highest accuracy
    """
    # Get all paths
    paths = listdir(model_dir)
    
    # Get only weight files
    weights = [p for p in paths if p[-5:] == '.hdf5']
    
    # Get only type of weights that were saved by desired metric
    weights = [w for w in weights if metric in w]

        
    vals = [float(w.rsplit('.hdf5', 1)[0].rsplit('-', 1)[-1]) for w in weights]
    if metric == 'acc':
        best_val = max(vals)
    else:
        best_val = min(vals)
        
    best_model = weights[vals.index(best_val)]
    return join(model_dir, best_model)

def get_labels(PATH, classes_to_nums):
    """
    Return dictionary that places each filename into
    a list with the parent dataset as the key
    """
    datasets = listdir(PATH)
    print('datasets are: {}'.format(datasets))
    labels = {}

    for d in datasets:
        classes = listdir(join(PATH, d))
        for c in classes:
            files = listdir(join(PATH, d, c))
            num = classes_to_nums[c]
            temp = {join(PATH, d, c, f):num for f in files}
            labels = {**temp, **labels}

    return labels

def make_predictions(model, data_gen):
    """Gets a batch from a data_gen and gets predictions on it"""
    batch = randint(0, data_gen.__len__())
    data_x, data_y = data_gen.__getitem__(batch)
    prediction_results = model.predict_on_batch(data_x)
    
    return data_x, data_y, prediction_results

def plot_predictions(r, c, data_x, data_y, probs, classes):
    """
    Creates a grid of plots of predictions and goundtruth labels of 
    input images
    """

    fig, axes = plt.subplots(r, c)
    fig.set_figheight(r * 6)
    fig.set_figwidth(c * 10)
    for i in range(r*c):

        # Read image into RGB format
        img = np.asarray(data_x[i, 0, :, :, :])
        # img = np.reshape(img, [1280, 720, 3])
        
        mean_cube = np.load('models/train01_16_128_171_mean.npy')
        mean_cube = np.transpose(mean_cube, (1, 2, 3, 0))
        mean_cube = mean_cube[:, 8:120, 30:142, :]
        # img += mean_cube[0, :, :, :]

        # get predicted and groudtruth labels
        lbl = data_y[i][0]
        pred = probs[i]
        pred = [round(z*1000)/1000 for z in pred]
        prb = probs[i][0]
        if prb > 0.5:
            cls = classes[0]
        else:
            cls = classes[1]

        if lbl > 0.5:
            lbl = classes[0]
        else:
            lbl = classes[1]
        
        # Add to subplot
        row = i % r
        col = i // r
        axes[row, col].imshow((img + mean_cube[0, :, :, :]) / 256)
        axes[row, col].set_title('out:{}, pred:{}, truth:{}'.format(
            pred, cls, lbl))
    plt.show()

def plot_movie_array_js(r, c, data_x, data_y, probs, classes):
    """
    Plots an array of movies with their predictions and
    groud truth labels as the titles
    """
    
    fig, axes = plt.subplots(r, c)
    fig.set_figheight(r * 10)
    fig.set_figwidth(c * 10)
    mean_cube = np.load('models/train01_16_128_171_mean.npy')
    mean_cube = np.transpose(mean_cube, (1, 2, 3, 0))
    mean_cube = mean_cube[:, 8:120, 30:142, :]
    # img += mean_cube[0, :, :, :]   

    def animate(z):
        for i in range(r*c):

            # get predicted and groudtruth labels
            lbl = data_y[i][0]
            pred = probs[i]
            pred = [round(z*1000)/1000 for z in pred]
            prb = probs[i][0]
            if prb > 0.5:
                cls = classes[0]
            else:
                cls = classes[1]

            if lbl > 0.5:
                lbl = classes[0]
            else:
                lbl = classes[1]

            # Add to subplot
            row = i % r
            col = i // r
            img = (data_x[i, :, :, :, :] + mean_cube) / 256 
            axes[row, col].imshow(img[z]).set_array(img[z])
            axes[row, col].set_title(
                        'out:{}, pred:{}, truth:{}'.format(pred, cls, lbl), 
                        fontsize=20)
        return (im,)

    anim = animation.FuncAnimation(fig, animate, frames=16)
    display(IPython_display.display_animation(anim))

def plot_movie_js(image_array):
    dpi = 72.0
    xpixels, ypixels = image_array[0].shape[0], image_array[0].shape[1]
    fig = plt.figure(figsize=(ypixels/dpi, xpixels/dpi), dpi=dpi)
    im = plt.figimage(image_array[0])
    plt.axis('off')

    def animate(i):
        im.set_array(image_array[i])
        return (im,)

    anim = animation.FuncAnimation(fig, animate, frames=len(image_array))
    display(IPython_display.display_animation(anim))

def predictions_over_entire_gen(model, data_gen):
    """Gets a batch from a data_gen and gets predictions on it"""
    predictions = []
    truth = []
    # diffs = []
    imgs = []
    for i in range(0, data_gen.__len__()):
        data_x, data_y = data_gen.__getitem__(i)
        # print(int(round(model.predict_on_batch(data_x)[0][1])))
        truth.append(int(round(data_y[0][1])))
        predictions.append(model.predict_on_batch(data_x)[0][1])
        imgs.append(i)

    return truth, predictions, imgs

def predict_class_over_entire_gen(model, data_gen):
    """Gets a batch from a data_gen and gets predictions on it"""
    predictions = []
    truth = []
    for i in range(0, data_gen.__len__()):
        data_x, data_y = data_gen.__getitem__(i)
        # print(int(round(model.predict_on_batch(data_x)[0][1])))
        truth.append(int(round(data_y[0][1])))
        predictions.append(int(round(model.predict_on_batch(data_x)[0][1])))
    return truth, predictions

def plot_confusion_matrix(cm, classes,
                          normalize=False,
                          title='Confusion matrix',
                          cmap=plt.cm.Blues):
    """
    This function prints and plots the confusion matrix.
    Normalization can be applied by setting `normalize=True`.
    """
    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
        print("Normalized confusion matrix")
    else:
        print('Confusion matrix, without normalization')

    print(cm)

    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    plt.title(title)
    plt.colorbar()
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes, rotation=45)
    plt.yticks(tick_marks, classes)

    fmt = '.2f' if normalize else 'd'
    thresh = cm.max() / 2.
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        plt.text(j, i, format(cm[i, j], fmt),
                 horizontalalignment="center",
                 color="white" if cm[i, j] > thresh else "black")

    plt.tight_layout()
    plt.ylabel('True label')
    plt.xlabel('Predicted label')

def load_model(dense_activation='relu'):
    show_images = False
    diagnose_plots = False
    pretrained_model_dir = './models'
    global backend

    print("[Info] Using backend={}".format(backend))

    if backend == 'th':
        model_weight_filename = join(pretrained_model_dir, 'sports1M_weights_th.h5')
        model_json_filename = join(pretrained_model_dir, 'sports1M_weights_th.json')
    else:
        model_weight_filename = join(pretrained_model_dir, 'sports1M_weights_tf.h5')
        model_json_filename = join(pretrained_model_dir, 'sports1M_weights_tf.json')

    print("[Info] Reading model architecture...")
    model_pretrained = model_from_json(open(model_json_filename, 'r').read())
    # print(model_pretrained.summary())

    # visualize model
    """
    model_img_filename = os.path.join(pretrained_model_dir, 'c3d_model.png')
    if not os.path.exists(model_img_filename):
        from keras.utils import plot_model
        plot_model(model, to_file=model_img_filename)
    """
    # Load pretrained weights
    print("[Info] Loading model weights...")
    model_pretrained.load_weights(model_weight_filename)
    print("[Info] Loading model weights -- DONE!")
    model_pretrained.compile(loss='mean_squared_error', optimizer='sgd')

    # Change output layer
    # model_pretrained.layers.pop()
    # new_out = Dense(2, activation='softmax', name='fc9')(model_pretrained.layers[-1].output)
    model_pretrained.layers.pop()
    model_pretrained.layers.pop()
    model_pretrained.layers.pop()
    model_pretrained.layers.pop()
    model_pretrained.layers.pop()
    x = model_pretrained.layers[-1].output
    x = Dense(128, activation=dense_activation, name='fc7')(x)
    x = Dropout(0.5, name='dropout_1')(x)
    x = Dense(128, activation=dense_activation, name='fc8')(x)
    x = Dropout(0.5, name='dropout_2')(x)
    x = Dense(2, activation='softmax', name='fc9')(x)
    # x = Dense(2, activation='softmax', name='fc9')
    # new_out = (d1)(d2)(d3)
    # model = Model(model_pretrained.input, output=[x])
    return Model(model_pretrained.input, output=[x])
    # model.compile(loss='mean_squared_error', optimizer='sgd')

def run_and_save_inference_results(model, dataset_generator, path, trials=3):
    inference_results = []
    for i in range(trials):
        single_inference = model.evaluate_generator(generator=dataset_generator)
        inference_results.append(single_inference)
    pickle.dump(inference_results, open(path, "wb" ))
    return inference_results

def run_verbose_inference(model, model_dir, training_generator, validation_generator, testing_generator):
    
    # Run inference on model as is, model with the best validation accuracy, and model with the best validation loss
    metric = 'final'
    run_and_save_inference_results(model, training_generator, join(model_dir, metric + '_training_results.pkl'), trials=1)
    run_and_save_inference_results(model, validation_generator, join(model_dir, metric + '_validation_results.pkl'), trials=5)
    run_and_save_inference_results(model, testing_generator, join(model_dir, metric + '_testing_results.pkl'), trials=5)

    metric = 'acc'
    best_model = get_best_model(model_dir, metric=metric)
    model.load_weights(best_model)
    run_and_save_inference_results(model, training_generator, join(model_dir, metric + '_training_results.pkl'), trials=1)
    run_and_save_inference_results(model, validation_generator, join(model_dir, metric + '_validation_results.pkl'), trials=5)
    run_and_save_inference_results(model, testing_generator, join(model_dir, metric + '_testing_results.pkl'), trials=5)

    metric = 'loss'
    best_model = get_best_model(model_dir, metric=metric)
    model.load_weights(best_model)
    run_and_save_inference_results(model, training_generator, join(model_dir, metric + '_training_results.pkl'), trials=1)
    run_and_save_inference_results(model, validation_generator, join(model_dir, metric + '_validation_results.pkl'), trials=5)
    run_and_save_inference_results(model, testing_generator, join(model_dir, metric + '_testing_results.pkl'), trials=5)

In [None]:
PATH_trn = config_clips.train_data_dir
PATH_val = config_clips.valid_data_dir
PATH_test =  config_clips.test_data_dir
PATH = config_clips.dataset_dir
sizes = config_clips.sizes
classes_to_nums = config_clips.classes_to_nums
train_params = config_clips.train_params
valid_params = config_clips.valid_params
test_params = config_clips.test_params

In [None]:
filepath = join(
            model_dir, 
            "weights-acc-improvement-{epoch:03d}-{val_acc:.4f}.hdf5")

checkpoint_acc = ModelCheckpoint(
                            filepath, 
                            monitor='val_acc', 
                            verbose=1, 
                            save_best_only=True, 
                            mode='max')

filepath = join(
            model_dir, 
            "weights-loss-improvement-{epoch:03d}-{val_loss:.4f}.hdf5")

checkpoint_loss = ModelCheckpoint(
                            filepath, 
                            monitor='val_loss', 
                            verbose=1, 
                            save_best_only=True, 
                            mode='min')

patience = 500
early_stopping = EarlyStopping(
                            monitor='val_loss', 
                            min_delta=0, 
                            patience=patience, 
                            verbose=0, 
                            mode='auto')

callbacks_list = [checkpoint_acc, checkpoint_loss, early_stopping]

In [None]:
# Generators
training_generator = DataGenerator(partition['train'], labels, **train_params)
validation_generator = DataGenerator(partition['valid'], labels, **valid_params)
testing_generator = DataGenerator(partition['test'], labels, **test_params)

In [None]:
model_iteration = 'model_c3d_025'
models_dir = join("..", "models", model_iteration)
model_name = join(models_dir, model_iteration + '.h5')
history_name = join(models_dir, model_iteration + '_history.pkl')

In [None]:
# loads unique names for specific training session
model_dir = config_clips.models_dir
model_name = config_clips.model_name
history_name = config_clips.history_name

# makes new directory to place all saved files
if not os.path.isdir(model_dir):
    os.makedirs(model_dir)
    
# loads a model
model = load_model(dense_activation='relu')
layers_to_train = ['fc7', 'fc8', 'fc9']
for layer in model.layers:
    if layer.name in layers_to_train:
        layer.trainable = True
        # print('{} IS trainable'.format(layer.name))
    else:
        layer.trainable = False
        # print('{} is NOT trainable'.format(layer.name))

# compiles a model 
adam = optimizers.adam()
model.compile(loss='categorical_crossentropy',
              optimizer=adam,
              metrics=['accuracy'])
# trains a model
history = model.fit_generator(
                    generator=training_generator,
                    steps_per_epoch=20,
                    callbacks=callbacks_list,
                    validation_data=validation_generator,
                    use_multiprocessing=True,
                    epochs=400,
                    initial_epoch=0,
                    workers=6)

# Saves final model and training results
# print('Saving model as {}'.format(model_name))
model.save(model_name)
pickle.dump(history.history, open(history_name, "wb" ))

# Runs inference verbosely over datasets
run_verbose_inference(model, model_dir, training_generator, validation_generator, testing_generator)

In [None]:
# loads unique names for specific training session
model_dir = config_clips.models_dir
model_name = config_clips.model_name
history_name = config_clips.history_name

# makes new directory to place all saved files
if not os.path.isdir(model_dir):
    os.makedirs(model_dir)
    
# loads a model
model = load_model(dense_activation='softmax')
layers_to_train = ['fc7', 'fc8', 'fc9']
for layer in model.layers:
    if layer.name in layers_to_train:
        layer.trainable = True
        # print('{} IS trainable'.format(layer.name))
    else:
        layer.trainable = False
        # print('{} is NOT trainable'.format(layer.name))

# compiles a model 
adam = optimizers.adam()
model.compile(loss='categorical_crossentropy',
              optimizer=adam,
              metrics=['accuracy'])
# trains a model
history = model.fit_generator(
                    generator=training_generator,
                    steps_per_epoch=20,
                    callbacks=callbacks_list,
                    validation_data=validation_generator,
                    use_multiprocessing=True,
                    epochs=200,
                    initial_epoch=0,
                    workers=6)
# model.save(model_name)
weights_name = model_name[:-2] + 'hdf5'
model.save_weights(weights_name)

# Retrain model with new activation
model = load_model(dense_activation='relu')
model.load_weights(weights_name)
history = model.fit_generator(
                    generator=training_generator,
                    steps_per_epoch=20,
                    callbacks=callbacks_list,
                    validation_data=validation_generator,
                    use_multiprocessing=True,
                    epochs=400,
                    initial_epoch=200,
                    workers=6)
# Saves final model and training results
# print('Saving model as {}'.format(model_name))
model.save(model_name)
pickle.dump(history.history, open(history_name, "wb" ))

# Runs inference verbosely over datasets
run_verbose_inference(model, model_dir, training_generator, validation_generator, testing_generator)

In [None]:
PATH_trn = config_clips.train_data_dir
PATH_val = config_clips.valid_data_dir
PATH_test =  config_clips.test_data_dir
PATH = config_clips.dataset_dir
sizes = config_clips.sizes
classes_to_nums = config_clips.classes_to_nums
train_params = config_clips.train_params
valid_params = config_clips.valid_params
test_params = config_clips.test_params

# Generators
training_generator = DataGenerator(partition['train'], labels, **train_params)
validation_generator = DataGenerator(partition['valid'], labels, **valid_params)
testing_generator = DataGenerator(partition['test'], labels, **test_params)

model_iteration = 'model_c3d_025'
models_dir = join("..", "models", model_iteration)
model_name = join(models_dir, model_iteration + '.h5')
history_name = join(models_dir, model_iteration + '_history.pkl')



filepath = join(model_dir, "weights-acc-improvement-{epoch:03d}-{val_acc:.4f}.hdf5")
checkpoint_acc = ModelCheckpoint(filepath, monitor='val_acc', verbose=1, save_best_only=True, mode='max')
filepath = join(model_dir, "weights-loss-improvement-{epoch:03d}-{val_loss:.4f}.hdf5")
checkpoint_loss = ModelCheckpoint(filepath, monitor='val_loss', verbose=1, save_best_only=True, mode='min')
callbacks_list = [checkpoint_acc, checkpoint_loss]

# loads unique names for specific training session
model_dir = config_clips.models_dir
model_name = config_clips.model_name
history_name = config_clips.history_name

# makes new directory to place all saved files
if not os.path.isdir(model_dir):
    os.makedirs(model_dir)
    
# loads a model
model = load_model(dense_activation='relu')
layers_to_train = ['fc7', 'fc8', 'fc9']
for layer in model.layers:
    if layer.name in layers_to_train:
        layer.trainable = True
        # print('{} IS trainable'.format(layer.name))
    else:
        layer.trainable = False
        # print('{} is NOT trainable'.format(layer.name))

# compiles a model 
adam = optimizers.adam()
model.compile(loss='categorical_crossentropy',
              optimizer=adam,
              metrics=['accuracy'])
# trains a model
history = model.fit_generator(
                    generator=training_generator,
                    steps_per_epoch=20,
                    callbacks=callbacks_list,
                    validation_data=validation_generator,
                    use_multiprocessing=True,
                    epochs=400,
                    initial_epoch=0,
                    workers=6)

# Saves final model and training results
# print('Saving model as {}'.format(model_name))
model.save(model_name)
pickle.dump(history.history, open(history_name, "wb" ))

# Runs inference verbosely over datasets
run_verbose_inference(model, model_dir, training_generator, validation_generator, testing_generator)

# loads unique names for specific training session
model_dir = config_clips.models_dir
model_name = config_clips.model_name
history_name = config_clips.history_name

# makes new directory to place all saved files
if not os.path.isdir(model_dir):
    os.makedirs(model_dir)
    
# loads a model
model = load_model(dense_activation='softmax')
layers_to_train = ['fc7', 'fc8', 'fc9']
for layer in model.layers:
    if layer.name in layers_to_train:
        layer.trainable = True
        # print('{} IS trainable'.format(layer.name))
    else:
        layer.trainable = False
        # print('{} is NOT trainable'.format(layer.name))

# compiles a model 
adam = optimizers.adam()
model.compile(loss='categorical_crossentropy',
              optimizer=adam,
              metrics=['accuracy'])
# trains a model
history = model.fit_generator(
                    generator=training_generator,
                    steps_per_epoch=20,
                    callbacks=callbacks_list,
                    validation_data=validation_generator,
                    use_multiprocessing=True,
                    epochs=200,
                    initial_epoch=0,
                    workers=6)
# model.save(model_name)
weights_name = model_name[:-2] + 'hdf5'
model.save_weights(weights_name)

# Retrain model with new activation
model = load_model(dense_activation='relu')
model.load_weights(weights_name)
history = model.fit_generator(
                    generator=training_generator,
                    steps_per_epoch=20,
                    callbacks=callbacks_list,
                    validation_data=validation_generator,
                    use_multiprocessing=True,
                    epochs=400,
                    initial_epoch=200,
                    workers=6)
# Saves final model and training results
# print('Saving model as {}'.format(model_name))
model.save(model_name)
pickle.dump(history.history, open(history_name, "wb" ))

# Runs inference verbosely over datasets
run_verbose_inference(model, model_dir, training_generator, validation_generator, testing_generator)