In [1]:
# Standard Library Imports
import os
from os import listdir
from os.path import join
import sys
import random
from random import shuffle
from random import randint
import pickle

# Third-Party Imports
import keras.backend as K
from keras import optimizers
from keras import regularizers
from keras.models import model_from_json
from keras.models import load_model
from keras.models import Sequential
from keras.models import Model
from keras.layers.core import Dense
from keras.layers.core import Dropout
from keras.callbacks import ModelCheckpoint
from keras.callbacks import EarlyStopping
import matplotlib
%matplotlib inline
import matplotlib.pyplot as plt
from matplotlib import animation
import cv2
import numpy as np
from sklearn.metrics import confusion_matrix
import itertools
from IPython.display import display
from JSAnimation import IPython_display
from IPython.display import HTML

# Local Imports
import c3d_model
import clip_dataset
from clip_dataset import DataGenerator
import config_clips

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [2]:
import tensorflow as tf
from keras import backend as k
 
###################################
# TensorFlow wizardry
config = tf.ConfigProto()
 
# Don't pre-allocate memory; allocate as-needed
config.gpu_options.allow_growth = True

k.tensorflow_backend.set_session(tf.Session(config=config))

In [3]:
# Use tf backend
dim_ordering = K.image_dim_ordering()
print("[Info] image_dim_order (from default ~/.keras/keras.json)={}".format(
        dim_ordering))
backend = dim_ordering

[Info] image_dim_order (from default ~/.keras/keras.json)=tf


In [4]:
def play_video(vid, size):
    html_code = """
    <video width="{}" height="{}" controls>
      <source src={} type="video/mp4">
    </video>""".format(size[0], size[1], vid)
    return html_code

In [5]:
def get_partitions(PATH):
    """
    Return dictionary that places each filename into
    a list with the parent dataset (train/valid/test) as the key
    """
    datasets = listdir(PATH)
    # print('datasets are: {}'.format(datasets))
    partitions = {d:[] for d in datasets}

    for d in datasets:
        classes = listdir(join(PATH, d))
        for c in classes:
            files = listdir(join(PATH, d, c))
            [partitions[d].append(join(PATH, d, c, f)) for f in files]
        # Randomize order
        shuffle(partitions[d])

    return partitions

In [6]:
def get_best_model(model_dir, metric='acc'):
    """
    Return path to model weights with either lowest
    loss or highest accuracy
    """
    # Get all paths
    paths = listdir(model_dir)
    
    # Get only weight files
    weights = [p for p in paths if p[-5:] == '.hdf5']
    
    # Get only type of weights that were saved by desired metric
    weights = [w for w in weights if metric in w]

        
    vals = [float(w.rsplit('.hdf5', 1)[0].rsplit('-', 1)[-1]) for w in weights]
    if metric == 'acc':
        best_val = max(vals)
    else:
        best_val = min(vals)
        
    best_model = weights[vals.index(best_val)]
    return join(model_dir, best_model)

In [7]:
def get_labels(PATH, classes_to_nums):
    """
    Return dictionary that places each filename into
    a list with the parent dataset as the key
    """
    datasets = listdir(PATH)
    print('datasets are: {}'.format(datasets))
    labels = {}

    for d in datasets:
        classes = listdir(join(PATH, d))
        for c in classes:
            files = listdir(join(PATH, d, c))
            num = classes_to_nums[c]
            temp = {join(PATH, d, c, f):num for f in files}
            labels = {**temp, **labels}

    return labels

In [8]:
def load_model(dense_activation='relu'):
    show_images = False
    diagnose_plots = False
    pretrained_model_dir = './models'
    global backend

    print("[Info] Using backend={}".format(backend))

    if backend == 'th':
        model_weight_filename = join(pretrained_model_dir, 'sports1M_weights_th.h5')
        model_json_filename = join(pretrained_model_dir, 'sports1M_weights_th.json')
    else:
        model_weight_filename = join(pretrained_model_dir, 'sports1M_weights_tf.h5')
        model_json_filename = join(pretrained_model_dir, 'sports1M_weights_tf.json')

    print("[Info] Reading model architecture...")
    model_pretrained = model_from_json(open(model_json_filename, 'r').read())
    # print(model_pretrained.summary())

    # visualize model
    """
    model_img_filename = os.path.join(pretrained_model_dir, 'c3d_model.png')
    if not os.path.exists(model_img_filename):
        from keras.utils import plot_model
        plot_model(model, to_file=model_img_filename)
    """
    # Load pretrained weights
    print("[Info] Loading model weights...")
    model_pretrained.load_weights(model_weight_filename)
    print("[Info] Loading model weights -- DONE!")
    model_pretrained.compile(loss='mean_squared_error', optimizer='sgd')

    # Change output layer
    # model_pretrained.layers.pop()
    # new_out = Dense(2, activation='softmax', name='fc9')(model_pretrained.layers[-1].output)
    model_pretrained.layers.pop()
    model_pretrained.layers.pop()
    model_pretrained.layers.pop()
    model_pretrained.layers.pop()
    model_pretrained.layers.pop()
    x = model_pretrained.layers[-1].output
    x = Dense(128, activation=dense_activation, name='fc7')(x)
    x = Dropout(0.5, name='dropout_1')(x)
    x = Dense(128, activation=dense_activation, name='fc8')(x)
    x = Dropout(0.5, name='dropout_2')(x)
    x = Dense(2, activation='softmax', name='fc9')(x)
    # x = Dense(2, activation='softmax', name='fc9')
    # new_out = (d1)(d2)(d3)
    # model = Model(model_pretrained.input, output=[x])
    return Model(model_pretrained.input, output=[x])
    # model.compile(loss='mean_squared_error', optimizer='sgd')

In [9]:
def run_and_save_inference_results(model, dataset_generator, path, trials=3):
    inference_results = []
    for i in range(trials):
        single_inference = model.evaluate_generator(generator=dataset_generator)
        inference_results.append(single_inference)
    pickle.dump(inference_results, open(path, "wb" ))
    return inference_results

In [10]:
def run_verbose_inference(model, model_dir, training_generator, validation_generator, testing_generator):
    
    # Run inference on model as is, model with the best validation accuracy, and model with the best validation loss
    metric = 'final'
    run_and_save_inference_results(model, training_generator, join(model_dir, metric + '_training_results.pkl'), trials=1)
    run_and_save_inference_results(model, validation_generator, join(model_dir, metric + '_validation_results.pkl'), trials=5)
    run_and_save_inference_results(model, testing_generator, join(model_dir, metric + '_testing_results.pkl'), trials=5)

    metric = 'acc'
    best_model = get_best_model(model_dir, metric=metric)
    model.load_weights(best_model)
    run_and_save_inference_results(model, training_generator, join(model_dir, metric + '_training_results.pkl'), trials=1)
    run_and_save_inference_results(model, validation_generator, join(model_dir, metric + '_validation_results.pkl'), trials=5)
    run_and_save_inference_results(model, testing_generator, join(model_dir, metric + '_testing_results.pkl'), trials=5)

    metric = 'loss'
    best_model = get_best_model(model_dir, metric=metric)
    model.load_weights(best_model)
    run_and_save_inference_results(model, training_generator, join(model_dir, metric + '_training_results.pkl'), trials=1)
    run_and_save_inference_results(model, validation_generator, join(model_dir, metric + '_validation_results.pkl'), trials=5)
    run_and_save_inference_results(model, testing_generator, join(model_dir, metric + '_testing_results.pkl'), trials=5)

## Start of special over-the-weekend run

In [11]:
PATH = config_clips.dataset_dir
classes_to_nums = config_clips.classes_to_nums
train_params = config_clips.train_params
valid_params = config_clips.valid_params
test_params = config_clips.test_params

In [12]:
# Generators
partition = get_partitions(PATH)
labels = get_labels(PATH, classes_to_nums)
training_generator = DataGenerator(partition['train'], labels, **train_params)
validation_generator = DataGenerator(partition['valid'], labels, **valid_params)
testing_generator = DataGenerator(partition['test'], labels, **test_params)

datasets are: ['test', 'valid', 'train']


In [13]:
for i in range(4, 16):
    # loads unique names for specific training session
    model_iteration = 'model_c3d_0' + str(26 + i)
    model_dir = join("..", "models", model_iteration)
    model_name = join(model_dir, model_iteration + '.h5')
    history_name = join(model_dir, model_iteration + '_history.pkl')

    # makes new directory to place all saved files
    if not os.path.isdir(model_dir):
        os.makedirs(model_dir)
        
    # Callbacks
    filepath = join(model_dir, "weights-acc-improvement-{epoch:03d}-{val_acc:.4f}.hdf5")
    checkpoint_acc = ModelCheckpoint(filepath, monitor='val_acc', verbose=1, save_best_only=True, mode='max')
    filepath = join(model_dir, "weights-loss-improvement-{epoch:03d}-{val_loss:.4f}.hdf5")
    checkpoint_loss = ModelCheckpoint(filepath, monitor='val_loss', verbose=1, save_best_only=True, mode='min')
    callbacks_list = [checkpoint_acc, checkpoint_loss]

    # picks an activation
    if i < 5:
        activation = 'relu'
    elif i < 10:
        activation = 'sigmoid'
    elif i < 15:
        activation = 'softmax'
    else:
        activation = 'relu'
    
    # loads a model
    model = load_model(dense_activation=activation)
    layers_to_train = ['fc7', 'fc8', 'fc9']
    for layer in model.layers:
        if layer.name in layers_to_train:
            layer.trainable = True
            # print('{} IS trainable'.format(layer.name))
        else:
            layer.trainable = False
            # print('{} is NOT trainable'.format(layer.name))

    # compiles a model 
    adam = optimizers.adam()
    model.compile(loss='categorical_crossentropy',
                  optimizer=adam,
                  metrics=['accuracy'])
    # trains a model
    history = model.fit_generator(
                        generator=training_generator,
                        steps_per_epoch=20,
                        callbacks=callbacks_list,
                        validation_data=validation_generator,
                        use_multiprocessing=True,
                        epochs=1,
                        initial_epoch=0,
                        workers=6)
    """
    # Saves final model and training results
    # print('Saving model as {}'.format(model_name))
    model.save(model_name)
    with open(history_name, "wb" ) as f:
        pickle.dump(history.history, f)

    # Runs inference verbosely over datasets
    run_verbose_inference(model, model_dir, training_generator, validation_generator, testing_generator)"""
    
    del history
    del model
    K.clear_session()
    import time
    time.sleep(5)
    print("Slept - allocating memory for next model")

[Info] Using backend=tf
[Info] Reading model architecture...
[Info] Loading model weights...
[Info] Loading model weights -- DONE!




Epoch 1/1

Epoch 00001: val_acc improved from -inf to 0.68750, saving model to ../models/model_c3d_030/weights-acc-improvement-001-0.6875.hdf5

Epoch 00001: val_loss improved from inf to 1.20974, saving model to ../models/model_c3d_030/weights-loss-improvement-001-1.2097.hdf5
Slept - allocating memory for next model
[Info] Using backend=tf
[Info] Reading model architecture...
[Info] Loading model weights...
[Info] Loading model weights -- DONE!
Epoch 1/1

Epoch 00001: val_acc improved from -inf to 0.66346, saving model to ../models/model_c3d_031/weights-acc-improvement-001-0.6635.hdf5

Epoch 00001: val_loss improved from inf to 0.62338, saving model to ../models/model_c3d_031/weights-loss-improvement-001-0.6234.hdf5
Slept - allocating memory for next model
[Info] Using backend=tf
[Info] Reading model architecture...
[Info] Loading model weights...
[Info] Loading model weights -- DONE!
Epoch 1/1

Epoch 00001: val_acc improved from -inf to 0.79327, saving model to ../models/model_c3d_032

Process ForkPoolWorker-97:
Process ForkPoolWorker-100:
Process ForkPoolWorker-101:
Process ForkPoolWorker-94:
Process ForkPoolWorker-98:
Process ForkPoolWorker-91:
Process ForkPoolWorker-95:
Process ForkPoolWorker-102:
Process ForkPoolWorker-96:
Process ForkPoolWorker-99:
Process ForkPoolWorker-93:
Process ForkPoolWorker-92:
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
  File "/usr/lib/python3.5/multiprocessing/process.py", line 249, in _bootstrap
    self.run()
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
  File "/usr/lib/python3.5/multiprocessing/process.py", line 249, in _bootstrap
    self.run()
  File "/usr/lib/python3.5/multiprocessing/process.py", line 249, in _bootstrap
    self.run()
Tra

KeyboardInterrupt: 

In [None]:
import keras
keras.__version__
tf.__version__

In [None]:
for i in range(6, 10):
    # loads unique names for specific training session
    model_iteration = 'model_c3d_0' + str(41 + i)
    model_dir = join("..", "models", model_iteration)
    model_name = join(model_dir, model_iteration + '.h5')
    history_name1 = join(model_dir, model_iteration + '_history1.pkl')
    history_name2 = join(model_dir, model_iteration + '_history2.pkl')
    weights_name = join(model_dir, model_iteration + '.hdf5')

    # makes new directory to place all saved files
    if not os.path.isdir(model_dir):
        os.makedirs(model_dir)

    # Callbacks
    filepath = join(model_dir, "weights-acc-improvement-{epoch:03d}-{val_acc:.4f}.hdf5")
    checkpoint_acc = ModelCheckpoint(filepath, monitor='val_acc', verbose=1, save_best_only=True, mode='max')
    filepath = join(model_dir, "weights-loss-improvement-{epoch:03d}-{val_loss:.4f}.hdf5")
    checkpoint_loss = ModelCheckpoint(filepath, monitor='val_loss', verbose=1, save_best_only=True, mode='min')
    callbacks_list = [checkpoint_acc, checkpoint_loss]
    
    
    # INTIAL training
    
    # loads a model
    model = load_model(dense_activation='softmax')
    layers_to_train = ['fc7', 'fc8', 'fc9']
    for layer in model.layers:
        if layer.name in layers_to_train:
            layer.trainable = True
            # print('{} IS trainable'.format(layer.name))
        else:
            layer.trainable = False
            # print('{} is NOT trainable'.format(layer.name))

    # compiles a model 
    adam = optimizers.adam()
    model.compile(loss='categorical_crossentropy',
                  optimizer=adam,
                  metrics=['accuracy'])
    # trains a model
    history = model.fit_generator(
                        generator=training_generator,
                        steps_per_epoch=20,
                        callbacks=callbacks_list,
                        validation_data=validation_generator,
                        use_multiprocessing=True,
                        epochs=300,
                        initial_epoch=0,
                        workers=6)
        
    # model.save(model_name)
    model.save_weights(weights_name)
    with open(history_name1, "wb" ) as f1:
        pickle.dump(history.history, f1)

    # RETRAINING
    # picks an activation
    if i < 5:
        activation = 'relu'
    elif i < 10:
        activation = 'sigmoid'
    else:
        activation = 'relu'
    
    # loads a model
    model = load_model(dense_activation=activation)
    layers_to_train = ['fc7', 'fc8', 'fc9']
    for layer in model.layers:
        if layer.name in layers_to_train:
            layer.trainable = True
            # print('{} IS trainable'.format(layer.name))
        else:
            layer.trainable = False
            # print('{} is NOT trainable'.format(layer.name))

    # compiles a model 
    adam = optimizers.adam()
    model.compile(loss='categorical_crossentropy',
                  optimizer=adam,
                  metrics=['accuracy'])
    
    model.load_weights(weights_name)
    # trains a model
    history = model.fit_generator(
                        generator=training_generator,
                        steps_per_epoch=20,
                        callbacks=callbacks_list,
                        validation_data=validation_generator,
                        use_multiprocessing=True,
                        epochs=400,
                        initial_epoch=300,
                        workers=6)

    # Saves final model and training results
    # print('Saving model as {}'.format(model_name))
    model.save(model_name)
    with open(history_name2, "wb" ) as f2:
        pickle.dump(history.history, f2)

    # Runs inference verbosely over datasets
    run_verbose_inference(model, model_dir, training_generator, validation_generator, testing_generator)

## End of special over-the-weekend run