In [1]:
WORKER_ID = 3

In [2]:
import os
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"]=str(WORKER_ID)

In [3]:
# whether to log each feature and sequence status
verbose = True

In [4]:
import gc
import os
import pandas as pd
import numpy as np
import json
import datetime
import matplotlib.pyplot as plt
import itertools
import sys
sys.path.append('..')

In [5]:
# setup paths
pwd = os.getcwd().replace("notebooks","")
path_cache = pwd + 'cache/'
path_data = pwd + 'data/'

In [6]:
# setup logging
# any explicit log messages or uncaught errors to stdout and file /logs.log
import logging
logging.basicConfig(
    level=logging.INFO,
    format="%(asctime)s [%(threadName)-12.12s] [%(levelname)-5.5s]  %(message)s",
    handlers=[
        logging.FileHandler("{0}/{1}.log".format(pwd, "logs")),
        logging.StreamHandler()
    ])
# init logger
logger = logging.getLogger()
# make logger aware of any uncaught exceptions
def handle_exception(exc_type, exc_value, exc_traceback):
    if issubclass(exc_type, KeyboardInterrupt):
        sys.__excepthook__(exc_type, exc_value, exc_traceback)
        return

    logger.error("Uncaught exception", exc_info=(exc_type, exc_value, exc_traceback))
sys.excepthook = handle_exception

In [7]:
from deepvideoclassification.architectures import Architecture

Using TensorFlow backend.


# Run experiments

In [8]:
experiment_batch_name = 'experiment_batch_1'

In [9]:
# load list of experiments
experiments = pd.read_csv(pwd + "experiments/" + experiment_batch_name + '.csv')

In [10]:
print(experiments.shape)

(5376, 12)


In [None]:
###################
### Run experiments
###################

for row in experiments.values:
    
    # get experiment params from dataframe row
    experiment = dict(zip(experiments.columns, row))
    
    # only run experiment if not already run
    if not os.path.exists(pwd + 'models/' + str(experiment["model_id"]) + '/results.json'):

        # only run experiment if matches this worker id
        if experiment['WORKER'] == WORKER_ID:
            
            print(str(experiment["model_id"]) + "   " + "X"*60)
            logging.info("Begin experiment for model_id={} on GPU:{} ".format(experiment['model_id'], os.environ["CUDA_VISIBLE_DEVICES"]))
            print(experiment)

            architecture = Architecture(model_id = experiment['model_id'], 
                                        architecture = experiment['architecture'], 
                                        sequence_length = experiment['sequence_length'], 
                                        pretrained_model_name = experiment['pretrained_model_name'],
                                        pooling = experiment['pooling'],
                                        sequence_model = experiment['sequence_model'],
                                        sequence_model_layers = experiment['sequence_model_layers'],
                                        layer_1_size = experiment['layer_1_size'],
                                        layer_2_size = experiment['layer_2_size'],
                                        layer_3_size = experiment['layer_3_size'],
                                        dropout = experiment['dropout'],
                                        verbose=True)

            architecture.train_model()
            
            gc.collect()

2019-01-20 15:05:19,488 [MainThread  ] [INFO ]  Begin experiment for model_id=195 on GPU:3 
2019-01-20 15:05:19,489 [MainThread  ] [INFO ]  Model folder exists but no results found - potential error in previous model training
2019-01-20 15:05:19,490 [MainThread  ] [INFO ]  Loading data


195   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
{'layer_2_size': 512, 'sequence_model': nan, 'layer_1_size': 512, 'layer_3_size': 0, 'architecture': 'video_MLP_concat', 'dropout': 0.2, 'pooling': 'max', 'pretrained_model_name': 'inception_resnet_v2', 'model_id': 195, 'WORKER': 3, 'sequence_length': 3, 'sequence_model_layers': nan}


2019-01-20 15:05:20,436 [MainThread  ] [INFO ]  Features already cached: /mnt/seals/cache/features/inception_resnet_v2/max/
2019-01-20 15:05:20,437 [MainThread  ] [INFO ]  Loading features sequence data into memory [may take a few minutes]


Done initializing data with #samples: train=60553, valid=6412, test=3137
Train on 60553 samples, validate on 6412 samples
Epoch 1/20

Epoch 00001: val_acc improved from -inf to 0.72360, saving model to /mnt/seals/models/195/model_round_1.h5
Epoch 2/20

Epoch 00002: val_acc did not improve from 0.72360
Epoch 3/20

Epoch 00003: val_acc did not improve from 0.72360
Epoch 4/20

Epoch 00004: val_acc did not improve from 0.72360
H1 {'loss': [4.212159820298539, 4.22041256158883, 4.220412566085288, 4.220412560974603], 'val_loss': [4.430780214736853, 4.430780214736853, 4.430780214736853, 4.430780214736853], 'val_acc': [0.7235986128570285, 0.7235986128570285, 0.7235986128570285, 0.7235986128570285], 'acc': [0.7369624300030456, 0.7367217910438794, 0.7367217903784665, 0.7367217914041472]}
stopped_epoch1 1
4
0.7235986128570285
Train on 60553 samples, validate on 6412 samples
Epoch 1/20

Epoch 00001: val_acc improved from -inf to 0.72360, saving model to /mnt/seals/models/195/model_round_2.h5
Epoch 

2019-01-20 15:09:00,541 [MainThread  ] [INFO ]  {
    "architecture": "video_mlp_concat",
    "batch_size": 32,
    "convolution_kernel_size": 3,
    "data_total_rows_test": 3137,
    "data_total_rows_train": 60553,
    "data_total_rows_valid": 6412,
    "dropout": 0.2,
    "fit_best_round": 1,
    "fit_dt_test_duration_seconds": "0",
    "fit_dt_test_end": "2019-01-20 15:08:58",
    "fit_dt_test_start": "2019-01-20 15:08:58",
    "fit_dt_train_duration_seconds": "214",
    "fit_dt_train_end": "2019-01-20 15:08:57",
    "fit_dt_train_start": "2019-01-20 15:05:23",
    "fit_num_epochs": 6,
    "fit_stopped_epoch1": 1,
    "fit_stopped_epoch2": 1,
    "fit_stopped_epoch3": 1,
    "fit_test_acc": 0.11794708320051006,
    "fit_train_acc": 0.7367217910438794,
    "fit_train_loss": 4.22041256158883,
    "fit_val_acc": 0.7235986128570285,
    "fit_val_loss": 4.430780214736853,
    "frame_size": [
        299,
        299
    ],
    "layer_1_size": 512,
    "layer_2_size": 512,
    "layer_3_si

203   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
{'layer_2_size': 128, 'sequence_model': nan, 'layer_1_size': 512, 'layer_3_size': 0, 'architecture': 'video_MLP_concat', 'dropout': 0.2, 'pooling': 'max', 'pretrained_model_name': 'inception_resnet_v2', 'model_id': 203, 'WORKER': 3, 'sequence_length': 3, 'sequence_model_layers': nan}


2019-01-20 15:09:04,544 [MainThread  ] [INFO ]  Features already cached: /mnt/seals/cache/features/inception_resnet_v2/max/
2019-01-20 15:09:04,547 [MainThread  ] [INFO ]  Loading features sequence data into memory [may take a few minutes]


Done initializing data with #samples: train=60553, valid=6412, test=3137
Train on 60553 samples, validate on 6412 samples
Epoch 1/20

Epoch 00001: val_acc improved from -inf to 0.89555, saving model to /mnt/seals/models/203/model_round_1.h5
Epoch 2/20

Epoch 00002: val_acc improved from 0.89555 to 0.90594, saving model to /mnt/seals/models/203/model_round_1.h5
Epoch 3/20

Epoch 00003: val_acc did not improve from 0.90594
Epoch 4/20

Epoch 00004: val_acc did not improve from 0.90594
Epoch 5/20

Epoch 00005: val_acc did not improve from 0.90594
H1 {'loss': [1.8159246559025675, 0.19889709713690215, 0.170300129238424, 0.1584465566402675, 0.15375541910983656], 'val_loss': [0.2852161432234704, 0.22536646345594266, 0.20489545479760793, 0.19699724153362805, 0.23888744974389198], 'val_acc': [0.8955530010754067, 0.9059353261460978, 0.9000312126359564, 0.9054006093454152, 0.8963327877832367], 'acc': [0.867320495047656, 0.9220234701706806, 0.9307265885283413, 0.9341922636093369, 0.93663404314803]}

2019-01-20 15:15:27,258 [MainThread  ] [INFO ]  {
    "architecture": "video_mlp_concat",
    "batch_size": 32,
    "convolution_kernel_size": 3,
    "data_total_rows_test": 3137,
    "data_total_rows_train": 60553,
    "data_total_rows_valid": 6412,
    "dropout": 0.2,
    "fit_best_round": 3,
    "fit_dt_test_duration_seconds": "0",
    "fit_dt_test_end": "2019-01-20 15:15:25",
    "fit_dt_test_start": "2019-01-20 15:15:25",
    "fit_dt_train_duration_seconds": "376",
    "fit_dt_train_end": "2019-01-20 15:15:24",
    "fit_dt_train_start": "2019-01-20 15:09:07",
    "fit_num_epochs": 15,
    "fit_stopped_epoch1": 2,
    "fit_stopped_epoch2": 4,
    "fit_stopped_epoch3": 6,
    "fit_test_acc": 0.5913292955052598,
    "fit_train_acc": 0.9502702639612548,
    "fit_train_loss": 0.12130598592922169,
    "fit_val_acc": 0.9120399397161701,
    "fit_val_loss": 0.2179687001410679,
    "frame_size": [
        299,
        299
    ],
    "layer_1_size": 512,
    "layer_2_size": 128,
    "layer_

211   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
{'layer_2_size': 512, 'sequence_model': nan, 'layer_1_size': 256, 'layer_3_size': 0, 'architecture': 'video_MLP_concat', 'dropout': 0.2, 'pooling': 'max', 'pretrained_model_name': 'inception_resnet_v2', 'model_id': 211, 'WORKER': 3, 'sequence_length': 3, 'sequence_model_layers': nan}


2019-01-20 15:15:31,891 [MainThread  ] [INFO ]  Features already cached: /mnt/seals/cache/features/inception_resnet_v2/max/
2019-01-20 15:15:31,892 [MainThread  ] [INFO ]  Loading features sequence data into memory [may take a few minutes]


Done initializing data with #samples: train=60553, valid=6412, test=3137
Train on 60553 samples, validate on 6412 samples
Epoch 1/20

Epoch 00001: val_acc improved from -inf to 0.91122, saving model to /mnt/seals/models/211/model_round_1.h5
Epoch 2/20

Epoch 00002: val_acc improved from 0.91122 to 0.91369, saving model to /mnt/seals/models/211/model_round_1.h5
Epoch 3/20

Epoch 00003: val_acc did not improve from 0.91369
Epoch 4/20

Epoch 00004: val_acc improved from 0.91369 to 0.91433, saving model to /mnt/seals/models/211/model_round_1.h5
Epoch 5/20

Epoch 00005: val_acc did not improve from 0.91433
Epoch 6/20

Epoch 00006: val_acc improved from 0.91433 to 0.91514, saving model to /mnt/seals/models/211/model_round_1.h5
Epoch 7/20

Epoch 00007: val_acc did not improve from 0.91514
Epoch 8/20

Epoch 00008: val_acc did not improve from 0.91514
Epoch 9/20

Epoch 00009: val_acc did not improve from 0.91514
H1 {'loss': [0.3561396125900364, 0.21518645189081023, 0.21218417022420502, 0.209905

2019-01-20 15:20:15,629 [MainThread  ] [INFO ]  {
    "architecture": "video_mlp_concat",
    "batch_size": 32,
    "convolution_kernel_size": 3,
    "data_total_rows_test": 3137,
    "data_total_rows_train": 60553,
    "data_total_rows_valid": 6412,
    "dropout": 0.2,
    "fit_best_round": 2,
    "fit_dt_test_duration_seconds": "0",
    "fit_dt_test_end": "2019-01-20 15:20:13",
    "fit_dt_test_start": "2019-01-20 15:20:13",
    "fit_dt_train_duration_seconds": "277",
    "fit_dt_train_end": "2019-01-20 15:20:12",
    "fit_dt_train_start": "2019-01-20 15:15:35",
    "fit_num_epochs": 12,
    "fit_stopped_epoch1": 6,
    "fit_stopped_epoch2": 2,
    "fit_stopped_epoch3": 1,
    "fit_test_acc": 0.43544788014026137,
    "fit_train_acc": 0.917191817350274,
    "fit_train_loss": 0.2002676573504154,
    "fit_val_acc": 0.913644091568362,
    "fit_val_loss": 0.22409575920103494,
    "frame_size": [
        299,
        299
    ],
    "layer_1_size": 256,
    "layer_2_size": 512,
    "layer_3

219   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
{'layer_2_size': 128, 'sequence_model': nan, 'layer_1_size': 256, 'layer_3_size': 0, 'architecture': 'video_MLP_concat', 'dropout': 0.2, 'pooling': 'max', 'pretrained_model_name': 'inception_resnet_v2', 'model_id': 219, 'WORKER': 3, 'sequence_length': 3, 'sequence_model_layers': nan}


2019-01-20 15:20:19,395 [MainThread  ] [INFO ]  Features already cached: /mnt/seals/cache/features/inception_resnet_v2/max/
2019-01-20 15:20:19,397 [MainThread  ] [INFO ]  Loading features sequence data into memory [may take a few minutes]


Done initializing data with #samples: train=60553, valid=6412, test=3137
Train on 60553 samples, validate on 6412 samples
Epoch 1/20

Epoch 00001: val_acc improved from -inf to 0.89558, saving model to /mnt/seals/models/219/model_round_1.h5
Epoch 2/20

Epoch 00002: val_acc improved from 0.89558 to 0.89578, saving model to /mnt/seals/models/219/model_round_1.h5
Epoch 3/20

Epoch 00003: val_acc improved from 0.89578 to 0.90121, saving model to /mnt/seals/models/219/model_round_1.h5
Epoch 4/20

Epoch 00004: val_acc improved from 0.90121 to 0.90319, saving model to /mnt/seals/models/219/model_round_1.h5
Epoch 5/20

Epoch 00005: val_acc did not improve from 0.90319
Epoch 6/20

Epoch 00006: val_acc did not improve from 0.90319
Epoch 7/20

Epoch 00007: val_acc improved from 0.90319 to 0.91200, saving model to /mnt/seals/models/219/model_round_1.h5
Epoch 8/20

Epoch 00008: val_acc improved from 0.91200 to 0.91474, saving model to /mnt/seals/models/219/model_round_1.h5
Epoch 9/20

Epoch 00009: 

Train on 60553 samples, validate on 6412 samples
Epoch 1/20

Epoch 00001: val_acc improved from -inf to 0.92048, saving model to /mnt/seals/models/219/model_round_3.h5
Epoch 2/20

Epoch 00002: val_acc improved from 0.92048 to 0.92314, saving model to /mnt/seals/models/219/model_round_3.h5
Epoch 3/20

Epoch 00003: val_acc did not improve from 0.92314
Epoch 4/20

Epoch 00004: val_acc did not improve from 0.92314
Epoch 5/20

Epoch 00005: val_acc improved from 0.92314 to 0.92367, saving model to /mnt/seals/models/219/model_round_3.h5
Epoch 6/20

Epoch 00006: val_acc did not improve from 0.92367
Epoch 7/20

Epoch 00007: val_acc improved from 0.92367 to 0.92376, saving model to /mnt/seals/models/219/model_round_3.h5
Epoch 8/20

Epoch 00008: val_acc did not improve from 0.92376
Epoch 9/20

Epoch 00009: val_acc did not improve from 0.92376
Epoch 10/20

Epoch 00010: val_acc did not improve from 0.92376
H3 {'loss': [0.1321820444115903, 0.13100903315593188, 0.13242581805184664, 0.1317591584437956

2019-01-20 15:30:28,761 [MainThread  ] [INFO ]  {
    "architecture": "video_mlp_concat",
    "batch_size": 32,
    "convolution_kernel_size": 3,
    "data_total_rows_test": 3137,
    "data_total_rows_train": 60553,
    "data_total_rows_valid": 6412,
    "dropout": 0.2,
    "fit_best_round": 2,
    "fit_dt_test_duration_seconds": "0",
    "fit_dt_test_end": "2019-01-20 15:30:27",
    "fit_dt_test_start": "2019-01-20 15:30:26",
    "fit_dt_train_duration_seconds": "603",
    "fit_dt_train_end": "2019-01-20 15:30:25",
    "fit_dt_train_start": "2019-01-20 15:20:22",
    "fit_num_epochs": 33,
    "fit_stopped_epoch1": 8,
    "fit_stopped_epoch2": 15,
    "fit_stopped_epoch3": 7,
    "fit_test_acc": 0.5992986930188078,
    "fit_train_acc": 0.9480195795025217,
    "fit_train_loss": 0.1331617473981854,
    "fit_val_acc": 0.9238258818500279,
    "fit_val_loss": 0.1816392701191227,
    "frame_size": [
        299,
        299
    ],
    "layer_1_size": 256,
    "layer_2_size": 128,
    "layer_

227   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
{'layer_2_size': 512, 'sequence_model': nan, 'layer_1_size': 128, 'layer_3_size': 0, 'architecture': 'video_MLP_concat', 'dropout': 0.2, 'pooling': 'max', 'pretrained_model_name': 'inception_resnet_v2', 'model_id': 227, 'WORKER': 3, 'sequence_length': 3, 'sequence_model_layers': nan}


2019-01-20 15:30:33,143 [MainThread  ] [INFO ]  Features already cached: /mnt/seals/cache/features/inception_resnet_v2/max/
2019-01-20 15:30:33,151 [MainThread  ] [INFO ]  Loading features sequence data into memory [may take a few minutes]


Done initializing data with #samples: train=60553, valid=6412, test=3137
Train on 60553 samples, validate on 6412 samples
Epoch 1/20

Epoch 00001: val_acc improved from -inf to 0.91311, saving model to /mnt/seals/models/227/model_round_1.h5
Epoch 2/20

Epoch 00002: val_acc did not improve from 0.91311
Epoch 3/20

Epoch 00003: val_acc improved from 0.91311 to 0.91494, saving model to /mnt/seals/models/227/model_round_1.h5
Epoch 4/20

Epoch 00004: val_acc did not improve from 0.91494
Epoch 5/20

Epoch 00005: val_acc did not improve from 0.91494
Epoch 6/20

Epoch 00006: val_acc did not improve from 0.91494
H1 {'loss': [0.23255856877312114, 0.22029947115365314, 0.2144760295052045, 0.21232214172126662, 0.2109907280512278, 0.2090156845732552], 'val_loss': [0.23813047702990692, 0.2439771846764101, 0.22173411056575965, 0.22790465359381415, 0.22401591218863884, 0.22917213121281516], 'val_acc': [0.9131093748420458, 0.9094555162669566, 0.9149363073273443, 0.9142233611119961, 0.9143793212943574, 0

2019-01-20 15:34:48,525 [MainThread  ] [INFO ]  {
    "architecture": "video_mlp_concat",
    "batch_size": 32,
    "convolution_kernel_size": 3,
    "data_total_rows_test": 3137,
    "data_total_rows_train": 60553,
    "data_total_rows_valid": 6412,
    "dropout": 0.2,
    "fit_best_round": 1,
    "fit_dt_test_duration_seconds": "0",
    "fit_dt_test_end": "2019-01-20 15:34:46",
    "fit_dt_test_start": "2019-01-20 15:34:45",
    "fit_dt_train_duration_seconds": "248",
    "fit_dt_train_end": "2019-01-20 15:34:44",
    "fit_dt_train_start": "2019-01-20 15:30:36",
    "fit_num_epochs": 10,
    "fit_stopped_epoch1": 3,
    "fit_stopped_epoch2": 2,
    "fit_stopped_epoch3": 2,
    "fit_test_acc": 0.4430985017532675,
    "fit_train_acc": 0.9143253774528209,
    "fit_train_loss": 0.21232214172126662,
    "fit_val_acc": 0.9142233611119961,
    "fit_val_loss": 0.22790465359381415,
    "frame_size": [
        299,
        299
    ],
    "layer_1_size": 128,
    "layer_2_size": 512,
    "layer

235   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
{'layer_2_size': 128, 'sequence_model': nan, 'layer_1_size': 128, 'layer_3_size': 0, 'architecture': 'video_MLP_concat', 'dropout': 0.2, 'pooling': 'max', 'pretrained_model_name': 'inception_resnet_v2', 'model_id': 235, 'WORKER': 3, 'sequence_length': 3, 'sequence_model_layers': nan}


2019-01-20 15:34:51,892 [MainThread  ] [INFO ]  Features already cached: /mnt/seals/cache/features/inception_resnet_v2/max/
2019-01-20 15:34:51,894 [MainThread  ] [INFO ]  Loading features sequence data into memory [may take a few minutes]


Done initializing data with #samples: train=60553, valid=6412, test=3137
Train on 60553 samples, validate on 6412 samples
Epoch 1/20

Epoch 00001: val_acc improved from -inf to 0.91442, saving model to /mnt/seals/models/235/model_round_1.h5
Epoch 2/20

Epoch 00002: val_acc improved from 0.91442 to 0.91460, saving model to /mnt/seals/models/235/model_round_1.h5
Epoch 3/20

Epoch 00003: val_acc did not improve from 0.91460
Epoch 4/20

Epoch 00004: val_acc did not improve from 0.91460
Epoch 5/20

Epoch 00005: val_acc improved from 0.91460 to 0.91478, saving model to /mnt/seals/models/235/model_round_1.h5
Epoch 6/20