In [1]:
WORKER_ID = 5

In [2]:
import os
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"]=str(WORKER_ID)

In [3]:
# whether to log each feature and sequence status
verbose = True

In [4]:
import gc
import os
import pandas as pd
import numpy as np
import json
import datetime
import matplotlib.pyplot as plt
import itertools
import sys
sys.path.append('..')

In [5]:
# setup paths
pwd = os.getcwd().replace("notebooks","")
path_cache = pwd + 'cache/'
path_data = pwd + 'data/'

In [6]:
# setup logging
# any explicit log messages or uncaught errors to stdout and file /logs.log
import logging
logging.basicConfig(
    level=logging.INFO,
    format="%(asctime)s [%(threadName)-12.12s] [%(levelname)-5.5s]  %(message)s",
    handlers=[
        logging.FileHandler("{0}/{1}.log".format(pwd, "logs")),
        logging.StreamHandler()
    ])
# init logger
logger = logging.getLogger()
# make logger aware of any uncaught exceptions
def handle_exception(exc_type, exc_value, exc_traceback):
    if issubclass(exc_type, KeyboardInterrupt):
        sys.__excepthook__(exc_type, exc_value, exc_traceback)
        return

    logger.error("Uncaught exception", exc_info=(exc_type, exc_value, exc_traceback))
sys.excepthook = handle_exception

In [7]:
from deepvideoclassification.architectures import Architecture

Using TensorFlow backend.


# Run experiments

In [8]:
experiment_batch_name = 'experiment_batch_1'

In [9]:
# load list of experiments
experiments = pd.read_csv(pwd + "experiments/" + experiment_batch_name + '.csv')

In [10]:
print(experiments.shape)

(5376, 12)


In [None]:
###################
### Run experiments
###################

for row in experiments.values:
    
    # get experiment params from dataframe row
    experiment = dict(zip(experiments.columns, row))
    
    # only run experiment if not already run
    if not os.path.exists(pwd + 'models/' + str(experiment["model_id"]) + '/results.json'):

        # only run experiment if matches this worker id
        if experiment['WORKER'] == WORKER_ID:
            
            print(str(experiment["model_id"]) + "   " + "X"*60)
            logging.info("Begin experiment for model_id={} on GPU:{} ".format(experiment['model_id'], os.environ["CUDA_VISIBLE_DEVICES"]))
            print(experiment)

            architecture = Architecture(model_id = experiment['model_id'], 
                                        architecture = experiment['architecture'], 
                                        sequence_length = experiment['sequence_length'], 
                                        pretrained_model_name = experiment['pretrained_model_name'],
                                        pooling = experiment['pooling'],
                                        sequence_model = experiment['sequence_model'],
                                        sequence_model_layers = experiment['sequence_model_layers'],
                                        layer_1_size = experiment['layer_1_size'],
                                        layer_2_size = experiment['layer_2_size'],
                                        layer_3_size = experiment['layer_3_size'],
                                        dropout = experiment['dropout'],
                                        verbose=True)

            architecture.train_model()
            
            gc.collect()

2019-01-20 15:05:15,376 [MainThread  ] [INFO ]  Begin experiment for model_id=197 on GPU:5 
2019-01-20 15:05:15,377 [MainThread  ] [INFO ]  Model folder exists but no results found - potential error in previous model training
2019-01-20 15:05:15,378 [MainThread  ] [INFO ]  Loading data


197   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
{'pretrained_model_name': 'inception_resnet_v2', 'dropout': 0.2, 'sequence_model': nan, 'layer_1_size': 512, 'sequence_length': 3, 'pooling': 'max', 'WORKER': 5, 'architecture': 'video_MLP_concat', 'layer_3_size': 256, 'layer_2_size': 256, 'sequence_model_layers': nan, 'model_id': 197}


2019-01-20 15:05:16,433 [MainThread  ] [INFO ]  Features already cached: /mnt/seals/cache/features/inception_resnet_v2/max/
2019-01-20 15:05:16,434 [MainThread  ] [INFO ]  Loading features sequence data into memory [may take a few minutes]


Done initializing data with #samples: train=60553, valid=6412, test=3137
Train on 60553 samples, validate on 6412 samples
Epoch 1/20

Epoch 00001: val_acc improved from -inf to 0.91271, saving model to /mnt/seals/models/197/model_round_1.h5
Epoch 2/20

Epoch 00002: val_acc did not improve from 0.91271
Epoch 3/20

Epoch 00003: val_acc did not improve from 0.91271
Epoch 4/20

Epoch 00004: val_acc did not improve from 0.91271
H1 {'loss': [0.24320616213922533, 0.17120334999237619, 0.16235863433394812, 0.15658455586477904], 'acc': [0.9189093126726294, 0.9308563434164004, 0.9333358716831245, 0.9355535266137668], 'val_loss': [0.18247627474818465, 0.19427804593101414, 0.2097525395550582, 0.21832335474003275], 'val_acc': [0.9127083379108313, 0.912663780185333, 0.9012120328913312, 0.9048213343358531]}
stopped_epoch1 1
4
0.912663780185333
Train on 60553 samples, validate on 6412 samples
Epoch 1/20

Epoch 00001: val_acc improved from -inf to 0.92420, saving model to /mnt/seals/models/197/model_rou

2019-01-20 15:11:03,959 [MainThread  ] [INFO ]  {
    "architecture": "video_mlp_concat",
    "batch_size": 32,
    "convolution_kernel_size": 3,
    "data_total_rows_test": 3137,
    "data_total_rows_train": 60553,
    "data_total_rows_valid": 6412,
    "dropout": 0.2,
    "fit_best_round": 2,
    "fit_dt_test_duration_seconds": "0",
    "fit_dt_test_end": "2019-01-20 15:11:02",
    "fit_dt_test_start": "2019-01-20 15:11:02",
    "fit_dt_train_duration_seconds": "342",
    "fit_dt_train_end": "2019-01-20 15:11:01",
    "fit_dt_train_start": "2019-01-20 15:05:19",
    "fit_num_epochs": 11,
    "fit_stopped_epoch1": 1,
    "fit_stopped_epoch2": 3,
    "fit_stopped_epoch3": 4,
    "fit_test_acc": 0.5871852087982148,
    "fit_train_acc": 0.9438791695701128,
    "fit_train_loss": 0.13414381149817092,
    "fit_val_acc": 0.9236253649089311,
    "fit_val_loss": 0.17784037077084824,
    "frame_size": [
        299,
        299
    ],
    "layer_1_size": 512,
    "layer_2_size": 256,
    "layer

205   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
{'pretrained_model_name': 'inception_resnet_v2', 'dropout': 0.2, 'sequence_model': nan, 'layer_1_size': 512, 'sequence_length': 3, 'pooling': 'max', 'WORKER': 5, 'architecture': 'video_MLP_concat', 'layer_3_size': 256, 'layer_2_size': 0, 'sequence_model_layers': nan, 'model_id': 205}


2019-01-20 15:11:08,337 [MainThread  ] [INFO ]  Features already cached: /mnt/seals/cache/features/inception_resnet_v2/max/
2019-01-20 15:11:08,338 [MainThread  ] [INFO ]  Loading features sequence data into memory [may take a few minutes]


Done initializing data with #samples: train=60553, valid=6412, test=3137
Train on 60553 samples, validate on 6412 samples
Epoch 1/20

Epoch 00001: val_acc improved from -inf to 0.85768, saving model to /mnt/seals/models/205/model_round_1.h5
Epoch 2/20

Epoch 00002: val_acc improved from 0.85768 to 0.86160, saving model to /mnt/seals/models/205/model_round_1.h5
Epoch 3/20

Epoch 00003: val_acc did not improve from 0.86160
Epoch 4/20

Epoch 00004: val_acc did not improve from 0.86160
Epoch 5/20

Epoch 00005: val_acc did not improve from 0.86160
H1 {'loss': [2.0957306539356986, 1.9002683508329805, 1.8414565497337851, 1.839819560489507, 1.9285695064554695], 'acc': [0.8666316043938239, 0.8799823690830559, 0.8838467531932004, 0.884210071831382, 0.8790481223588836], 'val_loss': [2.2640017080663966, 2.200602017487129, 2.221466517864877, 2.2310699450545806, 2.309807247395673], 'val_acc': [0.857677578442705, 0.8615988014789946, 0.8606630545144369, 0.8598609843703262, 0.8554050559485918]}
stopped

2019-01-20 15:17:34,671 [MainThread  ] [INFO ]  {
    "architecture": "video_mlp_concat",
    "batch_size": 32,
    "convolution_kernel_size": 3,
    "data_total_rows_test": 3137,
    "data_total_rows_train": 60553,
    "data_total_rows_valid": 6412,
    "dropout": 0.2,
    "fit_best_round": 3,
    "fit_dt_test_duration_seconds": "0",
    "fit_dt_test_end": "2019-01-20 15:17:33",
    "fit_dt_test_start": "2019-01-20 15:17:32",
    "fit_dt_train_duration_seconds": "380",
    "fit_dt_train_end": "2019-01-20 15:17:31",
    "fit_dt_train_start": "2019-01-20 15:11:11",
    "fit_num_epochs": 17,
    "fit_stopped_epoch1": 2,
    "fit_stopped_epoch2": 1,
    "fit_stopped_epoch3": 11,
    "fit_test_acc": 0.45553076187440233,
    "fit_train_acc": 0.8912310785218266,
    "fit_train_loss": 1.7338469354505475,
    "fit_val_acc": 0.861286883940194,
    "fit_val_loss": 2.215449474858851,
    "frame_size": [
        299,
        299
    ],
    "layer_1_size": 512,
    "layer_2_size": 0,
    "layer_3_s

213   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
{'pretrained_model_name': 'inception_resnet_v2', 'dropout': 0.2, 'sequence_model': nan, 'layer_1_size': 256, 'sequence_length': 3, 'pooling': 'max', 'WORKER': 5, 'architecture': 'video_MLP_concat', 'layer_3_size': 256, 'layer_2_size': 256, 'sequence_model_layers': nan, 'model_id': 213}


2019-01-20 15:17:38,897 [MainThread  ] [INFO ]  Features already cached: /mnt/seals/cache/features/inception_resnet_v2/max/
2019-01-20 15:17:38,898 [MainThread  ] [INFO ]  Loading features sequence data into memory [may take a few minutes]


Done initializing data with #samples: train=60553, valid=6412, test=3137
Train on 60553 samples, validate on 6412 samples
Epoch 1/20

Epoch 00001: val_acc improved from -inf to 0.91157, saving model to /mnt/seals/models/213/model_round_1.h5
Epoch 2/20

Epoch 00002: val_acc did not improve from 0.91157
Epoch 3/20

Epoch 00003: val_acc did not improve from 0.91157
Epoch 4/20

Epoch 00004: val_acc improved from 0.91157 to 0.91351, saving model to /mnt/seals/models/213/model_round_1.h5
Epoch 5/20

Epoch 00005: val_acc improved from 0.91351 to 0.93207, saving model to /mnt/seals/models/213/model_round_1.h5
Epoch 6/20

Epoch 00006: val_acc did not improve from 0.93207
Epoch 7/20

Epoch 00007: val_acc did not improve from 0.93207
Epoch 8/20

Epoch 00008: val_acc did not improve from 0.93207
H1 {'loss': [0.22294157847055598, 0.17307225064700923, 0.16624755583430487, 0.16423824418635086, 0.1585081812052944, 0.15753874932914644, 0.16047105853383123, 0.18628197102745095], 'acc': [0.91644629994905

2019-01-20 15:23:18,954 [MainThread  ] [INFO ]  {
    "architecture": "video_mlp_concat",
    "batch_size": 32,
    "convolution_kernel_size": 3,
    "data_total_rows_test": 3137,
    "data_total_rows_train": 60553,
    "data_total_rows_valid": 6412,
    "dropout": 0.2,
    "fit_best_round": 2,
    "fit_dt_test_duration_seconds": "0",
    "fit_dt_test_end": "2019-01-20 15:23:17",
    "fit_dt_test_start": "2019-01-20 15:23:16",
    "fit_dt_train_duration_seconds": "333",
    "fit_dt_train_end": "2019-01-20 15:23:15",
    "fit_dt_train_start": "2019-01-20 15:17:42",
    "fit_num_epochs": 13,
    "fit_stopped_epoch1": 5,
    "fit_stopped_epoch2": 2,
    "fit_stopped_epoch3": 3,
    "fit_test_acc": 0.5495696525342684,
    "fit_train_acc": 0.9406659293081822,
    "fit_train_loss": 0.14580339691345534,
    "fit_val_acc": 0.9319356777336325,
    "fit_val_loss": 0.17260039899398294,
    "frame_size": [
        299,
        299
    ],
    "layer_1_size": 256,
    "layer_2_size": 256,
    "layer

221   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
{'pretrained_model_name': 'inception_resnet_v2', 'dropout': 0.2, 'sequence_model': nan, 'layer_1_size': 256, 'sequence_length': 3, 'pooling': 'max', 'WORKER': 5, 'architecture': 'video_MLP_concat', 'layer_3_size': 256, 'layer_2_size': 0, 'sequence_model_layers': nan, 'model_id': 221}


2019-01-20 15:23:22,734 [MainThread  ] [INFO ]  Features already cached: /mnt/seals/cache/features/inception_resnet_v2/max/
2019-01-20 15:23:22,735 [MainThread  ] [INFO ]  Loading features sequence data into memory [may take a few minutes]


Done initializing data with #samples: train=60553, valid=6412, test=3137
Train on 60553 samples, validate on 6412 samples
Epoch 1/20

Epoch 00001: val_acc improved from -inf to 0.85229, saving model to /mnt/seals/models/221/model_round_1.h5
Epoch 2/20

Epoch 00002: val_acc improved from 0.85229 to 0.90469, saving model to /mnt/seals/models/221/model_round_1.h5
Epoch 3/20

Epoch 00003: val_acc improved from 0.90469 to 0.90614, saving model to /mnt/seals/models/221/model_round_1.h5
Epoch 4/20

Epoch 00004: val_acc improved from 0.90614 to 0.90712, saving model to /mnt/seals/models/221/model_round_1.h5
Epoch 5/20

Epoch 00005: val_acc did not improve from 0.90712
Epoch 6/20

Epoch 00006: val_acc did not improve from 0.90712
Epoch 7/20

Epoch 00007: val_acc did not improve from 0.90712
H1 {'loss': [2.2888611167722943, 0.29816718907000367, 0.16090902080504862, 0.15187223632463667, 0.14309141511321827, 0.143738609264366, 0.13890177233655052], 'acc': [0.8542103688628259, 0.9216625085761423, 0

2019-01-20 15:27:57,860 [MainThread  ] [INFO ]  {
    "architecture": "video_mlp_concat",
    "batch_size": 32,
    "convolution_kernel_size": 3,
    "data_total_rows_test": 3137,
    "data_total_rows_train": 60553,
    "data_total_rows_valid": 6412,
    "dropout": 0.2,
    "fit_best_round": 3,
    "fit_dt_test_duration_seconds": "0",
    "fit_dt_test_end": "2019-01-20 15:27:56",
    "fit_dt_test_start": "2019-01-20 15:27:55",
    "fit_dt_train_duration_seconds": "268",
    "fit_dt_train_end": "2019-01-20 15:27:54",
    "fit_dt_train_start": "2019-01-20 15:23:25",
    "fit_num_epochs": 13,
    "fit_stopped_epoch1": 4,
    "fit_stopped_epoch2": 5,
    "fit_stopped_epoch3": 1,
    "fit_test_acc": 0.5852725533949634,
    "fit_train_acc": 0.9534811446497923,
    "fit_train_loss": 0.11637138327914316,
    "fit_val_acc": 0.916696389652533,
    "fit_val_loss": 0.22465775934927032,
    "frame_size": [
        299,
        299
    ],
    "layer_1_size": 256,
    "layer_2_size": 0,
    "layer_3_

229   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
{'pretrained_model_name': 'inception_resnet_v2', 'dropout': 0.2, 'sequence_model': nan, 'layer_1_size': 128, 'sequence_length': 3, 'pooling': 'max', 'WORKER': 5, 'architecture': 'video_MLP_concat', 'layer_3_size': 256, 'layer_2_size': 256, 'sequence_model_layers': nan, 'model_id': 229}


2019-01-20 15:28:01,545 [MainThread  ] [INFO ]  Features already cached: /mnt/seals/cache/features/inception_resnet_v2/max/
2019-01-20 15:28:01,547 [MainThread  ] [INFO ]  Loading features sequence data into memory [may take a few minutes]


Done initializing data with #samples: train=60553, valid=6412, test=3137
Train on 60553 samples, validate on 6412 samples
Epoch 1/20

Epoch 00001: val_acc improved from -inf to 0.89812, saving model to /mnt/seals/models/229/model_round_1.h5
Epoch 2/20

Epoch 00002: val_acc improved from 0.89812 to 0.90627, saving model to /mnt/seals/models/229/model_round_1.h5
Epoch 3/20

Epoch 00003: val_acc improved from 0.90627 to 0.91690, saving model to /mnt/seals/models/229/model_round_1.h5
Epoch 4/20

Epoch 00004: val_acc did not improve from 0.91690
Epoch 5/20

Epoch 00005: val_acc did not improve from 0.91690
Epoch 6/20

Epoch 00006: val_acc did not improve from 0.91690
H1 {'loss': [0.21117068086585236, 0.17992981256605792, 0.17229136807935616, 0.16212958797642418, 0.15760850869951548, 0.15889413861835208], 'acc': [0.9151581706901196, 0.9271193553749254, 0.9307832074984708, 0.9347042106191025, 0.9368274982600846, 0.9374102195797593], 'val_loss': [0.2161090716728923, 0.2122091553542886, 0.18150

2019-01-20 15:35:02,689 [MainThread  ] [INFO ]  {
    "architecture": "video_mlp_concat",
    "batch_size": 32,
    "convolution_kernel_size": 3,
    "data_total_rows_test": 3137,
    "data_total_rows_train": 60553,
    "data_total_rows_valid": 6412,
    "dropout": 0.2,
    "fit_best_round": 3,
    "fit_dt_test_duration_seconds": "0",
    "fit_dt_test_end": "2019-01-20 15:35:00",
    "fit_dt_test_start": "2019-01-20 15:34:59",
    "fit_dt_train_duration_seconds": "413",
    "fit_dt_train_end": "2019-01-20 15:34:58",
    "fit_dt_train_start": "2019-01-20 15:28:04",
    "fit_num_epochs": 18,
    "fit_stopped_epoch1": 3,
    "fit_stopped_epoch2": 8,
    "fit_stopped_epoch3": 4,
    "fit_test_acc": 0.5671023270640739,
    "fit_train_acc": 0.9509426382667486,
    "fit_train_loss": 0.1258159916665657,
    "fit_val_acc": 0.9187683952427327,
    "fit_val_loss": 0.1753938285841915,
    "frame_size": [
        299,
        299
    ],
    "layer_1_size": 128,
    "layer_2_size": 256,
    "layer_3

237   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
{'pretrained_model_name': 'inception_resnet_v2', 'dropout': 0.2, 'sequence_model': nan, 'layer_1_size': 128, 'sequence_length': 3, 'pooling': 'max', 'WORKER': 5, 'architecture': 'video_MLP_concat', 'layer_3_size': 256, 'layer_2_size': 0, 'sequence_model_layers': nan, 'model_id': 237}


2019-01-20 15:35:06,147 [MainThread  ] [INFO ]  Features already cached: /mnt/seals/cache/features/inception_resnet_v2/max/
2019-01-20 15:35:06,149 [MainThread  ] [INFO ]  Loading features sequence data into memory [may take a few minutes]


Done initializing data with #samples: train=60553, valid=6412, test=3137
Train on 60553 samples, validate on 6412 samples
Epoch 1/20

Epoch 00001: val_acc improved from -inf to 0.91208, saving model to /mnt/seals/models/237/model_round_1.h5
Epoch 2/20

Epoch 00002: val_acc improved from 0.91208 to 0.91269, saving model to /mnt/seals/models/237/model_round_1.h5
Epoch 3/20

Epoch 00003: val_acc did not improve from 0.91269
Epoch 4/20

Epoch 00004: val_acc did not improve from 0.91269
Epoch 5/20

Epoch 00005: val_acc did not improve from 0.91269
H1 {'loss': [0.41436027126030894, 0.23815329042243566, 0.2307074539880285, 0.22647193371791885, 0.22420895207351124], 'acc': [0.9002951648230084, 0.907651179058653, 0.9082173891652832, 0.909057266260023, 0.910465713546642], 'val_loss': [0.24612288129604837, 0.23361274934677653, 0.27088298317200676, 0.2319204617647253, 0.23855892069177634], 'val_acc': [0.91208451287269, 0.9126860651089412, 0.9106808990816436, 0.9011897555902758, 0.9048436226803143]

2019-01-20 15:39:55,328 [MainThread  ] [INFO ]  {
    "architecture": "video_mlp_concat",
    "batch_size": 32,
    "convolution_kernel_size": 3,
    "data_total_rows_test": 3137,
    "data_total_rows_train": 60553,
    "data_total_rows_valid": 6412,
    "dropout": 0.2,
    "fit_best_round": 1,
    "fit_dt_test_duration_seconds": "0",
    "fit_dt_test_end": "2019-01-20 15:39:53",
    "fit_dt_test_start": "2019-01-20 15:39:52",
    "fit_dt_train_duration_seconds": "281",
    "fit_dt_train_end": "2019-01-20 15:39:50",
    "fit_dt_train_start": "2019-01-20 15:35:09",
    "fit_num_epochs": 14,
    "fit_stopped_epoch1": 2,
    "fit_stopped_epoch2": 4,
    "fit_stopped_epoch3": 5,
    "fit_test_acc": 0.3726490277335034,
    "fit_train_acc": 0.9082173891652832,
    "fit_train_loss": 0.2307074539880285,
    "fit_val_acc": 0.9106808990816436,
    "fit_val_loss": 0.27088298317200676,
    "frame_size": [
        299,
        299
    ],
    "layer_1_size": 128,
    "layer_2_size": 0,
    "layer_3_

245   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
{'pretrained_model_name': 'inception_resnet_v2', 'dropout': 0.2, 'sequence_model': nan, 'layer_1_size': 0, 'sequence_length': 3, 'pooling': 'max', 'WORKER': 5, 'architecture': 'video_MLP_concat', 'layer_3_size': 256, 'layer_2_size': 256, 'sequence_model_layers': nan, 'model_id': 245}


2019-01-20 15:39:58,686 [MainThread  ] [INFO ]  Features already cached: /mnt/seals/cache/features/inception_resnet_v2/max/
2019-01-20 15:39:58,688 [MainThread  ] [INFO ]  Loading features sequence data into memory [may take a few minutes]


Done initializing data with #samples: train=60553, valid=6412, test=3137
Train on 60553 samples, validate on 6412 samples
Epoch 1/20

Epoch 00001: val_acc improved from -inf to 0.86955, saving model to /mnt/seals/models/245/model_round_1.h5
Epoch 2/20

Epoch 00002: val_acc did not improve from 0.86955
Epoch 3/20