In [1]:
WORKER_ID = 2

In [2]:
import os
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"]=str(WORKER_ID)

In [3]:
# whether to log each feature and sequence status
verbose = True

In [4]:
import gc
import os
import pandas as pd
import numpy as np
import json
import datetime
import matplotlib.pyplot as plt
import itertools
import sys
sys.path.append('..')

In [5]:
# setup paths
pwd = os.getcwd().replace("notebooks","")
path_cache = pwd + 'cache/'
path_data = pwd + 'data/'

In [6]:
# setup logging
# any explicit log messages or uncaught errors to stdout and file /logs.log
import logging
logging.basicConfig(
    level=logging.INFO,
    format="%(asctime)s [%(threadName)-12.12s] [%(levelname)-5.5s]  %(message)s",
    handlers=[
        logging.FileHandler("{0}/{1}.log".format(pwd, "logs")),
        logging.StreamHandler()
    ])
# init logger
logger = logging.getLogger()
# make logger aware of any uncaught exceptions
def handle_exception(exc_type, exc_value, exc_traceback):
    if issubclass(exc_type, KeyboardInterrupt):
        sys.__excepthook__(exc_type, exc_value, exc_traceback)
        return

    logger.error("Uncaught exception", exc_info=(exc_type, exc_value, exc_traceback))
sys.excepthook = handle_exception

In [7]:
from deepvideoclassification.architectures import Architecture

Using TensorFlow backend.


# Run experiments

In [8]:
experiment_batch_name = 'experiment_batch_1'

In [9]:
# load list of experiments
experiments = pd.read_csv(pwd + "experiments/" + experiment_batch_name + '.csv')

In [10]:
print(experiments.shape)

(5376, 12)


In [None]:
###################
### Run experiments
###################

for row in experiments.values:
    
    # get experiment params from dataframe row
    experiment = dict(zip(experiments.columns, row))
    
    # only run experiment if not already run
    if not os.path.exists(pwd + 'models/' + str(experiment["model_id"]) + '/results.json'):

        # only run experiment if matches this worker id
        if experiment['WORKER'] == WORKER_ID:
            
            print(str(experiment["model_id"]) + "   " + "X"*60)
            logging.info("Begin experiment for model_id={} on GPU:{} ".format(experiment['model_id'], os.environ["CUDA_VISIBLE_DEVICES"]))
            print(experiment)

            architecture = Architecture(model_id = experiment['model_id'], 
                                        architecture = experiment['architecture'], 
                                        sequence_length = experiment['sequence_length'], 
                                        pretrained_model_name = experiment['pretrained_model_name'],
                                        pooling = experiment['pooling'],
                                        sequence_model = experiment['sequence_model'],
                                        sequence_model_layers = experiment['sequence_model_layers'],
                                        layer_1_size = experiment['layer_1_size'],
                                        layer_2_size = experiment['layer_2_size'],
                                        layer_3_size = experiment['layer_3_size'],
                                        dropout = experiment['dropout'],
                                        verbose=True)

            architecture.train_model()
            
            gc.collect()

2019-01-20 15:05:21,706 [MainThread  ] [INFO ]  Begin experiment for model_id=194 on GPU:2 
2019-01-20 15:05:21,707 [MainThread  ] [INFO ]  Model folder exists but no results found - potential error in previous model training
2019-01-20 15:05:21,708 [MainThread  ] [INFO ]  Loading data


194   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
{'layer_3_size': 128, 'sequence_model': nan, 'WORKER': 2, 'pooling': 'max', 'sequence_model_layers': nan, 'dropout': 0.2, 'architecture': 'video_MLP_concat', 'pretrained_model_name': 'inception_resnet_v2', 'layer_1_size': 512, 'layer_2_size': 512, 'sequence_length': 3, 'model_id': 194}


2019-01-20 15:05:22,856 [MainThread  ] [INFO ]  Features already cached: /mnt/seals/cache/features/inception_resnet_v2/max/
2019-01-20 15:05:22,857 [MainThread  ] [INFO ]  Loading features sequence data into memory [may take a few minutes]


Done initializing data with #samples: train=60553, valid=6412, test=3137
Train on 60553 samples, validate on 6412 samples
Epoch 1/20

Epoch 00001: val_acc improved from -inf to 0.93129, saving model to /mnt/seals/models/194/model_round_1.h5
Epoch 2/20

Epoch 00002: val_acc did not improve from 0.93129
Epoch 3/20

Epoch 00003: val_acc did not improve from 0.93129
Epoch 4/20

Epoch 00004: val_acc did not improve from 0.93129
H1 {'val_acc': [0.931289565131877, 0.902949841224768, 0.9020586605750243, 0.9194145075098396], 'acc': [0.9162245331632588, 0.9315570269446328, 0.9353577102549437, 0.9394839648928526], 'loss': [0.31939463147965, 0.16979920574906596, 0.16149092381318225, 0.1522709963449768], 'val_loss': [0.16886383507932937, 0.2005481084231206, 0.21122040294217675, 0.17952063952991534]}
stopped_epoch1 1
4
0.902949841224768
Train on 60553 samples, validate on 6412 samples
Epoch 1/20

Epoch 00001: val_acc improved from -inf to 0.90903, saving model to /mnt/seals/models/194/model_round_2.

2019-01-20 15:10:41,925 [MainThread  ] [INFO ]  {
    "architecture": "video_mlp_concat",
    "batch_size": 32,
    "convolution_kernel_size": 3,
    "data_total_rows_test": 3137,
    "data_total_rows_train": 60553,
    "data_total_rows_valid": 6412,
    "dropout": 0.2,
    "fit_best_round": 3,
    "fit_dt_test_duration_seconds": "0",
    "fit_dt_test_end": "2019-01-20 15:10:40",
    "fit_dt_test_start": "2019-01-20 15:10:39",
    "fit_dt_train_duration_seconds": "313",
    "fit_dt_train_end": "2019-01-20 15:10:39",
    "fit_dt_train_start": "2019-01-20 15:05:25",
    "fit_num_epochs": 10,
    "fit_stopped_epoch1": 1,
    "fit_stopped_epoch2": 3,
    "fit_stopped_epoch3": 3,
    "fit_test_acc": 0.5954733822123047,
    "fit_train_acc": 0.9550854082853909,
    "fit_train_loss": 0.11173019219498903,
    "fit_val_acc": 0.915337330984162,
    "fit_val_loss": 0.18322577914519378,
    "frame_size": [
        299,
        299
    ],
    "layer_1_size": 512,
    "layer_2_size": 512,
    "layer_

202   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
{'layer_3_size': 128, 'sequence_model': nan, 'WORKER': 2, 'pooling': 'max', 'sequence_model_layers': nan, 'dropout': 0.2, 'architecture': 'video_MLP_concat', 'pretrained_model_name': 'inception_resnet_v2', 'layer_1_size': 512, 'layer_2_size': 128, 'sequence_length': 3, 'model_id': 202}


2019-01-20 15:10:46,143 [MainThread  ] [INFO ]  Features already cached: /mnt/seals/cache/features/inception_resnet_v2/max/
2019-01-20 15:10:46,144 [MainThread  ] [INFO ]  Loading features sequence data into memory [may take a few minutes]


Done initializing data with #samples: train=60553, valid=6412, test=3137
Train on 60553 samples, validate on 6412 samples
Epoch 1/20

Epoch 00001: val_acc improved from -inf to 0.90123, saving model to /mnt/seals/models/202/model_round_1.h5
Epoch 2/20

Epoch 00002: val_acc improved from 0.90123 to 0.90386, saving model to /mnt/seals/models/202/model_round_1.h5
Epoch 3/20

Epoch 00003: val_acc improved from 0.90386 to 0.90778, saving model to /mnt/seals/models/202/model_round_1.h5
Epoch 4/20

Epoch 00004: val_acc did not improve from 0.90778
Epoch 5/20

Epoch 00005: val_acc did not improve from 0.90778
Epoch 6/20

Epoch 00006: val_acc improved from 0.90778 to 0.92784, saving model to /mnt/seals/models/202/model_round_1.h5
Epoch 7/20

Epoch 00007: val_acc did not improve from 0.92784
Epoch 8/20

Epoch 00008: val_acc did not improve from 0.92784
Epoch 9/20

Epoch 00009: val_acc did not improve from 0.92784
H1 {'val_acc': [0.9012343094859061, 0.9038633048274111, 0.9077845268597546, 0.89920



2019-01-20 15:19:48,577 [MainThread  ] [INFO ]  {
    "architecture": "video_mlp_concat",
    "batch_size": 32,
    "convolution_kernel_size": 3,
    "data_total_rows_test": 3137,
    "data_total_rows_train": 60553,
    "data_total_rows_valid": 6412,
    "dropout": 0.2,
    "fit_best_round": 3,
    "fit_dt_test_duration_seconds": "0",
    "fit_dt_test_end": "2019-01-20 15:19:47",
    "fit_dt_test_start": "2019-01-20 15:19:46",
    "fit_dt_train_duration_seconds": "536",
    "fit_dt_train_end": "2019-01-20 15:19:45",
    "fit_dt_train_start": "2019-01-20 15:10:49",
    "fit_num_epochs": 21,
    "fit_stopped_epoch1": 6,
    "fit_stopped_epoch2": 3,
    "fit_stopped_epoch3": 9,
    "fit_test_acc": 0.5878227605992987,
    "fit_train_acc": 0.9582561817798851,
    "fit_train_loss": 0.10580791605785143,
    "fit_val_acc": 0.9250512620247087,
    "fit_val_loss": 0.17193016466578318,
    "frame_size": [
        299,
        299
    ],
    "layer_1_size": 512,
    "layer_2_size": 128,
    "layer

210   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
{'layer_3_size': 128, 'sequence_model': nan, 'WORKER': 2, 'pooling': 'max', 'sequence_model_layers': nan, 'dropout': 0.2, 'architecture': 'video_MLP_concat', 'pretrained_model_name': 'inception_resnet_v2', 'layer_1_size': 256, 'layer_2_size': 512, 'sequence_length': 3, 'model_id': 210}


2019-01-20 15:19:52,951 [MainThread  ] [INFO ]  Features already cached: /mnt/seals/cache/features/inception_resnet_v2/max/
2019-01-20 15:19:52,958 [MainThread  ] [INFO ]  Loading features sequence data into memory [may take a few minutes]


Done initializing data with #samples: train=60553, valid=6412, test=3137
Train on 60553 samples, validate on 6412 samples
Epoch 1/20

Epoch 00001: val_acc improved from -inf to 0.89613, saving model to /mnt/seals/models/210/model_round_1.h5
Epoch 2/20

Epoch 00002: val_acc improved from 0.89613 to 0.90458, saving model to /mnt/seals/models/210/model_round_1.h5
Epoch 3/20

Epoch 00003: val_acc improved from 0.90458 to 0.90961, saving model to /mnt/seals/models/210/model_round_1.h5
Epoch 4/20

Epoch 00004: val_acc did not improve from 0.90961
Epoch 5/20

Epoch 00005: val_acc did not improve from 0.90961
Epoch 6/20

Epoch 00006: val_acc did not improve from 0.90961
H1 {'val_acc': [0.8961322681277675, 0.9045762575498165, 0.9096114608323804, 0.9080964515377266, 0.8735184174586443, 0.9036850729958384], 'acc': [0.9179561921597874, 0.9271382299097236, 0.9316372392045799, 0.9349896724798196, 0.9347443169449594, 0.9376980448872412], 'loss': [0.21330594390833987, 0.18405924437771626, 0.1721154807

2019-01-20 15:25:16,668 [MainThread  ] [INFO ]  {
    "architecture": "video_mlp_concat",
    "batch_size": 32,
    "convolution_kernel_size": 3,
    "data_total_rows_test": 3137,
    "data_total_rows_train": 60553,
    "data_total_rows_valid": 6412,
    "dropout": 0.2,
    "fit_best_round": 3,
    "fit_dt_test_duration_seconds": "0",
    "fit_dt_test_end": "2019-01-20 15:25:15",
    "fit_dt_test_start": "2019-01-20 15:25:14",
    "fit_dt_train_duration_seconds": "317",
    "fit_dt_train_end": "2019-01-20 15:25:13",
    "fit_dt_train_start": "2019-01-20 15:19:56",
    "fit_num_epochs": 12,
    "fit_stopped_epoch1": 3,
    "fit_stopped_epoch2": 5,
    "fit_stopped_epoch3": 1,
    "fit_test_acc": 0.5967484858144725,
    "fit_train_acc": 0.9511360925775518,
    "fit_train_loss": 0.12449507250467177,
    "fit_val_acc": 0.911482950002048,
    "fit_val_loss": 0.19053448398300654,
    "frame_size": [
        299,
        299
    ],
    "layer_1_size": 256,
    "layer_2_size": 512,
    "layer_

218   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
{'layer_3_size': 128, 'sequence_model': nan, 'WORKER': 2, 'pooling': 'max', 'sequence_model_layers': nan, 'dropout': 0.2, 'architecture': 'video_MLP_concat', 'pretrained_model_name': 'inception_resnet_v2', 'layer_1_size': 256, 'layer_2_size': 128, 'sequence_length': 3, 'model_id': 218}


2019-01-20 15:25:20,550 [MainThread  ] [INFO ]  Features already cached: /mnt/seals/cache/features/inception_resnet_v2/max/
2019-01-20 15:25:20,558 [MainThread  ] [INFO ]  Loading features sequence data into memory [may take a few minutes]


Done initializing data with #samples: train=60553, valid=6412, test=3137
Train on 60553 samples, validate on 6412 samples
Epoch 1/20

Epoch 00001: val_acc improved from -inf to 0.91405, saving model to /mnt/seals/models/218/model_round_1.h5
Epoch 2/20

Epoch 00002: val_acc improved from 0.91405 to 0.91491, saving model to /mnt/seals/models/218/model_round_1.h5
Epoch 3/20

Epoch 00003: val_acc improved from 0.91491 to 0.91540, saving model to /mnt/seals/models/218/model_round_1.h5
Epoch 4/20

Epoch 00004: val_acc improved from 0.91540 to 0.91670, saving model to /mnt/seals/models/218/model_round_1.h5
Epoch 5/20

Epoch 00005: val_acc did not improve from 0.91670
Epoch 6/20

Epoch 00006: val_acc did not improve from 0.91670
Epoch 7/20

Epoch 00007: val_acc did not improve from 0.91670
H1 {'val_acc': [0.9140451212116725, 0.9149140274978323, 0.9154041801031425, 0.9166964012536866, 0.912240468964901, 0.9147135113747656, 0.9131762150370621], 'acc': [0.9133439424165779, 0.921407716865022, 0.92

2019-01-20 15:31:51,666 [MainThread  ] [INFO ]  {
    "architecture": "video_mlp_concat",
    "batch_size": 32,
    "convolution_kernel_size": 3,
    "data_total_rows_test": 3137,
    "data_total_rows_train": 60553,
    "data_total_rows_valid": 6412,
    "dropout": 0.2,
    "fit_best_round": 2,
    "fit_dt_test_duration_seconds": "0",
    "fit_dt_test_end": "2019-01-20 15:31:49",
    "fit_dt_test_start": "2019-01-20 15:31:49",
    "fit_dt_train_duration_seconds": "384",
    "fit_dt_train_end": "2019-01-20 15:31:48",
    "fit_dt_train_start": "2019-01-20 15:25:23",
    "fit_num_epochs": 15,
    "fit_stopped_epoch1": 4,
    "fit_stopped_epoch2": 2,
    "fit_stopped_epoch3": 6,
    "fit_test_acc": 0.5639145680586548,
    "fit_train_acc": 0.9323709551007194,
    "fit_train_loss": 0.15841689425669064,
    "fit_val_acc": 0.9155378582993672,
    "fit_val_loss": 0.18314523304244987,
    "frame_size": [
        299,
        299
    ],
    "layer_1_size": 256,
    "layer_2_size": 128,
    "layer

226   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
{'layer_3_size': 128, 'sequence_model': nan, 'WORKER': 2, 'pooling': 'max', 'sequence_model_layers': nan, 'dropout': 0.2, 'architecture': 'video_MLP_concat', 'pretrained_model_name': 'inception_resnet_v2', 'layer_1_size': 128, 'layer_2_size': 512, 'sequence_length': 3, 'model_id': 226}


2019-01-20 15:31:55,655 [MainThread  ] [INFO ]  Features already cached: /mnt/seals/cache/features/inception_resnet_v2/max/
2019-01-20 15:31:55,656 [MainThread  ] [INFO ]  Loading features sequence data into memory [may take a few minutes]


Done initializing data with #samples: train=60553, valid=6412, test=3137
Train on 60553 samples, validate on 6412 samples
Epoch 1/20

Epoch 00001: val_acc improved from -inf to 0.90923, saving model to /mnt/seals/models/226/model_round_1.h5
Epoch 2/20

Epoch 00002: val_acc improved from 0.90923 to 0.91291, saving model to /mnt/seals/models/226/model_round_1.h5
Epoch 3/20

Epoch 00003: val_acc improved from 0.91291 to 0.91409, saving model to /mnt/seals/models/226/model_round_1.h5
Epoch 4/20

Epoch 00004: val_acc did not improve from 0.91409
Epoch 5/20

Epoch 00005: val_acc did not improve from 0.91409
Epoch 6/20

Epoch 00006: val_acc improved from 0.91409 to 0.91518, saving model to /mnt/seals/models/226/model_round_1.h5
Epoch 7/20

Epoch 00007: val_acc did not improve from 0.91518
Epoch 8/20

Epoch 00008: val_acc improved from 0.91518 to 0.91601, saving model to /mnt/seals/models/226/model_round_1.h5
Epoch 9/20

Epoch 00009: val_acc did not improve from 0.91601
Epoch 10/20

Epoch 0001