In [1]:
WORKER_ID = 0

In [2]:
import os
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"]=str(WORKER_ID)

In [3]:
# whether to log each feature and sequence status
verbose = True

In [4]:
import gc
import os
import pandas as pd
import numpy as np
import json
import datetime
import matplotlib.pyplot as plt
import itertools
import sys
sys.path.append('..')

In [5]:
# setup paths
pwd = os.getcwd().replace("notebooks","")
path_cache = pwd + 'cache/'
path_data = pwd + 'data/'

In [6]:
# setup logging
# any explicit log messages or uncaught errors to stdout and file /logs.log
import logging
logging.basicConfig(
    level=logging.INFO,
    format="%(asctime)s [%(threadName)-12.12s] [%(levelname)-5.5s]  %(message)s",
    handlers=[
        logging.FileHandler("{0}/{1}.log".format(pwd, "logs")),
        logging.StreamHandler()
    ])
# init logger
logger = logging.getLogger()
# make logger aware of any uncaught exceptions
def handle_exception(exc_type, exc_value, exc_traceback):
    if issubclass(exc_type, KeyboardInterrupt):
        sys.__excepthook__(exc_type, exc_value, exc_traceback)
        return

    logger.error("Uncaught exception", exc_info=(exc_type, exc_value, exc_traceback))
sys.excepthook = handle_exception

In [7]:
from deepvideoclassification.architectures import Architecture

Using TensorFlow backend.


# Run experiments

In [8]:
experiment_batch_name = 'experiment_batch_1'

In [9]:
# load list of experiments
experiments = pd.read_csv(pwd + "experiments/" + experiment_batch_name + '.csv')

In [10]:
print(experiments.shape)

(5376, 12)


In [None]:
###################
### Run experiments
###################

for row in experiments.values:
    
    # get experiment params from dataframe row
    experiment = dict(zip(experiments.columns, row))
    
    # only run experiment if not already run
    if not os.path.exists(pwd + 'models/' + str(experiment["model_id"]) + '/results.json'):

        # only run experiment if matches this worker id
        if experiment['WORKER'] == WORKER_ID:
            
            print(str(experiment["model_id"]) + "   " + "X"*60)
            logging.info("Begin experiment for model_id={} on GPU:{} ".format(experiment['model_id'], os.environ["CUDA_VISIBLE_DEVICES"]))
            print(experiment)

            architecture = Architecture(model_id = experiment['model_id'], 
                                        architecture = experiment['architecture'], 
                                        sequence_length = experiment['sequence_length'], 
                                        pretrained_model_name = experiment['pretrained_model_name'],
                                        pooling = experiment['pooling'],
                                        sequence_model = experiment['sequence_model'],
                                        sequence_model_layers = experiment['sequence_model_layers'],
                                        layer_1_size = experiment['layer_1_size'],
                                        layer_2_size = experiment['layer_2_size'],
                                        layer_3_size = experiment['layer_3_size'],
                                        dropout = experiment['dropout'],
                                        verbose=True)

            architecture.train_model()
            
            gc.collect()

2019-01-20 15:05:25,454 [MainThread  ] [INFO ]  Begin experiment for model_id=192 on GPU:0 
2019-01-20 15:05:25,455 [MainThread  ] [INFO ]  Model folder exists but no results found - potential error in previous model training
2019-01-20 15:05:25,456 [MainThread  ] [INFO ]  Loading data


192   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
{'pretrained_model_name': 'inception_resnet_v2', 'layer_3_size': 512, 'architecture': 'video_MLP_concat', 'pooling': 'max', 'layer_1_size': 512, 'model_id': 192, 'dropout': 0.2, 'sequence_length': 3, 'layer_2_size': 512, 'WORKER': 0, 'sequence_model_layers': nan, 'sequence_model': nan}


2019-01-20 15:05:26,550 [MainThread  ] [INFO ]  Features already cached: /mnt/seals/cache/features/inception_resnet_v2/max/
2019-01-20 15:05:26,551 [MainThread  ] [INFO ]  Loading features sequence data into memory [may take a few minutes]


Done initializing data with #samples: train=60553, valid=6412, test=3137
Train on 60553 samples, validate on 6412 samples
Epoch 1/20

Epoch 00001: val_acc improved from -inf to 0.73238, saving model to /mnt/seals/models/192/model_round_1.h5
Epoch 2/20

Epoch 00002: val_acc did not improve from 0.73238
Epoch 3/20

Epoch 00003: val_acc did not improve from 0.73238
Epoch 4/20

Epoch 00004: val_acc did not improve from 0.73238
H1 {'loss': [3.996050113182943, 4.000005363699462, 4.000005358856518, 4.000005361053561], 'val_acc': [0.7323767973792159, 0.7323767973792159, 0.7323767973792159, 0.7323767973792159], 'val_loss': [4.290063843902616, 4.290063843902616, 4.290063843902616, 4.290063843902616], 'acc': [0.7504995636878343, 0.7504712535024128, 0.7504712534216971, 0.7504712537544035]}
stopped_epoch1 1
4
0.7323767973792159
Train on 60553 samples, validate on 6412 samples
Epoch 1/20

Epoch 00001: val_acc improved from -inf to 0.73238, saving model to /mnt/seals/models/192/model_round_2.h5
Epoch

2019-01-20 15:09:21,321 [MainThread  ] [INFO ]  {
    "architecture": "video_mlp_concat",
    "batch_size": 32,
    "convolution_kernel_size": 3,
    "data_total_rows_test": 3137,
    "data_total_rows_train": 60553,
    "data_total_rows_valid": 6412,
    "dropout": 0.2,
    "fit_best_round": 1,
    "fit_dt_test_duration_seconds": "0",
    "fit_dt_test_end": "2019-01-20 15:09:19",
    "fit_dt_test_start": "2019-01-20 15:09:19",
    "fit_dt_train_duration_seconds": "228",
    "fit_dt_train_end": "2019-01-20 15:09:18",
    "fit_dt_train_start": "2019-01-20 15:05:29",
    "fit_num_epochs": 6,
    "fit_stopped_epoch1": 1,
    "fit_stopped_epoch2": 1,
    "fit_stopped_epoch3": 1,
    "fit_test_acc": 0.18425247051322924,
    "fit_train_acc": 0.7504712535024128,
    "fit_train_loss": 4.000005363699462,
    "fit_val_acc": 0.7323767973792159,
    "fit_val_loss": 4.290063843902616,
    "frame_size": [
        299,
        299
    ],
    "layer_1_size": 512,
    "layer_2_size": 512,
    "layer_3_s

200   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
{'pretrained_model_name': 'inception_resnet_v2', 'layer_3_size': 512, 'architecture': 'video_MLP_concat', 'pooling': 'max', 'layer_1_size': 512, 'model_id': 200, 'dropout': 0.2, 'sequence_length': 3, 'layer_2_size': 128, 'WORKER': 0, 'sequence_model_layers': nan, 'sequence_model': nan}


2019-01-20 15:09:26,238 [MainThread  ] [INFO ]  Features already cached: /mnt/seals/cache/features/inception_resnet_v2/max/
2019-01-20 15:09:26,240 [MainThread  ] [INFO ]  Loading features sequence data into memory [may take a few minutes]


Done initializing data with #samples: train=60553, valid=6412, test=3137
Train on 60553 samples, validate on 6412 samples
Epoch 1/20

Epoch 00001: val_acc improved from -inf to 0.88862, saving model to /mnt/seals/models/200/model_round_1.h5
Epoch 2/20

Epoch 00002: val_acc improved from 0.88862 to 0.89130, saving model to /mnt/seals/models/200/model_round_1.h5
Epoch 3/20

Epoch 00003: val_acc improved from 0.89130 to 0.92879, saving model to /mnt/seals/models/200/model_round_1.h5
Epoch 4/20

Epoch 00004: val_acc did not improve from 0.92879
Epoch 5/20

Epoch 00005: val_acc did not improve from 0.92879
Epoch 6/20

Epoch 00006: val_acc did not improve from 0.92879
H1 {'loss': [0.20765446354778327, 0.15866202148249656, 0.14931247135290507, 0.1472340586318223, 0.16156191672718406, 0.16821509295968048], 'val_acc': [0.8886240217660415, 0.8912975822696817, 0.9287942435989808, 0.9080964443985552, 0.8956866761482438, 0.9239150018373729], 'val_loss': [0.22254126869881072, 0.28018461598207706, 0.

2019-01-20 15:14:11,416 [MainThread  ] [INFO ]  {
    "architecture": "video_mlp_concat",
    "batch_size": 32,
    "convolution_kernel_size": 3,
    "data_total_rows_test": 3137,
    "data_total_rows_train": 60553,
    "data_total_rows_valid": 6412,
    "dropout": 0.2,
    "fit_best_round": 2,
    "fit_dt_test_duration_seconds": "0",
    "fit_dt_test_end": "2019-01-20 15:14:09",
    "fit_dt_test_start": "2019-01-20 15:14:09",
    "fit_dt_train_duration_seconds": "279",
    "fit_dt_train_end": "2019-01-20 15:14:08",
    "fit_dt_train_start": "2019-01-20 15:09:29",
    "fit_num_epochs": 8,
    "fit_stopped_epoch1": 3,
    "fit_stopped_epoch2": 1,
    "fit_stopped_epoch3": 1,
    "fit_test_acc": 0.5304430985017532,
    "fit_train_acc": 0.9428835832525173,
    "fit_train_loss": 0.14542662033545534,
    "fit_val_acc": 0.9082969573610512,
    "fit_val_loss": 0.18742330101361068,
    "frame_size": [
        299,
        299
    ],
    "layer_1_size": 512,
    "layer_2_size": 128,
    "layer_

208   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
{'pretrained_model_name': 'inception_resnet_v2', 'layer_3_size': 512, 'architecture': 'video_MLP_concat', 'pooling': 'max', 'layer_1_size': 256, 'model_id': 208, 'dropout': 0.2, 'sequence_length': 3, 'layer_2_size': 512, 'WORKER': 0, 'sequence_model_layers': nan, 'sequence_model': nan}


2019-01-20 15:14:15,530 [MainThread  ] [INFO ]  Features already cached: /mnt/seals/cache/features/inception_resnet_v2/max/
2019-01-20 15:14:15,532 [MainThread  ] [INFO ]  Loading features sequence data into memory [may take a few minutes]


Done initializing data with #samples: train=60553, valid=6412, test=3137
Train on 60553 samples, validate on 6412 samples
Epoch 1/20

Epoch 00001: val_acc improved from -inf to 0.91518, saving model to /mnt/seals/models/208/model_round_1.h5
Epoch 2/20

Epoch 00002: val_acc did not improve from 0.91518
Epoch 3/20

Epoch 00003: val_acc did not improve from 0.91518
Epoch 4/20