In [1]:
WORKER_ID = 2

In [2]:
import os
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"]=str(WORKER_ID)

In [3]:
# whether to log each feature and sequence status
verbose = True

In [4]:
import gc
import os
import pandas as pd
import numpy as np
import json
import datetime
import matplotlib.pyplot as plt
import itertools
import sys
sys.path.append('..')

In [5]:
# setup paths
pwd = os.getcwd().replace("notebooks","")
path_cache = pwd + 'cache/'
path_data = pwd + 'data/'

In [6]:
# setup logging
# any explicit log messages or uncaught errors to stdout and file /logs.log
import logging
logging.basicConfig(
    level=logging.INFO,
    format="%(asctime)s [%(threadName)-12.12s] [%(levelname)-5.5s]  %(message)s",
    handlers=[
        logging.FileHandler("{0}/{1}.log".format(pwd, "logs")),
        logging.StreamHandler()
    ])
# init logger
logger = logging.getLogger()
# make logger aware of any uncaught exceptions
def handle_exception(exc_type, exc_value, exc_traceback):
    if issubclass(exc_type, KeyboardInterrupt):
        sys.__excepthook__(exc_type, exc_value, exc_traceback)
        return

    logger.error("Uncaught exception", exc_info=(exc_type, exc_value, exc_traceback))
sys.excepthook = handle_exception

In [7]:
from deepvideoclassification.architectures import Architecture

Using TensorFlow backend.


# Run experiments

In [8]:
experiment_batch_name = 'experiment_batch_1'

In [9]:
# load list of experiments
experiments = pd.read_csv(pwd + "experiments/" + experiment_batch_name + '.csv')

In [10]:
print(experiments.shape)

(5376, 12)


In [None]:
###################
### Run experiments
###################

for row in experiments.values:
    
    # get experiment params from dataframe row
    experiment = dict(zip(experiments.columns, row))
    
    # only run experiment if not already run
    if not os.path.exists(pwd + 'models/' + str(experiment["model_id"]) + '/results.json'):

        # only run experiment if matches this worker id
        if experiment['WORKER'] == WORKER_ID:
            
            print(str(experiment["model_id"]) + "   " + "X"*60)
            logging.info("Begin experiment for model_id={} on GPU:{} ".format(experiment['model_id'], os.environ["CUDA_VISIBLE_DEVICES"]))
            print(experiment)

            architecture = Architecture(model_id = experiment['model_id'], 
                                        architecture = experiment['architecture'], 
                                        sequence_length = experiment['sequence_length'], 
                                        pretrained_model_name = experiment['pretrained_model_name'],
                                        pooling = experiment['pooling'],
                                        sequence_model = experiment['sequence_model'],
                                        sequence_model_layers = experiment['sequence_model_layers'],
                                        layer_1_size = experiment['layer_1_size'],
                                        layer_2_size = experiment['layer_2_size'],
                                        layer_3_size = experiment['layer_3_size'],
                                        dropout = experiment['dropout'],
                                        verbose=True)

            architecture.train_model()
            
            gc.collect()

2019-01-20 13:23:04,674 [MainThread  ] [INFO ]  Begin experiment for model_id=74 on GPU:2 
2019-01-20 13:23:04,676 [MainThread  ] [INFO ]  Model folder exists but no results found - potential error in previous model training
2019-01-20 13:23:04,677 [MainThread  ] [INFO ]  Loading data


74   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
{'dropout': 0.2, 'sequence_model': nan, 'layer_3_size': 128, 'model_id': 74, 'sequence_model_layers': nan, 'WORKER': 2, 'pretrained_model_name': 'vgg16', 'pooling': 'max', 'layer_2_size': 128, 'architecture': 'image_MLP_frozen', 'layer_1_size': 512, 'sequence_length': 1.0}


2019-01-20 13:23:05,627 [MainThread  ] [INFO ]  Features already cached: /mnt/seals/cache/features/vgg16/max/
2019-01-20 13:23:05,628 [MainThread  ] [INFO ]  Loading features data into memory [may take a few minutes]


Done initializing data with #samples: train=60597, valid=6416, test=3139
Train on 60597 samples, validate on 6416 samples
Epoch 1/20

Epoch 00001: val_acc improved from -inf to 0.92436, saving model to /mnt/seals/models/74/model_round_1.h5
Epoch 2/20

Epoch 00002: val_acc improved from 0.92436 to 0.93193, saving model to /mnt/seals/models/74/model_round_1.h5
Epoch 3/20

Epoch 00003: val_acc did not improve from 0.93193
Epoch 4/20

Epoch 00004: val_acc did not improve from 0.93193
Epoch 5/20

Epoch 00005: val_acc did not improve from 0.93193
H1 {'loss': [0.2105474062145316, 0.13308980150982294, 0.11850671127765847, 0.11053617785668919, 0.10483298172625677], 'val_loss': [0.2054662348846545, 0.17379937189029637, 0.19515685380396997, 0.188213187560179, 0.1863066403571507], 'val_acc': [0.9243632158139102, 0.9319335784103507, 0.9239847020912646, 0.9236284449808021, 0.9260999506845736], 'acc': [0.9271652026387105, 0.9477814969181265, 0.9537459583692209, 0.9563203431315604, 0.9587462043983326]

2019-01-20 13:27:54,061 [MainThread  ] [INFO ]  {'fit_dt_train_end': '2019-01-20 13:27:51', 'fit_dt_test_duration_seconds': '0', 'num_features': 512, 'fit_stopped_epoch2': 4, 'path_model': '/mnt/seals/models/74/', 'sequence_model': nan, 'batch_size': 32, 'fit_val_loss': 0.1915887152762187, 'layer_3_size': 128, 'sequence_model_layers': nan, 'data_total_rows_valid': 6416, 'fit_dt_test_end': '2019-01-20 13:27:52', 'convolution_kernel_size': 3, 'fit_val_acc': 0.9349171883447509, 'frame_size': (224, 224), 'pooling': 'max', 'layer_2_size': 128, 'architecture': 'image_mlp_frozen', 'fit_stopped_epoch1': 2, 'sequence_length': 1.0, 'fit_test_acc': 0.6553042370181587, 'data_total_rows_train': 60597, 'dropout': 0.2, 'fit_dt_test_start': '2019-01-20 13:27:52', 'model_weights_path': None, 'fit_stopped_epoch3': 5, 'fit_dt_train_duration_seconds': '285', 'fit_train_loss': 0.06792770854866015, 'model_param_count': 345735, 'fit_dt_train_start': '2019-01-20 13:23:06', 'fit_num_epochs': 14, 'pretrained_mo

82   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
{'dropout': 0.2, 'sequence_model': nan, 'layer_3_size': 128, 'model_id': 82, 'sequence_model_layers': nan, 'WORKER': 2, 'pretrained_model_name': 'vgg16', 'pooling': 'max', 'layer_2_size': 512, 'architecture': 'image_MLP_frozen', 'layer_1_size': 256, 'sequence_length': 1.0}


2019-01-20 13:27:55,387 [MainThread  ] [INFO ]  Features already cached: /mnt/seals/cache/features/vgg16/max/
2019-01-20 13:27:55,388 [MainThread  ] [INFO ]  Loading features data into memory [may take a few minutes]


Done initializing data with #samples: train=60597, valid=6416, test=3139
Train on 60597 samples, validate on 6416 samples
Epoch 1/20

Epoch 00001: val_acc improved from -inf to 0.91918, saving model to /mnt/seals/models/82/model_round_1.h5
Epoch 2/20

Epoch 00002: val_acc did not improve from 0.91918
Epoch 3/20

Epoch 00003: val_acc improved from 0.91918 to 0.92441, saving model to /mnt/seals/models/82/model_round_1.h5
Epoch 4/20

Epoch 00004: val_acc improved from 0.92441 to 0.92683, saving model to /mnt/seals/models/82/model_round_1.h5
Epoch 5/20

Epoch 00005: val_acc improved from 0.92683 to 0.93623, saving model to /mnt/seals/models/82/model_round_1.h5
Epoch 6/20

Epoch 00006: val_acc did not improve from 0.93623
Epoch 7/20

Epoch 00007: val_acc did not improve from 0.93623
Epoch 8/20

Epoch 00008: val_acc improved from 0.93623 to 0.93705, saving model to /mnt/seals/models/82/model_round_1.h5
Epoch 9/20

Epoch 00009: val_acc did not improve from 0.93705
Epoch 10/20

Epoch 00010: va


Epoch 00003: val_acc did not improve from 0.93198
Epoch 4/20

Epoch 00004: val_acc improved from 0.93198 to 0.93211, saving model to /mnt/seals/models/82/model_round_3.h5
Epoch 5/20

Epoch 00005: val_acc improved from 0.93211 to 0.93249, saving model to /mnt/seals/models/82/model_round_3.h5
Epoch 6/20

Epoch 00006: val_acc did not improve from 0.93249
Epoch 7/20

Epoch 00007: val_acc did not improve from 0.93249
Epoch 8/20

Epoch 00008: val_acc did not improve from 0.93249
H3 {'loss': [0.05786039141499548, 0.056828000624573986, 0.05704894165075055, 0.05703243230220038, 0.056571433601174066, 0.05697923583236487, 0.05721942838970237, 0.05666394991644684], 'val_loss': [0.22797160740579453, 0.2278854125492888, 0.22667051312929376, 0.22903181447129595, 0.2296534689361615, 0.22879266070011547, 0.22816948899232836, 0.2295470803864579], 'val_acc': [0.9319781056068782, 0.9319781076878384, 0.9319335769239506, 0.9321117026550216, 0.9324902197963877, 0.9317777158316234, 0.9321339683342456, 0.9320

2019-01-20 13:35:25,538 [MainThread  ] [INFO ]  {'fit_dt_train_end': '2019-01-20 13:35:22', 'num_features': 512, 'fit_stopped_epoch2': 5, 'path_model': '/mnt/seals/models/82/', 'sequence_model': nan, 'batch_size': 32, 'fit_val_loss': 0.22338662227787578, 'layer_3_size': 128, 'sequence_model_layers': nan, 'data_total_rows_valid': 6416, 'fit_dt_test_end': '2019-01-20 13:35:23', 'convolution_kernel_size': 3, 'fit_val_acc': 0.9327128804532667, 'fit_train_loss': 0.060653074162238356, 'frame_size': (224, 224), 'pooling': 'max', 'layer_2_size': 512, 'architecture': 'image_mlp_frozen', 'fit_stopped_epoch1': 13, 'sequence_length': 1.0, 'fit_test_acc': 0.6301369863013699, 'data_total_rows_train': 60597, 'dropout': 0.2, 'fit_dt_test_start': '2019-01-20 13:35:23', 'model_weights_path': None, 'fit_dt_train_duration_seconds': '446', 'fit_stopped_epoch3': 5, 'fit_dt_test_duration_seconds': '0', 'model_param_count': 329479, 'fit_dt_train_start': '2019-01-20 13:27:55', 'fit_num_epochs': 26, 'pretrained

90   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
{'dropout': 0.2, 'sequence_model': nan, 'layer_3_size': 128, 'model_id': 90, 'sequence_model_layers': nan, 'WORKER': 2, 'pretrained_model_name': 'vgg16', 'pooling': 'max', 'layer_2_size': 128, 'architecture': 'image_MLP_frozen', 'layer_1_size': 256, 'sequence_length': 1.0}


2019-01-20 13:35:26,951 [MainThread  ] [INFO ]  Features already cached: /mnt/seals/cache/features/vgg16/max/
2019-01-20 13:35:26,952 [MainThread  ] [INFO ]  Loading features data into memory [may take a few minutes]


Done initializing data with #samples: train=60597, valid=6416, test=3139
Train on 60597 samples, validate on 6416 samples
Epoch 1/20

Epoch 00001: val_acc improved from -inf to 0.92715, saving model to /mnt/seals/models/90/model_round_1.h5
Epoch 2/20

Epoch 00002: val_acc improved from 0.92715 to 0.93193, saving model to /mnt/seals/models/90/model_round_1.h5
Epoch 3/20

Epoch 00003: val_acc did not improve from 0.93193
Epoch 4/20

Epoch 00004: val_acc improved from 0.93193 to 0.93412, saving model to /mnt/seals/models/90/model_round_1.h5
Epoch 5/20

Epoch 00005: val_acc did not improve from 0.93412
Epoch 6/20

Epoch 00006: val_acc did not improve from 0.93412
Epoch 7/20

Epoch 00007: val_acc improved from 0.93412 to 0.93463, saving model to /mnt/seals/models/90/model_round_1.h5
Epoch 8/20

Epoch 00008: val_acc did not improve from 0.93463
Epoch 9/20

Epoch 00009: val_acc did not improve from 0.93463
Epoch 10/20

Epoch 00010: val_acc did not improve from 0.93463
H1 {'loss': [0.219849006


Epoch 00009: val_acc did not improve from 0.93396
Epoch 10/20

Epoch 00010: val_acc did not improve from 0.93396
Epoch 11/20

Epoch 00011: val_acc did not improve from 0.93396
H3 {'loss': [0.06673810518288893, 0.06691510054553848, 0.06653465020230578, 0.06657853283138443, 0.06638414125260574, 0.06676749651723672, 0.06646890156424132, 0.06587068004946187, 0.0660546239493324, 0.06581174480284223, 0.06598745544518367], 'val_loss': [0.18474289241648673, 0.18344857441516885, 0.18171509778930658, 0.1830508605463249, 0.18346255577338902, 0.18330683295343284, 0.1846467961657077, 0.18242946703758026, 0.1833342082631261, 0.18372939770432778, 0.18495744745481638], 'val_acc': [0.9327796754099782, 0.9328910041033776, 0.9335367110304702, 0.9334031160632869, 0.9338484315800846, 0.9335144463917264, 0.9328687405051138, 0.933959761759884, 0.9338484345528848, 0.933781636028813, 0.9332917890049276], 'acc': [0.9721532769304854, 0.9721014127647991, 0.9723772390505653, 0.9723348041002685, 0.9723159437179801

2019-01-20 13:42:55,638 [MainThread  ] [INFO ]  {'fit_dt_train_end': '2019-01-20 13:42:52', 'num_features': 512, 'fit_stopped_epoch2': 7, 'path_model': '/mnt/seals/models/90/', 'sequence_model': nan, 'batch_size': 32, 'fit_val_loss': 0.1833342082631261, 'layer_3_size': 128, 'sequence_model_layers': nan, 'data_total_rows_valid': 6416, 'fit_dt_test_end': '2019-01-20 13:42:53', 'convolution_kernel_size': 3, 'fit_val_acc': 0.9338484345528848, 'fit_train_loss': 0.0660546239493324, 'frame_size': (224, 224), 'pooling': 'max', 'layer_2_size': 128, 'architecture': 'image_mlp_frozen', 'fit_stopped_epoch1': 7, 'sequence_length': 1.0, 'fit_test_acc': 0.6613571201019433, 'data_total_rows_train': 60597, 'dropout': 0.2, 'fit_dt_test_start': '2019-01-20 13:42:53', 'model_weights_path': None, 'fit_dt_train_duration_seconds': '444', 'fit_stopped_epoch3': 8, 'fit_dt_test_duration_seconds': '0', 'model_param_count': 181639, 'fit_dt_train_start': '2019-01-20 13:35:27', 'fit_num_epochs': 25, 'pretrained_mod

98   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
{'dropout': 0.2, 'sequence_model': nan, 'layer_3_size': 128, 'model_id': 98, 'sequence_model_layers': nan, 'WORKER': 2, 'pretrained_model_name': 'vgg16', 'pooling': 'max', 'layer_2_size': 512, 'architecture': 'image_MLP_frozen', 'layer_1_size': 128, 'sequence_length': 1.0}


2019-01-20 13:42:57,119 [MainThread  ] [INFO ]  Features already cached: /mnt/seals/cache/features/vgg16/max/
2019-01-20 13:42:57,120 [MainThread  ] [INFO ]  Loading features data into memory [may take a few minutes]


Done initializing data with #samples: train=60597, valid=6416, test=3139
Train on 60597 samples, validate on 6416 samples
Epoch 1/20

Epoch 00001: val_acc improved from -inf to 0.92398, saving model to /mnt/seals/models/98/model_round_1.h5
Epoch 2/20

Epoch 00002: val_acc improved from 0.92398 to 0.92572, saving model to /mnt/seals/models/98/model_round_1.h5
Epoch 3/20

Epoch 00003: val_acc improved from 0.92572 to 0.93354, saving model to /mnt/seals/models/98/model_round_1.h5
Epoch 4/20

Epoch 00004: val_acc did not improve from 0.93354
Epoch 5/20

Epoch 00005: val_acc did not improve from 0.93354
Epoch 6/20

Epoch 00006: val_acc improved from 0.93354 to 0.94304, saving model to /mnt/seals/models/98/model_round_1.h5
Epoch 7/20

Epoch 00007: val_acc did not improve from 0.94304
Epoch 8/20

Epoch 00008: val_acc did not improve from 0.94304
Epoch 9/20

Epoch 00009: val_acc did not improve from 0.94304
H1 {'loss': [0.18648356981884714, 0.13080168428663602, 0.12037680153091554, 0.112353161

2019-01-20 13:48:30,716 [MainThread  ] [INFO ]  {'fit_dt_train_end': '2019-01-20 13:48:26', 'num_features': 512, 'fit_stopped_epoch2': 4, 'path_model': '/mnt/seals/models/98/', 'sequence_model': nan, 'batch_size': 32, 'fit_val_loss': 0.1897226759694758, 'layer_3_size': 128, 'sequence_model_layers': nan, 'data_total_rows_valid': 6416, 'fit_dt_test_end': '2019-01-20 13:48:28', 'convolution_kernel_size': 3, 'fit_val_acc': 0.9358523479126338, 'fit_train_loss': 0.07024304872903993, 'frame_size': (224, 224), 'pooling': 'max', 'layer_2_size': 512, 'architecture': 'image_mlp_frozen', 'fit_stopped_epoch1': 6, 'sequence_length': 1.0, 'fit_test_acc': 0.6578528193692259, 'data_total_rows_train': 60597, 'dropout': 0.2, 'fit_dt_test_start': '2019-01-20 13:48:28', 'model_weights_path': None, 'fit_dt_train_duration_seconds': '329', 'fit_stopped_epoch3': 3, 'fit_dt_test_duration_seconds': '0', 'model_param_count': 198279, 'fit_dt_train_start': '2019-01-20 13:42:57', 'fit_num_epochs': 16, 'pretrained_mo

106   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
{'dropout': 0.2, 'sequence_model': nan, 'layer_3_size': 128, 'model_id': 106, 'sequence_model_layers': nan, 'WORKER': 2, 'pretrained_model_name': 'vgg16', 'pooling': 'max', 'layer_2_size': 128, 'architecture': 'image_MLP_frozen', 'layer_1_size': 128, 'sequence_length': 1.0}


2019-01-20 13:48:32,254 [MainThread  ] [INFO ]  Features already cached: /mnt/seals/cache/features/vgg16/max/
2019-01-20 13:48:32,256 [MainThread  ] [INFO ]  Loading features data into memory [may take a few minutes]


Done initializing data with #samples: train=60597, valid=6416, test=3139
Train on 60597 samples, validate on 6416 samples
Epoch 1/20

Epoch 00001: val_acc improved from -inf to 0.91808, saving model to /mnt/seals/models/106/model_round_1.h5
Epoch 2/20

Epoch 00002: val_acc improved from 0.91808 to 0.91862, saving model to /mnt/seals/models/106/model_round_1.h5
Epoch 3/20

Epoch 00003: val_acc improved from 0.91862 to 0.93300, saving model to /mnt/seals/models/106/model_round_1.h5
Epoch 4/20

Epoch 00004: val_acc did not improve from 0.93300
Epoch 5/20

Epoch 00005: val_acc did not improve from 0.93300
Epoch 6/20

Epoch 00006: val_acc did not improve from 0.93300
H1 {'loss': [0.19866409233391047, 0.1332022113675778, 0.12153436883356708, 0.1154276107997944, 0.11106622734897957, 0.10707653305625055], 'val_loss': [0.20102053880691528, 0.19090504765956479, 0.179313188553451, 0.1799781235896441, 0.20445016531872928, 0.19659374488931047], 'val_acc': [0.9180842727199754, 0.9186186522914287, 0.

2019-01-20 13:52:58,970 [MainThread  ] [INFO ]  {'fit_dt_train_end': '2019-01-20 13:52:54', 'num_features': 512, 'fit_stopped_epoch2': 1, 'path_model': '/mnt/seals/models/106/', 'sequence_model': nan, 'batch_size': 32, 'fit_val_loss': 0.17259183446145118, 'layer_3_size': 128, 'sequence_model_layers': nan, 'data_total_rows_valid': 6416, 'fit_dt_test_end': '2019-01-20 13:52:57', 'convolution_kernel_size': 3, 'fit_val_acc': 0.9357632880198985, 'fit_train_loss': 0.08959206271563937, 'frame_size': (224, 224), 'pooling': 'max', 'layer_2_size': 128, 'architecture': 'image_mlp_frozen', 'fit_stopped_epoch1': 3, 'sequence_length': 1.0, 'fit_test_acc': 0.6502070723160243, 'data_total_rows_train': 60597, 'dropout': 0.2, 'fit_dt_test_start': '2019-01-20 13:52:56', 'model_weights_path': None, 'fit_dt_train_duration_seconds': '262', 'fit_stopped_epoch3': 4, 'fit_dt_test_duration_seconds': '0', 'model_param_count': 99591, 'fit_dt_train_start': '2019-01-20 13:48:32', 'fit_num_epochs': 11, 'pretrained_m

114   XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
{'dropout': 0.2, 'sequence_model': nan, 'layer_3_size': 128, 'model_id': 114, 'sequence_model_layers': nan, 'WORKER': 2, 'pretrained_model_name': 'vgg16', 'pooling': 'max', 'layer_2_size': 512, 'architecture': 'image_MLP_frozen', 'layer_1_size': 0, 'sequence_length': 1.0}


2019-01-20 13:53:00,628 [MainThread  ] [INFO ]  Features already cached: /mnt/seals/cache/features/vgg16/max/
2019-01-20 13:53:00,629 [MainThread  ] [INFO ]  Loading features data into memory [may take a few minutes]


Done initializing data with #samples: train=60597, valid=6416, test=3139
Train on 60597 samples, validate on 6416 samples
Epoch 1/20

Epoch 00001: val_acc improved from -inf to 0.92403, saving model to /mnt/seals/models/114/model_round_1.h5
Epoch 2/20

Epoch 00002: val_acc did not improve from 0.92403
Epoch 3/20

Epoch 00003: val_acc improved from 0.92403 to 0.92859, saving model to /mnt/seals/models/114/model_round_1.h5
Epoch 4/20

Epoch 00004: val_acc improved from 0.92859 to 0.93795, saving model to /mnt/seals/models/114/model_round_1.h5
Epoch 5/20

Epoch 00005: val_acc did not improve from 0.93795
Epoch 6/20

Epoch 00006: val_acc did not improve from 0.93795
Epoch 7/20

Epoch 00007: val_acc did not improve from 0.93795
H1 {'loss': [0.5575704447933575, 0.32670921050625035, 0.29230204228945167, 0.2720672982291683, 0.2682658182181214, 0.2591642508037075, 0.2546341742620686], 'val_loss': [0.6440689820155241, 0.794891457233643, 0.5525196344960954, 0.49032719920103685, 0.4908764069142781