## Setup

In [1]:
%load_ext autoreload
%autoreload 2

%matplotlib inline

In [2]:
import keras.backend as K
K.clear_session()

Using TensorFlow backend.


In [3]:
import matplotlib
matplotlib.rcParams['figure.figsize'] = (12.0, 6.0)

## Prepare data

In [4]:
from tep.config import Config
config = Config()
classes = config.CLASSES
num_classes = len(classes) + 1
print(num_classes)

5


In [5]:
from tep.utils import load_array
features = load_array(filename="data/auxiliary_features.bc")
labels = load_array(filename="data/classification_labels.bc")
print(features.shape)
print(labels.shape)

(1293005, 15)
(1293005,)


In [6]:
from tep.trainUtils import one_hot_encoding
oh_labels = one_hot_encoding(class_labels=labels, number_classes=num_classes)
print(oh_labels.shape)

(1293005, 5)


## Configure model architectures

In [7]:
# define configurations to test
configs = [
    {'name': '1h_16n', 'num_layers': 1, 'num_units': 16},
    {'name': '1h_32n', 'num_layers': 1, 'num_units': 32},
    {'name': '1h_64n', 'num_layers': 1, 'num_units': 64},
    {'name': '2h_16n', 'num_layers': 2, 'num_units': 16},
    {'name': '2h_32n', 'num_layers': 2, 'num_units': 32},
    {'name': '2h_64n', 'num_layers': 2, 'num_units': 64},
    {'name': '3h_16n', 'num_layers': 3, 'num_units': 16},
    {'name': '3h_32n', 'num_layers': 3, 'num_units': 32},
    {'name': '3h_64n', 'num_layers': 3, 'num_units': 64},
    {'name': '4h_16n', 'num_layers': 4, 'num_units': 16},
    {'name': '4h_32n', 'num_layers': 4, 'num_units': 32},
    {'name': '4h_64n', 'num_layers': 4, 'num_units': 64},
]

## Train model

In [8]:
# use settings for testing on sample
train_size = 10000
val_size = 1000
batch_size = 64

# use settings for running on full data
#val_size = 10000
#train_size = features.shape[0] - val_size
#batch_size = 512

In [9]:
root_name = 'dffn_class'
root_path = 'models/' + root_name

In [12]:
from tep.deepFeedforwardNetwork import classification_model
from tep.modelUtils import save_architecture
from tep.trainUtils import get_callbacks, print_classification_metrics

for config in configs[:2]:
    # clear tf session first in order to avoid conflicts
    K.clear_session()

    # set model path
    model_name = root_name + '_' + config['name']
    model_path = root_path + '_' + config['name']
    logging_path = root_path + '/' + config['name']
    
    # Start logging
    print("Start training model: " + model_name)
    print("Training set: {} examples".format(train_size))
    print("Validation set: {} examples".format(val_size))
    
    # Create logging directory
    !mkdir -p $logging_path
    # Remove prior logs
    !rm $logging_path/*
    
    # load and save model
    model = classification_model(features.shape[1], num_classes, config['num_layers'], config['num_units'])
    save_architecture(model, model_path + '.json')
    
    # load model callbacks
    cbs = get_callbacks(model_name=model_name, log_dir=logging_path, verbose=1)
    
    # train model
    model.fit(features[:train_size], 
          oh_labels[:train_size], 
          validation_data=(features[-val_size:], oh_labels[-val_size:]), 
          batch_size=batch_size, 
          epochs=100, 
          verbose=0,
          shuffle=True,
          callbacks=cbs)
    
    # print best result
    history = cbs[2]
    print_classification_metrics(history)
    
    # add newline after model was trained
    print('\n')

Start training model: dffn_class_1h_16n
Training set: 10000 examples
Validation set: 1000 examples
Epoch 00001: val_loss improved from inf to 1.30864, saving model to models/dffn_class_1h_16n.hdf5
Epoch 00002: val_loss improved from 1.30864 to 1.17916, saving model to models/dffn_class_1h_16n.hdf5
Epoch 00003: val_loss improved from 1.17916 to 1.12749, saving model to models/dffn_class_1h_16n.hdf5
Epoch 00004: val_loss improved from 1.12749 to 1.10112, saving model to models/dffn_class_1h_16n.hdf5
Epoch 00005: val_loss improved from 1.10112 to 1.08547, saving model to models/dffn_class_1h_16n.hdf5
Epoch 00006: val_loss improved from 1.08547 to 1.06912, saving model to models/dffn_class_1h_16n.hdf5
Epoch 00007: val_loss improved from 1.06912 to 1.05105, saving model to models/dffn_class_1h_16n.hdf5
Epoch 00008: val_loss improved from 1.05105 to 1.04743, saving model to models/dffn_class_1h_16n.hdf5
Epoch 00009: val_loss did not improve
Epoch 00010: val_loss improved from 1.04743 to 1.04

## Analyze model performance