In [1]:
import os

import numpy as np
from models.attention import multihead_model
from models.selfattention import selfattention_model, selfattention_model_modified
from models.baseline_model import baseline,baseline_preprocess, baseline_residual, baseline_modified
from keras.callbacks import ModelCheckpoint, EarlyStopping
from data_utility.file_utility import FileUtility
from data_utility.labeling_utility import LabelingData
import itertools
from data_utility.feedgenerator import train_batch_generator_408, validation_batch_generator_408


os.environ["CUDA_VISIBLE_DEVICES"] = '1'

Using TensorFlow backend.


In [2]:
def run_baseline(epochs=10, setting_name='basemodel', gpu='1', train_batch_size=64,
                 test_batch_size=100, patience=10, features_to_use=['onehot', 'sequence_profile'], convs=[3, 5, 7], dense_size=200, lstm_size=400,use_CRF=False,filter_size=256):
    os.environ["CUDA_VISIBLE_DEVICES"] = gpu

    # read files
    train_file = '../DeepSeq2Sec/data/s8_all_features/train.txt'
    test_file = '../DeepSeq2Sec/data/s8_all_features/test.txt'
    LD = LabelingData(train_file, test_file)
    train_lengths = [int(j) for j in FileUtility.load_list('/'.join(train_file.split('/')[0:-1]) + '/train_length.txt')]
    test_lengths = [int(i) for i in FileUtility.load_list('/'.join(test_file.split('/')[0:-1]) + '/test_length.txt')]

    # model
    model, params = baseline_preprocess(LD.n_classes, features_to_use=features_to_use, convs=convs,
                             dense_size=dense_size, lstm_size=lstm_size,use_CRF=use_CRF, filter_size=filter_size)

    # output directory
    FileUtility.ensure_dir('baseline_modifications/' + setting_name + params + '/')

    # save model
    with open('baseline_modifications/' + setting_name + params + '/' + 'config.txt', 'w') as fh:
        model.summary(print_fn=lambda x: fh.write(x + '\n'))

    # check points
    filepath = 'baseline_modifications/' + setting_name + params + "/weights-improvement-{epoch:02d}-{weighted_acc:.3f}-{val_weighted_acc:.3f}.hdf5"
    checkpoint = ModelCheckpoint(filepath, monitor='val_weighted_acc', verbose=1, save_best_only=True, mode='max',
                                 period=1)
    earlystopping = EarlyStopping(monitor='val_weighted_acc', min_delta=0, patience=patience, verbose=0, mode='max',
                                  baseline=None)
    callbacks_list = [checkpoint, earlystopping]

    # calculate the sizes
    steps_per_epoch = len(train_lengths) / train_batch_size if len(train_lengths) % train_batch_size == 0 else int(
        len(train_lengths) / train_batch_size) + 1
    validation_steps = int(len(test_lengths) / test_batch_size) if len(test_lengths) % test_batch_size == 0 else int(
        len(test_lengths) / test_batch_size) + 1

    # feed model
    h = model.fit_generator(train_batch_generator_408(train_batch_size), steps_per_epoch=steps_per_epoch,
                            validation_data=validation_batch_generator_408(test_batch_size),
                            validation_steps=validation_steps,
                            shuffle=False, epochs=epochs, verbose=1, callbacks=callbacks_list)

    # save the history
    FileUtility.save_obj('baseline_modifications/' + setting_name + params + '/history', h.history)
    return model


In [3]:
model=run_baseline(epochs=1000, setting_name='profilesig_onehotembed_', gpu='0', train_batch_size=64,
                 test_batch_size=100, patience=100, features_to_use=['onehot', 'sequence_profile'], convs=[3,5,7,11,21], dense_size=1000, lstm_size=1000,use_CRF=False, filter_size=256)

Labeling utility object created..
Training y encoded shape is  (5534, 700)
Maximum sequence length is 700
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            (None, None, 408)    0                                            
__________________________________________________________________________________________________
onehot (Lambda)                 (None, None, 21)     0           input_1[0][0]                    
__________________________________________________________________________________________________
sequenceprofile (Lambda)        (None, None, 21)     0           input_1[0][0]                    
__________________________________________________________________________________________________
batch_normalization_3 (BatchNor (None, None, 21)     84          onehot[0][0]                     
___

Epoch 7/1000

Epoch 00007: val_weighted_acc did not improve from 0.63728
Epoch 8/1000

Epoch 00008: val_weighted_acc did not improve from 0.63728
Epoch 9/1000

Epoch 00009: val_weighted_acc improved from 0.63728 to 0.66187, saving model to baseline_modifications/profilesig_onehotembed_model#onehot#sequence_profile@conv3_5_7_11_21@dense_1000@lstm1000@droplstm0.5@filtersize_256/weights-improvement-09-0.748-0.662.hdf5
Epoch 10/1000

Epoch 00010: val_weighted_acc did not improve from 0.66187
Epoch 11/1000

Epoch 00011: val_weighted_acc did not improve from 0.66187
Epoch 12/1000

Epoch 00012: val_weighted_acc improved from 0.66187 to 0.67045, saving model to baseline_modifications/profilesig_onehotembed_model#onehot#sequence_profile@conv3_5_7_11_21@dense_1000@lstm1000@droplstm0.5@filtersize_256/weights-improvement-12-0.773-0.670.hdf5
Epoch 13/1000

Epoch 00013: val_weighted_acc improved from 0.67045 to 0.67250, saving model to baseline_modifications/profilesig_onehotembed_model#onehot#seque


Epoch 00041: val_weighted_acc did not improve from 0.68422
Epoch 42/1000

Epoch 00042: val_weighted_acc did not improve from 0.68422
Epoch 43/1000

Epoch 00043: val_weighted_acc did not improve from 0.68422
Epoch 44/1000

Epoch 00044: val_weighted_acc did not improve from 0.68422
Epoch 45/1000

Epoch 00045: val_weighted_acc did not improve from 0.68422
Epoch 46/1000

Epoch 00046: val_weighted_acc did not improve from 0.68422
Epoch 47/1000

Epoch 00047: val_weighted_acc did not improve from 0.68422
Epoch 48/1000

Epoch 00048: val_weighted_acc did not improve from 0.68422
Epoch 49/1000

Epoch 00049: val_weighted_acc did not improve from 0.68422
Epoch 50/1000

Epoch 00050: val_weighted_acc did not improve from 0.68422
Epoch 51/1000

Epoch 00051: val_weighted_acc did not improve from 0.68422
Epoch 52/1000

Epoch 00052: val_weighted_acc did not improve from 0.68422
Epoch 53/1000

Epoch 00053: val_weighted_acc did not improve from 0.68422
Epoch 54/1000

Epoch 00054: val_weighted_acc did not


Epoch 00080: val_weighted_acc did not improve from 0.68422
Epoch 81/1000

Epoch 00081: val_weighted_acc did not improve from 0.68422
Epoch 82/1000

Epoch 00082: val_weighted_acc did not improve from 0.68422
Epoch 83/1000

Epoch 00083: val_weighted_acc did not improve from 0.68422
Epoch 84/1000

Epoch 00084: val_weighted_acc did not improve from 0.68422
Epoch 85/1000

Epoch 00085: val_weighted_acc did not improve from 0.68422
Epoch 86/1000

Epoch 00086: val_weighted_acc did not improve from 0.68422
Epoch 87/1000

Epoch 00087: val_weighted_acc did not improve from 0.68422
Epoch 88/1000

Epoch 00088: val_weighted_acc did not improve from 0.68422
Epoch 89/1000

Epoch 00089: val_weighted_acc did not improve from 0.68422
Epoch 90/1000

Epoch 00090: val_weighted_acc did not improve from 0.68422
Epoch 91/1000

Epoch 00091: val_weighted_acc did not improve from 0.68422
Epoch 92/1000

Epoch 00092: val_weighted_acc did not improve from 0.68422
Epoch 93/1000

Epoch 00093: val_weighted_acc did not

In [None]:
model=run_baseline(epochs=1000, setting_name='baseline_no_cnn', gpu='1', train_batch_size=64,
                 test_batch_size=100, patience=50, features_to_use=['onehot', 'sequence_profile'], convs=[3,5,7,11,21], dense_size=200, lstm_size=200,use_CRF=False, filter_size=60)