In [1]:
import os

import numpy as np
from models.attention import multihead_model
from models.selfattention import selfattention_model, selfattention_model_modified
from models.baseline_model import baseline, baseline_residual, baseline_modified
from keras.callbacks import ModelCheckpoint, EarlyStopping
from data_utility.file_utility import FileUtility
from data_utility.labeling_utility import LabelingData
import itertools
from data_utility.feedgenerator import train_batch_generator_408, validation_batch_generator_408


os.environ["CUDA_VISIBLE_DEVICES"] = '1'

Using TensorFlow backend.


In [2]:
def run_baseline(epochs=10, setting_name='basemodel', gpu='1', train_batch_size=64,
                 test_batch_size=100, patience=10, features_to_use=['onehot', 'sequence_profile'], convs=[3, 5, 7], dense_size=200, lstm_size=400,use_CRF=False):
    os.environ["CUDA_VISIBLE_DEVICES"] = gpu

    # read files
    train_file = '../DeepSeq2Sec/data/s8_all_features/train.txt'
    test_file = '../DeepSeq2Sec/data/s8_all_features/test.txt'
    LD = LabelingData(train_file, test_file)
    train_lengths = [int(j) for j in FileUtility.load_list('/'.join(train_file.split('/')[0:-1]) + '/train_length.txt')]
    test_lengths = [int(i) for i in FileUtility.load_list('/'.join(test_file.split('/')[0:-1]) + '/test_length.txt')]

    # model
    model, params = baseline_residual(LD.n_classes, features_to_use=features_to_use, convs=convs,
                             dense_size=dense_size, lstm_size=lstm_size,use_CRF=use_CRF)

    # output directory
    FileUtility.ensure_dir('baseline_modifications/' + setting_name + params + '/')

    # save model
    with open('baseline_modifications/' + setting_name + params + '/' + 'config.txt', 'w') as fh:
        model.summary(print_fn=lambda x: fh.write(x + '\n'))

    # check points
    filepath = 'baseline_modifications/' + setting_name + params + "/weights-improvement-{epoch:02d}-{weighted_acc:.3f}-{val_weighted_acc:.3f}.hdf5"
    checkpoint = ModelCheckpoint(filepath, monitor='val_weighted_acc', verbose=1, save_best_only=True, mode='max',
                                 period=1)
    earlystopping = EarlyStopping(monitor='val_weighted_acc', min_delta=0, patience=patience, verbose=0, mode='max',
                                  baseline=None)
    callbacks_list = [checkpoint, earlystopping]

    # calculate the sizes
    steps_per_epoch = len(train_lengths) / train_batch_size if len(train_lengths) % train_batch_size == 0 else int(
        len(train_lengths) / train_batch_size) + 1
    validation_steps = int(len(test_lengths) / test_batch_size) if len(test_lengths) % test_batch_size == 0 else int(
        len(test_lengths) / test_batch_size) + 1

    # feed model
    h = model.fit_generator(train_batch_generator_408(train_batch_size), steps_per_epoch=steps_per_epoch,
                            validation_data=validation_batch_generator_408(test_batch_size),
                            validation_steps=validation_steps,
                            shuffle=False, epochs=epochs, verbose=1, callbacks=callbacks_list)

    # save the history
    FileUtility.save_obj('baseline_modifications/' + setting_name + params + '/history', h.history)


In [4]:
run_baseline(epochs=300, setting_name='residual2_', gpu='0', train_batch_size=32,
                 test_batch_size=100, patience=10, features_to_use=['onehot', 'sequence_profile'], convs=[3,5,7,11,21], dense_size=1000, lstm_size=1000,use_CRF=False)

Labeling utility object created..
Training y encoded shape is  (5534, 700)
Maximum sequence length is 700
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_2 (InputLayer)            (None, None, 408)    0                                            
__________________________________________________________________________________________________
onehot (Lambda)                 (None, None, 21)     0           input_2[0][0]                    
__________________________________________________________________________________________________
sequenceprofile (Lambda)        (None, None, 21)     0           input_2[0][0]                    
__________________________________________________________________________________________________
concatenate_4 (Concatenate)     (None, None, 42)     0           onehot[0][0]                     
   

Epoch 7/300

Epoch 00007: val_weighted_acc improved from 0.66981 to 0.67542, saving model to baseline_modifications/residual2_model#onehot#sequence_profile@conv3_5_7_11_21@dense_1000@lstm1000@droplstm0.5@filtersize_32/weights-improvement-07-0.719-0.675.hdf5
Epoch 8/300

Epoch 00008: val_weighted_acc did not improve from 0.67542
Epoch 9/300

Epoch 00009: val_weighted_acc improved from 0.67542 to 0.67800, saving model to baseline_modifications/residual2_model#onehot#sequence_profile@conv3_5_7_11_21@dense_1000@lstm1000@droplstm0.5@filtersize_32/weights-improvement-09-0.724-0.678.hdf5
Epoch 10/300

Epoch 00010: val_weighted_acc improved from 0.67800 to 0.67801, saving model to baseline_modifications/residual2_model#onehot#sequence_profile@conv3_5_7_11_21@dense_1000@lstm1000@droplstm0.5@filtersize_32/weights-improvement-10-0.725-0.678.hdf5
Epoch 11/300

Epoch 00011: val_weighted_acc improved from 0.67801 to 0.68192, saving model to baseline_modifications/residual2_model#onehot#sequence_prof

Epoch 35/300

Epoch 00035: val_weighted_acc improved from 0.68844 to 0.68869, saving model to baseline_modifications/residual2_model#onehot#sequence_profile@conv3_5_7_11_21@dense_1000@lstm1000@droplstm0.5@filtersize_32/weights-improvement-35-0.758-0.689.hdf5
Epoch 36/300

Epoch 00036: val_weighted_acc did not improve from 0.68869
Epoch 37/300

Epoch 00037: val_weighted_acc improved from 0.68869 to 0.68997, saving model to baseline_modifications/residual2_model#onehot#sequence_profile@conv3_5_7_11_21@dense_1000@lstm1000@droplstm0.5@filtersize_32/weights-improvement-37-0.760-0.690.hdf5
Epoch 38/300

Epoch 00038: val_weighted_acc did not improve from 0.68997
Epoch 39/300

Epoch 00039: val_weighted_acc did not improve from 0.68997
Epoch 40/300

Epoch 00040: val_weighted_acc did not improve from 0.68997
Epoch 41/300

Epoch 00041: val_weighted_acc improved from 0.68997 to 0.69088, saving model to baseline_modifications/residual2_model#onehot#sequence_profile@conv3_5_7_11_21@dense_1000@lstm10


Epoch 00067: val_weighted_acc did not improve from 0.69336
Epoch 68/300

Epoch 00068: val_weighted_acc did not improve from 0.69336
Epoch 69/300

Epoch 00069: val_weighted_acc did not improve from 0.69336
Epoch 70/300

Epoch 00070: val_weighted_acc did not improve from 0.69336
Epoch 71/300

Epoch 00071: val_weighted_acc improved from 0.69336 to 0.69407, saving model to baseline_modifications/residual2_model#onehot#sequence_profile@conv3_5_7_11_21@dense_1000@lstm1000@droplstm0.5@filtersize_32/weights-improvement-71-0.794-0.694.hdf5
Epoch 72/300

Epoch 00072: val_weighted_acc did not improve from 0.69407
Epoch 73/300

Epoch 00073: val_weighted_acc did not improve from 0.69407
Epoch 74/300

Epoch 00074: val_weighted_acc did not improve from 0.69407
Epoch 75/300

Epoch 00075: val_weighted_acc did not improve from 0.69407
Epoch 76/300

Epoch 00076: val_weighted_acc did not improve from 0.69407
Epoch 77/300

Epoch 00077: val_weighted_acc did not improve from 0.69407
Epoch 78/300

Epoch 0007