In [1]:
import os
import numpy as np
from models.att_models import att_local,att_david
from keras.callbacks import ModelCheckpoint, EarlyStopping
from data_utility.file_utility import FileUtility
from data_utility.labeling_utility import LabelingData
import itertools
from data_utility.feedgenerator import train_batch_generator_408, validation_batch_generator_408
os.environ["CUDA_VISIBLE_DEVICES"] = '1'

Using TensorFlow backend.


In [2]:
def run_baseline(epochs=10, setting_name='basemodel', gpu='1', train_batch_size=64,
                 test_batch_size=100, patience=10,lstm_size =1000, features_to_use=['onehot', 'sequence_profile'], convs=[3, 5, 7], dense_size=200,use_CRF=False,filter_size=256, context=15):
    os.environ["CUDA_VISIBLE_DEVICES"] = gpu

    # read files
    train_file = '../DeepSeq2Sec/data/s8_all_features/train.txt'
    test_file = '../DeepSeq2Sec/data/s8_all_features/test.txt'
    LD = LabelingData(train_file, test_file)
    train_lengths = [int(j) for j in FileUtility.load_list('/'.join(train_file.split('/')[0:-1]) + '/train_length.txt')]
    test_lengths = [int(i) for i in FileUtility.load_list('/'.join(test_file.split('/')[0:-1]) + '/test_length.txt')]

    # model
    model, params = att_david(LD.n_classes, features_to_use=features_to_use, convs=convs,
                             dense_size=dense_size, use_CRF=use_CRF,lstm_size =lstm_size, filter_size=filter_size)

    # output directory
    FileUtility.ensure_dir('results_att/' + setting_name + params + '/')

    # save model
    with open('results_att/' + setting_name + params + '/' + 'config.txt', 'w') as fh:
        model.summary(print_fn=lambda x: fh.write(x + '\n'))

    # check points
    filepath = 'results_att/' + setting_name + params + "/weights-improvement-{epoch:02d}-{weighted_acc:.3f}-{val_weighted_acc:.3f}.hdf5"
    checkpoint = ModelCheckpoint(filepath, monitor='val_weighted_acc', verbose=1, save_best_only=True, mode='max',
                                 period=1)
    earlystopping = EarlyStopping(monitor='val_weighted_acc', min_delta=0, patience=patience, verbose=0, mode='max',
                                  baseline=None)
    callbacks_list = [checkpoint, earlystopping]

    # calculate the sizes
    steps_per_epoch = len(train_lengths) / train_batch_size if len(train_lengths) % train_batch_size == 0 else int(
        len(train_lengths) / train_batch_size) + 1
    validation_steps = int(len(test_lengths) / test_batch_size) if len(test_lengths) % test_batch_size == 0 else int(
        len(test_lengths) / test_batch_size) + 1

    # feed model
    h = model.fit_generator(train_batch_generator_408(train_batch_size), steps_per_epoch=steps_per_epoch,
                            validation_data=validation_batch_generator_408(test_batch_size),
                            validation_steps=validation_steps,
                            shuffle=False, epochs=epochs, verbose=1, callbacks=callbacks_list)

    # save the history
    FileUtility.save_obj('results_att/' + setting_name + params + '/history', h.history)
    return model





In [3]:
model=run_baseline(epochs=1000, setting_name='attention_david_', gpu='1', train_batch_size=64,
                 test_batch_size=100, patience=50, features_to_use=['onehot', 'sequence_profile'], convs=[3,5,7,11,21], lstm_size=1000, dense_size=1000, use_CRF=False, filter_size=256, context=100)

Labeling utility object created..
Training y encoded shape is  (5534, 700)
Maximum sequence length is 700


AssertionError: 