In [1]:
import os
import argparse

#libs for helper functions
import re
import math
#import multiprocessing
import random
# Keras model functions and classes
from keras.preprocessing.text import Tokenizer
from keras.layers import Embedding, LSTM, Dense
from keras.models import Sequential

Using TensorFlow backend.


In [2]:
import keras
from keras.preprocessing.sequence import pad_sequences
import random
import numpy as np

# Data generator class for keras Sequential model
class DataGenerator(keras.utils.Sequence):
    def __init__(self, seqs, vocab, vocab_size, maxlen=60, batch_size=32, shuffle=False):
        self.seqs = seqs
        self.vocab = vocab
        self.maxlen = maxlen
        self.vocab_size = vocab_size
        self.batch_size = batch_size
        self.shuffle = shuffle
        self.on_epoch_end()

    def __len__(self):
        """Denotes the number of batches per epoch"""
        return int(np.floor(len(self.seqs) / self.batch_size))

# Generate one batch of data
    def __getitem__(self, index):
        """Generate one batch of data"""
        # get indexes of the batch
        indexes = self.indexes[index*self.batch_size:(index+1)*self.batch_size]

        # Get sequences
        seqs_temp = [self.seqs[k] for k in indexes]

        # Generate data for model X are input contexts and y are output layers
        X, y = self.__data_generation(seqs_temp)

        return X, y

# update indexes after each epoch if shuffle is true
    def on_epoch_end(self):
        self.indexes = np.arange(len(self.seqs))
        if self.shuffle == True:
            np.random.shuffle(self.indexes)


#Generate input, output data for model training given n=batch_size sequences
    def __data_generation(self, seqs_temp):
        sequences = list()
        #create all sub sequences, e.g. seq = [1,2,3], then sequences = [[1],[1,2],[1,2,3]]
        for seq in seqs_temp:
            for i in range(1, len(seq)):
                sequence = seq[:i+1]
                sequences.append(sequence)
        # pad sequences, e.g. if maxlen = 4, then sequences = [[0,0,0,1],[0,0,1,2],[0,1,2,3]]
        sequences = pad_sequences(sequences, maxlen=self.maxlen, padding='pre')
        sequences = np.array(sequences)
        # create context and output split, e.g. [[0,0,0],[0,0,1],[0,1,2]], [[1],[2],[3]]
        X, y = sequences[:,:-1],sequences[:,-1]
        # create one hot vector for output category layer
        y = keras.utils.to_categorical(y, num_classes=self.vocab_size)

        return X,y



In [3]:
import copy

# Class which contains all necessary functions for decoders for production score calculation
class DecoderGenerator():
    def __init__(self, model, generator, k):
        # A Sequential model
        self.model = model
        #DataGenerator
        self.generator = generator
        # nb of beams for beam search
        self.k = k


    # get all sub sequences for each sequence in train and pad
    def prepare_seq(self, seq):
        sequences = list()
        #create all sub sequences, e.g. seq = [1,2,3], then sequences = [[1],[1,2],[1,2,3]]
        for i in range(1, len(seq)):
            sequence = seq[:i+1]
            sequences.append(sequence)
        # pad sequences, e.g. if maxlen = 4, then sequences = [[0,0,0,1],[0,0,1,2],[0,1,2,3]]
        sequences = pad_sequences(sequences, maxlen=self.generator.maxlen, padding='pre')
        sequences = np.array(sequences)
        # create context and output split, e.g. [[0,0,0],[0,0,1],[0,1,2]], [[1],[2],[3]]
        X, y = sequences[:,:-1],sequences[:,-1]

        return X, y


    # Returns the log probability of a sequence of words given the current context and the next possible word.
    def get_seq_prob(self, word, context):
        # create copy of context and add next word
        sub_seq = copy.deepcopy(context)
        sub_seq.append(word)
        # prepare sequence, such that x represents the contexts for each word y in the sequence, e.g. for sequence [1,2,3], x,y = [[],[1],[1,2]], [[1],[2],[3]]
        x, y = self.prepare_seq(sub_seq)
        # get output layers for each state in x
        p_pred = self.model.predict(x)
        # accumulate probability at each state to return probability of whole sequence
        log_p_seq = 0.0
        for i, prob in enumerate(p_pred):
            prob_word = prob[y[i]]
            log_p_seq += np.log(prob_word)

        return log_p_seq

    # Performs beam search decoder estimation for sequence and then returns 1 if original sequence is in final k beams else returns 0 .
    def beam_search_decoder(self, seq):
        result = 0
        # create "bag of words" from original sequence
        vocab = list(seq)
        # beams are composed of a context, the remaining vocab, and a score
        beams = [[list(), vocab, np.log(1.0)]]

        for i in range(len(seq)):
            # keep track of all possible candidates for beams at each state
            candidates = []
            for (context, vocab, score) in beams:
                # for each beam, find all possible next states and their scores
                for v in range(len(vocab)):
                    score = self.get_seq_prob(vocab[v], context)
                    # remove item from vocab and add it to the context for the new candidate beam
                    new_vocab = vocab[:v] + vocab[(v + 1):]
                    new_context = copy.deepcopy(context)
                    new_context.append(vocab[v])
                    candidates.append([new_context, new_vocab, score])
            # order all candidate beams next state according to their scores
            ordered = sorted(candidates, key=lambda prob: prob[2], reverse=True)
            # keep top k beams for the next iteration
            if self.k < len(ordered):
                beams = ordered[:self.k]
            else:
                beams = ordered
        for context,vocab,score in beams:
            if context == seq:
                result = 1

        return result


    # Performs the greedy decoder estimation and then returns 1 if it is equal to the original sequence else returns 0 .
    def greedy_decoder(self,seq):
        result = 0
        # create "bag of words" from sequence
        vocab = list(seq)
        context = []
        # while there are still words in the bag of words
        while vocab:
            # find the most probable next word add it to the current context and remove it from the bag of words
            (next_word, max_prob) = max([(v, self.get_seq_prob(v, context)) for v in vocab],
                                        key=lambda prob: prob[1])
            context.append(next_word)
            vocab.remove(next_word)
        # if the greedy sequence is the same as the original return 1 else 0
        if context == seq:
            result = 1

        return result

    # Returns the number of correct predictions and the overall number of test utterances for each sequence length for a given model decoder is either 'greedy' or 'beam'
    def get_performance_bylength(self, decoder):

        # Returns all sequences with less than 17 words organized by sequence length
        def get_seq_bylength(seqs):
            seqs_bylength = dict()
            for seq in seqs:
                seqlen = len(seq)
                if 1 < seqlen < 17:
                    if seqlen in seqs_bylength:
                        seqs_bylength[seqlen].append(seq)
                    else:
                        seqs_bylength[seqlen] = [seq]
            return seqs_bylength
        # organize sequences by length
        seqs_bylength = get_seq_bylength(self.generator.seqs)
        results_bylength = dict()
        for length, seqs in seqs_bylength.items():
            # for each length get the nb of correct predictions and the total nb of test utterances
            results_bylength[length] = [0, len(seqs)]
            print(str(length))
            for seq in seqs:
                # use greedy decoder
                if (decoder == 'greedy'):
                    results_bylength[length][0] += self.greedy_decoder(seq)
                # use beam search decoder
                else:
                    results_bylength[length][0] += self.beam_search_decoder(seq)

        return results_bylength


In [4]:
#### HYPERPARAMETERS ####

#cpus = multiprocessing.cpu_count()
# Nb of epochs (iterations through whole train set)
epochs=15
# Mini-batch size necessary for initializing data generators
batch_size = 32
# Size of word vectors
output_size = 100
# Nb of hidden neurons in 1 layer of LSTM
hidden_size = 50
# Generate sentences in order of transcript
shuffle = False
# Nb of beams for beam beam_search
k = 5


#### GLOBAL VARIABLES ####

transcript_dir = "."
model_dir = "./result2"
result_dir = "./result2"
train_all_data = True

In [9]:
### HELPER FUNCTIONS ####

# A biased (p) coin flip to determine whether a child utterance will be part of train or test set
def is_test_sent(p):
    return True if random.random() < p else False


# Retrieve train and test sets for all child transcripts
def get_data_from_files():
    data = []
    for subdir, dirs, files in os.walk(transcript_dir):
        for file in files:
            if ('.capp' in file):
                textfile = subdir+'/'+file
                with open(textfile,'r') as f :
                    lines = f.readlines()
                train = []
                test = []
                for sent in lines :
                    if '*CHI:' in sent :
                        sent = re.sub('\*[A-Z]+: ', '', sent)
                        # if training on random 60% of child utterances and testing on 40% remaining
                        if train_all_data:
                            if is_test_sent(0.4):
                                test.append(sent)
                            else:
                                train.append(sent)
                        # else train only on child-directed and test on all child utterances
                        else:
                            test.append(sent)
                    else :
                        sent = re.sub('\*[A-Z]+: ', '', sent)
                        train.append(sent)
                data.append((file,train,test))
                # save test and train split in case we need to rerun model
                with open(model_dir+'/train/'+file.split('.capp')[0]+'.train.txt','w') as f :
                    for line in train:
                        f.write(line)
                with open(model_dir+'/test/'+file.split('.capp')[0]+'.test.txt','w') as f :
                    for line in test:
                        f.write(line)

    return data

def get_train_test(data_dir):
    data = []
    for subdir, dirs, files in os.walk(data_dir):
        for file in files:
            if ('.h5' in file):
                model_file = subdir + '/' + file
                childname = file.split('_model.h5')[0]
                trainfile = subdir + '/train/' + childname + '.train.txt'
                testfile = subdir + '/test/' + childname + '.test.txt'
                with open(trainfile, 'r') as f:
                    train = f.readlines()
                with open(testfile, 'r') as f:
                    test = f.readlines()
                data.append((childname, train, test))
    return data

In [14]:
data = get_train_test("./result")

for childname,train,test in data:
    print('PREPARE DATA FOR: '+childname+'\n')
    # Get vocabulary
    tokenizer = Tokenizer()
    tokenizer.fit_on_texts(train + test)
    vocab = tokenizer.word_index
    # vocabulary size is equal to the vocab size + the <PAD> character used for
    # padding sequences during training
    vocab_size = len(vocab) + 1
    # transform text strings into sequences of int (representing the word's
    # index in vocab)
    train_seqs = tokenizer.texts_to_sequences(train)
    test_seqs = tokenizer.texts_to_sequences(test)
    # get the maximum length of sequences - this is needed for data generator
    maxlen = max([len(seq) for seq in train_seqs])
    # number of optimization iterations to see whole corpus (epoch)
    steps_per_epoch = math.ceil(len(train_seqs)/ batch_size)

    print('vocab_size = '+str(vocab_size))
    print('train_maxlen = '+str(maxlen))
    print('INITIALIZE DATA GENERATORS...\n')

    # Create data generators for train and test sequences
    train_generator = DataGenerator(seqs = train_seqs,
                                       vocab = vocab,
                                       vocab_size = vocab_size,
                                       maxlen = maxlen,
                                       batch_size = batch_size,
                                       shuffle = shuffle)
    test_generator = DataGenerator(seqs = test_seqs,
                                       vocab = vocab,
                                       vocab_size = vocab_size,
                                       maxlen = maxlen,
                                       batch_size = batch_size,
                                       shuffle = shuffle)

    print('TRAINING MODEL...\n')
    # initialize model
    model = Sequential()
    # add initial embedding layer
    model.add(Embedding(input_dim = vocab_size,  # vocabulary size
                        output_dim = output_size,  # size of embeddings
                        input_length = maxlen-1))  # length of the padded sequences minus the last output word
    
    
    
    #add LSTM layers (2 LAYERS)
    #stateful=True, bias_regularizer=reg, kernel_regularizer=reg
    #where reg in [L1L2(l1=0.0, l2=0.0), L1L2(l1=0.01, l2=0.0), L1L2(l1=0.0, l2=0.01), L1L2(l1=0.01, l2=0.01)]
    #from keras.regularizers import L1L2
    model.add(LSTM(hidden_size, return_sequences=True))
    model.add(LSTM(hidden_size))
    
    
    
    # add layer regular densely connected layer to reshape to output size and use softmax activation for output layer
    model.add(Dense(vocab_size, activation='softmax'))
    # use RMSprop for optimization (could also use Adam or Adagrad) and cross entropy for loss function
    model.compile('rmsprop', 'categorical_crossentropy')

    # Train LSTM
    model.fit_generator(train_generator,
                        steps_per_epoch = steps_per_epoch,
                        epochs = epochs,
                        verbose=2,
                        max_queue_size=10,
                        shuffle=False)

    # Save trained model for future use
    model.save(str(model_dir+'/'+childname+'_model.h5'))
    # Initialize decoders and get production scores by utterance length using both the greedy and the beam search decoders
    decoders = DecoderGenerator(model,test_generator,k)
    print('CALCULATING PRODUCTION PERFORMANCE METRIC 1...\n')
    results_greedy = decoders.get_performance_bylength("greedy")
    print('CALCULATING PRODUCTION PERFORMANCE METRIC 2...\n')
    results_beam = decoders.get_performance_bylength("beam")

    # save all performance results
    with open(result_dir+'/greedy/'+childname+'.prod_result_50_100.csv','w') as f :
        f.write("iter,utterance_length,nb_utterances,produced,production_score"+'\n')
        for length in results_greedy:
            f.write('1,'+str(length)+','+
                            str(results_greedy[length][1])+','+
                            str(results_greedy[length][0])+','+
                            str(results_greedy[length][0]/results_greedy[length][1])+'\n')
        with open(result_dir+'/beam/'+childname+'.prod_result_50_100.csv','w') as f :
            f.write("iter,utterance_length,nb_utterances,produced,production_score"+'\n')
            for length in results_beam:
                f.write('1,'+str(length)+','+
                                str(results_beam[length][1])+','+
                                str(results_beam[length][0])+','+
                                str(results_beam[length][0]/results_beam[length][1])+'\n')
    del model


PREPARE DATA FOR: Tow

vocab_size = 2043
train_maxlen = 28
INITIALIZE DATA GENERATORS...

TRAINING MODEL...

Epoch 1/15
 - 21s - loss: 5.8617
Epoch 2/15
 - 18s - loss: 5.5084
Epoch 3/15
 - 19s - loss: 5.3051
Epoch 4/15
 - 19s - loss: 5.1346
Epoch 5/15
 - 19s - loss: 4.9933
Epoch 6/15
 - 19s - loss: 4.8860
Epoch 7/15
 - 19s - loss: 4.8068
Epoch 8/15
 - 18s - loss: 4.7545
Epoch 9/15
 - 19s - loss: 4.6990
Epoch 10/15
 - 19s - loss: 4.6395
Epoch 11/15
 - 19s - loss: 4.5711
Epoch 12/15
 - 19s - loss: 4.5129
Epoch 13/15
 - 18s - loss: 4.4591
Epoch 14/15
 - 18s - loss: 4.4177
Epoch 15/15
 - 18s - loss: 4.3922
CALCULATING PRODUCTION PERFORMANCE METRIC 1...

2
3
4
6
5
8
7
9
10
CALCULATING PRODUCTION PERFORMANCE METRIC 2...

2
3
4
6
5
8
7
9
10
PREPARE DATA FOR: Will

vocab_size = 338
train_maxlen = 17
INITIALIZE DATA GENERATORS...

TRAINING MODEL...

Epoch 1/15
 - 3s - loss: 5.7346
Epoch 2/15
 - 1s - loss: 5.2744
Epoch 3/15
 - 1s - loss: 5.1027
Epoch 4/15
 - 1s - loss: 5.0536
Epoch 5/15
 - 1s - 

In [22]:
from keras.regularizers import L1L2

def cross_validation(hidden_dims, output_sizes, regularizations):
    with open(result_dir+'/'+childname+'.prod_result_crossvalidation.csv','w') as f :
        f.write('decoder,hidden_dim,output_size,L1,L2,utterance_length,nb_utterances,produced,production_score\n')
        
    for hidden_dim in hidden_dims:
        for output_size in output_sizes:
            for reg in regularizations:
                print("hidden dim: " + str(hidden_dim))
                print("output size: " + str(output_size))
                print("reg: l1-" + str(reg.l1)+ " l2-" + str(reg.l2))
                print('INITIALIZE DATA GENERATORS...\n')
                # Create data generators for train and test sequences
                train_generator = DataGenerator(seqs = train_seqs,
                                                   vocab = vocab,
                                                   vocab_size = vocab_size,
                                                   maxlen = maxlen,
                                                   batch_size = batch_size,
                                                   shuffle = shuffle)
                test_generator = DataGenerator(seqs = test_seqs,
                                                   vocab = vocab,
                                                   vocab_size = vocab_size,
                                                   maxlen = maxlen,
                                                   batch_size = batch_size,
                                                   shuffle = shuffle)

                print('TRAINING MODEL...\n')
                # initialize model
                model = Sequential()
                # add initial embedding layer
                model.add(Embedding(input_dim = vocab_size,  # vocabulary size
                                    output_dim = output_size,  # size of embeddings
                                    input_length = maxlen-1))  # length of the padded sequences minus the last output word



                #add LSTM layers (2 LAYERS)
                #stateful=True, bias_regularizer=reg, kernel_regularizer=reg
                #where reg in [L1L2(l1=0.0, l2=0.0), L1L2(l1=0.01, l2=0.0), L1L2(l1=0.0, l2=0.01), L1L2(l1=0.01, l2=0.01)]
                #from keras.regularizers import L1L2
                model.add(LSTM(hidden_size, return_sequences=True))
                model.add(LSTM(hidden_size, bias_regularizer=reg))
                # add layer regular densely connected layer to reshape to output size and use softmax activation for output layer
                model.add(Dense(vocab_size, activation='softmax'))
                # use RMSprop for optimization (could also use Adam or Adagrad) and cross entropy for loss function
                model.compile('rmsprop', 'categorical_crossentropy')

                # Train LSTM
                model.fit_generator(train_generator,
                                    steps_per_epoch = steps_per_epoch,
                                    epochs = epochs,
                                    verbose=2,
                                    max_queue_size=10,
                                    shuffle=False)

                # Initialize decoders and get production scores by utterance length using both the greedy and the beam search decoders
                decoders = DecoderGenerator(model,test_generator,k)
                print('CALCULATING PRODUCTION PERFORMANCE METRIC 1...\n')
                results_greedy = decoders.get_performance_bylength("greedy")
                print('CALCULATING PRODUCTION PERFORMANCE METRIC 2...\n')
                results_beam = decoders.get_performance_bylength("beam")

                # save all performance results
                with open(result_dir+'/'+childname+'.prod_result_crossvalidation.csv','a') as f :
                    for length in results_greedy:
                        f.write('greedy'+','+str(hidden_dim)+','+
                                    str(output_size)+','+
                                    str(reg.l1)+','+
                                    str(reg.l2)+','+
                                    str(length)+','+
                                    str(results_greedy[length][1])+','+
                                    str(results_greedy[length][0])+','+
                                    str(results_greedy[length][0]/results_greedy[length][1])+'\n')
                    for length in results_beam:
                        f.write('beam'+','+str(hidden_dim)+','+
                                    str(output_size)+','+
                                    str(reg.l1)+','+
                                    str(reg.l2)+','+
                                    str(length)+','+
                                    str(results_beam[length][1])+','+
                                    str(results_beam[length][0])+','+
                                    str(results_beam[length][0]/results_beam[length][1])+'\n')
                del model




In [23]:
hidden_dims = [5,10,25,50]
output_sizes = [10,50,100]
regularizations = [L1L2(l1=0.0, l2=0.0), L1L2(l1=0.01, l2=0.0), L1L2(l1=0.0, l2=0.01), L1L2(l1=0.01, l2=0.01)]

data = get_train_test("./result")

for childname,train,test in data:
    print('PREPARE DATA FOR: '+childname+'\n')
    # Get vocabulary
    tokenizer = Tokenizer()
    tokenizer.fit_on_texts(train + test)
    vocab = tokenizer.word_index
    # vocabulary size is equal to the vocab size + the <PAD> character used for
    # padding sequences during training
    vocab_size = len(vocab) + 1
    # transform text strings into sequences of int (representing the word's
    # index in vocab)
    train_seqs = tokenizer.texts_to_sequences(train)
    test_seqs = tokenizer.texts_to_sequences(test)
    # get the maximum length of sequences - this is needed for data generator
    maxlen = max([len(seq) for seq in train_seqs])
    # number of optimization iterations to see whole corpus (epoch)
    steps_per_epoch = math.ceil(len(train_seqs)/ batch_size)

    print('vocab_size = '+str(vocab_size))
    print('train_maxlen = '+str(maxlen))
    
    cross_validation(hidden_dims, output_sizes, regularizations)

PREPARE DATA FOR: Will

vocab_size = 338
train_maxlen = 17
hidden dim: 5
output size: 10
reg: l1-0.0 l2-0.0
INITIALIZE DATA GENERATORS...

TRAINING MODEL...

Epoch 1/15
 - 4s - loss: 5.7582
Epoch 2/15
 - 1s - loss: 5.2348
Epoch 3/15
 - 1s - loss: 5.0949
Epoch 4/15
 - 1s - loss: 5.0580
Epoch 5/15
 - 1s - loss: 5.0190
Epoch 6/15
 - 1s - loss: 5.0196
Epoch 7/15
 - 0s - loss: 5.0285
Epoch 8/15
 - 0s - loss: 5.0141
Epoch 9/15
 - 0s - loss: 5.0268
Epoch 10/15
 - 1s - loss: 5.0259
Epoch 11/15
 - 0s - loss: 5.0221
Epoch 12/15
 - 1s - loss: 5.0080
Epoch 13/15
 - 1s - loss: 5.0185
Epoch 14/15
 - 1s - loss: 5.0273
Epoch 15/15
 - 0s - loss: 5.0174
CALCULATING PRODUCTION PERFORMANCE METRIC 1...

2
3
4
5
6
CALCULATING PRODUCTION PERFORMANCE METRIC 2...

2
3
4
5
6
hidden dim: 5
output size: 10
reg: l1-0.01 l2-0.0
INITIALIZE DATA GENERATORS...

TRAINING MODEL...

Epoch 1/15
 - 5s - loss: 6.2481
Epoch 2/15
 - 1s - loss: 5.6979
Epoch 3/15
 - 0s - loss: 5.5614
Epoch 4/15
 - 0s - loss: 5.5202
Epoch 5/15
 

Epoch 11/15
 - 1s - loss: 5.6966
Epoch 12/15
 - 1s - loss: 5.6409
Epoch 13/15
 - 1s - loss: 5.6119
Epoch 14/15
 - 1s - loss: 5.5728
Epoch 15/15
 - 1s - loss: 5.5130
CALCULATING PRODUCTION PERFORMANCE METRIC 1...

2
3
4
5
6
CALCULATING PRODUCTION PERFORMANCE METRIC 2...

2
3
4
5
6
hidden dim: 10
output size: 10
reg: l1-0.0 l2-0.0
INITIALIZE DATA GENERATORS...

TRAINING MODEL...

Epoch 1/15
 - 7s - loss: 5.7498
Epoch 2/15
 - 1s - loss: 5.2682
Epoch 3/15
 - 1s - loss: 5.1076
Epoch 4/15
 - 0s - loss: 5.0669
Epoch 5/15
 - 1s - loss: 5.0169
Epoch 6/15
 - 0s - loss: 5.0175
Epoch 7/15
 - 1s - loss: 5.0239
Epoch 8/15
 - 0s - loss: 5.0125
Epoch 9/15
 - 1s - loss: 5.0247
Epoch 10/15
 - 0s - loss: 5.0241
Epoch 11/15
 - 1s - loss: 5.0199
Epoch 12/15
 - 1s - loss: 5.0032
Epoch 13/15
 - 1s - loss: 5.0086
Epoch 14/15
 - 1s - loss: 5.0153
Epoch 15/15
 - 0s - loss: 5.0069
CALCULATING PRODUCTION PERFORMANCE METRIC 1...

2
3
4
5
6
CALCULATING PRODUCTION PERFORMANCE METRIC 2...

2
3
4
5
6
hidden dim: 10
ou

 - 1s - loss: 6.0070
Epoch 4/15
 - 1s - loss: 5.9488
Epoch 5/15
 - 1s - loss: 5.8787
Epoch 6/15
 - 1s - loss: 5.8515
Epoch 7/15
 - 1s - loss: 5.8324
Epoch 8/15
 - 1s - loss: 5.7933
Epoch 9/15
 - 1s - loss: 5.7751
Epoch 10/15
 - 1s - loss: 5.7438
Epoch 11/15
 - 1s - loss: 5.7008
Epoch 12/15
 - 1s - loss: 5.6432
Epoch 13/15
 - 1s - loss: 5.6124
Epoch 14/15
 - 1s - loss: 5.5692
Epoch 15/15
 - 1s - loss: 5.5041
CALCULATING PRODUCTION PERFORMANCE METRIC 1...

2
3
4
5
6
CALCULATING PRODUCTION PERFORMANCE METRIC 2...

2
3
4
5
6
hidden dim: 25
output size: 10
reg: l1-0.0 l2-0.0
INITIALIZE DATA GENERATORS...

TRAINING MODEL...

Epoch 1/15
 - 9s - loss: 5.7452
Epoch 2/15
 - 1s - loss: 5.2287
Epoch 3/15
 - 1s - loss: 5.0866
Epoch 4/15
 - 1s - loss: 5.0536
Epoch 5/15
 - 1s - loss: 5.0122
Epoch 6/15
 - 1s - loss: 5.0135
Epoch 7/15
 - 1s - loss: 5.0232
Epoch 8/15
 - 1s - loss: 5.0133
Epoch 9/15
 - 1s - loss: 5.0269
Epoch 10/15
 - 1s - loss: 5.0254
Epoch 11/15
 - 1s - loss: 5.0217
Epoch 12/15
 - 1s -

3
4
5
6
CALCULATING PRODUCTION PERFORMANCE METRIC 2...

2
3
4
5
6
hidden dim: 25
output size: 100
reg: l1-0.01 l2-0.01
INITIALIZE DATA GENERATORS...

TRAINING MODEL...

Epoch 1/15
 - 12s - loss: 6.7155
Epoch 2/15
 - 1s - loss: 6.1896
Epoch 3/15
 - 1s - loss: 6.0104
Epoch 4/15
 - 1s - loss: 5.9479
Epoch 5/15
 - 1s - loss: 5.8772
Epoch 6/15
 - 1s - loss: 5.8531
Epoch 7/15
 - 1s - loss: 5.8328
Epoch 8/15
 - 1s - loss: 5.7847
Epoch 9/15
 - 1s - loss: 5.7662
Epoch 10/15
 - 1s - loss: 5.7310
Epoch 11/15
 - 1s - loss: 5.6920
Epoch 12/15
 - 1s - loss: 5.6382
Epoch 13/15
 - 1s - loss: 5.6117
Epoch 14/15
 - 1s - loss: 5.5732
Epoch 15/15
 - 1s - loss: 5.5151
CALCULATING PRODUCTION PERFORMANCE METRIC 1...

2
3
4
5
6
CALCULATING PRODUCTION PERFORMANCE METRIC 2...

2
3
4
5
6
hidden dim: 50
output size: 10
reg: l1-0.0 l2-0.0
INITIALIZE DATA GENERATORS...

TRAINING MODEL...

Epoch 1/15
 - 13s - loss: 5.7724
Epoch 2/15
 - 1s - loss: 5.2541
Epoch 3/15
 - 1s - loss: 5.0990
Epoch 4/15
 - 1s - loss: 5.0621

Epoch 10/15
 - 1s - loss: 5.3191
Epoch 11/15
 - 1s - loss: 5.2848
Epoch 12/15
 - 1s - loss: 5.2362
Epoch 13/15
 - 1s - loss: 5.2134
Epoch 14/15
 - 1s - loss: 5.1825
Epoch 15/15
 - 1s - loss: 5.1329
CALCULATING PRODUCTION PERFORMANCE METRIC 1...

2
3
4
5
6
CALCULATING PRODUCTION PERFORMANCE METRIC 2...

2
3
4
5
6
hidden dim: 50
output size: 100
reg: l1-0.01 l2-0.01
INITIALIZE DATA GENERATORS...

TRAINING MODEL...

Epoch 1/15
 - 15s - loss: 6.7019
Epoch 2/15
 - 1s - loss: 6.1838
Epoch 3/15
 - 1s - loss: 6.0150
Epoch 4/15
 - 1s - loss: 5.9555
Epoch 5/15
 - 1s - loss: 5.8780
Epoch 6/15
 - 1s - loss: 5.8495
Epoch 7/15
 - 1s - loss: 5.8294
Epoch 8/15
 - 1s - loss: 5.7847
Epoch 9/15
 - 1s - loss: 5.7663
Epoch 10/15
 - 1s - loss: 5.7342
Epoch 11/15
 - 1s - loss: 5.6974
Epoch 12/15
 - 1s - loss: 5.6441
Epoch 13/15
 - 1s - loss: 5.6176
Epoch 14/15
 - 1s - loss: 5.5842
Epoch 15/15
 - 1s - loss: 5.5304
CALCULATING PRODUCTION PERFORMANCE METRIC 1...

2
3
4
5
6
CALCULATING PRODUCTION PERFORMANCE MET