In [1]:
import numpy as np
import re
import itertools
from collections import Counter
from tensorflow.contrib import learn
import unittests as tests
import tensorflow as tf

This notebook was created for this talk:
- https://www.infoq.com/br/presentations/deep-learning-for-sentiment-analysis?utm_source=infoq&utm_campaign=user_page&utm_medium=link

And it was inspired by this blog post:
- http://www.wildml.com/2015/12/implementing-a-cnn-for-text-classification-in-tensorflow/

And this paper:
- https://arxiv.org/abs/1408.5882

# Helpful functions (Taken from paper)

In [2]:
def clean_str(string):
    """
    Tokenization/string cleaning for all datasets except for SST.
    Original taken from https://github.com/yoonkim/CNN_sentence/blob/master/process_data.py
    """
    string = re.sub(r"[^A-Za-z0-9(),!?\'\`]", " ", string)
    string = re.sub(r"\'s", " \'s", string)
    string = re.sub(r"\'ve", " \'ve", string)
    string = re.sub(r"n\'t", " n\'t", string)
    string = re.sub(r"\'re", " \'re", string)
    string = re.sub(r"\'d", " \'d", string)
    string = re.sub(r"\'ll", " \'ll", string)
    string = re.sub(r",", " , ", string)
    string = re.sub(r"!", " ! ", string)
    string = re.sub(r"\(", " \( ", string)
    string = re.sub(r"\)", " \) ", string)
    string = re.sub(r"\?", " \? ", string)
    string = re.sub(r"\s{2,}", " ", string)
    return string.strip().lower()


def load_data_and_labels(positive_data_file, negative_data_file):
    """
    Loads MR polarity data from files, splits the data into words and generates labels.
    Returns split sentences and labels.
    """
    # Load data from files
    positive_examples = list(open(positive_data_file, "r").readlines())
    positive_examples = [s.strip() for s in positive_examples]
    negative_examples = list(open(negative_data_file, "r").readlines())
    negative_examples = [s.strip() for s in negative_examples]
    # Split by words
    x_text = positive_examples + negative_examples
    x_text = [clean_str(sent) for sent in x_text]
    # Generate labels
    positive_labels = [[0, 1] for _ in positive_examples]
    negative_labels = [[1, 0] for _ in negative_examples]
    y = np.concatenate([positive_labels, negative_labels], 0)
    return [x_text, y]

def batch_iter(data, batch_size, num_batches_per_epoch, num_epochs, shuffle=True):
    """
    Generates a batch iterator for a dataset.
    I changed it up a little bit for working on a single batch
    """
    data = np.array(data)
    #all data
    data_size = len(data)
    #each block has 64 data input, resulting in 150 blocks data
    #num_batches_per_epoch = int((len(data)-1)/batch_size) + 1
    for epoch in range(num_epochs):
        # Shuffle the data at each epoch
        if shuffle:
            shuffle_indices = np.random.permutation(np.arange(data_size))
            shuffled_data = data[shuffle_indices]
        else:
            shuffled_data = data
        for batch_num in range(num_batches_per_epoch):
            start_index = batch_num * batch_size
            end_index = min((batch_num + 1) * batch_size, data_size)
            print('Epoch {:>2}, Sentence Batch {}:  '.format(epoch + 1, batch_num), end='')
            #150 blocks of data per epoch
            yield shuffled_data[start_index:end_index]

# Data explore

- Movie reviews from Rotten Tomatoes
- 5331 positive reviews and 5331 negative reviews

In [3]:
import random

data,labels = load_data_and_labels('data/rt-polaritydata/rt-polarity.pos','data/rt-polaritydata/rt-polarity.neg')

print (len(data)) #all reviews each element is one review
print (len(labels)) #all labels sentiments


10662
10662


In [4]:
(data[1], labels[1]) #first review and its label

("the gorgeously elaborate continuation of the lord of the rings trilogy is so huge that a column of words cannot adequately describe co writer director peter jackson 's expanded vision of j r r tolkien 's middle earth",
 array([0, 1]))

# Pre processing
- Build vocabulary
- Maps documents to sequences of word ids.
- Every sentence is padded as 0 until it gets max review size, which is 56

In [5]:
max_review_size = max([len(x.split(" ")) for x in data])

#Maps documents to sequences of word ids.
vocab_processor = learn.preprocessing.VocabularyProcessor(max_review_size)
x = np.array(list(vocab_processor.fit_transform(data)))
(x[0], data[0])

(array([ 1,  2,  3,  4,  5,  6,  1,  7,  8,  9, 10, 11, 12, 13, 14,  9, 15,
         5, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30,  0,
         0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
         0,  0,  0,  0,  0]),
 "the rock is destined to be the 21st century 's new conan and that he 's going to make a splash even greater than arnold schwarzenegger , jean claud van damme or steven segal")

In [6]:
max_review_size

56

In [7]:
len(x[0])

56

- Randomly shuffle data

In [8]:
#Shuffle Data
np.random.seed(10) #for debugging, garante que os numeros aleatorios gerados sempre sejam os mesmos
shuffle_indices = np.random.permutation(np.arange(len(labels)))
shuffle_indices

array([ 7359,  5573, 10180, ...,  1344,  7293,  1289])

In [9]:
#e.g.: x[7359] == x_shuffled[0]
x_shuffled = x[shuffle_indices] 
y_shuffled = labels[shuffle_indices]



 - Train/Validation split

In [10]:
val_percentage = .1
val_sample_index = -1 * int(val_percentage * float(len(labels)))

In [11]:
x_train, x_val = x_shuffled[:val_sample_index], x_shuffled[val_sample_index:]
y_train, y_val = y_shuffled[:val_sample_index], y_shuffled[val_sample_index:]

In [12]:
print("Vocabulary Size: {:d}".format(len(vocab_processor.vocabulary_)))
print("Train/Val split: {:d}/{:d}".format(len(y_train), len(y_val)))

Vocabulary Size: 18758
Train/Val split: 9596/1066


- The Architeture
![](img/architeture.png)

# Build CNN

- Inputs and Labels instances

In [13]:
def neural_net_sentence_input(sentence_size):
    """
    Return a Tensor for a batch of image input
    : sentence_size: Size of the sentence with the biggest len
    : return: Tensor for sentences input.
    Remeber: all sentences were padded to get the max len
    """
    return tf.placeholder(tf.int32, shape=[None,sentence_size],name='input_x')


def neural_net_label_input(n_classes):
    """
    Return a Tensor for a batch of label input
    : n_classes: Number of classes
    : return: Tensor for label input.
    """
    return tf.placeholder(tf.float32, shape=[None,n_classes],name='input_y')


def neural_net_keep_prob_input():
    """
    Return a Tensor for keep probability
    : return: Tensor for keep probability.
    """
    return tf.placeholder(tf.float32, shape=None,name='keep_prob')


"""
UNIT TESTS
"""
tf.reset_default_graph()
tests.test_nn_sentence_inputs(neural_net_sentence_input)
tests.test_nn_label_inputs(neural_net_label_input)
tests.test_nn_keep_prob_inputs(neural_net_keep_prob_input)

Sentence Input Tests Passed.
Label Input Tests Passed.
Keep Prob Tests Passed.


- Load Pre Trained Word2Vec Model from GoogleNews Dataset

In [15]:
%%time
import gensim
import logging
logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO)
model = None
model = gensim.models.KeyedVectors.load_word2vec_format('GoogleNews-vectors-negative300.bin', binary=True) 



2017-09-27 14:27:28,911 : INFO : loading projection weights from GoogleNews-vectors-negative300.bin
2017-09-27 14:28:28,832 : INFO : loaded (3000000, 300) matrix from GoogleNews-vectors-negative300.bin


CPU times: user 46.6 s, sys: 9.29 s, total: 55.8 s
Wall time: 59.9 s


- Store only the words that exists in our vocab 
- If I have a word that exists in my vocab but does not existis in the GoogleNews dataset I just randomize it

In [16]:
# Remove previous weights, bias, inputs, etc..

tf.reset_default_graph()
vocab_size = len(vocab_processor.vocabulary_)
W = tf.Variable(initial_value=tf.random_uniform([vocab_size, 300], -1.0, 1.0),name="K")
if(model):
    T = np.random.rand(vocab_size, 300)
vocab_dict = vocab_processor.vocabulary_._mapping
for word,idx in vocab_dict.items():
    if word in model:
        T[idx] = model[word]
    else:
        T[idx] = np.random.uniform(low=-0.25, high=0.25, size=(300,))
#save memory
del model

- Embedding Layer
![](img/embed.png)

In [17]:
def embedding_creation(x_tensor,vocab_size,embedding_size):
    embedded_chars = tf.nn.embedding_lookup(W, x_tensor)
    embedded_chars_expanded = tf.expand_dims(embedded_chars, -1)
    
    return embedded_chars_expanded


tests.test_embed(embedding_creation)

Tests Passed


- Convolution Layer

![](img/conv.png)
![](img/maxpool.png)

In [18]:
def conv2d_maxpool(x_tensor, num_filters, filter_size):
    """
    return: A tensor that represents convolution and max pooling of x_tensor
    """
    embbeding_size = int(x_tensor.shape[2])
    filter_shape = [filter_size,embbeding_size, 1, num_filters]
    
    weights = tf.Variable(tf.truncated_normal(filter_shape,stddev=0.1), name="W")
    bias = tf.Variable(tf.constant(0.1, shape=[num_filters]), name="b")
    """
    Strides controls how the filter convolves around the input
    As we want to go each word per time, everything will have size one
    As we apply conv layers, we could pad the image to preserve the dimension 
    (and try to extract more level features)
    Because we are only dealing with words, this would not be necessary. This is known as narrow convolution
    
    Conv gives us an output of shape [1, sequence_length - filter_size + 1, 1, 1] - There is a formula to discover that
    
    """
    conv = tf.nn.conv2d(x_tensor, weights, strides=[1, 1, 1, 1], padding='VALID')

    conv = tf.nn.bias_add(conv, bias)
    #add non linearity
    h = tf.nn.relu(conv, name="relu")
    sequence_length = int(x_tensor.shape[1])
    conv_output = [1, sequence_length - filter_size + 1, 1, 1]
    
    #Maxpooling over the outputs
    #this will heaturn a tensor of shape [batch_size, 1, 1, num_filters] 
    #which is essencialy a feature vector where the last dimension correspond to features
    #Stride have this size basically because of the same logic applied before
    pooled = tf.nn.max_pool(h, ksize=conv_output,
                            strides=[1, 1, 1, 1],
                            padding='VALID',
                            name='pool') 
    
    return pooled


tests.test_con_pool(conv2d_maxpool)

Tests Passed


- Apply different filters

In [19]:
def apply_conv_filters(x_tensor,filter_sizes,num_filters):
# Create a convolution + maxpool layer for each filter size
    pooled_outputs = []
    for i, filter_size in enumerate(filter_sizes):
        with tf.name_scope("conv-maxpool-{}".format(filter_size)):
            pooled = conv2d_maxpool(x_tensor, num_filters, filter_size)
            pooled_outputs.append(pooled)     
    num_filters_total = num_filters * len(filter_sizes)
    #concat -> sum(Daxis(i)) where Daxis is Dimension axis (in our case is the third one)
    h_pool = tf.concat(pooled_outputs, 3)
    return h_pool

tests.test_apply_filters(apply_conv_filters,conv2d_maxpool)

Tests Passed


- Flatten Layer

![](img/flatten.png)

The output should be the shape (Batch Size, Flattened Features Size).

In [20]:
def flatten(x_tensor):
    """
    : x_tensor: A tensor of size (Batch Size, ...), where ... are the image dimensions.
    : return: A tensor of size (Batch Size, Flattened Image Size).
    """
    #This is a general flatten function
    flat = x_tensor.shape[1]*x_tensor.shape[2]*x_tensor.shape[3]
    return tf.reshape(x_tensor,[-1,int(flat)])



tests.test_flatten(flatten)

Tests Passed


- Output Layer

![](img/output.png)

In [21]:
def output(x_tensor,num_classes):
    num_filters_total = int(x_tensor.shape[1])
    W = tf.Variable(tf.truncated_normal([num_filters_total, num_classes], stddev=0.1), name="W")
    b = tf.Variable(tf.constant(0.1, shape=[num_classes]), name="b")

    scores = tf.nn.xw_plus_b(x_tensor, W, b, name="scores")
    return scores, tf.nn.l2_loss(W), tf.nn.l2_loss(b)


- Convolutional Network

In [22]:
def conv_net(x, keep_prob):
    """
    Create a convolutional neural network model
    : x: Placeholder tensor that holds image data.
    : keep_prob: Placeholder tensor that hold dropout keep probability.
    : return: Tensor that represents logits
    """
    #    Play around with different number of outputs, kernel size and stride
    # Function Definition from Above:
    #    conv2d_maxpool(x_tensor, conv_num_outputs, conv_ksize, conv_strides, pool_ksize, pool_strides)
    
    vocab_size = len(vocab_processor.vocabulary_)
    embed_dim = 300
    with tf.name_scope("embedding"):
        embbed_layer = embedding_creation(x,vocab_size,embed_dim)
    
    num_filters = 128
    filter_sizes = [3,4,5]
    conv_layer = apply_conv_filters(embbed_layer,filter_sizes,num_filters)

    

    flat_layer = flatten(conv_layer)
    
    with tf.name_scope("dropout"):
        dropout =  tf.nn.dropout(flat_layer, keep_prob)
        
    with tf.name_scope("output"):
        num_classes = 2
        output_layer, l2_w, l2_b = output(dropout, num_classes)

    
    return output_layer, l2_w, l2_b

In [23]:
#Regularization parameters
l2_loss = tf.constant(0.0)
l2_reg_lambda = 1.0


# Inputs
x_input = neural_net_sentence_input(56) #sequence_length
y_input = neural_net_label_input(2) #positive or negative
keep_prob = neural_net_keep_prob_input()

# Model
logits, l2_w, l2_b = conv_net(x_input, keep_prob)

# Name logits Tensor, so that is can be loaded from disk after training
logits = tf.identity(logits, name='logits')

# Loss and Optimizer
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=y_input))
l2_loss += l2_w
l2_loss += l2_b
cost =  cost + l2_reg_lambda * l2_loss
#optimizer = tf.train.AdamOptimizer().minimize(cost) - Other option for the optmizer, but got less validation acc
global_step = tf.Variable(0, name="global_step", trainable=False)
optimizer = tf.train.AdamOptimizer(1e-3)
grads_and_vars = optimizer.compute_gradients(cost)
train_op = optimizer.apply_gradients(grads_and_vars, global_step=global_step)

# Accuracy
correct_pred = tf.equal(tf.argmax(logits, 1), tf.argmax(y_input, 1))
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32), name='accuracy')


tests.test_conv_net(conv_net)

Neural Network Built!


# Training Process

In [24]:
def train_neural_network(session, optimizer, keep_probability, feature_batch, label_batch):
    """
    Optimize the session on a batch of images and labels
    : session: Current TensorFlow session
    : optimizer: TensorFlow optimizer function
    : keep_probability: keep probability
    : feature_batch: Batch of Numpy image data
    : label_batch: Batch of Numpy label data
    """
    
    session.run(optimizer, feed_dict={
            x_input: feature_batch,
            y_input: label_batch,
            keep_prob: keep_probability,
            })


tests.test_train_nn(train_neural_network)

Tests Passed


# Print statistics

In [25]:
def print_stats(session, feature_batch, label_batch, cost, accuracy):
    """
    Print information about loss and validation accuracy
    : session: Current TensorFlow session
    : feature_batch: Batch of Numpy image data
    : label_batch: Batch of Numpy label data
    : cost: TensorFlow cost function
    : accuracy: TensorFlow accuracy function
    """
    loss,acc = session.run([cost,accuracy],feed_dict={
            x_input: feature_batch,
            y_input: label_batch,
            keep_prob: 1.})
    
    
    print('Loss: {:>10.4f} Training Accuracy: {:.6f}'.format(loss,acc))

In [26]:
def print_validation_stats(session):
    """
    Print information about loss and validation accuracy
    : session: Current TensorFlow session
    : feature_batch: Batch of Numpy image data
    : label_batch: Batch of Numpy label data
    : cost: TensorFlow cost function
    : accuracy: TensorFlow accuracy function
    """
    
    valid_acc = session.run(accuracy, feed_dict={
        x_input: x_val,
        y_input: y_val,
        keep_prob: 1.})
    
    print('Validation Accuracy: {:.6f}'.format(valid_acc))

# Hyperparameters

- Just for a Single Batch

In [27]:
epochs = 12
batch_size = 64
keep_probability =  0.5
num_batches_per_epoch = 1

# Training on a Single Batch

In [28]:
%%time
print('Checking the Training on a Single Batch...')
saver = tf.train.Saver()
with tf.Session() as sess:
    # Initializing the variables
    sess.run(tf.global_variables_initializer())
    if(T.any):
        sess.run(W.assign(T))
    # Generate single batches
    batches = batch_iter(list(zip(x_train, y_train)), batch_size, 1, epochs, shuffle=False)
    # Training cycle
    for batch in batches:
        batch_features, batch_labels = zip(*batch)
        train_neural_network(sess, train_op, keep_probability, batch_features, batch_labels)
        print_stats(sess, batch_features, batch_labels, cost, accuracy)
    print("#######VALIDATION STATS#######")
    print_validation_stats(sess)
    print("#######SAVING PARTIAL CHECKPOINT#######")
    save_path = saver.save(sess, "./tmp/temp_ckpt.ckpt")

Checking the Training on a Single Batch...
Epoch  1, Sentence Batch 0:  Loss:     3.5260 Training Accuracy: 0.812500
Epoch  2, Sentence Batch 0:  Loss:     3.3360 Training Accuracy: 0.953125
Epoch  3, Sentence Batch 0:  Loss:     3.1959 Training Accuracy: 0.984375
Epoch  4, Sentence Batch 0:  Loss:     3.0962 Training Accuracy: 1.000000
Epoch  5, Sentence Batch 0:  Loss:     3.0177 Training Accuracy: 1.000000
Epoch  6, Sentence Batch 0:  Loss:     2.9481 Training Accuracy: 1.000000
Epoch  7, Sentence Batch 0:  Loss:     2.8869 Training Accuracy: 1.000000
Epoch  8, Sentence Batch 0:  Loss:     2.8321 Training Accuracy: 1.000000
Epoch  9, Sentence Batch 0:  Loss:     2.7813 Training Accuracy: 1.000000
Epoch 10, Sentence Batch 0:  Loss:     2.7337 Training Accuracy: 1.000000
Epoch 11, Sentence Batch 0:  Loss:     2.6881 Training Accuracy: 1.000000
Epoch 12, Sentence Batch 0:  Loss:     2.6437 Training Accuracy: 1.000000
#######VALIDATION STATS#######
Validation Accuracy: 0.577861
#######S

- Update Hyperparameters for full training

In [29]:
epochs = 7
batch_size = 64
keep_probability =  0.3
num_batches_per_epoch = int((len(list(zip(x_train, y_train)))-1)/batch_size) + 1

In [30]:
print('Training...')
with tf.Session() as sess:
    # Initializing the variables
    sess.run(tf.global_variables_initializer())
    if(T.any):
        sess.run(W.assign(T))
    # Generate single batches
    batches = batch_iter(list(zip(x_train, y_train)), batch_size, num_batches_per_epoch, epochs, shuffle=True)
    # Training cycle
    i = 0 
    for batch in batches:
        if(i%100 == 0):
            print_validation_stats(sess)
            i = 0
        batch_features, batch_labels = zip(*batch)
        train_neural_network(sess, train_op, keep_probability, batch_features, batch_labels)
        print_stats(sess, batch_features, batch_labels, cost, accuracy)
        i+=1
        
    print("#######VALIDATION STATS#######")
    print_validation_stats(sess)
    print("#######SAVING FINAL RESULTS#######")
    save_path = saver.save(sess, "./tmp/full_model.ckpt")

Training...
Epoch  1, Sentence Batch 0:  Validation Accuracy: 0.483114
Loss:     3.7501 Training Accuracy: 0.703125
Epoch  1, Sentence Batch 1:  Loss:     3.7115 Training Accuracy: 0.671875
Epoch  1, Sentence Batch 2:  Loss:     4.0143 Training Accuracy: 0.484375
Epoch  1, Sentence Batch 3:  Loss:     4.0085 Training Accuracy: 0.515625
Epoch  1, Sentence Batch 4:  Loss:     3.9363 Training Accuracy: 0.484375
Epoch  1, Sentence Batch 5:  Loss:     3.6925 Training Accuracy: 0.609375
Epoch  1, Sentence Batch 6:  Loss:     3.6258 Training Accuracy: 0.625000
Epoch  1, Sentence Batch 7:  Loss:     3.5709 Training Accuracy: 0.687500
Epoch  1, Sentence Batch 8:  Loss:     3.6313 Training Accuracy: 0.578125
Epoch  1, Sentence Batch 9:  Loss:     3.5387 Training Accuracy: 0.609375
Epoch  1, Sentence Batch 10:  Loss:     3.6764 Training Accuracy: 0.578125
Epoch  1, Sentence Batch 11:  Loss:     3.5353 Training Accuracy: 0.671875
Epoch  1, Sentence Batch 12:  Loss:     3.8361 Training Accuracy: 0.

Epoch  1, Sentence Batch 108:  Loss:     1.4032 Training Accuracy: 0.718750
Epoch  1, Sentence Batch 109:  Loss:     1.4064 Training Accuracy: 0.734375
Epoch  1, Sentence Batch 110:  Loss:     1.3224 Training Accuracy: 0.859375
Epoch  1, Sentence Batch 111:  Loss:     1.2664 Training Accuracy: 0.859375
Epoch  1, Sentence Batch 112:  Loss:     1.3216 Training Accuracy: 0.765625
Epoch  1, Sentence Batch 113:  Loss:     1.3077 Training Accuracy: 0.781250
Epoch  1, Sentence Batch 114:  Loss:     1.3579 Training Accuracy: 0.750000
Epoch  1, Sentence Batch 115:  Loss:     1.3314 Training Accuracy: 0.703125
Epoch  1, Sentence Batch 116:  Loss:     1.3768 Training Accuracy: 0.656250
Epoch  1, Sentence Batch 117:  Loss:     1.2969 Training Accuracy: 0.734375
Epoch  1, Sentence Batch 118:  Loss:     1.3322 Training Accuracy: 0.656250
Epoch  1, Sentence Batch 119:  Loss:     1.2895 Training Accuracy: 0.718750
Epoch  1, Sentence Batch 120:  Loss:     1.2296 Training Accuracy: 0.781250
Epoch  1, Se

Epoch  2, Sentence Batch 67:  Loss:     0.6261 Training Accuracy: 0.921875
Epoch  2, Sentence Batch 68:  Loss:     0.6706 Training Accuracy: 0.796875
Epoch  2, Sentence Batch 69:  Loss:     0.6575 Training Accuracy: 0.859375
Epoch  2, Sentence Batch 70:  Loss:     0.6323 Training Accuracy: 0.828125
Epoch  2, Sentence Batch 71:  Loss:     0.7361 Training Accuracy: 0.703125
Epoch  2, Sentence Batch 72:  Loss:     0.6912 Training Accuracy: 0.828125
Epoch  2, Sentence Batch 73:  Loss:     0.6399 Training Accuracy: 0.859375
Epoch  2, Sentence Batch 74:  Loss:     0.6481 Training Accuracy: 0.812500
Epoch  2, Sentence Batch 75:  Loss:     0.6521 Training Accuracy: 0.859375
Epoch  2, Sentence Batch 76:  Loss:     0.6570 Training Accuracy: 0.859375
Epoch  2, Sentence Batch 77:  Loss:     0.6398 Training Accuracy: 0.843750
Epoch  2, Sentence Batch 78:  Loss:     0.6568 Training Accuracy: 0.843750
Epoch  2, Sentence Batch 79:  Loss:     0.6114 Training Accuracy: 0.921875
Epoch  2, Sentence Batch 

Epoch  3, Sentence Batch 25:  Loss:     0.4421 Training Accuracy: 0.921875
Epoch  3, Sentence Batch 26:  Loss:     0.4525 Training Accuracy: 0.890625
Epoch  3, Sentence Batch 27:  Loss:     0.4776 Training Accuracy: 0.875000
Epoch  3, Sentence Batch 28:  Loss:     0.5182 Training Accuracy: 0.812500
Epoch  3, Sentence Batch 29:  Loss:     0.4744 Training Accuracy: 0.906250
Epoch  3, Sentence Batch 30:  Loss:     0.4040 Training Accuracy: 0.921875
Epoch  3, Sentence Batch 31:  Loss:     0.4121 Training Accuracy: 0.921875
Epoch  3, Sentence Batch 32:  Loss:     0.4544 Training Accuracy: 0.906250
Epoch  3, Sentence Batch 33:  Loss:     0.4625 Training Accuracy: 0.875000
Epoch  3, Sentence Batch 34:  Loss:     0.4276 Training Accuracy: 0.953125
Epoch  3, Sentence Batch 35:  Loss:     0.4615 Training Accuracy: 0.890625
Epoch  3, Sentence Batch 36:  Loss:     0.4598 Training Accuracy: 0.906250
Epoch  3, Sentence Batch 37:  Loss:     0.4002 Training Accuracy: 0.921875
Epoch  3, Sentence Batch 

Epoch  3, Sentence Batch 133:  Loss:     0.3797 Training Accuracy: 0.906250
Epoch  3, Sentence Batch 134:  Loss:     0.3498 Training Accuracy: 0.937500
Epoch  3, Sentence Batch 135:  Loss:     0.5191 Training Accuracy: 0.812500
Epoch  3, Sentence Batch 136:  Loss:     0.4535 Training Accuracy: 0.875000
Epoch  3, Sentence Batch 137:  Loss:     0.4481 Training Accuracy: 0.859375
Epoch  3, Sentence Batch 138:  Loss:     0.4152 Training Accuracy: 0.937500
Epoch  3, Sentence Batch 139:  Loss:     0.4339 Training Accuracy: 0.828125
Epoch  3, Sentence Batch 140:  Loss:     0.3362 Training Accuracy: 0.953125
Epoch  3, Sentence Batch 141:  Loss:     0.3682 Training Accuracy: 0.937500
Epoch  3, Sentence Batch 142:  Loss:     0.3363 Training Accuracy: 0.921875
Epoch  3, Sentence Batch 143:  Loss:     0.4850 Training Accuracy: 0.828125
Epoch  3, Sentence Batch 144:  Loss:     0.4391 Training Accuracy: 0.875000
Epoch  3, Sentence Batch 145:  Loss:     0.3821 Training Accuracy: 0.875000
Epoch  3, Se

Epoch  4, Sentence Batch 92:  Loss:     0.3290 Training Accuracy: 0.937500
Epoch  4, Sentence Batch 93:  Loss:     0.3963 Training Accuracy: 0.859375
Epoch  4, Sentence Batch 94:  Loss:     0.3155 Training Accuracy: 0.953125
Epoch  4, Sentence Batch 95:  Loss:     0.3390 Training Accuracy: 0.921875
Epoch  4, Sentence Batch 96:  Loss:     0.3351 Training Accuracy: 0.953125
Epoch  4, Sentence Batch 97:  Loss:     0.3046 Training Accuracy: 0.953125
Epoch  4, Sentence Batch 98:  Loss:     0.3357 Training Accuracy: 0.953125
Epoch  4, Sentence Batch 99:  Loss:     0.3069 Training Accuracy: 0.921875
Epoch  4, Sentence Batch 100:  Loss:     0.3475 Training Accuracy: 0.906250
Epoch  4, Sentence Batch 101:  Loss:     0.3221 Training Accuracy: 0.906250
Epoch  4, Sentence Batch 102:  Loss:     0.3310 Training Accuracy: 0.968750
Epoch  4, Sentence Batch 103:  Loss:     0.3567 Training Accuracy: 0.921875
Epoch  4, Sentence Batch 104:  Loss:     0.3611 Training Accuracy: 0.906250
Epoch  4, Sentence B

Epoch  5, Sentence Batch 50:  Loss:     0.2759 Training Accuracy: 0.953125
Epoch  5, Sentence Batch 51:  Loss:     0.2502 Training Accuracy: 0.953125
Epoch  5, Sentence Batch 52:  Loss:     0.3236 Training Accuracy: 0.875000
Epoch  5, Sentence Batch 53:  Loss:     0.2198 Training Accuracy: 0.984375
Epoch  5, Sentence Batch 54:  Loss:     0.2158 Training Accuracy: 1.000000
Epoch  5, Sentence Batch 55:  Loss:     0.2638 Training Accuracy: 0.953125
Epoch  5, Sentence Batch 56:  Loss:     0.3011 Training Accuracy: 0.921875
Epoch  5, Sentence Batch 57:  Loss:     0.3229 Training Accuracy: 0.890625
Epoch  5, Sentence Batch 58:  Loss:     0.2796 Training Accuracy: 0.937500
Epoch  5, Sentence Batch 59:  Loss:     0.2681 Training Accuracy: 0.968750
Epoch  5, Sentence Batch 60:  Loss:     0.2846 Training Accuracy: 0.953125
Epoch  5, Sentence Batch 61:  Loss:     0.3186 Training Accuracy: 0.937500
Epoch  5, Sentence Batch 62:  Loss:     0.3100 Training Accuracy: 0.937500
Epoch  5, Sentence Batch 

Epoch  6, Sentence Batch 8:  Loss:     0.2226 Training Accuracy: 0.968750
Epoch  6, Sentence Batch 9:  Loss:     0.2509 Training Accuracy: 0.937500
Epoch  6, Sentence Batch 10:  Loss:     0.2245 Training Accuracy: 0.968750
Epoch  6, Sentence Batch 11:  Loss:     0.2315 Training Accuracy: 0.968750
Epoch  6, Sentence Batch 12:  Loss:     0.2255 Training Accuracy: 0.968750
Epoch  6, Sentence Batch 13:  Loss:     0.2623 Training Accuracy: 0.937500
Epoch  6, Sentence Batch 14:  Loss:     0.2481 Training Accuracy: 0.937500
Epoch  6, Sentence Batch 15:  Loss:     0.2296 Training Accuracy: 0.968750
Epoch  6, Sentence Batch 16:  Loss:     0.2304 Training Accuracy: 0.968750
Epoch  6, Sentence Batch 17:  Loss:     0.1986 Training Accuracy: 0.968750
Epoch  6, Sentence Batch 18:  Loss:     0.1958 Training Accuracy: 1.000000
Epoch  6, Sentence Batch 19:  Loss:     0.1880 Training Accuracy: 1.000000
Epoch  6, Sentence Batch 20:  Loss:     0.2100 Training Accuracy: 0.984375
Epoch  6, Sentence Batch 21

Epoch  6, Sentence Batch 117:  Loss:     0.2207 Training Accuracy: 0.937500
Epoch  6, Sentence Batch 118:  Loss:     0.2171 Training Accuracy: 0.953125
Epoch  6, Sentence Batch 119:  Loss:     0.1683 Training Accuracy: 1.000000
Epoch  6, Sentence Batch 120:  Loss:     0.2008 Training Accuracy: 0.968750
Epoch  6, Sentence Batch 121:  Loss:     0.3283 Training Accuracy: 0.921875
Epoch  6, Sentence Batch 122:  Loss:     0.1822 Training Accuracy: 1.000000
Epoch  6, Sentence Batch 123:  Loss:     0.1740 Training Accuracy: 1.000000
Epoch  6, Sentence Batch 124:  Loss:     0.1891 Training Accuracy: 0.968750
Epoch  6, Sentence Batch 125:  Loss:     0.2525 Training Accuracy: 0.953125
Epoch  6, Sentence Batch 126:  Loss:     0.1959 Training Accuracy: 0.968750
Epoch  6, Sentence Batch 127:  Loss:     0.2573 Training Accuracy: 0.937500
Epoch  6, Sentence Batch 128:  Loss:     0.2742 Training Accuracy: 0.937500
Epoch  6, Sentence Batch 129:  Loss:     0.2343 Training Accuracy: 0.953125
Epoch  6, Se

Epoch  7, Sentence Batch 76:  Loss:     0.1960 Training Accuracy: 0.968750
Epoch  7, Sentence Batch 77:  Loss:     0.1873 Training Accuracy: 0.984375
Epoch  7, Sentence Batch 78:  Loss:     0.1723 Training Accuracy: 0.984375
Epoch  7, Sentence Batch 79:  Loss:     0.1593 Training Accuracy: 1.000000
Epoch  7, Sentence Batch 80:  Loss:     0.1349 Training Accuracy: 1.000000
Epoch  7, Sentence Batch 81:  Loss:     0.1585 Training Accuracy: 0.984375
Epoch  7, Sentence Batch 82:  Loss:     0.1444 Training Accuracy: 0.984375
Epoch  7, Sentence Batch 83:  Loss:     0.1582 Training Accuracy: 1.000000
Epoch  7, Sentence Batch 84:  Loss:     0.2071 Training Accuracy: 0.968750
Epoch  7, Sentence Batch 85:  Loss:     0.1868 Training Accuracy: 1.000000
Epoch  7, Sentence Batch 86:  Loss:     0.1784 Training Accuracy: 0.984375
Epoch  7, Sentence Batch 87:  Loss:     0.1606 Training Accuracy: 0.984375
Epoch  7, Sentence Batch 88:  Loss:     0.1996 Training Accuracy: 0.968750
Epoch  7, Sentence Batch 