<h1>Table of Contents<span class="tocSkip"></span></h1>
<div class="toc"><ul class="toc-item"><li><span><a href="#Imports" data-toc-modified-id="Imports-1"><span class="toc-item-num">1&nbsp;&nbsp;</span>Imports</a></span></li><li><span><a href="#Batch-Reader" data-toc-modified-id="Batch-Reader-2"><span class="toc-item-num">2&nbsp;&nbsp;</span>Batch Reader</a></span></li><li><span><a href="#Model" data-toc-modified-id="Model-3"><span class="toc-item-num">3&nbsp;&nbsp;</span>Model</a></span></li><li><span><a href="#Test-the-Model" data-toc-modified-id="Test-the-Model-4"><span class="toc-item-num">4&nbsp;&nbsp;</span>Test the Model</a></span></li></ul></div>

# Imports

In [1]:
import numpy as np

# Batch Reader

In [2]:
import numpy as np

# Adapted from Udemy course of data science.
class Audiobooks_Data_Reader():
    # Dataset is a mandatory arugment, while the batch_size is optional
    # If you don't input batch_size, it will automatically take the value: None
    #
    # Changes: 
    # 1. Name of datafile
    # 2. Number of classes      
    def __init__(self, dataset, batch_size = None):
    
        # The dataset that loads is one of "train", "validation", "test".
        npz = np.load('../data/outputs/Audiobooks_data_{0}.npz'.format(dataset))
        
        # Two variables that take the values of the inputs and the targets. 
        # Inputs are floats, targets are integers
        self.inputs, self.targets = npz['inputs'].astype(np.float), npz['targets'].astype(np.int)
        
        # Counts the batch number, given the size you feed it later
        # e.g. for validation batch is None, we can load whole data at once.
        if batch_size is None:
            self.batch_size = self.inputs.shape[0]
        else:
            self.batch_size = batch_size
        self.curr_batch = 0
        self.batch_count = self.inputs.shape[0] // self.batch_size
    
    def __next__(self):
        if self.curr_batch >= self.batch_count:
            self.curr_batch = 0
            raise StopIteration()
            
        # You slice the dataset in batches and then the "next" function loads them one after the other
        batch_slice = slice(self.curr_batch * self.batch_size, (self.curr_batch + 1) * self.batch_size)
        inputs_batch = self.inputs[batch_slice]
        targets_batch = self.targets[batch_slice]
        self.curr_batch += 1
        
        # one hot encode labels
        classes_num = 2
        targets_one_hot = np.zeros((targets_batch.shape[0], classes_num))
        targets_one_hot[range(targets_batch.shape[0]), targets_batch] = 1
        

        return inputs_batch, targets_one_hot
    

    def __iter__(self):
        return self

# Model

In [3]:
import tensorflow as tf

# size of input, hidden, output
ni = 10
nh = 50
no = 2

# Reset the default graph
tf.reset_default_graph()

# placeholders
X = tf.placeholder(tf.float32, [None, ni])
y = tf.placeholder(tf.int32, [None, no])

# model
# first layer output uses relu and last layer output uses sigmoid
w1 = tf.get_variable("w1", [ni, nh])
b1 = tf.get_variable("b1", [nh])
y1 = tf.nn.relu(tf.matmul(X, w1) + b1)

w2 = tf.get_variable("w2", [nh, nh])
b2 = tf.get_variable("b2", [nh])
y2 = tf.nn.sigmoid(tf.matmul(y1, w2) + b2)

y3 = tf.get_variable("y3", [nh, no])
b3 = tf.get_variable("b3", [no])

# output
outputs = tf.matmul(y2, y3) + b3

# loss
loss = tf.nn.softmax_cross_entropy_with_logits(logits=outputs, labels=y)
mean_loss = tf.reduce_mean(loss)

# accuracy
acc = tf.equal(tf.argmax(outputs, 1), tf.argmax(y, 1))
acc = tf.reduce_mean(tf.cast(acc, tf.float32))

# Optimize with Adam
optimize = tf.train.AdamOptimizer(learning_rate=0.0001).minimize(mean_loss)

# session
sess = tf.InteractiveSession()
initializer = tf.global_variables_initializer()
sess.run(initializer)

# sizes
batch_size = 100
max_epochs = 100
loss_valid_prev = 9999999.

# data
X_train = Audiobooks_Data_Reader('train', batch_size)
X_valid = Audiobooks_Data_Reader('validation')

# loop for epochs 
for e in range(max_epochs):
    
    # Set the epoch loss to 0, and make it a float
    loss_cur = 0.
    
    # iterate batch
    # X and y are tf placeholders.
    for input_batch, target_batch in X_train:
        _, batch_loss = sess.run([optimize, mean_loss], 
            feed_dict={X: input_batch, y: target_batch})
        
        # batch loss
        loss_cur += batch_loss
    
    # epoch mean loss
    loss_cur /= X_train.batch_count
    
    # initialize loss
    loss_valid = 0.
    acc_valid = 0.
    
    # forward prop
    for input_batch, target_batch in X_valid:
        loss_valid, acc_valid = sess.run([mean_loss, acc],
            feed_dict={X: input_batch, y: target_batch})
    
    # prints
    print('Epoch '+str(e+1)+
          '. Training loss: '+'{0:.3f}'.format(loss_cur)+
          '. Validation loss: '+'{0:.3f}'.format(loss_valid)+
          '. Validation accuracy: '+'{0:.2f}'.format(acc_valid * 100.)+'%')
    
    # early stopping
    if loss_valid > loss_valid_prev:
        break
        
    # previous loss
    loss_valid_prev = loss_valid
    
print('End of training.')

Instructions for updating:

Future major versions of TensorFlow will allow gradients to flow
into the labels input on backprop by default.

See @{tf.nn.softmax_cross_entropy_with_logits_v2}.

Epoch 1. Training loss: 0.745. Validation loss: 0.714. Validation accuracy: 51.68%
Epoch 2. Training loss: 0.706. Validation loss: 0.686. Validation accuracy: 51.68%
Epoch 3. Training loss: 0.682. Validation loss: 0.669. Validation accuracy: 53.24%
Epoch 4. Training loss: 0.667. Validation loss: 0.658. Validation accuracy: 65.77%
Epoch 5. Training loss: 0.655. Validation loss: 0.648. Validation accuracy: 71.36%
Epoch 6. Training loss: 0.645. Validation loss: 0.639. Validation accuracy: 71.14%
Epoch 7. Training loss: 0.636. Validation loss: 0.630. Validation accuracy: 72.48%
Epoch 8. Training loss: 0.626. Validation loss: 0.621. Validation accuracy: 72.26%
Epoch 9. Training loss: 0.617. Validation loss: 0.612. Validation accuracy: 70.92%
Epoch 10. Training loss: 0.607. Validation loss: 0.603. Valid

Epoch 97. Training loss: 0.356. Validation loss: 0.367. Validation accuracy: 78.52%
Epoch 98. Training loss: 0.355. Validation loss: 0.366. Validation accuracy: 78.52%
Epoch 99. Training loss: 0.354. Validation loss: 0.366. Validation accuracy: 78.52%
Epoch 100. Training loss: 0.354. Validation loss: 0.365. Validation accuracy: 78.52%
End of training.


# Test the Model

In [4]:
test_data = Audiobooks_Data_Reader('test')

# Forward propagate through the training set. This time we only need the accuracy
# note: out_equals_target = tf.equal(tf.argmax(outputs, 1), tf.argmax(targets, 1))
# note: acc = tf.reduce_mean(tf.cast(out_equals_target, tf.float32))
# X and y are tf placeholders
for inputs_batch, targets_batch in test_data:
    test_accuracy = sess.run([acc],
                     feed_dict={X: inputs_batch, y: targets_batch})

acc = test_accuracy[0] * 100.

# Print the test accuracy
print('Test accuracy: '+'{0:.2f}'.format(acc)+'%')

Test accuracy: 83.48%
