# Creating Methods that will batch the data


In [10]:
import numpy as np

#the input can be "validation, test or train" into ()

class Audiobooks_Data_Reader():
    
    #First function
    #batch size should be declared else it will be: None
    #self is used to define as an instance of a class instead of static
    def __init__(self, dataset, batch_size = None):
        
        #dataset that loads can be train, validation or test
        npz = np.load(r'C:\Users\George\Python\Audiobooks_data_{0}.npz'.format(dataset))
        
        self.inputs, self.targets = npz['inputs'].astype(np.float), npz['targets'].astype(np.int)
        
        if batch_size is None:
            self.batch_size = self.inputs.shape[0]
            
        else:
            self.batch_size = batch_size
        self.curr_batch = 0
        self.batch_count = self.inputs.shape[0] // self.batch_size
        
    #Second Function, to load the next batch
    
    def __next__(self):
        if self.curr_batch >= self.batch_count:
            self.curr_batch = 0
            raise StopIteration()
            
        #slicing data so "next" can loads them after another
        
        batch_slice = slice(self.curr_batch * self.batch_size, (self.curr_batch + 1) * self.batch_size)
        inputs_batch = self.inputs[batch_slice]
        targets_batch = self.targets[batch_slice]
        self.curr_batch += 1
        
        #one-hot encode the targets
        
        classes_num = 2
        targets_one_hot = np.zeros((targets_batch.shape[0],classes_num))
        targets_one_hot[range(targets_batch.shape[0]), targets_batch] = 1
        
        #using function below to return input batch and one hot encoded
        return inputs_batch, targets_one_hot
    
    
    #Third method to iterate over batches by putting them in a loop
    
    def __iter__(self):
        return self
        
                

# Creating ML Algo

In [12]:
import tensorflow as tf

input_size = 10
output_size = 2
hidden_layer_size = 50
#can put any hidden layer, but 50 is most used

tf.reset_default_graph() #it clears memory of all variables, reset all computational graph

#declare place holders
inputs = tf.placeholder(tf.float32, [None, input_size])
targets = tf.placeholder(tf.int32, [None, output_size])

#tf.get_variable used to declare variable and using Xavier initialization
weights_1 = tf.get_variable("weights_1", [input_size, hidden_layer_size])
biases_1 = tf.get_variable("biases_1", [hidden_layer_size])

#Creating HL, using tf.nn as it as the first neural network support. 
#Commonly used activation functions

outputs_1 = tf.nn.relu(tf.matmul(inputs,weights_1) + biases_1)

weights_2 = tf.get_variable("weights_2", [hidden_layer_size, hidden_layer_size])
biases_2 = tf.get_variable("biases_2", [hidden_layer_size])

outputs_2 = tf.nn.relu(tf.matmul(outputs_1, weights_2) + biases_2)

weights_3 = tf.get_variable("weights_3", [hidden_layer_size, output_size])
biases_3 = tf.get_variable("biases_3", [output_size])

outputs = tf.matmul(outputs_2, weights_3) + biases_3

#using softmax cross with logits as it applies softmax activation and calculate entropy loss
#using small number as small number will jeopardize our functions for outputs that has small probability
#Declaring LOSS and OPTIMIZING Algos
loss = tf.nn.softmax_cross_entropy_with_logits (logits = outputs, labels=targets)

mean_loss = tf.reduce_mean(loss)

#uses adam optimizer instead of gradient descent
optimize  = tf.train.AdamOptimizer(learning_rate = 0.001).minimize(mean_loss)

#calculate accuracy using argmax
out_equals_target = tf.equal(tf.argmax(outputs,1), tf.argmax(targets,1))

accuracy = tf.reduce_mean(tf.cast(out_equals_target, tf.float32))

sess = tf.InteractiveSession()
initializer = tf.global_variables_initializer()
sess.run(initializer)

#batch size = SGD(gradient descent), close to zero but reasonably high
batch_size = 100 #a hyperparameter
# batches_number = mnist.train._num_examples // batch_size
max_epochs = 50 #we will change this once we found a sweeet spot

prev_validation_loss = 99999999.

#inputing data to model below
train_data = Audiobooks_Data_Reader('train',batch_size)
validation_data = Audiobooks_Data_Reader('validation')


for epoch_counter in range(max_epochs):
    curr_epoch_loss = 0.
    for input_batch, target_batch in train_data:
        _, batch_loss = sess.run([optimize, mean_loss],
            feed_dict = {inputs: input_batch, targets: target_batch})
        
        curr_epoch_loss += batch_loss
    curr_epoch_loss /= train_data.batch_count
    
    validation_loss = 0.
    validation_accuracy = 0.

    for input_batch, target_batch in validation_data:
        validation_loss, validation_accuracy = sess.run([mean_loss, accuracy],
        feed_dict = {inputs: input_batch, targets: target_batch})

    print('Epoch ' + str(epoch_counter + 1) + 
        '. Training Loss: '+' {0:.3f}'.format(curr_epoch_loss)+
         '.Validation loss: '+'{0:.3f}'.format(validation_loss)+
         '.Validation Accuracy: '+'{0:.2f}'.format(validation_accuracy * 100)+ '%')
    
    if validation_loss > prev_validation_loss:
        break
    prev_validation_loss = validation_loss
    
print('End of training')




Epoch 1. Training Loss:  0.616.Validation loss: 0.527.Validation Accuracy: 72.93%
Epoch 2. Training Loss:  0.499.Validation loss: 0.460.Validation Accuracy: 79.87%
Epoch 3. Training Loss:  0.446.Validation loss: 0.417.Validation Accuracy: 80.76%
Epoch 4. Training Loss:  0.415.Validation loss: 0.391.Validation Accuracy: 81.66%
Epoch 5. Training Loss:  0.395.Validation loss: 0.372.Validation Accuracy: 82.55%
Epoch 6. Training Loss:  0.381.Validation loss: 0.360.Validation Accuracy: 82.55%
Epoch 7. Training Loss:  0.371.Validation loss: 0.351.Validation Accuracy: 83.00%
Epoch 8. Training Loss:  0.365.Validation loss: 0.344.Validation Accuracy: 83.00%
Epoch 9. Training Loss:  0.359.Validation loss: 0.339.Validation Accuracy: 82.10%
Epoch 10. Training Loss:  0.355.Validation loss: 0.335.Validation Accuracy: 82.55%
Epoch 11. Training Loss:  0.351.Validation loss: 0.332.Validation Accuracy: 82.55%
Epoch 12. Training Loss:  0.349.Validation loss: 0.329.Validation Accuracy: 82.55%
Epoch 13. Tra

# Test the model


In [14]:
test_data = Audiobooks_Data_Reader('test')

for input_batch, target_batch in test_data:
    test_accuracy = sess.run([accuracy], feed_dict = {inputs: input_batch, targets: target_batch})
    
test_accuracy_percent = test_accuracy[0]*100

print('Test accuracy : '+'{0:.2f}'.format(test_accuracy_percent) + '%')

Test accuracy : 81.92%
