# create the machine learning algorithm

### import the relevant libraries

In [1]:
import numpy as np
import tensorflow as tf

### load the data

In [3]:
# for the training data
npz = np.load('Audiobooks_data_train.npz')

# extracting the train inputs
train_inputs = npz['inputs'].astype(float)

# extract train targets
train_targets = npz['targets'].astype(int) # need this in 0's and 1's

# for the validation data
npz = np.load('Audiobooks_data_validation.npz')
validation_inputs, validation_targets = npz['inputs'].astype(float), npz['targets'].astype(int)

# for the test data
npz = np.load('Audiobooks_data_test.npz')
test_inputs, test_targets = npz['inputs'].astype(float), npz['targets'].astype(int)

### create the model

outline, optimizers, loss, early stopping and training

In [14]:
input_size = 10
output_size = 2
hidden_layer_size = 50

# define actual model
model = tf.keras.Sequential([
    # dataset has been preprocessed, we dont need to flatten
    
    # hidden layers
    tf.keras.layers.Dense(hidden_layer_size, activation='relu'),
    tf.keras.layers.Dense(hidden_layer_size, activation='relu'),
    
    # output layer
    tf.keras.layers.Dense(output_size, activation='softmax')
])

In [15]:
# optimiser and loss function:

model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# sparse_categorical_crossentropy - used for one-hot encoding applications
# adam - adaptive moment estimation

In [19]:
# setting hyperparameters : batch size, epochs, and early stop

batch_size = 100
max_epochs = 100
early_stopping = tf.keras.callbacks.EarlyStopping(patience=2)

# monitors when val loss starts to increase. the patience attribute is by default = 0.
# to modify this level of strictness, we can change that to the number of times increases in loss functions are observed, 
# when we can then confirm overfitting is taking place

In [20]:
# fit the model:

model.fit(train_inputs,
         train_targets,
         batch_size = batch_size,
         epochs = max_epochs,
         callbacks=[early_stopping],
         validation_data=(validation_inputs, validation_targets),
         verbose = 2)

Epoch 1/100
36/36 - 0s - loss: 0.3777 - accuracy: 0.8019 - val_loss: 0.3885 - val_accuracy: 0.7651 - 87ms/epoch - 2ms/step
Epoch 2/100
36/36 - 0s - loss: 0.3667 - accuracy: 0.8005 - val_loss: 0.3766 - val_accuracy: 0.7740 - 61ms/epoch - 2ms/step
Epoch 3/100
36/36 - 0s - loss: 0.3608 - accuracy: 0.8047 - val_loss: 0.3810 - val_accuracy: 0.7830 - 53ms/epoch - 1ms/step
Epoch 4/100
36/36 - 0s - loss: 0.3564 - accuracy: 0.8055 - val_loss: 0.3724 - val_accuracy: 0.7875 - 61ms/epoch - 2ms/step
Epoch 5/100
36/36 - 0s - loss: 0.3483 - accuracy: 0.8153 - val_loss: 0.3785 - val_accuracy: 0.7785 - 58ms/epoch - 2ms/step
Epoch 6/100
36/36 - 0s - loss: 0.3454 - accuracy: 0.8142 - val_loss: 0.3566 - val_accuracy: 0.7852 - 66ms/epoch - 2ms/step
Epoch 7/100
36/36 - 0s - loss: 0.3424 - accuracy: 0.8142 - val_loss: 0.3518 - val_accuracy: 0.7942 - 60ms/epoch - 2ms/step
Epoch 8/100
36/36 - 0s - loss: 0.3455 - accuracy: 0.8161 - val_loss: 0.3590 - val_accuracy: 0.7852 - 64ms/epoch - 2ms/step
Epoch 9/100
36/3

<keras.callbacks.History at 0x2951cf9b910>

### test the model

model.evaluate() returns the loss value and metrics values for the model in the 'test mode'

In [21]:
test_loss, test_accuracy = model.evaluate(test_inputs, test_targets)



In [22]:
print('\nTest loss: {0:.2f}. \nTest accuracy: {1:.2f}%'.format(test_loss, test_accuracy*100.))


Test loss: 0.34. 
Test accuracy: 80.13%
