# Machine Learning Algorithm 

#### Import libraries :

In [1]:
import numpy as np
import tensorflow as  tf

#### Load Data :

In [5]:
npz = np.load('Audiobooks_data_train.npz')

train_inputs = npz['inputs'].astype(np.float)
train_targets = npz['targets'].astype(np.int)

npz = np.load('Audiobooks_data_validation.npz')
validation_inputs = npz['inputs'].astype(np.float)
validation_targets = npz['targets'].astype(np.int)

npz = np.load('Audiobooks_data_test.npz')
test_inputs = npz['inputs'].astype(np.float)
test_targets = npz['targets'].astype(np.int)


## Model 

#### Outline :

In [7]:
input_size = 10 #10 predictors
output_size = 2 #0s and 1s
hidden_layer_size = 50 #Initially 50

model = tf.keras.Sequential([
    tf.keras.layers.Dense(hidden_layer_size, activation='relu'),
    tf.keras.layers.Dense(hidden_layer_size,activation='relu'),
    tf.keras.layers.Dense(output_size,activation='softmax')
])


#### Optimizer and Loss Function :

In [8]:
model.compile(optimizer='adam' , loss='sparse_categorical_crossentropy',metrics=['accuracy'])

#### Hyperparameters :

In [9]:
batch_size = 100
max_epoch = 100

## Training Model 

In [12]:
#Will automatically batch the data :
model.fit(train_inputs,
          train_targets,
          batch_size = batch_size,
          epochs = max_epoch,
          validation_data=(validation_inputs, validation_targets),
         verbose = 2)

Train on 3579 samples, validate on 447 samples
Epoch 1/100
3579/3579 - 2s - loss: 0.5115 - accuracy: 0.8187 - val_loss: 0.3940 - val_accuracy: 0.8814
Epoch 2/100
3579/3579 - 0s - loss: 0.3476 - accuracy: 0.8807 - val_loss: 0.3223 - val_accuracy: 0.8837
Epoch 3/100
3579/3579 - 0s - loss: 0.3076 - accuracy: 0.8868 - val_loss: 0.3002 - val_accuracy: 0.8949
Epoch 4/100
3579/3579 - 0s - loss: 0.2888 - accuracy: 0.8919 - val_loss: 0.2845 - val_accuracy: 0.8971
Epoch 5/100
3579/3579 - 0s - loss: 0.2772 - accuracy: 0.8949 - val_loss: 0.2723 - val_accuracy: 0.9038
Epoch 6/100
3579/3579 - 0s - loss: 0.2682 - accuracy: 0.8977 - val_loss: 0.2627 - val_accuracy: 0.9105
Epoch 7/100
3579/3579 - 0s - loss: 0.2609 - accuracy: 0.9022 - val_loss: 0.2558 - val_accuracy: 0.9105
Epoch 8/100
3579/3579 - 0s - loss: 0.2564 - accuracy: 0.9044 - val_loss: 0.2521 - val_accuracy: 0.9105
Epoch 9/100
3579/3579 - 0s - loss: 0.2513 - accuracy: 0.9033 - val_loss: 0.2473 - val_accuracy: 0.9105
Epoch 10/100
3579/3579 - 0

Epoch 80/100
3579/3579 - 0s - loss: 0.2091 - accuracy: 0.9206 - val_loss: 0.2242 - val_accuracy: 0.9172
Epoch 81/100
3579/3579 - 0s - loss: 0.2068 - accuracy: 0.9226 - val_loss: 0.2201 - val_accuracy: 0.9172
Epoch 82/100
3579/3579 - 0s - loss: 0.2106 - accuracy: 0.9212 - val_loss: 0.2288 - val_accuracy: 0.9150
Epoch 83/100
3579/3579 - 0s - loss: 0.2073 - accuracy: 0.9184 - val_loss: 0.2261 - val_accuracy: 0.9217
Epoch 84/100
3579/3579 - 0s - loss: 0.2101 - accuracy: 0.9206 - val_loss: 0.2196 - val_accuracy: 0.9239
Epoch 85/100
3579/3579 - 0s - loss: 0.2049 - accuracy: 0.9229 - val_loss: 0.2306 - val_accuracy: 0.9128
Epoch 86/100
3579/3579 - 0s - loss: 0.2046 - accuracy: 0.9240 - val_loss: 0.2214 - val_accuracy: 0.9172
Epoch 87/100
3579/3579 - 0s - loss: 0.2080 - accuracy: 0.9223 - val_loss: 0.2212 - val_accuracy: 0.9195
Epoch 88/100
3579/3579 - 0s - loss: 0.2049 - accuracy: 0.9218 - val_loss: 0.2199 - val_accuracy: 0.9217
Epoch 89/100
3579/3579 - 0s - loss: 0.2036 - accuracy: 0.9237 - 

<tensorflow.python.keras.callbacks.History at 0x25a3b49ac08>

Validation loss keeps fluctuating = Overfitted
Happens because of lower preprocessing. Hence, try early stopping.

#### Early stopping :

In [15]:
early_stopping = tf.keras.callbacks.EarlyStopping(patience = 2)

model.fit(train_inputs,
          train_targets,
          batch_size = batch_size,
          epochs = max_epoch,
          callbacks=[early_stopping],
          validation_data=(validation_inputs, validation_targets),
         verbose = 2)

Train on 3579 samples, validate on 447 samples
Epoch 1/100
3579/3579 - 0s - loss: 0.2055 - accuracy: 0.9234 - val_loss: 0.2210 - val_accuracy: 0.9195
Epoch 2/100
3579/3579 - 0s - loss: 0.2025 - accuracy: 0.9229 - val_loss: 0.2204 - val_accuracy: 0.9150
Epoch 3/100
3579/3579 - 0s - loss: 0.2053 - accuracy: 0.9226 - val_loss: 0.2213 - val_accuracy: 0.9217
Epoch 4/100
3579/3579 - 0s - loss: 0.2019 - accuracy: 0.9254 - val_loss: 0.2233 - val_accuracy: 0.9128


<tensorflow.python.keras.callbacks.History at 0x25a3cb56c48>

## Testing 

In [16]:
test_loss , test_accuracy = model.evaluate(test_inputs,test_targets)



In [18]:
print("Test loss : {0:.2f} , Test accuracy : {1:.2f}%".format(test_loss, test_accuracy*100.))

Test loss : 0.29 , Test accuracy : 89.06%
