## Create the machine learning algorithm



### Import the relevant libraries

In [1]:
import numpy as np
import tensorflow as tf

### Data

In [2]:
npz = np.load('dataset/Audiobooks_data_train.npz')

train_inputs = npz['inputs'].astype(np.float)
train_targets = npz['targets'].astype(np.int)

npz = np.load('dataset/Audiobooks_data_validation.npz')
validation_inputs, validation_targets = npz['inputs'].astype(np.float), npz['targets'].astype(np.int)

npz = np.load('dataset/Audiobooks_data_test.npz')
test_inputs, test_targets = npz['inputs'].astype(np.float), npz['targets'].astype(np.int)

### Model
Outline, optimizers, loss, early stopping and training

In [3]:
#input_size = 10
# we can use output_size = 1, use 'sigmoid', and use binary crossentropy too.
# But we this task I want to try this model
output_size = 2
hidden_layer_size = 50

model = tf.keras.Sequential([
                            tf.keras.layers.Dense(hidden_layer_size, activation='relu'),
                            tf.keras.layers.Dense(hidden_layer_size, activation='relu'),
                            tf.keras.layers.Dense(output_size, activation='softmax')    
                            ])

# We use sparse categorical crossentropy because we didn't do one-hot encoding for our targets
# this loss function will do it for us
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics = ['accuracy'])

batch_size = 100

max_epochs=100

early_stopping = tf.keras.callbacks.EarlyStopping(patience=2)

model.fit(train_inputs,
          train_targets,
          batch_size=batch_size,
          epochs=max_epochs,
          callbacks=[early_stopping],
          validation_data=(validation_inputs, validation_targets),
          verbose=1
          )

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100


<tensorflow.python.keras.callbacks.History at 0x20848c4e5b0>

## Test the model

In [4]:
test_loss, test_accuracy = model.evaluate(test_inputs, test_targets)



In [5]:
print('\nTest loss: {0:.2f}. Test accuracy: {1:.2f}%'.format(test_loss, test_accuracy*100.))


Test loss: 0.29. Test accuracy: 88.17%


## Obtain the probability for a customer to convert

In [6]:
model.predict(test_inputs).round(2)

array([[0.11, 0.89],
       [0.  , 1.  ],
       [0.79, 0.21],
       [0.05, 0.95],
       [0.16, 0.84],
       [0.24, 0.76],
       [0.9 , 0.1 ],
       [0.84, 0.16],
       [0.09, 0.91],
       [0.14, 0.86],
       [0.81, 0.19],
       [0.13, 0.87],
       [1.  , 0.  ],
       [1.  , 0.  ],
       [1.  , 0.  ],
       [0.01, 0.99],
       [1.  , 0.  ],
       [0.  , 1.  ],
       [0.9 , 0.1 ],
       [0.  , 1.  ],
       [0.89, 0.11],
       [1.  , 0.  ],
       [0.93, 0.07],
       [0.14, 0.86],
       [0.26, 0.74],
       [1.  , 0.  ],
       [1.  , 0.  ],
       [0.07, 0.93],
       [0.97, 0.03],
       [0.  , 1.  ],
       [0.93, 0.07],
       [0.27, 0.73],
       [0.38, 0.62],
       [1.  , 0.  ],
       [0.05, 0.95],
       [0.  , 1.  ],
       [0.94, 0.06],
       [0.93, 0.07],
       [0.  , 1.  ],
       [0.93, 0.07],
       [0.16, 0.84],
       [0.83, 0.17],
       [0.  , 1.  ],
       [0.4 , 0.6 ],
       [0.  , 1.  ],
       [0.  , 1.  ],
       [0.12, 0.88],
       [0.9 ,

In [7]:
# save only possible of being 1 (re-purchasing)
model.predict(test_inputs)[:,1].round(0)

array([1., 1., 0., 1., 1., 1., 0., 0., 1., 1., 0., 1., 0., 0., 0., 1., 0.,
       1., 0., 1., 0., 0., 0., 1., 1., 0., 0., 1., 0., 1., 0., 1., 1., 0.,
       1., 1., 0., 0., 1., 0., 1., 0., 1., 1., 1., 1., 1., 0., 0., 1., 1.,
       1., 1., 0., 0., 0., 1., 0., 1., 1., 0., 1., 1., 0., 1., 1., 0., 0.,
       1., 0., 1., 1., 0., 1., 0., 1., 1., 1., 1., 1., 0., 0., 0., 0., 0.,
       1., 1., 0., 1., 1., 0., 1., 1., 1., 0., 0., 1., 0., 0., 1., 0., 0.,
       1., 1., 0., 1., 1., 0., 0., 0., 0., 1., 0., 1., 1., 0., 1., 1., 0.,
       0., 0., 1., 0., 0., 1., 0., 1., 0., 1., 0., 1., 1., 1., 0., 0., 0.,
       1., 0., 1., 0., 1., 0., 1., 1., 1., 1., 1., 0., 1., 0., 0., 0., 1.,
       0., 0., 0., 0., 1., 0., 0., 1., 1., 1., 1., 1., 1., 1., 0., 1., 1.,
       0., 0., 1., 1., 0., 1., 1., 1., 0., 1., 0., 0., 0., 0., 1., 0., 1.,
       0., 0., 1., 0., 1., 1., 1., 0., 1., 1., 0., 1., 0., 1., 1., 0., 1.,
       0., 0., 1., 0., 1., 1., 1., 1., 0., 1., 0., 1., 1., 1., 1., 1., 1.,
       1., 0., 1., 0., 0.

In [8]:
# alternative way
np.argmax(model.predict(test_inputs), axis=1)

array([1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0,
       0, 1, 1, 0, 0, 1, 0, 1, 0, 1, 1, 0, 1, 1, 0, 0, 1, 0, 1, 0, 1, 1,
       1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1,
       0, 0, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0,
       1, 1, 0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0,
       0, 1, 0, 1, 1, 0, 1, 1, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 1, 1,
       1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 1, 0,
       0, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1,
       1, 1, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0,
       1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1,
       1, 1, 0, 1, 0, 0, 1, 1, 1, 0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0,
       0, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 0,
       1, 0, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 0, 0, 0, 1,
       0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 0, 1, 1, 0, 1,

## Save the model

In [9]:
model.save('model/audiobooks_model.h5')