# APP purchasing prediction with TensorFlow. Part 2

# Machine learning

Code Created by Luis Enrique Acevedo Galicia

Date: 2019-24-02

Here, I present a simple and easy way to create a machine learning algorithm, which is able to predict if a customer will buy again.

Data comes from the preprocessing script. Here a net with 3 hidden layers is presented to predict if a customer will buy again.

# The Libraries

In [1]:
import numpy as np
import tensorflow as tf

# Batch data

In [2]:
#Divide all data in batches
class Batch_data_reader():
    def __init__(self,DataSet, size_batch = None):
        npz = np.load('APP_data_{0}.npz'.format(DataSet))
        self.inputs = npz['inputs'].astype(np.float)
        self.targets = npz['targets'].astype(np.int)
        #count the batch
        if size_batch is None:
            self.size_batch = self.inputs.shape[0]
        else:
            self.size_batch = size_batch
        self.batch_current = 0
        self.batch_count = self.inputs.shape[0]//self.size_batch
    #load next batch
    def __next__(self):
        if self.batch_current >= self.batch_count:
            self.batch_current = 0
            raise StopIteration()
        slice_batch = slice(self.batch_current*self.size_batch, (self.batch_current+1)*self.size_batch)
        batch_inputs = self.inputs[slice_batch]
        batch_targets = self.targets[slice_batch]
        self.batch_current += 1
        
        #get targets in format [0 1] or [1 0]
        num_c = 2
        targets_def = np.zeros((batch_targets.shape[0],num_c))
        targets_def[range(batch_targets.shape[0]), batch_targets]=1
        
        return batch_inputs, targets_def
    #this part allows the class to iterate 
    def __iter__(self):
        return self

# Machine learning code

In [9]:
#define inputs and targets size
input_size = 10
output_size = 2
# hidden layer size
hidden_layer_size = 100

# Reset the default graph
tf.reset_default_graph()

# Placeholders
inputs = tf.placeholder(tf.float32, [None, input_size])
targets = tf.placeholder(tf.int32, [None, output_size])

# The net will contain 3 hidden layers


weights_1 = tf.get_variable("weights_1", [input_size, hidden_layer_size])
biases_1 = tf.get_variable("biases_1", [hidden_layer_size])
outputs_1 = tf.nn.relu(tf.matmul(inputs, weights_1) + biases_1)

weights_2 = tf.get_variable("weights_2", [hidden_layer_size, hidden_layer_size])
biases_2 = tf.get_variable("biases_2", [hidden_layer_size])
outputs_2 = tf.nn.relu(tf.matmul(outputs_1, weights_2) + biases_2)

weights_3 = tf.get_variable("weights_3", [hidden_layer_size, hidden_layer_size])
biases_3 = tf.get_variable("biases_3", [hidden_layer_size])
outputs_3 = tf.nn.sigmoid(tf.matmul(outputs_2, weights_3) + biases_3)

weights_4 = tf.get_variable("weights_4", [hidden_layer_size, output_size])
biases_4 = tf.get_variable("biases_4", [output_size])
# The softmax activation into the loss
outputs = tf.matmul(outputs_3, weights_4) + biases_4



# Use the softmax cross entropy loss with logits
loss = tf.nn.softmax_cross_entropy_with_logits(logits=outputs, labels=targets)
mean_loss = tf.reduce_mean(loss)

# Get a 0 or 1 for every input indicating whether it output the correct answer
out_equals_target = tf.equal(tf.argmax(outputs, 1), tf.argmax(targets, 1))
accuracy = tf.reduce_mean(tf.cast(out_equals_target, tf.float32))

# Optimize with Adam
optimize = tf.train.AdamOptimizer(learning_rate=0.0001).minimize(mean_loss)

# Create a session
sess = tf.InteractiveSession()

# Initialize the variables
initializer = tf.global_variables_initializer()
sess.run(initializer)

# Choose the batch size
batch_size = 100

# Set early stopping mechanisms
max_epochs = 100
prev_validation_loss = 9999999.


train_data = Batch_data_reader('Train', batch_size)
validation_data = Batch_data_reader('Valid')

# Create the loop for epochs 
for epoch_counter in range(max_epochs):
    
    # initialize the epoch loss to 0
    curr_epoch_loss = 0.
    
    # Iterations over the batched training data 
    for input_batch, target_batch in train_data:
        _, batch_loss = sess.run([optimize, mean_loss], 
            feed_dict={inputs: input_batch, targets: target_batch})
        
        #Get record of the batch loss
        curr_epoch_loss += batch_loss
    
    # The mean curr_epoch_loss
    
    curr_epoch_loss /= train_data.batch_count
    
    #Validation:
    
    # Validation loss and accuracy for the epoch to zero
    validation_loss = 0.
    validation_accuracy = 0.
    
    # In this case the batch size is equal to the data set
    
    for input_batch, target_batch in validation_data:
        validation_loss, validation_accuracy = sess.run([mean_loss, accuracy],
            feed_dict={inputs: input_batch, targets: target_batch})
    
    # Print results and statistics for the current epoch
    print('Epoch '+str(epoch_counter+1)+
          '. Training loss: '+'{0:.3f}'.format(curr_epoch_loss)+
          '. Validation loss: '+'{0:.3f}'.format(validation_loss)+
          '. Validation accuracy: '+'{0:.2f}'.format(validation_accuracy * 100.)+'%')
    
    # An early stop (only if validation loss is increasing)
    if validation_loss > prev_validation_loss:
        break
        
    #keep epochs validation loss.
    prev_validation_loss = validation_loss
    
print('End of training.')


Epoch 1. Training loss: 0.929. Validation loss: 0.857. Validation accuracy: 48.77%
Epoch 2. Training loss: 0.777. Validation loss: 0.735. Validation accuracy: 48.99%
Epoch 3. Training loss: 0.691. Validation loss: 0.668. Validation accuracy: 55.70%
Epoch 4. Training loss: 0.646. Validation loss: 0.627. Validation accuracy: 66.89%
Epoch 5. Training loss: 0.613. Validation loss: 0.594. Validation accuracy: 71.59%
Epoch 6. Training loss: 0.585. Validation loss: 0.564. Validation accuracy: 73.60%
Epoch 7. Training loss: 0.559. Validation loss: 0.538. Validation accuracy: 74.27%
Epoch 8. Training loss: 0.536. Validation loss: 0.515. Validation accuracy: 74.50%
Epoch 9. Training loss: 0.515. Validation loss: 0.495. Validation accuracy: 75.39%
Epoch 10. Training loss: 0.497. Validation loss: 0.479. Validation accuracy: 76.06%
Epoch 11. Training loss: 0.482. Validation loss: 0.464. Validation accuracy: 76.29%
Epoch 12. Training loss: 0.468. Validation loss: 0.452. Validation accuracy: 77.18%
E

Epoch 99. Training loss: 0.326. Validation loss: 0.329. Validation accuracy: 83.22%
Epoch 100. Training loss: 0.326. Validation loss: 0.329. Validation accuracy: 83.22%
End of training.


## Test the model

In [10]:
# Load the test data in batches
test_data = Batch_data_reader('Test')
#Obtain the test accuracy
for inputs_batch, targets_batch in test_data:
    test_accuracy = sess.run([accuracy],
                     feed_dict={inputs: inputs_batch, targets: targets_batch})

#convert value of first position to get the percentage of accuracy
test_accuracy_percent = test_accuracy[0] * 100.

# Print accuracy
print('Test accuracy: '+'{0:.2f}'.format(test_accuracy_percent)+'%')

Test accuracy: 82.37%
