# Earning Prediction using Tensorflow

## Setup

### Configuration

In [1]:
RUN_NAME = 'tensorflow-run-1'

### Libraries
- https://pandas.pydata.org/
- http://scikit-learn.org/stable/
- https://www.tensorflow.org/

In [2]:
import os
import shutil
from pathlib import Path

import tensorflow as tf
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
import pickle

### Constants

In [3]:
# Turn off TensorFlow warning messages in program output
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'

LOGDIR = Path('output/logs/{}'.format(RUN_NAME))

## Load data

In [4]:
# Load training data set from CSV file
training_data_df = pd.read_csv("data/sales_data_training.csv", dtype=float)
training_data_df.head(5)

Unnamed: 0,critic_rating,is_action,is_exclusive_to_us,is_portable,is_role_playing,is_sequel,is_sports,suitable_for_kids,total_earnings,unit_price
0,3.5,1.0,0.0,1.0,0.0,1.0,0.0,0.0,132717.0,59.99
1,4.5,0.0,0.0,0.0,0.0,1.0,1.0,0.0,83407.0,49.99
2,3.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,62423.0,49.99
3,4.5,1.0,0.0,0.0,0.0,0.0,0.0,1.0,69889.0,39.99
4,4.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,161382.0,59.99


In [5]:
# Pull out columns for X (data to train with) and Y (value to predict)
X_training = training_data_df.drop('total_earnings', axis=1).values
Y_training = training_data_df[['total_earnings']].values

In [6]:
# Load testing data set from CSV file
test_data_df = pd.read_csv("data/sales_data_test.csv", dtype=float)
test_data_df.head(5)

Unnamed: 0,critic_rating,is_action,is_exclusive_to_us,is_portable,is_role_playing,is_sequel,is_sports,suitable_for_kids,total_earnings,unit_price
0,3.5,1.0,1.0,1.0,0.0,1.0,0.0,1.0,247537.0,59.99
1,2.5,0.0,0.0,0.0,1.0,1.0,0.0,0.0,73960.0,59.99
2,3.5,0.0,0.0,0.0,0.0,1.0,1.0,0.0,82671.0,59.99
3,4.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,137456.0,39.99
4,2.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,89639.0,59.99


In [7]:
# Pull out columns for X (data to train with) and Y (value to predict)
X_testing = test_data_df.drop('total_earnings', axis=1).values
Y_testing = test_data_df[['total_earnings']].values

In [8]:
# All data needs to be scaled to a small range like 0 to 1 for the neural
# network to work well. Create scalers for the inputs and outputs.
X_scaler = MinMaxScaler(feature_range=(0, 1))
Y_scaler = MinMaxScaler(feature_range=(0, 1))

# Scale both the training inputs and outputs
X_scaled_training = X_scaler.fit_transform(X_training)
Y_scaled_training = Y_scaler.fit_transform(Y_training)

# It's very important that the training and test data are scaled with the same scaler.
X_scaled_testing = X_scaler.transform(X_testing)
Y_scaled_testing = Y_scaler.transform(Y_testing)

In [9]:
print("Y values were scaled by multiplying by {:.10f} and adding {:.4f}".format(Y_scaler.scale_[0], Y_scaler.min_[0]))

Y values were scaled by multiplying by 0.0000036968 and adding -0.1159


In [10]:
print(X_scaled_training[0])

[0.5 1.  0.  1.  0.  1.  0.  0.  1. ]


In [11]:
print(Y_scaled_training[0])

[0.37471396]


In [12]:
print('Shapes')
print(X_scaled_testing.shape)
print(Y_scaled_testing.shape)

Shapes
(400, 9)
(400, 1)


## Build the model

In [13]:
# Define model parameters
learning_rate = 0.001
training_epochs = 100
display_step = 5

# Define how many inputs and outputs are in our neural network
number_of_inputs = 9
number_of_outputs = 1

# Define how many neurons we want in each layer of our neural network
layer_1_nodes = 50
layer_2_nodes = 100
layer_3_nodes = 50

# Section One: Define the layers of the neural network itself

# Input Layer
with tf.variable_scope('input'):
    X = tf.placeholder(tf.float32, shape=(None, number_of_inputs))

# Layer 1
with tf.variable_scope('layer_1'):
    weights = tf.get_variable("weights1", shape=[number_of_inputs, layer_1_nodes], initializer=tf.contrib.layers.xavier_initializer())
    biases = tf.get_variable(name="biases1", shape=[layer_1_nodes], initializer=tf.zeros_initializer())
    layer_1_output = tf.nn.relu(tf.matmul(X, weights) + biases)

# Layer 2
with tf.variable_scope('layer_2'):
    weights = tf.get_variable("weights2", shape=[layer_1_nodes, layer_2_nodes], initializer=tf.contrib.layers.xavier_initializer())
    biases = tf.get_variable(name="biases2", shape=[layer_2_nodes], initializer=tf.zeros_initializer())
    layer_2_output = tf.nn.relu(tf.matmul(layer_1_output, weights) + biases)

# Layer 3
with tf.variable_scope('layer_3'):
    weights = tf.get_variable("weights3", shape=[layer_2_nodes, layer_3_nodes], initializer=tf.contrib.layers.xavier_initializer())
    biases = tf.get_variable(name="biases3", shape=[layer_3_nodes], initializer=tf.zeros_initializer())
    layer_3_output = tf.nn.relu(tf.matmul(layer_2_output, weights) + biases)

# Output Layer
with tf.variable_scope('output'):
    weights = tf.get_variable("weights4", shape=[layer_3_nodes, number_of_outputs], initializer=tf.contrib.layers.xavier_initializer())
    biases = tf.get_variable(name="biases4", shape=[number_of_outputs], initializer=tf.zeros_initializer())
    prediction = tf.matmul(layer_3_output, weights) + biases

# Section Two: Define the cost function of the neural network that will measure prediction accuracy during training

with tf.variable_scope('cost'):
    Y = tf.placeholder(tf.float32, shape=(None, 1))
    cost = tf.reduce_mean(tf.squared_difference(prediction, Y))

# Section Three: Define the optimizer function that will be run to optimize the neural network

with tf.variable_scope('train'):
    optimizer = tf.train.AdamOptimizer(learning_rate).minimize(cost)

## Train the model

In [14]:
# Delete the folder directory
if os.path.exists(LOGDIR):
    shutil.rmtree(LOGDIR)

# Create a summary operation to log the progress of the network
with tf.variable_scope('logging'):
    tf.summary.scalar('current_cost', cost)
    tf.summary.histogram('predicted_value', prediction)
    summary = tf.summary.merge_all()

# Initialize a session so that we can run TensorFlow operations
session = tf.Session()

# saver = tf.train.Saver()

# Run the global variable initializer to initialize all variables and layers of the neural network
session.run(tf.global_variables_initializer())

# Instead, load them from disk:
# saver.restore(session, "models/tf_trained_model.ckpt")
    
# Create log file writers to record training progress.
# We'll store training and testing log data separately.
training_writer = tf.summary.FileWriter('{}/training'.format(LOGDIR), session.graph)
testing_writer = tf.summary.FileWriter('{}/testing'.format(LOGDIR), session.graph)

# Run the optimizer over and over to train the network.
# One epoch is one full run through the training data set.
for epoch in range(training_epochs):

    # Feed in the training data and do one step of neural network training
    session.run(optimizer, feed_dict={X: X_scaled_training, Y: Y_scaled_training})

    # Every 5 training steps, log our progress
    if epoch % 5 == 0:
        # Get the current accuracy scores by running the "cost" operation on the training and test data sets
        training_cost, training_summary = session.run([cost, summary], feed_dict={X: X_scaled_training, Y: Y_scaled_training})
        testing_cost, testing_summary = session.run([cost, summary], feed_dict={X: X_scaled_testing, Y: Y_scaled_testing})

        # Write the current training status to the log files (Which we can view with TensorBoard)
        training_writer.add_summary(training_summary, epoch)
        testing_writer.add_summary(testing_summary, epoch)

        # Print the current training status to the screen
        print("Epoch: {} - Training Cost: {}  Testing Cost: {}".format(epoch, training_cost, testing_cost))

# save_path = saver.save(session, "models/tf_trained_model.ckpt")
# print("Model saved: {}".format(save_path))
    
# Training is now complete!
training_writer.close()
testing_writer.close()

print("Training is now complete")

# Get the final accuracy scores by running the "cost" operation on the training and test data sets
final_training_cost = session.run(cost, feed_dict={X: X_scaled_training, Y: Y_scaled_training})
final_testing_cost = session.run(cost, feed_dict={X: X_scaled_testing, Y: Y_scaled_testing})

print("Final Training cost: {}".format(final_training_cost))
print("Final Testing cost: {}".format(final_testing_cost))

Epoch: 0 - Training Cost: 0.02705908752977848  Testing Cost: 0.028945259749889374
Epoch: 5 - Training Cost: 0.011612320318818092  Testing Cost: 0.01396175380796194
Epoch: 10 - Training Cost: 0.00831885077059269  Testing Cost: 0.009200850501656532
Epoch: 15 - Training Cost: 0.005509235430508852  Testing Cost: 0.0064634038135409355
Epoch: 20 - Training Cost: 0.003075777553021908  Testing Cost: 0.0034968226682394743
Epoch: 25 - Training Cost: 0.0023623700253665447  Testing Cost: 0.0026602190919220448
Epoch: 30 - Training Cost: 0.0020820503123104572  Testing Cost: 0.002409636275842786
Epoch: 35 - Training Cost: 0.001537898788228631  Testing Cost: 0.0016911652637645602
Epoch: 40 - Training Cost: 0.0011657190043479204  Testing Cost: 0.0012751355534419417
Epoch: 45 - Training Cost: 0.0009357126546092331  Testing Cost: 0.0009707102435640991
Epoch: 50 - Training Cost: 0.0007372793043032289  Testing Cost: 0.0008163382881321013
Epoch: 55 - Training Cost: 0.0006156013696454465  Testing Cost: 0.000

## Save the model

In [15]:
# Export the model

# Delete the existing folder (if there)
model_path = Path('models/tf_exported_model')
if os.path.exists(model_path):
    shutil.rmtree(model_path)
    
model_builder = tf.saved_model.builder.SavedModelBuilder("models/tf_exported_model")

inputs = {
    'input': tf.saved_model.utils.build_tensor_info(X)
    }
outputs = {
    'earnings': tf.saved_model.utils.build_tensor_info(prediction)
    }

signature_def = tf.saved_model.signature_def_utils.build_signature_def(
    inputs=inputs,
    outputs=outputs,
    method_name=tf.saved_model.signature_constants.PREDICT_METHOD_NAME
)

model_builder.add_meta_graph_and_variables(
    session,
    tags=[tf.saved_model.tag_constants.SERVING],
    signature_def_map={
        tf.saved_model.signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY: signature_def
    }
)

model_builder.save()

print('model saved to models/tf_exported_model')

INFO:tensorflow:No assets to save.
INFO:tensorflow:No assets to write.
INFO:tensorflow:SavedModel written to: models/tf_exported_model/saved_model.pb
model saved to models/tf_exported_model


In [16]:
# save the scaler
yscalerfile = './models/yscaler.dat'
pickle.dump(Y_scaler, open(yscalerfile, 'wb'))
xscalerfile = './models/xscaler.dat'
pickle.dump(X_scaler, open(xscalerfile, 'wb'))

print('scalars saved')

scalars saved


## Prediction

### Input

In [17]:
print(X_testing[0])

[ 3.5   1.    1.    1.    0.    1.    0.    1.   59.99]


In [18]:
print(X_scaled_testing[0])

[0.5 1.  1.  1.  0.  1.  0.  1.  1. ]


### Predict

In [19]:
# Now that the neural network is trained, let's use it to make predictions for our test data.
# Pass in the X testing data and run the "prediciton" operation
Y_predicted_scaled = session.run(prediction, feed_dict={X: X_scaled_testing})

# Unscale the data back to it's original units (dollars)
Y_predicted = Y_scaler.inverse_transform(Y_predicted_scaled)
predicted_earnings = Y_predicted[0][0]

real_earnings = test_data_df['total_earnings'].values[0]

print("The predict scaled earnings of Game #1 were {}".format(Y_predicted_scaled[0][0]))
print("The actual earnings of Game #1 were ${}".format(real_earnings))
print("Our neural network predicted earnings of ${}".format(predicted_earnings))

The predict scaled earnings of Game #1 were 0.7850745320320129
The actual earnings of Game #1 were $247537.0
Our neural network predicted earnings of $243721.59375


## Troubleshooting

In [20]:
# Check the model signature with saved_model_cli

!saved_model_cli show --dir ./models/tf_exported_model  --all


MetaGraphDef with tag-set: 'serve' contains the following SignatureDefs:

signature_def['serving_default']:
  The given SavedModel SignatureDef contains the following input(s):
    inputs['input'] tensor_info:
        dtype: DT_FLOAT
        shape: (-1, 9)
        name: input/Placeholder:0
  The given SavedModel SignatureDef contains the following output(s):
    outputs['earnings'] tensor_info:
        dtype: DT_FLOAT
        shape: (-1, 1)
        name: output/add:0
  Method name is: tensorflow/serving/predict


In [21]:
# Test prediction with saved_model_cli
input = [[0.5, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0]]
print(input)

output = !saved_model_cli run --dir ./models/tf_exported_model --tag_set serve --signature_def serving_default --input_exprs 'input={ input }'

scaledPrediction = eval(output[1])[0][0]

print("The predict scaled earnings of Game #1 were {}".format(scaledPrediction))


[[0.5, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0]]
The predict scaled earnings of Game #1 were 0.78507453


In [22]:
yscalerfile = './models/yscaler.dat'
yscaler = pickle.load(open(yscalerfile, 'rb'))
test_scaled_set = yscaler.inverse_transform(eval(output[1]))

print("Our neural network predicted earnings of ${}".format(test_scaled_set[0][0]))


Our neural network predicted earnings of $243721.58573765002


In [23]:
# Manual calculation
print((eval(output[1])[0]-Y_scaler.min_[0])/Y_scaler.scale_[0])

[243721.58573765]


In [24]:
# Visualize the log


# Note: Stop once done
!tensorboard --logdir=$LOGDIR

^C
Traceback (most recent call last):
  File "/Library/Frameworks/Python.framework/Versions/3.6/bin/tensorboard", line 7, in <module>
    from tensorboard.main import run_main
  File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/tensorboard/main.py", line 39, in <module>
    from tensorboard import default
  File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/tensorboard/default.py", line 34, in <module>
    import tensorflow as tf
  File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/tensorflow/__init__.py", line 22, in <module>
    from tensorflow.python import pywrap_tensorflow  # pylint: disable=unused-import
  File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/tensorflow/python/__init__.py", line 81, in <module>
    from tensorflow.python import keras
  File "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/tenso