# Earning Prediction using Tensorflow

### Libraries
- https://pandas.pydata.org/
- http://scikit-learn.org/stable/
- https://www.tensorflow.org/

### Source
- https://www.linkedin.com/learning/building-and-deploying-deep-learning-applications-with-tensorflow

### Note
- Ensure the same version is used locally and on Google cloud https://cloud.google.com/ml-engine/docs/tensorflow/runtime-version-list

### To Do
- Fix bew error Attempting to use uninitialized value output_layer_2/bias on gcloud

In [1]:
# Configuration

RUN_NAME = 'tensorflow-run-1'

In [2]:
# Libraries

import os
import shutil
from pathlib import Path

import tensorflow as tf
import pandas as pd
from sklearn.preprocessing import MinMaxScaler

In [3]:
# Constanta

# Turn off TensorFlow warning messages in program output
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'

LOGDIR = Path('output/logs/{}'.format(RUN_NAME))

In [4]:
# Load data

# Load training data set from CSV file
training_data_df = pd.read_csv("data/sales_data_training.csv", dtype=float)

# Pull out columns for X (data to train with) and Y (value to predict)
X_training = training_data_df.drop('total_earnings', axis=1).values
Y_training = training_data_df[['total_earnings']].values

# Load testing data set from CSV file
test_data_df = pd.read_csv("data/sales_data_test.csv", dtype=float)

# Pull out columns for X (data to train with) and Y (value to predict)
X_testing = test_data_df.drop('total_earnings', axis=1).values
Y_testing = test_data_df[['total_earnings']].values

# All data needs to be scaled to a small range like 0 to 1 for the neural
# network to work well. Create scalers for the inputs and outputs.
X_scaler = MinMaxScaler(feature_range=(0, 1))
Y_scaler = MinMaxScaler(feature_range=(0, 1))

# Scale both the training inputs and outputs
X_scaled_training = X_scaler.fit_transform(X_training)
Y_scaled_training = Y_scaler.fit_transform(Y_training)

# It's very important that the training and test data are scaled with the same scaler.
X_scaled_testing = X_scaler.transform(X_testing)
Y_scaled_testing = Y_scaler.transform(Y_testing)

print(X_scaled_testing.shape)
print(Y_scaled_testing.shape)

print("Note: Y values were scaled by multiplying by {:.10f} and adding {:.4f}".format(Y_scaler.scale_[0], Y_scaler.min_[0]))

(400, 9)
(400, 1)
Note: Y values were scaled by multiplying by 0.0000036968 and adding -0.1159


In [5]:
# Build the model

# Define model parameters
learning_rate = 0.001
training_epochs = 100
display_step = 5

# Define how many inputs and outputs are in our neural network
number_of_inputs = 9
number_of_outputs = 1

# Define how many neurons we want in each layer of our neural network
layer_1_nodes = 50
layer_2_nodes = 100
layer_3_nodes = 50

# Section One: Define the layers of the neural network itself

# Input Layer
with tf.variable_scope('input'):
    X = tf.placeholder(tf.float32, shape=(None, number_of_inputs))

# Layer 1
with tf.variable_scope('layer_1'):
    weights = tf.get_variable("weights1", shape=[number_of_inputs, layer_1_nodes], initializer=tf.contrib.layers.xavier_initializer())
    biases = tf.get_variable(name="biases1", shape=[layer_1_nodes], initializer=tf.zeros_initializer())
    layer_1_output = tf.nn.relu(tf.matmul(X, weights) + biases)

# Layer 2
with tf.variable_scope('layer_2'):
    weights = tf.get_variable("weights2", shape=[layer_1_nodes, layer_2_nodes], initializer=tf.contrib.layers.xavier_initializer())
    biases = tf.get_variable(name="biases2", shape=[layer_2_nodes], initializer=tf.zeros_initializer())
    layer_2_output = tf.nn.relu(tf.matmul(layer_1_output, weights) + biases)

# Layer 3
with tf.variable_scope('layer_3'):
    weights = tf.get_variable("weights3", shape=[layer_2_nodes, layer_3_nodes], initializer=tf.contrib.layers.xavier_initializer())
    biases = tf.get_variable(name="biases3", shape=[layer_3_nodes], initializer=tf.zeros_initializer())
    layer_3_output = tf.nn.relu(tf.matmul(layer_2_output, weights) + biases)

# Output Layer
with tf.variable_scope('output'):
    weights = tf.get_variable("weights4", shape=[layer_3_nodes, number_of_outputs], initializer=tf.contrib.layers.xavier_initializer())
    biases = tf.get_variable(name="biases4", shape=[number_of_outputs], initializer=tf.zeros_initializer())
    prediction = tf.matmul(layer_3_output, weights) + biases

# Section Two: Define the cost function of the neural network that will measure prediction accuracy during training

with tf.variable_scope('cost'):
    Y = tf.placeholder(tf.float32, shape=(None, 1))
    cost = tf.reduce_mean(tf.squared_difference(prediction, Y))

# Section Three: Define the optimizer function that will be run to optimize the neural network

with tf.variable_scope('train'):
    optimizer = tf.train.AdamOptimizer(learning_rate).minimize(cost)

In [6]:
# Train the model

# Delete the folder directory
if os.path.exists(LOGDIR):
    shutil.rmtree(LOGDIR)

# Create a summary operation to log the progress of the network
with tf.variable_scope('logging'):
    tf.summary.scalar('current_cost', cost)
    tf.summary.histogram('predicted_value', prediction)
    summary = tf.summary.merge_all()

# Initialize a session so that we can run TensorFlow operations
session = tf.Session()

# saver = tf.train.Saver()

# Run the global variable initializer to initialize all variables and layers of the neural network
session.run(tf.global_variables_initializer())

# Instead, load them from disk:
# saver.restore(session, "models/tf_trained_model.ckpt")
    
# Create log file writers to record training progress.
# We'll store training and testing log data separately.
training_writer = tf.summary.FileWriter('{}/training'.format(LOGDIR), session.graph)
testing_writer = tf.summary.FileWriter('{}/testing'.format(LOGDIR), session.graph)

# Run the optimizer over and over to train the network.
# One epoch is one full run through the training data set.
for epoch in range(training_epochs):

    # Feed in the training data and do one step of neural network training
    session.run(optimizer, feed_dict={X: X_scaled_training, Y: Y_scaled_training})

    # Every 5 training steps, log our progress
    if epoch % 5 == 0:
        # Get the current accuracy scores by running the "cost" operation on the training and test data sets
        training_cost, training_summary = session.run([cost, summary], feed_dict={X: X_scaled_training, Y: Y_scaled_training})
        testing_cost, testing_summary = session.run([cost, summary], feed_dict={X: X_scaled_testing, Y: Y_scaled_testing})

        # Write the current training status to the log files (Which we can view with TensorBoard)
        training_writer.add_summary(training_summary, epoch)
        testing_writer.add_summary(testing_summary, epoch)

        # Print the current training status to the screen
        print("Epoch: {} - Training Cost: {}  Testing Cost: {}".format(epoch, training_cost, testing_cost))

# save_path = saver.save(session, "models/tf_trained_model.ckpt")
# print("Model saved: {}".format(save_path))
    
# Training is now complete!
training_writer.close()
testing_writer.close()

print("Training is now complete")

# Get the final accuracy scores by running the "cost" operation on the training and test data sets
final_training_cost = session.run(cost, feed_dict={X: X_scaled_training, Y: Y_scaled_training})
final_testing_cost = session.run(cost, feed_dict={X: X_scaled_testing, Y: Y_scaled_testing})

print("Final Training cost: {}".format(final_training_cost))
print("Final Testing cost: {}".format(final_testing_cost))

Epoch: 0 - Training Cost: 0.09082907438278198  Testing Cost: 0.10087310522794724
Epoch: 5 - Training Cost: 0.02922944724559784  Testing Cost: 0.031113190576434135
Epoch: 10 - Training Cost: 0.024600623175501823  Testing Cost: 0.025740129873156548
Epoch: 15 - Training Cost: 0.010835109278559685  Testing Cost: 0.012432792223989964
Epoch: 20 - Training Cost: 0.008190933614969254  Testing Cost: 0.00951931718736887
Epoch: 25 - Training Cost: 0.004999236669391394  Testing Cost: 0.005687185097485781
Epoch: 30 - Training Cost: 0.0044493293389678  Testing Cost: 0.0048012202605605125
Epoch: 35 - Training Cost: 0.0032441336661577225  Testing Cost: 0.0033440205734223127
Epoch: 40 - Training Cost: 0.002446786966174841  Testing Cost: 0.0024651456624269485
Epoch: 45 - Training Cost: 0.0019288077019155025  Testing Cost: 0.0019247394520789385
Epoch: 50 - Training Cost: 0.001537245698273182  Testing Cost: 0.0015769719611853361
Epoch: 55 - Training Cost: 0.0012658971827477217  Testing Cost: 0.00133267417

In [7]:
# Local predict

# Now that the neural network is trained, let's use it to make predictions for our test data.
# Pass in the X testing data and run the "prediciton" operation
Y_predicted_scaled = session.run(prediction, feed_dict={X: X_scaled_testing})

# Unscale the data back to it's original units (dollars)
Y_predicted = Y_scaler.inverse_transform(Y_predicted_scaled)

real_earnings = test_data_df['total_earnings'].values[0]
predicted_earnings = Y_predicted[0][0]

print("The actual earnings of Game #1 were ${}".format(real_earnings))
print("Our neural network predicted earnings of ${}".format(predicted_earnings))

The actual earnings of Game #1 were $247537.0
Our neural network predicted earnings of $257265.84375


In [12]:
# Export our model

# Delete the existing folder (if there)
model_path = Path('models/tf_exported_model')
if os.path.exists(model_path):
    shutil.rmtree(model_path)
    
model_builder = tf.saved_model.builder.SavedModelBuilder("models/tf_exported_model")

inputs = {
    'input': tf.saved_model.utils.build_tensor_info(X)
    }
outputs = {
    'earnings': tf.saved_model.utils.build_tensor_info(prediction)
    }

signature_def = tf.saved_model.signature_def_utils.build_signature_def(
    inputs=inputs,
    outputs=outputs,
    method_name=tf.saved_model.signature_constants.PREDICT_METHOD_NAME
)

model_builder.add_meta_graph_and_variables(
    session,
    tags=[tf.saved_model.tag_constants.SERVING],
    signature_def_map={
        tf.saved_model.signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY: signature_def
    }
)

model_builder.save()

INFO:tensorflow:No assets to save.
INFO:tensorflow:No assets to write.
INFO:tensorflow:SavedModel written to: models/tf_exported_model/saved_model.pb


b'models/tf_exported_model/saved_model.pb'

In [15]:
# Upload the model to Google cloud

# Make the bucket
!gsutil mb -l us-central1 gs://tf_earning-prediction-1000 
!gsutil cp -R models/tf_exported_model/* gs://tf_earning-prediction-1000/earnings_v1
    
# Create the gcloud model
!gcloud ml-engine models create tf_earnings_prediction --regions us-central1
!gcloud ml-engine versions create v1 --model=tf_earnings_prediction --origin=gs://tf_earning-prediction-1000/earnings_v1/ --runtime-version=1.11
    
    


Creating gs://tf_earning-prediction-1000/...
Copying file://models/tf_exported_model/saved_model.pb [Content-Type=application/octet-stream]...
Copying file://models/tf_exported_model/variables/variables.data-00000-of-00001 [Content-Type=application/octet-stream]...
Copying file://models/tf_exported_model/variables/variables.index [Content-Type=application/octet-stream]...
\ [3 files][214.6 KiB/214.6 KiB]                                                
Operation completed over 3 objects/214.6 KiB.                                    
Created ml engine model [projects/gde-core-dev/models/tf_earnings_prediction].
Creating version (this might take a few minutes)......done.                    


In [16]:
# Predict from Google cloud using gcloud

!gcloud ml-engine predict --model=tf_earnings_prediction --json-instances=./data/sample_input_prescaled.json

{
  "error": "Prediction failed: Error during model execution: AbortionError(code=StatusCode.FAILED_PRECONDITION, details=\"Attempting to use uninitialized value output/biases4\n\t [[{{node output/biases4/read}} = Identity[T=DT_FLOAT, _output_shapes=[[1]], _device=\"/job:localhost/replica:0/task:0/device:CPU:0\"](output/biases4)]]\")"
}


In [17]:
# Predict from Google cloud using SDK

from oauth2client.client import GoogleCredentials
import googleapiclient.discovery

# Change this values to match your project
PROJECT_ID = "gde-core-dev"
MODEL_NAME = "tf_earnings_prediction"
CREDENTIALS_FILE = "security/credentials.json"

# These are the values we want a prediction for
inputs_for_prediction = [
    {"input": [0.4999, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.5]}
]

# Connect to the Google Cloud-ML Service
credentials = GoogleCredentials.from_stream(CREDENTIALS_FILE)
service = googleapiclient.discovery.build('ml', 'v1', credentials=credentials)

# Connect to our Prediction Model
name = 'projects/{}/models/{}'.format(PROJECT_ID, MODEL_NAME)
response = service.projects().predict(
    name=name,
    body={'instances': inputs_for_prediction}
).execute()

# Report any errors
if 'error' in response:
    raise RuntimeError(response['error'])

# Grab the results from the response object
results = response['predictions']

# Print the results!
print(results)

RuntimeError: Prediction failed: Error during model execution: AbortionError(code=StatusCode.FAILED_PRECONDITION, details="Attempting to use uninitialized value output/biases4
	 [[{{node output/biases4/read}} = Identity[T=DT_FLOAT, _output_shapes=[[1]], _device="/job:localhost/replica:0/task:0/device:CPU:0"](output/biases4)]]")

In [18]:
# Delete the model from Google cloud
    
# Remove the model
!gcloud ml-engine versions delete v1 --model=tf_earnings_prediction --quiet
!gcloud ml-engine models delete tf_earnings_prediction --quiet

# Remove the bucket
!gsutil -m rm -r gs://tf_earning-prediction-1000 

Deleting version [v1]......done.                                               
Deleting model [tf_earnings_prediction]...done.                                
Removing gs://tf_earning-prediction-1000/earnings_v1/saved_model.pb#1539214681300470...
Removing gs://tf_earning-prediction-1000/earnings_v1/variables.data-00000-of-00001#1539214681915252...
Removing gs://tf_earning-prediction-1000/earnings_v1/variables.index#1539214682277466...
/ [3/3 objects] 100% Done                                                       
Operation completed over 3 objects.                                              
Removing gs://tf_earning-prediction-1000/...


In [13]:
# Check the model signature with saved_model_cli

!saved_model_cli show --dir ./models/tf_exported_model  --all


MetaGraphDef with tag-set: 'serve' contains the following SignatureDefs:

signature_def['serving_default']:
  The given SavedModel SignatureDef contains the following input(s):
    inputs['input'] tensor_info:
        dtype: DT_FLOAT
        shape: (-1, 9)
        name: input/Placeholder:0
  The given SavedModel SignatureDef contains the following output(s):
    outputs['earnings'] tensor_info:
        dtype: DT_FLOAT
        shape: (-1, 1)
        name: output/add:0
  Method name is: tensorflow/serving/predict


In [14]:
# Test prediction with saved_model_cli

!saved_model_cli run --dir ./models/tf_exported_model --tag_set serve --signature_def serving_default --input_exprs 'input=[[0.499, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.5]]'

Result for output key earnings:
[[0.17700902]]


In [8]:
# Visualize the log


# Note: Stop once done
!tensorboard --logdir=$LOGDIR

TensorBoard 1.11.0 at http://Jeromes-MBP.hitronhub.home:6006 (Press CTRL+C to quit)
^C
