# Earning Prediction

### Libraries
- https://pandas.pydata.org/
- http://scikit-learn.org/stable/
- https://keras.io/
- https://www.tensorflow.org/

### Source
- https://www.linkedin.com/learning/building-deep-learning-applications-with-keras-2-0

### Note
- Ensure the same version is used locally and on Google cloud https://cloud.google.com/ml-engine/docs/tensorflow/runtime-version-list

### To Do
- Fix bew error Attempting to use uninitialized value output_layer_2/bias on gcloud

In [1]:
# Configuration

RUN_NAME = "run 1 with 50 nodes"

In [2]:
# Libraries

import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from pathlib import Path
import shutil

import keras
from keras.models import Sequential
from keras.layers import *
from keras.models import load_model
from keras import backend as K

import tensorflow as tf

Using TensorFlow backend.


In [3]:
# Load and transform data

# Load training data set from CSV file
training_data_df = pd.read_csv("data/sales_data_training.csv")

# Load testing data set from CSV file
test_data_df = pd.read_csv("data/sales_data_test.csv")

# Data needs to be scaled to a small range like 0 to 1 for the neural
# network to work well.
scaler = MinMaxScaler(feature_range=(0, 1))

# Scale both the training inputs and outputs
scaled_training = scaler.fit_transform(training_data_df)
scaled_testing = scaler.transform(test_data_df)

# Print out the adjustment that the scaler applied to the total_earnings column of data
print("Note: total_earnings values were scaled by multiplying by {:.10f} and adding {:.6f}"
      .format(scaler.scale_[8], scaler.min_[8]))

# Create new pandas DataFrame objects from the scaled data
scaled_training_df = pd.DataFrame(scaled_training, columns=training_data_df.columns.values)
scaled_testing_df = pd.DataFrame(scaled_testing, columns=test_data_df.columns.values)

# Save scaled data dataframes to new CSV files
scaled_training_df.to_csv("data/sales_data_training_scaled.csv", index=False)
scaled_testing_df.to_csv("data/sales_data_testing_scaled.csv", index=False)

Note: total_earnings values were scaled by multiplying by 0.0000036968 and adding -0.115913


  return self.partial_fit(X, y)


In [4]:
# Build the model

model = Sequential()
model.add(Dense(50, input_dim=9, activation='relu', name='layer_1'))
model.add(Dense(100, activation='relu', name='layer_2'))
model.add(Dense(50, activation='relu', name='layer_3'))
model.add(Dense(1, activation='linear', name='output_layer'))
model.compile(loss='mean_squared_error', optimizer='adam')

In [5]:
# Train the model

# Load the training data set
scaled_training_df = pd.read_csv("data/sales_data_training_scaled.csv")
X = scaled_training_df.drop('total_earnings', axis=1).values
Y = scaled_training_df[['total_earnings']].values

# Load the separate test data set
scaled_testing_df = pd.read_csv("data/sales_data_testing_scaled.csv")
X_test = scaled_testing_df.drop('total_earnings', axis=1).values
Y_test = scaled_testing_df[['total_earnings']].values

# Delete the folder directory
shutil.rmtree(Path('logs/{}'.format(RUN_NAME)))

# Create a TensorBoard logger
logger = keras.callbacks.TensorBoard(
    log_dir='logs/{}'.format(RUN_NAME),
    histogram_freq=5,
    write_graph=True
)

# Train the model
model.fit(
    X,
    Y,
    epochs=50,
    shuffle=True,
    verbose=2,
    callbacks=[logger],
    validation_data=(X_test, Y_test)
)

# Save the model to disk
model.save("models/trained_model.h5")

Train on 1000 samples, validate on 400 samples
Epoch 1/50
 - 0s - loss: 0.0427 - val_loss: 0.0167
Epoch 2/50
 - 0s - loss: 0.0083 - val_loss: 0.0036
Epoch 3/50
 - 0s - loss: 0.0023 - val_loss: 0.0012
Epoch 4/50
 - 0s - loss: 9.2786e-04 - val_loss: 6.0168e-04
Epoch 5/50
 - 0s - loss: 4.7653e-04 - val_loss: 3.4027e-04
Epoch 6/50
 - 0s - loss: 2.8334e-04 - val_loss: 2.0834e-04
Epoch 7/50
 - 0s - loss: 1.8747e-04 - val_loss: 1.8184e-04
Epoch 8/50
 - 0s - loss: 1.3083e-04 - val_loss: 1.4171e-04
Epoch 9/50
 - 0s - loss: 1.0029e-04 - val_loss: 1.3539e-04
Epoch 10/50
 - 0s - loss: 8.4134e-05 - val_loss: 1.3759e-04
Epoch 11/50
 - 0s - loss: 7.3133e-05 - val_loss: 1.1303e-04
Epoch 12/50
 - 0s - loss: 6.3734e-05 - val_loss: 1.1112e-04
Epoch 13/50
 - 0s - loss: 5.6183e-05 - val_loss: 1.1090e-04
Epoch 14/50
 - 0s - loss: 5.2346e-05 - val_loss: 1.1024e-04
Epoch 15/50
 - 0s - loss: 5.0324e-05 - val_loss: 9.7113e-05
Epoch 16/50
 - 0s - loss: 3.9727e-05 - val_loss: 8.8845e-05
Epoch 17/50
 - 0s - loss: 

In [6]:
# Local predict

prediction_model = load_model('models/trained_model.h5')

X = pd.read_csv("data/proposed_new_product.csv").values

prediction = prediction_model.predict(X)

# Grab just the first element of the first prediction (since we only have one)
prediction = prediction[0][0]

# Re-scale the data from the 0-to-1 range back to dollars
# These constants are from when the data was originally scaled down to the 0-to-1 range
prediction = prediction - scaler.min_[8]
prediction = prediction / scaler.scale_[8]

print("Earnings Prediction for Proposed Product - ${}".format(prediction))

Earnings Prediction for Proposed Product - $265202.03809678555


In [7]:
# Export model

sess = K.get_session()

prediction_model = load_model('models/trained_model.h5')

# Delete the existing folder (if there)
shutil.rmtree(Path('models/exported_model'))
    
inputs = {
    'input': tf.saved_model.utils.build_tensor_info(prediction_model.input)
}
outputs = {
    'earnings': tf.saved_model.utils.build_tensor_info(prediction_model.output)
}

signature_def = tf.saved_model.signature_def_utils.build_signature_def(
    inputs=inputs,
    outputs=outputs,
    method_name=tf.saved_model.signature_constants.PREDICT_METHOD_NAME
)

model_builder = tf.saved_model.builder.SavedModelBuilder("models/exported_model")

model_builder.add_meta_graph_and_variables(
    sess=sess,
    tags=[tf.saved_model.tag_constants.SERVING],
    signature_def_map={ tf.saved_model.signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY: signature_def}
)

model_builder.save()

INFO:tensorflow:No assets to save.
INFO:tensorflow:No assets to write.
INFO:tensorflow:SavedModel written to: models/exported_model/saved_model.pb


b'models/exported_model/saved_model.pb'

In [10]:
# Upload the model to Google cloud

# Make the bucket
!gsutil mb -l us-central1 gs://earning-prediction-1000 
!gsutil cp -R models/exported_model/* gs://earning-prediction-1000/earnings_v1
    
# Create the gcloud model
!gcloud ml-engine models create earnings --regions us-central1
!gcloud ml-engine versions create v1 --model=earnings --origin=gs://earning-prediction-1000/earnings_v1/ --runtime-version=1.11

Creating gs://earning-prediction-1000/...
Copying file://models/exported_model/saved_model.pb [Content-Type=application/octet-stream]...
Copying file://models/exported_model/variables/variables.data-00000-of-00001 [Content-Type=application/octet-stream]...
Copying file://models/exported_model/variables/variables.index [Content-Type=application/octet-stream]...
\ [3 files][753.5 KiB/753.5 KiB]                                                
Operation completed over 3 objects/753.5 KiB.                                    
Created ml engine model [projects/gde-core-dev/models/earnings].
Creating version (this might take a few minutes)......done.                    


In [11]:
# Predict from Google cloud using gcloud

!gcloud ml-engine predict --model=earnings --json-instances=sample_input_prescaled.json

{
  "error": "Prediction failed: Error during model execution: AbortionError(code=StatusCode.FAILED_PRECONDITION, details=\"Attempting to use uninitialized value output_layer_2/bias\n\t [[{{node output_layer_2/bias/read}} = Identity[T=DT_FLOAT, _output_shapes=[[1]], _device=\"/job:localhost/replica:0/task:0/device:CPU:0\"](output_layer_2/bias)]]\")"
}


In [12]:
# Predict from Google cloud using SDK

from oauth2client.client import GoogleCredentials
import googleapiclient.discovery

# Change this values to match your project
PROJECT_ID = "gde-core-dev"
MODEL_NAME = "earnings"
CREDENTIALS_FILE = "security/credentials.json"

# These are the values we want a prediction for
inputs_for_prediction = [
    {"input": [0.4999, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.5]}
]

# Connect to the Google Cloud-ML Service
credentials = GoogleCredentials.from_stream(CREDENTIALS_FILE)
service = googleapiclient.discovery.build('ml', 'v1', credentials=credentials)

# Connect to our Prediction Model
name = 'projects/{}/models/{}'.format(PROJECT_ID, MODEL_NAME)
response = service.projects().predict(
    name=name,
    body={'instances': inputs_for_prediction}
).execute()

# Report any errors
if 'error' in response:
    raise RuntimeError(response['error'])

# Grab the results from the response object
results = response['predictions']

# Print the results!
print(results)

RuntimeError: Prediction failed: Error during model execution: AbortionError(code=StatusCode.FAILED_PRECONDITION, details="Attempting to use uninitialized value output_layer_2/bias
	 [[{{node output_layer_2/bias/read}} = Identity[T=DT_FLOAT, _output_shapes=[[1]], _device="/job:localhost/replica:0/task:0/device:CPU:0"](output_layer_2/bias)]]")

In [13]:
# Delete the model from Google cloud
    
# Remove the model
!gcloud ml-engine versions delete v1 --model=earnings --quiet
!gcloud ml-engine models delete earnings --quiet

# Remove the bucket
!gsutil -m rm -r gs://earning-prediction-1000 

Deleting version [v1]......done.                                               
Deleting model [earnings]...done.                                              
Removing gs://earning-prediction-1000/earnings_v1/saved_model.pb#1539153861043454...
Removing gs://earning-prediction-1000/earnings_v1/variables.data-00000-of-00001#1539153861686707...
Removing gs://earning-prediction-1000/earnings_v1/variables.index#1539153861964947...
/ [3/3 objects] 100% Done                                                       
Operation completed over 3 objects.                                              
Removing gs://earning-prediction-1000/...


In [14]:
# Visualize the log

# Note: Stop once done
!tensorboard --logdir=./logs

TensorBoard 1.11.0 at http://Jeromes-MBP.hitronhub.home:6006 (Press CTRL+C to quit)
^C


In [15]:
# Check the model signature with saved_model_cli

!saved_model_cli show --dir ./models/exported_model  --all


MetaGraphDef with tag-set: 'serve' contains the following SignatureDefs:

signature_def['serving_default']:
  The given SavedModel SignatureDef contains the following input(s):
    inputs['input'] tensor_info:
        dtype: DT_FLOAT
        shape: (-1, 9)
        name: layer_1_input_2:0
  The given SavedModel SignatureDef contains the following output(s):
    outputs['earnings'] tensor_info:
        dtype: DT_FLOAT
        shape: (-1, 1)
        name: output_layer_2/BiasAdd:0
  Method name is: tensorflow/serving/predict


In [16]:
# Test prediction with saved_model_cli

!saved_model_cli run --dir ./models/exported_model --tag_set serve --signature_def serving_default --input_exprs 'input=[[0.499, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.5]]'

Result for output key earnings:
[[0.17232177]]
