# Earning Prediction using Keras

### Source
- https://www.linkedin.com/learning/building-deep-learning-applications-with-keras-2-0

## Setup

### Configuration

In [1]:
RUN_NAME = "keras-run-1-50-nodes"
LOGDIR = './output/logs/{}'.format(RUN_NAME)

### Libraries
- https://pandas.pydata.org/
- http://scikit-learn.org/stable/
- https://keras.io/
- https://www.tensorflow.org/

In [2]:
import os
import shutil
from pathlib import Path

import pandas as pd
from sklearn.preprocessing import MinMaxScaler

import keras
from keras.models import Sequential
from keras.layers import *
from keras.models import load_model
from keras import backend as K

import tensorflow as tf

import pickle

Using TensorFlow backend.


## Load data

In [3]:
# Load and transform data

# Load training data set from CSV file
training_data_df = pd.read_csv("data/sales_data_training.csv")

# Load testing data set from CSV file
test_data_df = pd.read_csv("data/sales_data_test.csv")

# Data needs to be scaled to a small range like 0 to 1 for the neural
# network to work well.
scaler = MinMaxScaler(feature_range=(0, 1))

# Scale both the training inputs and outputs
scaled_training = scaler.fit_transform(training_data_df)
scaled_testing = scaler.transform(test_data_df)

# Print out the adjustment that the scaler applied to the total_earnings column of data
print("Note: total_earnings values were scaled by multiplying by {:.10f} and adding {:.6f}"
      .format(scaler.scale_[8], scaler.min_[8]))

# Create new pandas DataFrame objects from the scaled data
scaled_training_df = pd.DataFrame(scaled_training, columns=training_data_df.columns.values)
scaled_testing_df = pd.DataFrame(scaled_testing, columns=test_data_df.columns.values)

# Save scaled data dataframes to new CSV files
scaled_training_df.to_csv("output/sales_data_training_scaled.csv", index=False)
scaled_testing_df.to_csv("output/sales_data_testing_scaled.csv", index=False)

Note: total_earnings values were scaled by multiplying by 0.0000036968 and adding -0.115913


  return self.partial_fit(X, y)


## Build the model

In [4]:
model = Sequential()
model.add(Dense(50, input_dim=9, activation='relu', name='layer_1'))
model.add(Dense(100, activation='relu', name='layer_2'))
model.add(Dense(50, activation='relu', name='layer_3'))
model.add(Dense(1, activation='linear', name='output_layer'))
model.compile(loss='mean_squared_error', optimizer='adam')

## Train model

In [5]:
# Load the training data set
scaled_training_df = pd.read_csv("output/sales_data_training_scaled.csv")
X = scaled_training_df.drop('total_earnings', axis=1).values
Y = scaled_training_df[['total_earnings']].values

# Load the separate test data set
scaled_testing_df = pd.read_csv("output/sales_data_testing_scaled.csv")
X_test = scaled_testing_df.drop('total_earnings', axis=1).values
Y_test = scaled_testing_df[['total_earnings']].values

# Delete the folder directory
if os.path.exists(LOGDIR):
    shutil.rmtree(LOGDIR)

# Create a TensorBoard logger
logger = keras.callbacks.TensorBoard(
    log_dir=LOGDIR,
    histogram_freq=5,
    write_graph=True
)

# Train the model
model.fit(
    X,
    Y,
    epochs=50,
    shuffle=True,
    verbose=2,
    callbacks=[logger],
    validation_data=(X_test, Y_test)
)

Train on 1000 samples, validate on 400 samples
Epoch 1/50
 - 0s - loss: 0.0105 - val_loss: 0.0021
Epoch 2/50
 - 0s - loss: 0.0015 - val_loss: 8.3202e-04
Epoch 3/50
 - 0s - loss: 5.6431e-04 - val_loss: 4.5582e-04
Epoch 4/50
 - 0s - loss: 3.2860e-04 - val_loss: 3.1952e-04
Epoch 5/50
 - 0s - loss: 1.8685e-04 - val_loss: 2.7504e-04
Epoch 6/50
 - 0s - loss: 1.5069e-04 - val_loss: 3.4934e-04
Epoch 7/50
 - 0s - loss: 1.1786e-04 - val_loss: 2.1426e-04
Epoch 8/50
 - 0s - loss: 1.0692e-04 - val_loss: 1.5785e-04
Epoch 9/50
 - 0s - loss: 7.0051e-05 - val_loss: 1.5981e-04
Epoch 10/50
 - 0s - loss: 5.6284e-05 - val_loss: 1.2854e-04
Epoch 11/50
 - 0s - loss: 4.5262e-05 - val_loss: 1.0843e-04
Epoch 12/50
 - 0s - loss: 4.4378e-05 - val_loss: 1.0152e-04
Epoch 13/50
 - 0s - loss: 3.8360e-05 - val_loss: 9.6515e-05
Epoch 14/50
 - 0s - loss: 3.0082e-05 - val_loss: 9.2767e-05
Epoch 15/50
 - 0s - loss: 3.7693e-05 - val_loss: 8.6918e-05
Epoch 16/50
 - 0s - loss: 4.4227e-05 - val_loss: 1.0921e-04
Epoch 17/50
 -

<keras.callbacks.History at 0x123f8e8d0>

## Save the model

In [6]:
# Save the model to disk
model.save("models/keras_trained_model.h5")

In [1]:
# Export model

sess = K.get_session()

classify_model = load_model('models/keras_trained_model.h5')

# Delete the existing folder (if there)
model_path = Path('models/keras_exported_model')
if os.path.exists(model_path):
    shutil.rmtree(model_path)
    
inputs = {
    'input': tf.saved_model.utils.build_tensor_info(classify_model.input)
}
outputs = {
    'earnings': tf.saved_model.utils.build_tensor_info(classify_model.output)
}

signature_def = tf.saved_model.signature_def_utils.build_signature_def(
    inputs=inputs,
    outputs=outputs,
    method_name=tf.saved_model.signature_constants.CLASSIFY_METHOD_NAME
)

model_builder = tf.saved_model.builder.SavedModelBuilder("models/keras_exported_model")

model_builder.add_meta_graph_and_variables(
    sess=sess,
    tags=[tf.saved_model.tag_constants.SERVING],
    signature_def_map={ tf.saved_model.signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY: signature_def}
)

model_builder.save()

NameError: name 'K' is not defined

In [8]:
# save the scaler
scalerfile = './models/scaler.dat'
pickle.dump(scaler, open(scalerfile, 'wb'))

print('scalars saved')

scalars saved


## Prediction

### Local predict

In [9]:
prediction_model = load_model('models/keras_trained_model.h5')

X = pd.read_csv("data/proposed_new_product.csv").values

prediction_scaled = prediction_model.predict(X)

# Re-scale the data from the 0-to-1 range back to dollars
# These constants are from when the data was originally scaled down to the 0-to-1 range
prediction = (prediction_scaled - scaler.min_[8]) / scaler.scale_[8]

print("Earnings Prediction for Proposed Product - ${}".format(prediction[0][0]))

Earnings Prediction for Proposed Product - $260868.609375


## Troubleshooting

In [10]:
# Visualize the log

# Note: Stop once done
!tensorboard --logdir=$LOGDIR

TensorBoard 1.11.0 at http://Jerome-Curliers-MacBook-Pro.local:6006 (Press CTRL+C to quit)
^C
