# Deep Learning for Predicting Game Sales

### Import Data and Libraries

In [1]:
import pandas as pd

# Sequential will be used to create the three
# layers of neural networks: 
# (1) Input Layer, (2) Hiddens Layers, and (3) Output Layer
from keras.models import Sequential

# Use '*' to import all three types of layers
from keras.layers import *

In [2]:
trainDf = pd.read_csv('./sales_data_train_scaled.csv')
trainDf.head()

Unnamed: 0,critic_rating,is_action,is_exclusive_to_us,is_portable,is_role_playing,is_sequel,is_sports,suitable_for_kids,total_earnings,unit_price
0,0.5,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.374714,1.0
1,0.833333,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.192425,0.5
2,0.333333,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.114852,0.5
3,0.833333,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.142452,0.0
4,0.666667,1.0,0.0,1.0,0.0,1.0,0.0,1.0,0.480682,1.0


In [3]:
# Use .value attribute to extract features as NumPy array
features = trainDf.drop('total_earnings', axis=1, inplace=False).values

# Note: two square brackets because this is a Pandas DataFrame (i.e, multidimensiona;)
label = trainDf[['total_earnings']].values

### Building the Neural Network

In [4]:
# Instantiate the Sequential class
modelDL = Sequential()

# Add the Input Layer
# Note:
# Dense: is the neural network architecture type
# input_dim: is the number of features
# activation: is the activation function ReLU (Rectified Linear Unit)
modelDL.add( Dense(50, input_dim=9, activation='relu') )

# Add the Hidden Layers
modelDL.add( Dense(100, activation='relu') )
modelDL.add( Dense(50, activation='relu') )

# Add the Output Layer
modelDL.add( Dense(1, activation='linear') )

# Compile the Deep Learning model
modelDL.compile(loss='mean_squared_error', optimizer='adam')

### Train, Validation, Test Stage

In [None]:
# Note:
# features is the array of explanatory (random) variables
# label is the array of response (random) variable
# epochs is the number of times to feed the entire data to the model
# shuffle will randomize the features
# verbose will control the logging of TensorFlow

# Note 2:
# Here, .fit() will train and validate the model
modelDL.fit( features, label, epochs=50, shuffle=True, verbose=20)

In [6]:
# Import the test dataset
testDf = pd.read_csv('./sales_data_test_scaled.csv')

In [8]:
# Select the features and label for test dataset
features_test = testDf.drop('total_earnings', axis=1).values
label_test = testDf[['total_earnings']].values

In [10]:
# Evaluate the model before prediction. This will allows
# us to optimize the hyperparameters if necessary.
# 
# Note:
# the range for MSE is [0,+infinity)
metric_mse = modelDL.evaluate( features_test, label_test, verbose=0 )

In [11]:
# Print the MSE after evaluation
print( "The MSE (Mean Squared Error): {}".format(metric_mse) )

The MSE (Mean Squared Error): 0.0013003492495045066


In [12]:
# Convert from scientific notation to float
print( '%f' % metric_mse)

0.001300


### Predicting Total Earning for A Sample Product