# Sales data prediction using Neural Network

We are provided sales data of videos games. For each game we have several <br>
features which give different information. <br>
We have to predict the total earning of the video games from the data using NN.


In [8]:
#import libraries
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
import matplotlib.pyplot as plt

In [5]:
#import data

train_data=pd.read_csv('sales_data_training.csv')
train_data.head()

Unnamed: 0,critic_rating,is_action,is_exclusive_to_us,is_portable,is_role_playing,is_sequel,is_sports,suitable_for_kids,total_earnings,unit_price
0,3.5,1,0,1,0,1,0,0,132717,59.99
1,4.5,0,0,0,0,1,1,0,83407,49.99
2,3.0,0,0,0,0,1,1,0,62423,49.99
3,4.5,1,0,0,0,0,0,1,69889,39.99
4,4.0,1,0,1,0,1,0,1,161382,59.99


In [7]:
train_data.shape

(1000, 10)

In [9]:
""" ok so!we have 1000 rows and 10 columns Our target variable is total earning!

But as can be seen the data is needs to be normalized becuase it is in different range
keep in mind this is the trainig data. 
we will have our test data as well 
"""

In [10]:
#normalize data

scaler = MinMaxScaler(feature_range=(0,1))

In [16]:
scaled_training= scaler.fit_transform(train_data)
scaled_training

array([[0.5       , 1.        , 0.        , ..., 0.        , 0.37471396,
        1.        ],
       [0.83333333, 0.        , 0.        , ..., 0.        , 0.19242528,
        0.5       ],
       [0.33333333, 0.        , 0.        , ..., 0.        , 0.11485185,
        0.5       ],
       ...,
       [0.83333333, 0.        , 1.        , ..., 0.        , 0.61007375,
        1.        ],
       [0.5       , 1.        , 1.        , ..., 1.        , 0.24626902,
        0.        ],
       [0.33333333, 1.        , 0.        , ..., 1.        , 0.21633242,
        0.5       ]])

In [14]:
#let us import our test data now
test_data = pd.read_csv('sales_data_test.csv')

In [17]:
#normalize the test data as well but in the same parameters as the train data

scaled_test = scaler.transform(test_data)  
scaled_test

array([[0.5       , 1.        , 1.        , ..., 1.        , 0.79917931,
        1.        ],
       [0.16666667, 0.        , 0.        , ..., 0.        , 0.15750171,
        1.        ],
       [0.5       , 0.        , 0.        , ..., 0.        , 0.18970444,
        1.        ],
       ...,
       [1.        , 1.        , 1.        , ..., 0.        , 0.41018835,
        0.        ],
       [0.        , 1.        , 1.        , ..., 0.        , 0.11162086,
        0.        ],
       [0.5       , 1.        , 0.        , ..., 0.        , 0.18022587,
        0.5       ]])

In [19]:
# Create new pandas DataFrame objects from the scaled data
scaled_training_df = pd.DataFrame(scaled_training, columns=train_data.columns.values)
scaled_testing_df = pd.DataFrame(scaled_test, columns=test_data.columns.values)

# Create the Neural Network Model

In [22]:
""" let us first separate our trainig samples and target variable
into X and Y"""

X = scaled_training_df.drop('total_earnings', axis=1).values
Y = scaled_training_df[['total_earnings']].values

In [25]:
from keras.models import Sequential
from keras.layers import*

In [31]:
#Define Model
model1 = Sequential()

In [32]:
model1.add(Dense(50, input_dim=9, activation='relu'))
model1.add(Dense(100, activation='relu'))
model1.add(Dense(50, activation='relu'))
model1.add(Dense(1,activation ='linear'))
model1.compile(loss="mean_squared_error", optimizer="adam")

In [33]:
model1.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_7 (Dense)             (None, 50)                500       
                                                                 
 dense_8 (Dense)             (None, 100)               5100      
                                                                 
 dense_9 (Dense)             (None, 50)                5050      
                                                                 
 dense_10 (Dense)            (None, 1)                 51        
                                                                 
Total params: 10,701
Trainable params: 10,701
Non-trainable params: 0
_________________________________________________________________


# Model Training 

In [36]:
#Train the model now
model.fit(X,Y, epochs=50, shuffle=True, verbose=2)

Epoch 1/50
32/32 - 1s - loss: 0.0333 - 1s/epoch - 36ms/step
Epoch 2/50
32/32 - 0s - loss: 0.0057 - 79ms/epoch - 2ms/step
Epoch 3/50
32/32 - 0s - loss: 0.0014 - 79ms/epoch - 2ms/step
Epoch 4/50
32/32 - 0s - loss: 4.4083e-04 - 79ms/epoch - 2ms/step
Epoch 5/50
32/32 - 0s - loss: 2.0949e-04 - 80ms/epoch - 2ms/step
Epoch 6/50
32/32 - 0s - loss: 1.2857e-04 - 76ms/epoch - 2ms/step
Epoch 7/50
32/32 - 0s - loss: 1.1138e-04 - 76ms/epoch - 2ms/step
Epoch 8/50
32/32 - 0s - loss: 8.8157e-05 - 78ms/epoch - 2ms/step
Epoch 9/50
32/32 - 0s - loss: 8.7912e-05 - 77ms/epoch - 2ms/step
Epoch 10/50
32/32 - 0s - loss: 6.3165e-05 - 81ms/epoch - 3ms/step
Epoch 11/50
32/32 - 0s - loss: 6.8822e-05 - 76ms/epoch - 2ms/step
Epoch 12/50
32/32 - 0s - loss: 5.4480e-05 - 80ms/epoch - 3ms/step
Epoch 13/50
32/32 - 0s - loss: 3.7226e-05 - 75ms/epoch - 2ms/step
Epoch 14/50
32/32 - 0s - loss: 3.0077e-05 - 81ms/epoch - 3ms/step
Epoch 15/50
32/32 - 0s - loss: 3.1500e-05 - 81ms/epoch - 3ms/step
Epoch 16/50
32/32 - 0s - loss: 3

<keras.callbacks.History at 0x26ccba77f10>

# Model Testing

In [35]:
""" To test our model on test data ! 
let us first separate our trainig samples and target variable
into X and Y"""
X_Test = scaled_testing_df.drop('total_earnings', axis=1).values
Y_test = scaled_testing_df[['total_earnings']].values

In [38]:
test_error = model.evaluate(X_Test, Y_test , verbose =0 )

In [39]:
print("The mean Square Error MSE for the test data is : {}".format(test_error))

The mean Square Error MSE for the test data is : 5.4205680498853326e-05


This is great.. the error rate is very small

Our model is predicting the test data perfectly.! 

# Let us use this model for prediction! 

We will have completely new data now! 


In [41]:
#import completely new unseen data

new_data = pd.read_csv('proposed_new_product.csv')
new_data.head()

Unnamed: 0,critic_rating,is_action,is_exclusive_to_us,is_portable,is_role_playing,is_sequel,is_sports,suitable_for_kids,unit_price
0,0.7,1.0,1.0,1.0,0.0,1.0,0.0,1.0,0.8


In [42]:
#make prediction
prediction = model1.predict(X)

In [43]:
prediction = prediction[0][0]

In [44]:
#reverse the scaling to see the prediction correctly

prediction = prediction + 0.1159
prediction = prediction / 0.000036968

print("The Earning prediction for proposed product is : $ {}".format(prediction))

The Earning prediction for proposed product is : $ 4779.554743725251


Excellent for the proposed product the predicted earning is 4779.55 $

In [45]:
""" let us save the model so as we get rid of training the model 
again """

model1.save("trained_model.h5")   #the data will be stored in hda5 designed to store binary form

print("Model saved to disk ")

Model saved to disk 


In [46]:
#"""You can load the saved model to use it again and again"

'You can load the saved model to use it again and again'

In [48]:
from keras.models import load_model

In [49]:
model = load_model("trained_model.h5")

In [50]:
""" Now you have the model in 
your hand and you can use it ....!"""

pred = model.predict(X)

This way you can use the saved model and save your time ! 