In [1]:
import numpy as np
import tensorflow as tf
import pandas as pd
from pandas import DataFrame, Series
import seaborn as sns
import matplotlib.pyplot as plt

## Load Data

In [2]:
npz = np.load('Cars_train.npz')
train_inputs, train_targets = npz['inputs'], npz['targets']

npz = np.load('Cars_test.npz')
test_inputs, test_targets = npz['inputs'], npz['targets']


In [3]:
print(train_inputs.shape)
print(train_targets.shape)
print(test_inputs.shape)
print(test_targets.shape)

(3443, 19)
(3443,)
(383, 19)
(383,)


## Model

Outline, loss function, optimizer, early stopping and training

In [4]:
input_size = 19
hidden_layer_size = 50
output_size = 1

model = tf.keras.Sequential([tf.keras.layers.Dense(hidden_layer_size,activation='relu'),
                             tf.keras.layers.Dense(hidden_layer_size,activation='relu'),
                             tf.keras.layers.Dense(output_size)])

custom_optimiser = tf.keras.optimizers.Adam(learning_rate = 0.001)
model.compile(optimizer=custom_optimiser,loss='mean_squared_error',metrics=['mse'])

BATCH_SIZE = 50
max_epochs = 200
early_stopping = tf.keras.callbacks.EarlyStopping(patience=1)

model.fit(train_inputs,train_targets,batch_size=BATCH_SIZE,epochs=max_epochs,verbose=2,
          callbacks=[early_stopping],
          validation_split=0.1)


Train on 3098 samples, validate on 345 samples
Epoch 1/200
3098/3098 - 2s - loss: 53.7347 - mse: 53.7347 - val_loss: 11.2943 - val_mse: 11.2943
Epoch 2/200
3098/3098 - 0s - loss: 3.3752 - mse: 3.3752 - val_loss: 1.8935 - val_mse: 1.8935
Epoch 3/200
3098/3098 - 0s - loss: 1.2510 - mse: 1.2510 - val_loss: 1.1534 - val_mse: 1.1534
Epoch 4/200
3098/3098 - 0s - loss: 0.8048 - mse: 0.8048 - val_loss: 0.7991 - val_mse: 0.7991
Epoch 5/200
3098/3098 - 0s - loss: 0.5665 - mse: 0.5665 - val_loss: 0.5767 - val_mse: 0.5767
Epoch 6/200
3098/3098 - 0s - loss: 0.4219 - mse: 0.4219 - val_loss: 0.4488 - val_mse: 0.4488
Epoch 7/200
3098/3098 - 0s - loss: 0.3341 - mse: 0.3341 - val_loss: 0.3578 - val_mse: 0.3578
Epoch 8/200
3098/3098 - 0s - loss: 0.2742 - mse: 0.2742 - val_loss: 0.2980 - val_mse: 0.2980
Epoch 9/200
3098/3098 - 0s - loss: 0.2305 - mse: 0.2305 - val_loss: 0.2553 - val_mse: 0.2553
Epoch 10/200
3098/3098 - 0s - loss: 0.1982 - mse: 0.1982 - val_loss: 0.2223 - val_mse: 0.2223
Epoch 11/200
3098/

<tensorflow.python.keras.callbacks.History at 0x1a4c1af350>

In [5]:
pred = model.predict(test_inputs)

In [6]:
prediction = np.exp(pred)

In [7]:
prediction

array([[ 10646.001 ],
       [  3332.42  ],
       [  2911.1765],
       [ 14704.546 ],
       [  8521.489 ],
       [  4921.0396],
       [  6534.7983],
       [ 11219.097 ],
       [  3784.6238],
       [ 14529.615 ],
       [  8806.827 ],
       [  4465.475 ],
       [ 31730.12  ],
       [ 18171.947 ],
       [ 10920.428 ],
       [ 22444.71  ],
       [  8989.043 ],
       [  7613.6167],
       [ 13115.403 ],
       [ 24065.15  ],
       [  4790.107 ],
       [ 28545.605 ],
       [ 11912.931 ],
       [ 10331.0625],
       [ 24140.682 ],
       [ 17309.871 ],
       [  8492.574 ],
       [ 18758.393 ],
       [  4757.8823],
       [  6896.083 ],
       [ 16131.189 ],
       [  4129.7   ],
       [ 12872.955 ],
       [ 42913.535 ],
       [ 52135.12  ],
       [ 16150.308 ],
       [ 12765.422 ],
       [  9414.813 ],
       [ 26807.293 ],
       [  6320.174 ],
       [ 25855.246 ],
       [  4230.6343],
       [ 59022.945 ],
       [ 20627.955 ],
       [ 37352.9   ],
       [ 2

In [8]:
np.reshape(prediction,prediction.shape[0])

Series(np.reshape(prediction,prediction.shape[0]))

0      10646.000977
1       3332.419922
2       2911.176514
3      14704.545898
4       8521.489258
           ...     
378    10808.332031
379    10466.857422
380     3627.387695
381     7370.839355
382     5010.371094
Length: 383, dtype: float32

In [9]:
summary = DataFrame()
summary['Prediction'] = Series(np.reshape(prediction,prediction.shape[0]))
summary['Actual'] = np.exp(test_targets)
summary['Residual'] = summary['Actual']-summary['Prediction']
summary['% Difference'] = np.abs((summary['Residual']/summary['Actual'])*100)

In [10]:
pd.set_option('display.max_row',None)
summary.sort_values('% Difference',ascending=False,inplace=True)
summary.round(2)

Unnamed: 0,Prediction,Actual,Residual,% Difference
78,12064.629883,3999.0,-8065.63,201.69
306,5483.930176,1850.0,-3633.93,196.43
135,29922.269531,10500.0,-19422.27,184.97
194,9521.400391,3650.0,-5871.4,160.86
37,9414.80957,4500.0,-4914.81,109.22
51,44382.210938,21300.0,-23082.21,108.37
202,24966.300781,12000.0,-12966.3,108.05
127,5893.22998,2850.0,-3043.23,106.78
30,16131.19043,8000.0,-8131.19,101.64
3,14704.549805,7500.0,-7204.55,96.06


In [11]:
summary.describe()

Unnamed: 0,Prediction,Actual,Residual,% Difference
count,383.0,383.0,383.0,383.0
mean,19357.306641,18684.436762,-672.874643,21.442546
std,20539.591797,18009.43108,8381.734786,25.882135
min,781.814697,1389.0,-82335.328125,0.012751
25%,7888.258789,7750.0,-1939.078125,6.840897
50%,12872.955078,12400.0,-279.064453,14.134319
75%,22965.393555,23600.0,1499.526123,25.990333
max,180335.328125,125000.0,40100.28125,201.691121
