# Price Prediction with Deep Neural Network

In [1]:
import numpy as np
import pandas as pd

In [2]:
from sklearn.model_selection import train_test_split

In [3]:
df = pd.read_csv('ford.csv')

In [4]:
df.head()

Unnamed: 0,model,year,price,transmission,mileage,fuelType,tax,mpg,engineSize
0,Fiesta,2017,12000,Automatic,15944,Petrol,150,57.7,1.0
1,Focus,2018,14000,Manual,9083,Petrol,150,57.7,1.0
2,Focus,2017,13000,Manual,12456,Petrol,150,57.7,1.0
3,Fiesta,2019,17500,Manual,10460,Petrol,145,40.3,1.5
4,Fiesta,2019,16500,Automatic,1482,Petrol,145,48.7,1.0


## Data Preprocessing and Cleaning

In [5]:
from sklearn.preprocessing import LabelEncoder

le = LabelEncoder()


In [6]:

def auto_label_encode(df):
    df = df.copy()
    encoders = {}

    for col in df.select_dtypes(include='object'):
      
        df[col] = le.fit_transform(df[col])
        encoders[col] = le

    return df, encoders


In [7]:
enc_df , encoders= auto_label_encode(df)

In [8]:
enc_df

Unnamed: 0,model,year,price,transmission,mileage,fuelType,tax,mpg,engineSize
0,5,2017,12000,0,15944,4,150,57.7,1.0
1,6,2018,14000,1,9083,4,150,57.7,1.0
2,6,2017,13000,1,12456,4,150,57.7,1.0
3,5,2019,17500,1,10460,4,145,40.3,1.5
4,5,2019,16500,0,1482,4,145,48.7,1.0
...,...,...,...,...,...,...,...,...,...
17961,0,2017,8999,1,16700,4,150,47.1,1.4
17962,0,2014,7499,1,40700,4,30,57.7,1.0
17963,6,2015,9999,1,7010,0,20,67.3,1.6
17964,11,2018,8299,1,5007,4,145,57.7,1.2


In [9]:
train, test = train_test_split(enc_df, test_size=0.2)

In [10]:
X_train = train.drop(columns=['price'])
y_train = train['price']

X_test = test.drop(columns=['price'])
y_test = test['price']



In [11]:
X_train

Unnamed: 0,model,year,transmission,mileage,fuelType,tax,mpg,engineSize
1543,1,2014,1,39549,0,30,61.4,1.6
16571,2,2016,0,7600,4,160,44.8,1.5
5902,5,2017,1,22025,4,145,65.7,1.0
2072,5,2014,1,55259,4,30,54.3,1.2
10991,14,2019,0,4665,2,135,47.1,2.0
...,...,...,...,...,...,...,...,...
15455,2,2018,0,2261,4,145,45.6,1.0
9088,6,2018,1,8201,4,150,58.9,1.0
1440,6,2016,1,26000,0,20,67.3,2.0
454,0,2015,1,9708,0,20,70.6,1.6


In [12]:
from sklearn.preprocessing import MinMaxScaler

In [13]:
num_vars =['year','mileage','tax','mpg']
num_vars

['year', 'mileage', 'tax', 'mpg']

In [14]:
scaler = MinMaxScaler()

scaler.fit(X_train[num_vars])

X_train[num_vars] = scaler.transform(X_train[num_vars])
X_test[num_vars] = scaler.transform(X_test[num_vars])

In [15]:
X_train

Unnamed: 0,model,year,transmission,mileage,fuelType,tax,mpg,engineSize
1543,1,0.233333,1,0.222626,0,0.052632,0.224309,1.6
16571,2,0.266667,0,0.042777,4,0.280702,0.132597,1.5
5902,5,0.283333,1,0.123979,4,0.254386,0.248066,1.0
2072,5,0.233333,1,0.311062,4,0.052632,0.185083,1.2
10991,14,0.316667,0,0.026255,2,0.236842,0.145304,2.0
...,...,...,...,...,...,...,...,...
15455,2,0.300000,0,0.012722,4,0.254386,0.137017,1.0
9088,6,0.300000,1,0.046160,4,0.263158,0.210497,1.0
1440,6,0.266667,1,0.146355,0,0.035088,0.256906,2.0
454,0,0.250000,1,0.054643,0,0.035088,0.275138,1.6


In [16]:
y_train

1543      7591
16571    10995
5902      9991
2072      5495
10991    22899
         ...  
15455    15800
9088     14995
1440     14795
454       7698
13955     9000
Name: price, Length: 14372, dtype: int64

## Input featueres...

In [17]:
X_train.shape

(14372, 8)

## Neural network

In [18]:
import  tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

## Using sequential api for model building

In [19]:
model = keras.Sequential([
    layers.Input(shape=(8,)),
    layers.Dense(128, activation='relu'),
    layers.Dense(64, activation='relu'),
    layers.Dense(10,activation='relu'),
    layers.Dense(1)  # we dont need activation for a regression problem's output layer 
])

## Neural net looks like...

In [20]:
model.summary()

## Compiling - stating optimizer and loss func to be used in backpropogation

In [21]:
model.compile(
    optimizer='adam',
    loss='mse',       # loss func used for updating params in backprop
    metrics=['mae']   # loss func it calc. per each epoch for logging 
)

## fitting model with training data

In [22]:
model.fit(X_train, y_train, epochs=100, batch_size=32)

Epoch 1/100
[1m450/450[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 956us/step - loss: 85909632.0000 - mae: 7401.6445    
Epoch 2/100
[1m450/450[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 938us/step - loss: 22826978.0000 - mae: 3523.9177
Epoch 3/100
[1m450/450[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 850us/step - loss: 19793332.0000 - mae: 3300.1907
Epoch 4/100
[1m450/450[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 939us/step - loss: 16642895.0000 - mae: 3046.7571
Epoch 5/100
[1m450/450[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 845us/step - loss: 13938498.0000 - mae: 2815.8818
Epoch 6/100
[1m450/450[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - loss: 11738559.0000 - mae: 2577.6328  
Epoch 7/100
[1m450/450[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 957us/step - loss: 9991522.0000 - mae: 2362.3181  
Epoch 8/100
[1m450/450[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 888us/step - loss: 88

<keras.src.callbacks.history.History at 0x2cbb8fbc440>

In [23]:
mse , mae = model.evaluate(x=X_test,y=y_test)
print(mae)

[1m113/113[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 3023012.7500 - mae: 1177.3622  
1177.3621826171875


In [24]:
y_pred = model.predict(X_test)


[1m113/113[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 825us/step


In [25]:
y_pred = y_pred.flatten()

In [26]:

comparison = pd.DataFrame({
    'Actual': y_test.values,
    'Predicted': y_pred
})

comparison.head(20)


Unnamed: 0,Actual,Predicted
0,16499,16075.279297
1,6475,7380.87793
2,18000,14909.451172
3,9600,8953.931641
4,11998,11146.22168
5,18975,16857.328125
6,6200,6379.864258
7,10995,9132.835938
8,16495,15562.777344
9,7890,8811.210938


## Evaluation

In [27]:
from sklearn.metrics import r2_score
r2= r2_score(y_pred,y_test.values)
print(f'Mean Squared Error : {mse}\n')
print(f'Mean Absolute Error : {mae}\n')
print(f'R2 Score : {r2}\n')

Mean Squared Error : 3023012.75

Mean Absolute Error : 1177.3621826171875

R2 Score : 0.8279984593391418

