<a href="https://colab.research.google.com/github/avadakadevra/DeepLearning/blob/main/DLassignment_CarPricePrediction_faisal.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Connecting Google Drive

In [545]:
from google.colab import drive
drive.mount('/content/drive')
!unzip -uq "/content/drive/My Drive/DLAssignmentsData/CarPrice_Assignment.zip" -d "/content/drive/My Drive/DLAssignmentsData/CarPrice_Assignment/"

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).



# Importing modules

In [546]:
import numpy as np
import pandas as pd
import tensorflow as tf
from keras import models
from keras import layers

# Loading Data

In [547]:
dataFrame = pd.read_csv("/content/drive/My Drive/DLAssignmentsData/CarPrice_Assignment/CarPrice_Assignment.csv")
dataFrame.head()

Unnamed: 0,car_ID,symboling,CarName,fueltype,aspiration,doornumber,carbody,drivewheel,enginelocation,wheelbase,carlength,carwidth,carheight,curbweight,enginetype,cylindernumber,enginesize,fuelsystem,boreratio,stroke,compressionratio,horsepower,peakrpm,citympg,highwaympg,price
0,1,3,alfa-romero giulia,gas,std,two,convertible,rwd,front,88.6,168.8,64.1,48.8,2548,dohc,four,130,mpfi,3.47,2.68,9.0,111,5000,21,27,13495.0
1,2,3,alfa-romero stelvio,gas,std,two,convertible,rwd,front,88.6,168.8,64.1,48.8,2548,dohc,four,130,mpfi,3.47,2.68,9.0,111,5000,21,27,16500.0
2,3,1,alfa-romero Quadrifoglio,gas,std,two,hatchback,rwd,front,94.5,171.2,65.5,52.4,2823,ohcv,six,152,mpfi,2.68,3.47,9.0,154,5000,19,26,16500.0
3,4,2,audi 100 ls,gas,std,four,sedan,fwd,front,99.8,176.6,66.2,54.3,2337,ohc,four,109,mpfi,3.19,3.4,10.0,102,5500,24,30,13950.0
4,5,2,audi 100ls,gas,std,four,sedan,4wd,front,99.4,176.6,66.4,54.3,2824,ohc,five,136,mpfi,3.19,3.4,8.0,115,5500,18,22,17450.0


# Making Corrections to Data

In [548]:
dataFrame.iloc[3,2]

'audi 100 ls'

In [549]:
dataFrame.iloc[3,2] = 'audi 100ls'

# Checking Null values

In [550]:
dataFrame.isnull().sum()

car_ID              0
symboling           0
CarName             0
fueltype            0
aspiration          0
doornumber          0
carbody             0
drivewheel          0
enginelocation      0
wheelbase           0
carlength           0
carwidth            0
carheight           0
curbweight          0
enginetype          0
cylindernumber      0
enginesize          0
fuelsystem          0
boreratio           0
stroke              0
compressionratio    0
horsepower          0
peakrpm             0
citympg             0
highwaympg          0
price               0
dtype: int64

# One Hot Encoding Categorical data

In [551]:
dataFrame_final= pd.get_dummies(dataFrame,columns=['CarName','symboling','fueltype','aspiration','carbody','doornumber','drivewheel','enginelocation','cylindernumber','fuelsystem','enginetype'])

# Extracting Labels and Dropping unrelated Columns

In [552]:
dataFrame.drop(columns=['car_ID',],inplace=True)

# Split Data Set into Training and Test Ratio 72 % and 28 %

In [553]:
np.random.seed(123)
msk = np.random.rand(len(dataFrame_final)) < 0.72
train_total = dataFrame_final[msk]
test_total = dataFrame_final[~msk]

# Extracting Labels

In [554]:
trainLabel = train_total.loc[:,'price']
testLabel = test_total.loc[:,'price']
trainData = train_total.drop(columns=['price'])
testData = test_total.drop(columns=['price'])

In [555]:
trainData.shape

(154, 204)

# Normalization of Non categorical Data

In [556]:
#{trainData.columns.get_loc(c): c for idx, c in enumerate(trainData.columns)}

In [557]:
mean = trainData.iloc[:,0:13].mean(axis=0)
trainData.iloc[:,0:13] -= mean
std = trainData.iloc[:,0:13].std(axis=0)
trainData.iloc[:,0:13] /= std
testData.iloc[:,0:13] -= mean
testData.iloc[:,0:13] /= std

In [558]:
meanLabel = trainLabel.mean()
trainLabel -= meanLabel
stdLabel = trainLabel.std()
trainLabel /= stdLabel
testLabel -= meanLabel
testLabel /= stdLabel

In [559]:
trainData = np.array(trainData.iloc[:])
testData = np.array(testData.iloc[:])
trainLabel = np.array(trainLabel.astype('float32'))
testLabel = np.array(testLabel.astype('float32'))

# Building Model

In [560]:
def build_model():
  model = models.Sequential()
  model.add(layers.Dense(10, activation='relu',  input_shape=(trainData.shape[1],)))
  model.add(layers.Dense(8, activation='relu'))
  model.add(layers.Dense(6, activation='relu'))
  model.add(layers.Dense(1))
  model.compile(optimizer='rmsprop', loss='mse', metrics=['mae'])
  return model

In [561]:
build_model().summary()

Model: "sequential_192"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_870 (Dense)            (None, 10)                2050      
_________________________________________________________________
dense_871 (Dense)            (None, 8)                 88        
_________________________________________________________________
dense_872 (Dense)            (None, 6)                 54        
_________________________________________________________________
dense_873 (Dense)            (None, 1)                 7         
Total params: 2,199
Trainable params: 2,199
Non-trainable params: 0
_________________________________________________________________


# Resorting to K fold Validation due to less data

In [562]:
k = 4
num_val_samples = len(trainData) // k
num_epochs = 100
all_scores = []
for i in range(k):
  print('processing fold #', i)
  val_data = trainData[i * num_val_samples: (i + 1) * num_val_samples]
  val_targets = trainLabel[i * num_val_samples: (i + 1) * num_val_samples]
  partial_train_data = np.concatenate([trainData[:i * num_val_samples],trainData[(i + 1) * num_val_samples:]],axis=0)
  partial_train_targets = np.concatenate([trainLabel[:i * num_val_samples],trainLabel[(i + 1) * num_val_samples:]],axis=0)
  model = build_model()
  model.fit(partial_train_data, partial_train_targets,epochs=num_epochs, batch_size=1, verbose=0)
  val_mse, val_mae = model.evaluate(val_data, val_targets, verbose=0)
  all_scores.append(val_mae)

processing fold # 0
processing fold # 1
processing fold # 2
processing fold # 3


In [563]:
all_scores

[0.34794896841049194,
 0.31029292941093445,
 0.4206322729587555,
 0.2616458237171173]

In [564]:
np.mean(all_scores)

0.3351299986243248

# Training the Model using fine tuned Vaues

In [565]:
model = build_model()
model.fit(trainData,trainLabel,epochs=80,batch_size=1,verbose=0)
test_mse, test_mae = model.evaluate(testData,testLabel)



In [566]:
test_mae

0.22754314541816711

# Reversing the Normalization

In [567]:
x = model.predict(testData) * stdLabel + meanLabel



In [568]:
y = testLabel * stdLabel + meanLabel

# Random Check

In [571]:
x[23]

array([7470.3257], dtype=float32)

In [572]:
y[23]

7957.0