In [128]:
# !pip install pandas
# !pip install tensorflow
# !pip install seaborn
# !pip install pydot
from tensorflow.keras.layers import Normalization , Dense , InputLayer
from tensorflow.keras.metrics import RootMeanSquaredError
from tensorflow.keras.losses import MeanSquaredError,Huber,MeanAbsoluteError
from tensorflow.keras.optimizers import Adam


# DATA PREPROCESSING

In [None]:
import pandas as pd
import tensorflow as tf
import seaborn as sb

file = pd.read_csv("train.csv",delimiter=",")
print(file.shape)
file.head(5)



In [None]:
tensor_data = tf.constant(file)
tensor_data = tf.cast(tensor_data,tf.float32)
print(tensor_data)

# Shuffling

In [None]:
tensor_data = tf.random.shuffle(tensor_data)
tensor_data[:5]

# Input & Output

In [None]:
X = tensor_data[:,3:-1]
print(X[:5])
print(X.shape)

In [None]:
y = tensor_data[:,-1]
y = tf.expand_dims(y,axis=-1)
print(y[:5])
print(y.shape)

# TRAINING

In [None]:
TRAIN_RATIO = 0.8
VAL_RATIO = 0.1
TEST_RATIO = 0.1
DATASET_SIZE = len(X)
print(DATASET_SIZE)


In [None]:
X_train = X[:int(DATASET_SIZE*TRAIN_RATIO)]
y_train= y[:int(DATASET_SIZE*TRAIN_RATIO)]
print(X_train.shape)
print(y_train.shape)
print(y_train)

In [None]:
X_val = X[int(DATASET_SIZE*TRAIN_RATIO):int(DATASET_SIZE*(TRAIN_RATIO+VAL_RATIO))]
y_val = y[int(DATASET_SIZE*TRAIN_RATIO):int(DATASET_SIZE*(TRAIN_RATIO+VAL_RATIO))]
print(X_val.shape)
print(y_val.shape)

In [None]:
X_test = X[int(DATASET_SIZE*(TRAIN_RATIO+VAL_RATIO)):]
y_test = y[int(DATASET_SIZE*(TRAIN_RATIO+VAL_RATIO)):]
print(X_test.shape)
print(y_test.shape)

In [138]:
# Train Dataset
train_dataset = tf.data.Dataset.from_tensor_slices((X_train, y_train))
train_dataset = train_dataset.shuffle(buffer_size=8, reshuffle_each_iteration=True).batch(32).prefetch(tf.data.AUTOTUNE)


# Validation Dataset
val_dataset = tf.data.Dataset.from_tensor_slices((X_val, y_val))
val_dataset = val_dataset.shuffle(buffer_size=8, reshuffle_each_iteration=True).batch(32).prefetch(tf.data.AUTOTUNE)

# Test Dataset
test_dataset = tf.data.Dataset.from_tensor_slices((X_test, y_test))
test_dataset = test_dataset.shuffle(buffer_size=8, reshuffle_each_iteration=True).batch(32).prefetch(tf.data.AUTOTUNE)


In [None]:
for k,j in train_dataset:
    print(k,j)

In [None]:
for k,j in val_dataset:
    print(k,j)

In [None]:
for k,j in train_dataset:
    print(k,j)


# Normalization

In [None]:
Normalizer = Normalization()
Normalizer.adapt(X_train)
Normalizer(X[:5])
# print(Normalizer(X[:5]))
# print(X[:5])

# Model 

In [None]:
model = tf.keras.Sequential([
    InputLayer(input_shape = (8,)),
    Normalizer,
    Dense(128,activation='relu'),
    Dense(128,activation='relu'),
    Dense(128,activation='relu'),
    Dense(1),
])
model.summary()
model.build()

In [162]:
model.compile(optimizer=Adam(
    learning_rate=1.0
    ),
              loss=MeanAbsoluteError(),
              metrics=[RootMeanSquaredError()])


# Training

In [None]:
history = model.fit(train_dataset,validation_data=val_dataset, epochs=100,verbose=1)

print(history.history.keys())

 
# # model.compile(optimizer='adam', loss='your_loss_function', metrics=['accuracy'])


In [None]:
print(history.history.keys())

In [None]:
import matplotlib.pyplot as plt
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])

plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train','val_loss'])
plt.show()

In [None]:
import matplotlib.pyplot as plt
plt.plot(history.history['root_mean_squared_error'])
plt.plot(history.history['val_root_mean_squared_error'])

plt.title('model performance')
plt.ylabel('rmse')
plt.xlabel('epoch')
plt.legend(['train','val'])
plt.show()   

In [None]:
history.history

In [None]:
model.evaluate(X_test,y_test)

In [None]:
model.predict(tf.expand_dims(X_test[0], axis = 0))
# X_test.
# X_test[0].shape

In [None]:
y_test[0]


In [None]:
y_true = list(y_test[:,0].numpy())
y_pred = list(model.predict(X_test)[:,0])

In [None]:
import numpy as np
ind = np.arange(100)
plt.figure(figsize=(40,20))
width = 0.4

plt.bar(ind , y_pred,width , label='Pred car price')
plt.bar(ind + width, y_true,width , label='Actual car price')

plt.xlabel('Actual vs Predicted Price')
plt.ylabel('car Price prices')
plt.show()