> **Importing all libraries**

In [None]:
import tensorflow as tf
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.compose import make_column_transformer
from sklearn.preprocessing import MinMaxScaler
import matplotlib.pyplot as plt

> **Importing complete data**

In [None]:
raw_data = pd.read_csv("../input/logical-rhythm-2k21-cars24/train.csv")
raw_data

In [None]:
raw_data.columns

> **Training(including both train + validation data) data processing**

In [None]:
training_data = raw_data.drop(columns='price')
training_labels = raw_data["price"]

In [None]:
training_data  = pd.get_dummies(training_data)
#training_data = training_data.drop(columns = 'pre_owner_4th Owner')
training_data = training_data.drop(columns = 'car ID')


In [None]:
training_data

In [None]:
training_labels

> **Spliting dataset in train_data and validation_data**

In [None]:
train_data,validation_data,train_labels,validation_labels = train_test_split(training_data,training_labels,test_size = 0.1,random_state = 42)

> **Dataset visulaization**

In [None]:
train_data

In [None]:
train_labels

In [None]:
plt.plot(raw_data["car ID"],raw_data["price"])
plt.ylabel('raw_data["price"]')
plt.xlabel('raw_data["car ID"]')

In [None]:
raw_data["price"].plot(kind = "hist")

In [None]:
plt.scatter(raw_data["car ID"],raw_data["price"])

In [None]:
#4th owner cars in training data
raw_data[raw_data["pre_owner"]== "4th Owner"]

> **Normalization of train data and validation data**

In [None]:
normalization = make_column_transformer((MinMaxScaler(),['model_year','distance_covered (km)']))
normalization.fit(train_data)
train_data_normalized = normalization.transform(train_data)
validation_data_normalized = normalization.transform(validation_data)

In [None]:
train_data_normalized

In [None]:
validation_data_normalized

> **Adding noramalized data to train_set and validation_set**

In [None]:
train_data.columns
validation_data.columns

In [None]:
train_data[['model_year','distance_covered (km)']] = train_data_normalized
validation_data[['model_year','distance_covered (km)']] = validation_data_normalized

In [None]:
train_data

> **MODEL PREPARATION**

In [None]:
#preparing model
tf.random.set_seed(42)
car_price_pridiction_model = tf.keras.Sequential([
                              tf.keras.layers.Dense(5000,activation = "relu"),
                              tf.keras.layers.Dense(5000,activation = "relu"),
                              tf.keras.layers.Dense(5000,activation = "relu"),
                              tf.keras.layers.Dropout(0.2),
                              
    
                              tf.keras.layers.Dense(4096,activation = "relu"),
                              tf.keras.layers.Dense(4096,activation = "relu"),
                              tf.keras.layers.Dense(4096,activation = "relu"), 
                              
                              
                              tf.keras.layers.Dense(2048,activation = "relu"),
                              tf.keras.layers.Dense(2048,activation = "relu"),
                              tf.keras.layers.Dense(2048,activation = "relu"),
                              
                              tf.keras.layers.Dense(1024,activation = "relu"),
                              tf.keras.layers.Dense(1024,activation = "relu"),
                              tf.keras.layers.Dense(1024,activation = "relu"),
                              
                              
                              tf.keras.layers.Dense(512,activation = "relu"),
                              tf.keras.layers.Dense(512,activation = "relu"),
                              tf.keras.layers.Dense(512,activation = "relu"),
                              
                              tf.keras.layers.Dense(256,activation = "relu"),
                              tf.keras.layers.Dense(256,activation = "relu"),
                              tf.keras.layers.Dense(256,activation = "relu"),
                              tf.keras.layers.Dense(128,activation = "relu"),
                              tf.keras.layers.Dense(128,activation = "relu"),
                              tf.keras.layers.Dense(1)
                                            ])
car_price_pridiction_model.compile(loss=tf.keras.losses.MeanSquaredLogarithmicError(),optimizer= tf.keras.optimizers.Adam(learning_rate = 0.0001))

car_price_pridiction = car_price_pridiction_model.fit(train_data,train_labels,batch_size = 32,steps_per_epoch = 50,epochs = 38,validation_data = (validation_data,validation_labels))


In [None]:
pd.DataFrame(car_price_pridiction.history).plot()
plt.ylabel("loss")
plt.xlabel("epochs")

In [None]:
car_price_pridiction_model.summary()

In [None]:
# validation_labels.head(10)  # just for fun

In [None]:
#car_price_pridiction_model.predict(validation_data)

In [None]:
car_price_pridiction_model.evaluate(validation_data,validation_labels)

> **Importing testdata**

In [None]:
raw_test_data = pd.read_csv("../input/logical-rhythm-2k21-cars24/test.csv")
raw_test_data

> **Preparing test dataset**

In [None]:
test_data = pd.get_dummies(raw_test_data) 
test_data = test_data.drop(columns = 'car ID')
test_data

> **Normalization**

In [None]:
normalization = make_column_transformer((MinMaxScaler(),['model_year','distance_covered (km)']))
normalization.fit(test_data)
test_data_normalized = normalization.transform(test_data)

> **Adding normalize data**

In [None]:
# test_data = test_data.drop(columns = 'model_year')
# test_data = test_data.drop(columns = 'distance_covered (km)')
test_data[['model_year','distance_covered (km)']] = test_data_normalized


In [None]:
test_data['4th Owner'] = 0

In [None]:
test_data

In [None]:
test_data.shape,train_data.shape

> **predicting over testdata set**

In [None]:
price_pridiction = car_price_pridiction_model.predict(test_data)
price_pridiction

In [None]:
raw_test_data["price"] = price_pridiction

> **Complete dataframe of testset with pridicted price**

In [None]:
raw_test_data

In [None]:
plt.plot(raw_data["car ID"],raw_data["price"])
plt.ylabel('raw_test_data["price"]')
plt.xlabel('raw_test_data["car ID"]')

> **CSV file of car id and price**

In [None]:
car_id = raw_test_data["car ID"]
predicted_price = raw_test_data["price"]
dict = { "car id": car_id,
          "price":predicted_price}
predicted_submit_file = pd.DataFrame(dict)


In [None]:
predicted_submit_file
predicted_submit_file.to_csv("submission_q2",index = False)