# Importing Libraries 

In [47]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import plotly.express as px
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split

# Read the DataSet

In [48]:
df=pd.read_csv('car data.csv')

# Exploratory Data Analysis

In [49]:
df.head()

Unnamed: 0,Car_Name,Year,Selling_Price,Present_Price,Driven_kms,Fuel_Type,Selling_type,Transmission,Owner
0,ritz,2014,3.35,5.59,27000,Petrol,Dealer,Manual,0
1,sx4,2013,4.75,9.54,43000,Diesel,Dealer,Manual,0
2,ciaz,2017,7.25,9.85,6900,Petrol,Dealer,Manual,0
3,wagon r,2011,2.85,4.15,5200,Petrol,Dealer,Manual,0
4,swift,2014,4.6,6.87,42450,Diesel,Dealer,Manual,0


In [50]:
df.describe()

Unnamed: 0,Year,Selling_Price,Present_Price,Driven_kms,Owner
count,301.0,301.0,301.0,301.0,301.0
mean,2013.627907,4.661296,7.628472,36947.20598,0.043189
std,2.891554,5.082812,8.642584,38886.883882,0.247915
min,2003.0,0.1,0.32,500.0,0.0
25%,2012.0,0.9,1.2,15000.0,0.0
50%,2014.0,3.6,6.4,32000.0,0.0
75%,2016.0,6.0,9.9,48767.0,0.0
max,2018.0,35.0,92.6,500000.0,3.0


In [51]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 301 entries, 0 to 300
Data columns (total 9 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   Car_Name       301 non-null    object 
 1   Year           301 non-null    int64  
 2   Selling_Price  301 non-null    float64
 3   Present_Price  301 non-null    float64
 4   Driven_kms     301 non-null    int64  
 5   Fuel_Type      301 non-null    object 
 6   Selling_type   301 non-null    object 
 7   Transmission   301 non-null    object 
 8   Owner          301 non-null    int64  
dtypes: float64(2), int64(3), object(4)
memory usage: 21.3+ KB


# Visualization 

# 1.1 Selling Type

In [52]:

fig = px.histogram(df, x='Selling_type', color='Selling_type',
                   color_discrete_sequence=['blue', 'green', 'red'])
fig.update_layout(title='Selling Type Histogram',
                  xaxis_title='Selling Type',
                  yaxis_title='Frequency')

fig.show()

# 1.2 Fuel Type

In [53]:
# Create a histogram
fig = px.histogram(df, x='Fuel_Type' ,color='Fuel_Type',
                   color_discrete_sequence=['blue', 'green', 'red'])

# Update layout for better appearance
fig.update_layout(title='Fuel Type Histogram',
                  xaxis_title='Fuel Type',
                  yaxis_title='Frequency')

fig.show()

# 1.3 Transmission Type

In [54]:
# Create a histogram
fig = px.histogram(df, x='Transmission' ,color='Transmission',
                   color_discrete_sequence=['blue', 'green', 'red'])

# Update layout for better appearance
fig.update_layout(title='Transmission Histogram',
                  xaxis_title='Transmission Type',
                  yaxis_title='Frequency')

fig.show()

# 1.4 Car Type

In [55]:
fig=px.histogram(df,x='Car_Name',color='Car_Name',color_discrete_sequence=['Blue','green','red'])
fig.update_layout(title='Car Type Histogram',
                  xaxis_title='Car Type',
                  yaxis_title='Frequency')
fig.show()

# Splitting Data 

In [56]:
df

Unnamed: 0,Car_Name,Year,Selling_Price,Present_Price,Driven_kms,Fuel_Type,Selling_type,Transmission,Owner
0,ritz,2014,3.35,5.59,27000,Petrol,Dealer,Manual,0
1,sx4,2013,4.75,9.54,43000,Diesel,Dealer,Manual,0
2,ciaz,2017,7.25,9.85,6900,Petrol,Dealer,Manual,0
3,wagon r,2011,2.85,4.15,5200,Petrol,Dealer,Manual,0
4,swift,2014,4.60,6.87,42450,Diesel,Dealer,Manual,0
...,...,...,...,...,...,...,...,...,...
296,city,2016,9.50,11.60,33988,Diesel,Dealer,Manual,0
297,brio,2015,4.00,5.90,60000,Petrol,Dealer,Manual,0
298,city,2009,3.35,11.00,87934,Petrol,Dealer,Manual,0
299,city,2017,11.50,12.50,9000,Diesel,Dealer,Manual,0


In [57]:

le=LabelEncoder()
df['Car_Name']=le.fit_transform(df['Car_Name'])
df['Fuel_Type']=le.fit_transform(df['Fuel_Type'])
df['Selling_type']=le.fit_transform(df['Selling_type'])
df['Transmission']=le.fit_transform(df['Transmission'])
df['Year']=le.fit_transform(df['Year'])

In [58]:
X=df.drop(columns=['Selling_Price','Present_Price','Owner'],axis=1)
Y=df['Selling_Price']

# Train Test Split

In [59]:
X_train,X_test,Y_train,Y_test=train_test_split(X,Y,train_size=0.8)

In [60]:
X_train.shape

(240, 6)

In [61]:
Y_train.shape

(240,)

# Machine Learning Model

In [62]:
from sklearn.linear_model import LinearRegression

In [63]:
Model1=LinearRegression()

In [64]:
Model1.fit(X_train,Y_train)

In [65]:
prediction=Model1.predict(X_test)

In [66]:
from sklearn.metrics import mean_absolute_error

In [67]:
print(mean_absolute_error(Y_test,prediction))

2.0868269109227198


# Decision Trees

In [68]:
from sklearn.tree import DecisionTreeRegressor

In [69]:
Model2=DecisionTreeRegressor()

In [70]:
Model2.fit(X_train,Y_train)

In [71]:
prediction=Model2.predict(X_test)

In [72]:
print(mean_absolute_error(Y_test,prediction))

1.4837704918032788


# ANNS

In [73]:
from tensorflow import keras
from keras.layers import Dense,BatchNormalization,Flatten
from keras.models import Sequential

In [74]:
Model=Sequential()

In [75]:
Model.add(Flatten(input_shape=(6,)))
Model.add(Dense(12,activation='relu'))
Model.add(Dense(110,activation='relu'))
Model.add(Dense(3,activation='linear'))

In [76]:
Model.compile(optimizer='Adam',loss="mean_absolute_error",metrics=['Accuracy'])

In [77]:
History=Model.fit(X_train, Y_train, epochs=10, validation_data=(X_test, Y_test))

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
