In [48]:
import numpy as np
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import GradientBoostingRegressor
import matplotlib.pyplot as plt

In [8]:
car_dataset = pd.read_csv('cardata.csv')

In [9]:
car_dataset = car_dataset.rename(columns = {'Car_Name' : 'car_name', 'Year': 'year', 'Selling_Price' : 'selling_price', 'Present_Price' : 'present_price', 'Kms_Driven' : 'kms_driven', 'Fuel_Type' : 'fuel_type', 'Seller_Type' : 'seller_type', 'Transmission' : 'transmission', 'Owner' : 'owner'})

In [10]:
car_dataset.head(20)

Unnamed: 0,car_name,year,selling_price,present_price,kms_driven,fuel_type,seller_type,transmission,owner
0,ritz,2014,3.35,5.59,27000,Petrol,Dealer,Manual,0
1,sx4,2013,4.75,9.54,43000,Diesel,Dealer,Manual,0
2,ciaz,2017,7.25,9.85,6900,Petrol,Dealer,Manual,0
3,wagon r,2011,2.85,4.15,5200,Petrol,Dealer,Manual,0
4,swift,2014,4.6,6.87,42450,Diesel,Dealer,Manual,0
5,vitara brezza,2018,9.25,9.83,2071,Diesel,Dealer,Manual,0
6,ciaz,2015,6.75,8.12,18796,Petrol,Dealer,Manual,0
7,s cross,2015,6.5,8.61,33429,Diesel,Dealer,Manual,0
8,ciaz,2016,8.75,8.89,20273,Diesel,Dealer,Manual,0
9,ciaz,2015,7.45,8.92,42367,Diesel,Dealer,Manual,0


In [11]:
car_dataset.tail(5)

Unnamed: 0,car_name,year,selling_price,present_price,kms_driven,fuel_type,seller_type,transmission,owner
296,city,2016,9.5,11.6,33988,Diesel,Dealer,Manual,0
297,brio,2015,4.0,5.9,60000,Petrol,Dealer,Manual,0
298,city,2009,3.35,11.0,87934,Petrol,Dealer,Manual,0
299,city,2017,11.5,12.5,9000,Diesel,Dealer,Manual,0
300,brio,2016,5.3,5.9,5464,Petrol,Dealer,Manual,0


In [12]:
car_dataset.fuel_type.nunique()

3

In [13]:
car_dataset.seller_type.nunique()

2

In [14]:
car_dataset.transmission.nunique()

2

In [15]:
def change_fuel_type(fuel):
    if fuel == 'Petrol':
        return 0
    elif fuel == 'Diesel':
        return 1
    elif fuel == 'CNG':
        return 2
    
def change_seller_type(seller):
    if seller == 'Individual':
        return 0 
    elif seller == 'Dealer':
        return 1
    
def change_transmission_type(transmission):
    if transmission == 'Manual':
        return 0 
    elif transmission == 'Automatic':
        return 1

In [16]:
car_dataset.fuel_type = car_dataset.fuel_type.apply(change_fuel_type)
car_dataset.seller_type = car_dataset.seller_type.apply(change_seller_type)
car_dataset.transmission = car_dataset.transmission.apply(change_transmission_type)

In [17]:
car_dataset

Unnamed: 0,car_name,year,selling_price,present_price,kms_driven,fuel_type,seller_type,transmission,owner
0,ritz,2014,3.35,5.59,27000,0,1,0,0
1,sx4,2013,4.75,9.54,43000,1,1,0,0
2,ciaz,2017,7.25,9.85,6900,0,1,0,0
3,wagon r,2011,2.85,4.15,5200,0,1,0,0
4,swift,2014,4.60,6.87,42450,1,1,0,0
...,...,...,...,...,...,...,...,...,...
296,city,2016,9.50,11.60,33988,1,1,0,0
297,brio,2015,4.00,5.90,60000,0,1,0,0
298,city,2009,3.35,11.00,87934,0,1,0,0
299,city,2017,11.50,12.50,9000,1,1,0,0


In [18]:
car_dataset = car_dataset.drop_duplicates()

In [19]:
def statistics_values(series):
    mean = series.mean()
    std = series.std()
    return mean, std

In [20]:
car_selling_price_mean, car_selling_price_std = statistics_values(car_dataset.selling_price)
car_present_price_mean, car_present_price_std = statistics_values(car_dataset.present_price)

In [21]:
print('car_selling_price_mean', car_selling_price_mean)
print('car_selling_price_std', car_selling_price_std)
print('car_present_price_mean', car_present_price_mean)
print('car_present_price_std', car_present_price_std)

car_selling_price_mean 4.589632107023414
car_selling_price_std 4.984239677845962
car_present_price_mean 7.541036789297662
car_present_price_std 8.567886743159498


In [22]:
car_dataset

Unnamed: 0,car_name,year,selling_price,present_price,kms_driven,fuel_type,seller_type,transmission,owner
0,ritz,2014,3.35,5.59,27000,0,1,0,0
1,sx4,2013,4.75,9.54,43000,1,1,0,0
2,ciaz,2017,7.25,9.85,6900,0,1,0,0
3,wagon r,2011,2.85,4.15,5200,0,1,0,0
4,swift,2014,4.60,6.87,42450,1,1,0,0
...,...,...,...,...,...,...,...,...,...
296,city,2016,9.50,11.60,33988,1,1,0,0
297,brio,2015,4.00,5.90,60000,0,1,0,0
298,city,2009,3.35,11.00,87934,0,1,0,0
299,city,2017,11.50,12.50,9000,1,1,0,0


In [23]:
training_car_data = car_dataset.head(200)
testing_car_data = car_dataset.tail(99)

In [24]:
training_car_data

Unnamed: 0,car_name,year,selling_price,present_price,kms_driven,fuel_type,seller_type,transmission,owner
0,ritz,2014,3.35,5.59,27000,0,1,0,0
1,sx4,2013,4.75,9.54,43000,1,1,0,0
2,ciaz,2017,7.25,9.85,6900,0,1,0,0
3,wagon r,2011,2.85,4.15,5200,0,1,0,0
4,swift,2014,4.60,6.87,42450,1,1,0,0
...,...,...,...,...,...,...,...,...,...
197,Honda CB twister,2010,0.16,0.51,33000,0,0,0,0
198,Bajaj Discover 125,2011,0.15,0.57,35000,0,0,0,1
199,Honda CB Shine,2007,0.12,0.58,53000,0,0,0,0
200,Bajaj Pulsar 150,2006,0.10,0.75,92233,0,0,0,0


In [25]:
x_training = training_car_data[['year','selling_price', 'kms_driven', 'fuel_type', 'transmission', 'owner']]

In [26]:
y_training = training_car_data['present_price']

In [27]:
x_testing = testing_car_data[['year','selling_price', 'kms_driven', 'fuel_type', 'transmission', 'owner']]

In [28]:
y_testing = testing_car_data['present_price']

In [29]:
model = LinearRegression()

In [30]:
model.fit(x_training, y_training)

LinearRegression()

In [37]:
y_pred = model.predict(x_testing)

In [32]:
mse = mean_squared_error(y_testing, y_pred)
rmse = np.sqrt(mse)

In [42]:
print("Mean Squared Error (MSE):", mse)
print("Root Mean Squared Error (RMSE):", rmse)

Mean Squared Error (MSE): 3.5076855346287066
Root Mean Squared Error (RMSE): 1.8728816125502183


In [69]:
rf_model = RandomForestRegressor(n_estimators=100, random_state=200)

In [70]:
rf_model.fit(x_training, y_training)

RandomForestRegressor(random_state=200)

In [71]:
rf_pred = rf_model.predict(x_testing)

In [72]:
rf_mse = mean_squared_error(y_testing, rf_pred)
rf_rmse = np.sqrt(rf_mse)

In [73]:
print("Mean Squared Error (MSE):", rf_mse)
print("Root Mean Squared Error (RMSE):", rf_rmse)

Mean Squared Error (MSE): 4.133455469898993
Root Mean Squared Error (RMSE): 2.033090128326581


In [74]:
grad_model = GradientBoostingRegressor()

In [75]:
grad_model.fit(x_training, y_training)

GradientBoostingRegressor()

In [77]:
grad_pred = grad_model.predict(x_testing)

In [80]:
grad_mse = mean_squared_error(y_testing, grad_pred)
grad_rmse = np.sqrt(grad_mse)

In [81]:
print("Mean Squared Error (MSE):", grad_mse)
print("Root Mean Squared Error (RMSE):", grad_rmse)

Mean Squared Error (MSE): 4.388162793126206
Root Mean Squared Error (RMSE): 2.094794212596122
