#### Importing the dependencies

In [1]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import Lasso
from sklearn import metrics
import warnings
warnings.filterwarnings("ignore")

In [2]:
# loading data into pandas dataframe
car_data = pd.read_csv("./car data.csv")

In [3]:
# displaying first 5 rows of dataset
car_data.head()

Unnamed: 0,Car_Name,Year,Selling_Price,Present_Price,Kms_Driven,Fuel_Type,Seller_Type,Transmission,Owner
0,ritz,2014,3.35,5.59,27000,Petrol,Dealer,Manual,0
1,sx4,2013,4.75,9.54,43000,Diesel,Dealer,Manual,0
2,ciaz,2017,7.25,9.85,6900,Petrol,Dealer,Manual,0
3,wagon r,2011,2.85,4.15,5200,Petrol,Dealer,Manual,0
4,swift,2014,4.6,6.87,42450,Diesel,Dealer,Manual,0


In [4]:
# displaying last 5 rows of dataset
car_data.tail()

Unnamed: 0,Car_Name,Year,Selling_Price,Present_Price,Kms_Driven,Fuel_Type,Seller_Type,Transmission,Owner
296,city,2016,9.5,11.6,33988,Diesel,Dealer,Manual,0
297,brio,2015,4.0,5.9,60000,Petrol,Dealer,Manual,0
298,city,2009,3.35,11.0,87934,Petrol,Dealer,Manual,0
299,city,2017,11.5,12.5,9000,Diesel,Dealer,Manual,0
300,brio,2016,5.3,5.9,5464,Petrol,Dealer,Manual,0


In [5]:
print(f"shape: {car_data.shape}")

shape: (301, 9)


In [6]:
# checking for missing values
car_data.isnull().sum()

Car_Name         0
Year             0
Selling_Price    0
Present_Price    0
Kms_Driven       0
Fuel_Type        0
Seller_Type      0
Transmission     0
Owner            0
dtype: int64

In [7]:
car_data["Transmission"].value_counts()

Manual       261
Automatic     40
Name: Transmission, dtype: int64

In [8]:
car_data["Car_Name"].value_counts()

city                        26
corolla altis               16
verna                       14
fortuner                    11
brio                        10
                            ..
Honda CB Trigger             1
Yamaha FZ S                  1
Bajaj Pulsar 135 LS          1
Activa 4g                    1
Bajaj Avenger Street 220     1
Name: Car_Name, Length: 98, dtype: int64

In [9]:
car_data["Seller_Type"].value_counts()

Dealer        195
Individual    106
Name: Seller_Type, dtype: int64

In [10]:
car_data["Fuel_Type"].value_counts()

Petrol    239
Diesel     60
CNG         2
Name: Fuel_Type, dtype: int64

In [11]:
car_data["Owner"].value_counts()

0    290
1     10
3      1
Name: Owner, dtype: int64

#### Encoding the Categorial data

In [12]:
car_data.replace({'Fuel_Type': {'Petrol': 0, 'Diesel': 1, 'CNG': 2},
                  'Seller_Type': {'Dealer': 0, 'Individual': 1},
                  'Transmission': {'Manual': 0, 'Automatic': 1}},
                 inplace=True)

In [13]:
car_data

Unnamed: 0,Car_Name,Year,Selling_Price,Present_Price,Kms_Driven,Fuel_Type,Seller_Type,Transmission,Owner
0,ritz,2014,3.35,5.59,27000,0,0,0,0
1,sx4,2013,4.75,9.54,43000,1,0,0,0
2,ciaz,2017,7.25,9.85,6900,0,0,0,0
3,wagon r,2011,2.85,4.15,5200,0,0,0,0
4,swift,2014,4.60,6.87,42450,1,0,0,0
...,...,...,...,...,...,...,...,...,...
296,city,2016,9.50,11.60,33988,1,0,0,0
297,brio,2015,4.00,5.90,60000,0,0,0,0
298,city,2009,3.35,11.00,87934,0,0,0,0
299,city,2017,11.50,12.50,9000,1,0,0,0


#### Splitting data into Features and Target

In [15]:
X = car_data.drop(columns=['Car_Name', 'Selling_Price'], axis=1)
Y = car_data['Selling_Price']

In [16]:
X

Unnamed: 0,Year,Present_Price,Kms_Driven,Fuel_Type,Seller_Type,Transmission,Owner
0,2014,5.59,27000,0,0,0,0
1,2013,9.54,43000,1,0,0,0
2,2017,9.85,6900,0,0,0,0
3,2011,4.15,5200,0,0,0,0
4,2014,6.87,42450,1,0,0,0
...,...,...,...,...,...,...,...
296,2016,11.60,33988,1,0,0,0
297,2015,5.90,60000,0,0,0,0
298,2009,11.00,87934,0,0,0,0
299,2017,12.50,9000,1,0,0,0


In [17]:
Y

0       3.35
1       4.75
2       7.25
3       2.85
4       4.60
       ...  
296     9.50
297     4.00
298     3.35
299    11.50
300     5.30
Name: Selling_Price, Length: 301, dtype: float64

#### Splitting data into Train and Test data

In [20]:
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.1, random_state=2)

#### Model Training

##### Linear Regression

In [21]:
# model = LinearRegression()

In [22]:
# model.fit(X_train, Y_train)

##### Linear Regression Accuracy

In [23]:
# Y_train_cap = model.predict(X_train)

In [26]:
# R squared error
# rmse = metrics.r2_score(Y_train, Y_train_cap)
# print("R Squared Error:", rmse)

R Squared Error: 0.8799451660493705


#### Lasso Regression

In [27]:
model = Lasso()

In [28]:
model.fit(X_train, Y_train)

In [29]:
Y_train_cap = model.predict(X_train)

In [30]:
rmse = metrics.r2_score(Y_train, Y_train_cap)
print("R Squared Error:", rmse)

R Squared Error: 0.8427856123435793
