In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import OneHotEncoder
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

In [2]:
dataset = pd.read_csv("LabelEncoding.csv")
dataset

Unnamed: 0,car model,mileage,age,sell price
0,BMW X5,69000,6,18000
1,BMW X5,35000,3,34000
2,BMW X5,57000,5,26100
3,BMW X5,22500,2,40000
4,BMW X5,46000,4,31500
5,Audi,59000,5,29400
6,Audi,52000,5,32000
7,Audi,72000,6,19300
8,Audi,91000,8,12000
9,Mercedez Benz,67000,6,22000


In [3]:
dataset.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 13 entries, 0 to 12
Data columns (total 4 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   car model   13 non-null     object
 1   mileage     13 non-null     int64 
 2   age         13 non-null     int64 
 3   sell price  13 non-null     int64 
dtypes: int64(3), object(1)
memory usage: 544.0+ bytes


## data preprocessing

In [4]:
dummies = pd.get_dummies(dataset['car model'])
dummies

Unnamed: 0,Audi,BMW X5,Mercedez Benz
0,0,1,0
1,0,1,0
2,0,1,0
3,0,1,0
4,0,1,0
5,1,0,0
6,1,0,0
7,1,0,0
8,1,0,0
9,0,0,1


In [5]:
dataset = pd.concat([dataset,dummies],axis='columns')
dataset

Unnamed: 0,car model,mileage,age,sell price,Audi,BMW X5,Mercedez Benz
0,BMW X5,69000,6,18000,0,1,0
1,BMW X5,35000,3,34000,0,1,0
2,BMW X5,57000,5,26100,0,1,0
3,BMW X5,22500,2,40000,0,1,0
4,BMW X5,46000,4,31500,0,1,0
5,Audi,59000,5,29400,1,0,0
6,Audi,52000,5,32000,1,0,0
7,Audi,72000,6,19300,1,0,0
8,Audi,91000,8,12000,1,0,0
9,Mercedez Benz,67000,6,22000,0,0,1


In [6]:
dataset = dataset.drop(['car model'],axis='columns')

In [7]:
dataset

Unnamed: 0,mileage,age,sell price,Audi,BMW X5,Mercedez Benz
0,69000,6,18000,0,1,0
1,35000,3,34000,0,1,0
2,57000,5,26100,0,1,0
3,22500,2,40000,0,1,0
4,46000,4,31500,0,1,0
5,59000,5,29400,1,0,0
6,52000,5,32000,1,0,0
7,72000,6,19300,1,0,0
8,91000,8,12000,1,0,0
9,67000,6,22000,0,0,1


## split featurs and level

In [8]:
level = dataset['sell price']
featurs = dataset.drop(['sell price'],axis='columns')

In [9]:
level

0     18000
1     34000
2     26100
3     40000
4     31500
5     29400
6     32000
7     19300
8     12000
9     22000
10    20000
11    21000
12    33000
Name: sell price, dtype: int64

In [10]:
featurs

Unnamed: 0,mileage,age,Audi,BMW X5,Mercedez Benz
0,69000,6,0,1,0
1,35000,3,0,1,0
2,57000,5,0,1,0
3,22500,2,0,1,0
4,46000,4,0,1,0
5,59000,5,1,0,0
6,52000,5,1,0,0
7,72000,6,1,0,0
8,91000,8,1,0,0
9,67000,6,0,0,1


## model selection

In [11]:
xtrain,xtest,ytrain,ytest = train_test_split(featurs,level,test_size=0.2,random_state=1)

In [12]:
xtrain

Unnamed: 0,mileage,age,Audi,BMW X5,Mercedez Benz
10,83000,7,0,0,1
1,35000,3,0,1,0
6,52000,5,1,0,0
0,69000,6,0,1,0
7,72000,6,1,0,0
12,59000,5,0,0,1
9,67000,6,0,0,1
8,91000,8,1,0,0
11,79000,7,0,0,1
5,59000,5,1,0,0


In [13]:
ytrain

10    20000
1     34000
6     32000
0     18000
7     19300
12    33000
9     22000
8     12000
11    21000
5     29400
Name: sell price, dtype: int64

In [14]:
model = LinearRegression()
model.fit(xtrain,ytrain)

LinearRegression()

In [21]:
model.coef_

array([-3.55330204e-01, -1.73845122e+03,  1.04745223e+03, -4.59817298e+03,
        3.55072075e+03])

In [22]:
model.intercept_

56898.37412001342

In [15]:
model.score(xtest,ytest)

0.8526576494631262

In [16]:
y_predict=model.predict(xtest)

In [17]:
y_predict

array([23354.12336574, 40828.36909152, 29001.20683875])

In [18]:
mean_s_error = mean_squared_error(y_predict,ytest)

In [19]:
mean_s_error

4823333.701685976

In [20]:
mean_squarde_error2 = mean_squared_error(ytest,y_predict)
mean_squarde_error2

4823333.701685976