In [77]:
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import OneHotEncoder, LabelEncoder
from sklearn.compose import ColumnTransformer

In [78]:
df = pd.read_csv('car_prices.csv')
df

Unnamed: 0,Car Model,Mileage,Sell Price($),Age(yrs)
0,BMW X5,69000,18000,6
1,BMW X5,35000,34000,3
2,BMW X5,57000,26100,5
3,BMW X5,22500,40000,2
4,BMW X5,46000,31500,4
5,Audi A5,59000,29400,5
6,Audi A5,52000,32000,5
7,Audi A5,72000,19300,6
8,Audi A5,91000,12000,8
9,Mercedez Benz C class,67000,22000,6


In [79]:
dummies = pd.get_dummies(df['Car Model'])

merged = pd.concat([df, dummies],axis='columns')
final = merged.drop(['Car Model','Audi A5'],axis='columns')
final

Unnamed: 0,Mileage,Sell Price($),Age(yrs),BMW X5,Mercedez Benz C class
0,69000,18000,6,1,0
1,35000,34000,3,1,0
2,57000,26100,5,1,0
3,22500,40000,2,1,0
4,46000,31500,4,1,0
5,59000,29400,5,0,0
6,52000,32000,5,0,0
7,72000,19300,6,0,0
8,91000,12000,8,0,0
9,67000,22000,6,0,1


In [80]:
x = final.drop('Sell Price($)',axis='columns')
y = final['Sell Price($)']

In [81]:
l_model = LinearRegression()
l_model.fit(x,y)

print(l_model.predict([[50000,10,1,0]]))
print(l_model.score(x,y))

[20407.77764734]
0.9417050937281082


In [82]:
# Using OneHotEncoder

df_le = df
label_encoder = LabelEncoder()
df_le['Car Model'] = label_encoder.fit_transform(df['Car Model'])
df_le

Unnamed: 0,Car Model,Mileage,Sell Price($),Age(yrs)
0,1,69000,18000,6
1,1,35000,34000,3
2,1,57000,26100,5
3,1,22500,40000,2
4,1,46000,31500,4
5,0,59000,29400,5
6,0,52000,32000,5
7,0,72000,19300,6
8,0,91000,12000,8
9,2,67000,22000,6


In [83]:
x = df_le[['Car Model','Mileage','Age(yrs)']].values
y = df_le[['Sell Price($)']]
print(x,y)

[[    1 69000     6]
 [    1 35000     3]
 [    1 57000     5]
 [    1 22500     2]
 [    1 46000     4]
 [    0 59000     5]
 [    0 52000     5]
 [    0 72000     6]
 [    0 91000     8]
 [    2 67000     6]
 [    2 83000     7]
 [    2 79000     7]
 [    2 59000     5]]     Sell Price($)
0           18000
1           34000
2           26100
3           40000
4           31500
5           29400
6           32000
7           19300
8           12000
9           22000
10          20000
11          21000
12          33000


In [84]:
columnTransformer = ColumnTransformer([('encoder', OneHotEncoder(), [0])], remainder='passthrough')
x = columnTransformer.fit_transform(x)
x = x[:,1:]
x

array([[1.00e+00, 0.00e+00, 6.90e+04, 6.00e+00],
       [1.00e+00, 0.00e+00, 3.50e+04, 3.00e+00],
       [1.00e+00, 0.00e+00, 5.70e+04, 5.00e+00],
       [1.00e+00, 0.00e+00, 2.25e+04, 2.00e+00],
       [1.00e+00, 0.00e+00, 4.60e+04, 4.00e+00],
       [0.00e+00, 0.00e+00, 5.90e+04, 5.00e+00],
       [0.00e+00, 0.00e+00, 5.20e+04, 5.00e+00],
       [0.00e+00, 0.00e+00, 7.20e+04, 6.00e+00],
       [0.00e+00, 0.00e+00, 9.10e+04, 8.00e+00],
       [0.00e+00, 1.00e+00, 6.70e+04, 6.00e+00],
       [0.00e+00, 1.00e+00, 8.30e+04, 7.00e+00],
       [0.00e+00, 1.00e+00, 7.90e+04, 7.00e+00],
       [0.00e+00, 1.00e+00, 5.90e+04, 5.00e+00]])

In [88]:
l_model.fit(x,y)
print(l_model.predict([[0,1,45000,4]]))
print(l_model.predict([[1,0,86000,7]]))
print(l_model.score(x,y))

[[36991.31721062]]
[[11080.74313219]]
0.9417050937281083
