# Solve with the pandas.get_dummies( )

In [47]:
import pandas as pd
from sklearn.linear_model import LinearRegression

df = pd.read_csv('carprices.csv')
df

Unnamed: 0,Car Model,Mileage,Sell Price($),Age(yrs)
0,BMW X5,69000,18000,6
1,BMW X5,35000,34000,3
2,BMW X5,57000,26100,5
3,BMW X5,22500,40000,2
4,BMW X5,46000,31500,4
5,Audi A5,59000,29400,5
6,Audi A5,52000,32000,5
7,Audi A5,72000,19300,6
8,Audi A5,91000,12000,8
9,Mercedez Benz C class,67000,22000,6


In [48]:
dummies = pd.get_dummies(df[['Car Model']])
dummies

Unnamed: 0,Car Model_Audi A5,Car Model_BMW X5,Car Model_Mercedez Benz C class
0,0,1,0
1,0,1,0
2,0,1,0
3,0,1,0
4,0,1,0
5,1,0,0
6,1,0,0
7,1,0,0
8,1,0,0
9,0,0,1


In [49]:
X = pd.concat([dummies, df], axis='columns').drop(['Car Model', 'Car Model_Audi A5', 'Sell Price($)'], axis='columns')
X

Unnamed: 0,Car Model_BMW X5,Car Model_Mercedez Benz C class,Mileage,Age(yrs)
0,1,0,69000,6
1,1,0,35000,3
2,1,0,57000,5
3,1,0,22500,2
4,1,0,46000,4
5,0,0,59000,5
6,0,0,52000,5
7,0,0,72000,6
8,0,0,91000,8
9,0,1,67000,6


In [50]:
y = df[['Sell Price($)']]
y

Unnamed: 0,Sell Price($)
0,18000
1,34000
2,26100
3,40000
4,31500
5,29400
6,32000
7,19300
8,12000
9,22000


In [51]:
model = LinearRegression()
model.fit(X, y)

LinearRegression()

In [52]:
model.predict([[0, 1, 45000, 4]])

array([[36991.31721063]])

In [53]:
model.predict([[1, 0, 86000, 7]])

array([[11080.74313217]])

In [54]:
model.score(X, y)

0.9417050937281082

# Solve with the OneHotEncoder and ColumnTransformer

In [55]:
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer

In [56]:
X = df.drop('Sell Price($)', axis='columns')
X

Unnamed: 0,Car Model,Mileage,Age(yrs)
0,BMW X5,69000,6
1,BMW X5,35000,3
2,BMW X5,57000,5
3,BMW X5,22500,2
4,BMW X5,46000,4
5,Audi A5,59000,5
6,Audi A5,52000,5
7,Audi A5,72000,6
8,Audi A5,91000,8
9,Mercedez Benz C class,67000,6


In [57]:
ct = ColumnTransformer([('car', OneHotEncoder(dtype='int'), ['Car Model'])], remainder='passthrough')
X = ct.fit_transform(X)
X

array([[    0,     1,     0, 69000,     6],
       [    0,     1,     0, 35000,     3],
       [    0,     1,     0, 57000,     5],
       [    0,     1,     0, 22500,     2],
       [    0,     1,     0, 46000,     4],
       [    1,     0,     0, 59000,     5],
       [    1,     0,     0, 52000,     5],
       [    1,     0,     0, 72000,     6],
       [    1,     0,     0, 91000,     8],
       [    0,     0,     1, 67000,     6],
       [    0,     0,     1, 83000,     7],
       [    0,     0,     1, 79000,     7],
       [    0,     0,     1, 59000,     5]])

In [58]:
X = X[:,1:]  # drop the first column
X

array([[    1,     0, 69000,     6],
       [    1,     0, 35000,     3],
       [    1,     0, 57000,     5],
       [    1,     0, 22500,     2],
       [    1,     0, 46000,     4],
       [    0,     0, 59000,     5],
       [    0,     0, 52000,     5],
       [    0,     0, 72000,     6],
       [    0,     0, 91000,     8],
       [    0,     1, 67000,     6],
       [    0,     1, 83000,     7],
       [    0,     1, 79000,     7],
       [    0,     1, 59000,     5]])

In [61]:
model.fit(X, y)
model.predict([[0, 1, 45000, 4]])

array([[36991.31721063]])

In [62]:
model.predict([[1, 0, 86000, 7]])

array([[11080.74313217]])