In [46]:
## Imports

import pandas as pd
import numpy as np
from sklearn import linear_model
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
import matplotlib.pyplot as plt

In [47]:
car_prices = pd.read_csv('carprices.csv')

car_prices

## Nominal variable: Car Model, not quite ordered, but cannot be numerically interpreted, we have to use the technique called One Hot Encoding (OHE) to achieve the desired result

Unnamed: 0,Car Model,Mileage,Sell Price($),Age(yrs)
0,BMW X5,69000,18000,6
1,BMW X5,35000,34000,3
2,BMW X5,57000,26100,5
3,BMW X5,22500,40000,2
4,BMW X5,46000,31500,4
5,Audi A5,59000,29400,5
6,Audi A5,52000,32000,5
7,Audi A5,72000,19300,6
8,Audi A5,91000,12000,8
9,Mercedez Benz C class,67000,22000,6


In [48]:
## Get dummy values

dummies = pd.get_dummies(car_prices['Car Model'])

dummies

Unnamed: 0,Audi A5,BMW X5,Mercedez Benz C class
0,0,1,0
1,0,1,0
2,0,1,0
3,0,1,0
4,0,1,0
5,1,0,0
6,1,0,0
7,1,0,0
8,1,0,0
9,0,0,1


In [49]:
car_prices.columns

Index(['Car Model', 'Mileage', 'Sell Price($)', 'Age(yrs)'], dtype='object')

In [50]:
car_model_ohe = OneHotEncoder()

car_model_ohe_fit_transform = ColumnTransformer([('Car Model', OneHotEncoder(), [0])], remainder='passthrough')

car_model_ohe_fit_transform

ColumnTransformer(remainder='passthrough',
                  transformers=[('Car Model', OneHotEncoder(), [0])])

In [51]:
car_model_x = car_prices.drop(columns=['Sell Price($)'])

car_model_ohe = OneHotEncoder(categories=[0])

car_model_x

Unnamed: 0,Car Model,Mileage,Age(yrs)
0,BMW X5,69000,6
1,BMW X5,35000,3
2,BMW X5,57000,5
3,BMW X5,22500,2
4,BMW X5,46000,4
5,Audi A5,59000,5
6,Audi A5,52000,5
7,Audi A5,72000,6
8,Audi A5,91000,8
9,Mercedez Benz C class,67000,6


In [52]:
fitted_car_model_x = car_model_ohe_fit_transform.fit_transform(car_model_x)

fitted_car_model_x = fitted_car_model_x[:,1:]

fitted_car_model_x

## [Audi, Mercedez Benz C Class, Mileage, Age]

array([[1.00e+00, 0.00e+00, 6.90e+04, 6.00e+00],
       [1.00e+00, 0.00e+00, 3.50e+04, 3.00e+00],
       [1.00e+00, 0.00e+00, 5.70e+04, 5.00e+00],
       [1.00e+00, 0.00e+00, 2.25e+04, 2.00e+00],
       [1.00e+00, 0.00e+00, 4.60e+04, 4.00e+00],
       [0.00e+00, 0.00e+00, 5.90e+04, 5.00e+00],
       [0.00e+00, 0.00e+00, 5.20e+04, 5.00e+00],
       [0.00e+00, 0.00e+00, 7.20e+04, 6.00e+00],
       [0.00e+00, 0.00e+00, 9.10e+04, 8.00e+00],
       [0.00e+00, 1.00e+00, 6.70e+04, 6.00e+00],
       [0.00e+00, 1.00e+00, 8.30e+04, 7.00e+00],
       [0.00e+00, 1.00e+00, 7.90e+04, 7.00e+00],
       [0.00e+00, 1.00e+00, 5.90e+04, 5.00e+00]])

In [53]:
car_model_y = car_prices[['Sell Price($)']].values

car_model_y

array([[18000],
       [34000],
       [26100],
       [40000],
       [31500],
       [29400],
       [32000],
       [19300],
       [12000],
       [22000],
       [20000],
       [21000],
       [33000]], dtype=int64)

In [54]:
car_linreg_model = linear_model.LinearRegression()

car_linreg_model.fit(fitted_car_model_x, car_model_y)

LinearRegression()

In [55]:
## [Audi, Mercedez Benz C Class, Mileage, Age]

car_linreg_model.predict([[0, 1, 45000, 4]])

array([[36991.31721062]])

In [56]:
car_linreg_model.predict([[0, 0, 86000, 7]])

array([[15365.40972059]])

In [57]:
car_linreg_model.score(fitted_car_model_x, car_model_y)

0.9417050937281083