# CAR PRICE PREDICTION USING RANDOM FOREST REGRESSOR

## Import Necessary Libraries

In [67]:
import pandas as pd
import numpy as np
import category_encoders as ce

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_percentage_error, mean_absolute_error, r2_score


## Data Understanding

In [40]:
data = pd.read_csv('../Datasets/03_CarPrice.csv')
data

Unnamed: 0,car_ID,symboling,CarName,fueltype,aspiration,doornumber,carbody,drivewheel,enginelocation,wheelbase,...,enginesize,fuelsystem,boreratio,stroke,compressionratio,horsepower,peakrpm,citympg,highwaympg,price
0,1,3,alfa-romero giulia,gas,std,two,convertible,rwd,front,88.6,...,130,mpfi,3.47,2.68,9.0,111,5000,21,27,13495.0
1,2,3,alfa-romero stelvio,gas,std,two,convertible,rwd,front,88.6,...,130,mpfi,3.47,2.68,9.0,111,5000,21,27,16500.0
2,3,1,alfa-romero Quadrifoglio,gas,std,two,hatchback,rwd,front,94.5,...,152,mpfi,2.68,3.47,9.0,154,5000,19,26,16500.0
3,4,2,audi 100 ls,gas,std,four,sedan,fwd,front,99.8,...,109,mpfi,3.19,3.40,10.0,102,5500,24,30,13950.0
4,5,2,audi 100ls,gas,std,four,sedan,4wd,front,99.4,...,136,mpfi,3.19,3.40,8.0,115,5500,18,22,17450.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
200,201,-1,volvo 145e (sw),gas,std,four,sedan,rwd,front,109.1,...,141,mpfi,3.78,3.15,9.5,114,5400,23,28,16845.0
201,202,-1,volvo 144ea,gas,turbo,four,sedan,rwd,front,109.1,...,141,mpfi,3.78,3.15,8.7,160,5300,19,25,19045.0
202,203,-1,volvo 244dl,gas,std,four,sedan,rwd,front,109.1,...,173,mpfi,3.58,2.87,8.8,134,5500,18,23,21485.0
203,204,-1,volvo 246,diesel,turbo,four,sedan,rwd,front,109.1,...,145,idi,3.01,3.40,23.0,106,4800,26,27,22470.0


In [41]:
data.isna().sum()

car_ID              0
symboling           0
CarName             0
fueltype            0
aspiration          0
doornumber          0
carbody             0
drivewheel          0
enginelocation      0
wheelbase           0
carlength           0
carwidth            0
carheight           0
curbweight          0
enginetype          0
cylindernumber      0
enginesize          0
fuelsystem          0
boreratio           0
stroke              0
compressionratio    0
horsepower          0
peakrpm             0
citympg             0
highwaympg          0
price               0
dtype: int64

In [42]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 205 entries, 0 to 204
Data columns (total 26 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   car_ID            205 non-null    int64  
 1   symboling         205 non-null    int64  
 2   CarName           205 non-null    object 
 3   fueltype          205 non-null    object 
 4   aspiration        205 non-null    object 
 5   doornumber        205 non-null    object 
 6   carbody           205 non-null    object 
 7   drivewheel        205 non-null    object 
 8   enginelocation    205 non-null    object 
 9   wheelbase         205 non-null    float64
 10  carlength         205 non-null    float64
 11  carwidth          205 non-null    float64
 12  carheight         205 non-null    float64
 13  curbweight        205 non-null    int64  
 14  enginetype        205 non-null    object 
 15  cylindernumber    205 non-null    object 
 16  enginesize        205 non-null    int64  
 1

In [43]:
data.describe()

Unnamed: 0,car_ID,symboling,wheelbase,carlength,carwidth,carheight,curbweight,enginesize,boreratio,stroke,compressionratio,horsepower,peakrpm,citympg,highwaympg,price
count,205.0,205.0,205.0,205.0,205.0,205.0,205.0,205.0,205.0,205.0,205.0,205.0,205.0,205.0,205.0,205.0
mean,103.0,0.834146,98.756585,174.049268,65.907805,53.724878,2555.565854,126.907317,3.329756,3.255415,10.142537,104.117073,5125.121951,25.219512,30.75122,13276.710571
std,59.322565,1.245307,6.021776,12.337289,2.145204,2.443522,520.680204,41.642693,0.270844,0.313597,3.97204,39.544167,476.985643,6.542142,6.886443,7988.852332
min,1.0,-2.0,86.6,141.1,60.3,47.8,1488.0,61.0,2.54,2.07,7.0,48.0,4150.0,13.0,16.0,5118.0
25%,52.0,0.0,94.5,166.3,64.1,52.0,2145.0,97.0,3.15,3.11,8.6,70.0,4800.0,19.0,25.0,7788.0
50%,103.0,1.0,97.0,173.2,65.5,54.1,2414.0,120.0,3.31,3.29,9.0,95.0,5200.0,24.0,30.0,10295.0
75%,154.0,2.0,102.4,183.1,66.9,55.5,2935.0,141.0,3.58,3.41,9.4,116.0,5500.0,30.0,34.0,16503.0
max,205.0,3.0,120.9,208.1,72.3,59.8,4066.0,326.0,3.94,4.17,23.0,288.0,6600.0,49.0,54.0,45400.0


In [44]:
data['CarName'].value_counts()

CarName
peugeot 504                 6
toyota corolla              6
toyota corona               6
subaru dl                   4
mitsubishi outlander        3
                           ..
volkswagen super beetle     1
volkswagen rabbit custom    1
volvo 245                   1
volvo diesel                1
volvo 246                   1
Name: count, Length: 147, dtype: int64

In [45]:
data['aspiration'].value_counts()

aspiration
std      168
turbo     37
Name: count, dtype: int64

In [46]:
data['cylindernumber'].value_counts()

cylindernumber
four      159
six        24
five       11
eight       5
two         4
twelve      1
three       1
Name: count, dtype: int64

In [47]:
data['fuelsystem'].value_counts()

fuelsystem
mpfi    94
2bbl    66
idi     20
1bbl    11
spdi     9
4bbl     3
mfi      1
spfi     1
Name: count, dtype: int64

In [48]:
data['enginetype'].value_counts()

enginetype
ohc      148
ohcf      15
ohcv      13
dohc      12
l         12
rotor      4
dohcv      1
Name: count, dtype: int64

## Data Pre-processing

In [49]:
data1 = data.drop(['car_ID'], axis=1)
data1

Unnamed: 0,symboling,CarName,fueltype,aspiration,doornumber,carbody,drivewheel,enginelocation,wheelbase,carlength,...,enginesize,fuelsystem,boreratio,stroke,compressionratio,horsepower,peakrpm,citympg,highwaympg,price
0,3,alfa-romero giulia,gas,std,two,convertible,rwd,front,88.6,168.8,...,130,mpfi,3.47,2.68,9.0,111,5000,21,27,13495.0
1,3,alfa-romero stelvio,gas,std,two,convertible,rwd,front,88.6,168.8,...,130,mpfi,3.47,2.68,9.0,111,5000,21,27,16500.0
2,1,alfa-romero Quadrifoglio,gas,std,two,hatchback,rwd,front,94.5,171.2,...,152,mpfi,2.68,3.47,9.0,154,5000,19,26,16500.0
3,2,audi 100 ls,gas,std,four,sedan,fwd,front,99.8,176.6,...,109,mpfi,3.19,3.40,10.0,102,5500,24,30,13950.0
4,2,audi 100ls,gas,std,four,sedan,4wd,front,99.4,176.6,...,136,mpfi,3.19,3.40,8.0,115,5500,18,22,17450.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
200,-1,volvo 145e (sw),gas,std,four,sedan,rwd,front,109.1,188.8,...,141,mpfi,3.78,3.15,9.5,114,5400,23,28,16845.0
201,-1,volvo 144ea,gas,turbo,four,sedan,rwd,front,109.1,188.8,...,141,mpfi,3.78,3.15,8.7,160,5300,19,25,19045.0
202,-1,volvo 244dl,gas,std,four,sedan,rwd,front,109.1,188.8,...,173,mpfi,3.58,2.87,8.8,134,5500,18,23,21485.0
203,-1,volvo 246,diesel,turbo,four,sedan,rwd,front,109.1,188.8,...,145,idi,3.01,3.40,23.0,106,4800,26,27,22470.0


In [50]:
symboling_mapping = {-3: 3, -2: 2, -1: 1, 0: 0, 1: -1, 2: -2, 3: -3}

data1['Symboling_Categorical'] = data1['symboling'].map(symboling_mapping)

data1

Unnamed: 0,symboling,CarName,fueltype,aspiration,doornumber,carbody,drivewheel,enginelocation,wheelbase,carlength,...,fuelsystem,boreratio,stroke,compressionratio,horsepower,peakrpm,citympg,highwaympg,price,Symboling_Categorical
0,3,alfa-romero giulia,gas,std,two,convertible,rwd,front,88.6,168.8,...,mpfi,3.47,2.68,9.0,111,5000,21,27,13495.0,-3
1,3,alfa-romero stelvio,gas,std,two,convertible,rwd,front,88.6,168.8,...,mpfi,3.47,2.68,9.0,111,5000,21,27,16500.0,-3
2,1,alfa-romero Quadrifoglio,gas,std,two,hatchback,rwd,front,94.5,171.2,...,mpfi,2.68,3.47,9.0,154,5000,19,26,16500.0,-1
3,2,audi 100 ls,gas,std,four,sedan,fwd,front,99.8,176.6,...,mpfi,3.19,3.40,10.0,102,5500,24,30,13950.0,-2
4,2,audi 100ls,gas,std,four,sedan,4wd,front,99.4,176.6,...,mpfi,3.19,3.40,8.0,115,5500,18,22,17450.0,-2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
200,-1,volvo 145e (sw),gas,std,four,sedan,rwd,front,109.1,188.8,...,mpfi,3.78,3.15,9.5,114,5400,23,28,16845.0,1
201,-1,volvo 144ea,gas,turbo,four,sedan,rwd,front,109.1,188.8,...,mpfi,3.78,3.15,8.7,160,5300,19,25,19045.0,1
202,-1,volvo 244dl,gas,std,four,sedan,rwd,front,109.1,188.8,...,mpfi,3.58,2.87,8.8,134,5500,18,23,21485.0,1
203,-1,volvo 246,diesel,turbo,four,sedan,rwd,front,109.1,188.8,...,idi,3.01,3.40,23.0,106,4800,26,27,22470.0,1


In [51]:
data2 = data1.drop(['symboling'], axis=1)
data2

Unnamed: 0,CarName,fueltype,aspiration,doornumber,carbody,drivewheel,enginelocation,wheelbase,carlength,carwidth,...,fuelsystem,boreratio,stroke,compressionratio,horsepower,peakrpm,citympg,highwaympg,price,Symboling_Categorical
0,alfa-romero giulia,gas,std,two,convertible,rwd,front,88.6,168.8,64.1,...,mpfi,3.47,2.68,9.0,111,5000,21,27,13495.0,-3
1,alfa-romero stelvio,gas,std,two,convertible,rwd,front,88.6,168.8,64.1,...,mpfi,3.47,2.68,9.0,111,5000,21,27,16500.0,-3
2,alfa-romero Quadrifoglio,gas,std,two,hatchback,rwd,front,94.5,171.2,65.5,...,mpfi,2.68,3.47,9.0,154,5000,19,26,16500.0,-1
3,audi 100 ls,gas,std,four,sedan,fwd,front,99.8,176.6,66.2,...,mpfi,3.19,3.40,10.0,102,5500,24,30,13950.0,-2
4,audi 100ls,gas,std,four,sedan,4wd,front,99.4,176.6,66.4,...,mpfi,3.19,3.40,8.0,115,5500,18,22,17450.0,-2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
200,volvo 145e (sw),gas,std,four,sedan,rwd,front,109.1,188.8,68.9,...,mpfi,3.78,3.15,9.5,114,5400,23,28,16845.0,1
201,volvo 144ea,gas,turbo,four,sedan,rwd,front,109.1,188.8,68.8,...,mpfi,3.78,3.15,8.7,160,5300,19,25,19045.0,1
202,volvo 244dl,gas,std,four,sedan,rwd,front,109.1,188.8,68.9,...,mpfi,3.58,2.87,8.8,134,5500,18,23,21485.0,1
203,volvo 246,diesel,turbo,four,sedan,rwd,front,109.1,188.8,68.9,...,idi,3.01,3.40,23.0,106,4800,26,27,22470.0,1


In [55]:
#LABEL ENCODE
le_enc_aspiration = ce.OrdinalEncoder(cols=['aspiration'], return_df=True, mapping=[{'col':'aspiration',
                                                                              'mapping':{'std':0,'turbo':1}}])

data3_enc = le_enc_aspiration.fit_transform(data2)
data3_enc

Unnamed: 0,CarName,fueltype,aspiration,doornumber,carbody,drivewheel,enginelocation,wheelbase,carlength,carwidth,...,fuelsystem,boreratio,stroke,compressionratio,horsepower,peakrpm,citympg,highwaympg,price,Symboling_Categorical
0,alfa-romero giulia,gas,0,two,convertible,rwd,front,88.6,168.8,64.1,...,mpfi,3.47,2.68,9.0,111,5000,21,27,13495.0,-3
1,alfa-romero stelvio,gas,0,two,convertible,rwd,front,88.6,168.8,64.1,...,mpfi,3.47,2.68,9.0,111,5000,21,27,16500.0,-3
2,alfa-romero Quadrifoglio,gas,0,two,hatchback,rwd,front,94.5,171.2,65.5,...,mpfi,2.68,3.47,9.0,154,5000,19,26,16500.0,-1
3,audi 100 ls,gas,0,four,sedan,fwd,front,99.8,176.6,66.2,...,mpfi,3.19,3.40,10.0,102,5500,24,30,13950.0,-2
4,audi 100ls,gas,0,four,sedan,4wd,front,99.4,176.6,66.4,...,mpfi,3.19,3.40,8.0,115,5500,18,22,17450.0,-2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
200,volvo 145e (sw),gas,0,four,sedan,rwd,front,109.1,188.8,68.9,...,mpfi,3.78,3.15,9.5,114,5400,23,28,16845.0,1
201,volvo 144ea,gas,1,four,sedan,rwd,front,109.1,188.8,68.8,...,mpfi,3.78,3.15,8.7,160,5300,19,25,19045.0,1
202,volvo 244dl,gas,0,four,sedan,rwd,front,109.1,188.8,68.9,...,mpfi,3.58,2.87,8.8,134,5500,18,23,21485.0,1
203,volvo 246,diesel,1,four,sedan,rwd,front,109.1,188.8,68.9,...,idi,3.01,3.40,23.0,106,4800,26,27,22470.0,1


In [57]:
#LABEL ENCODE
le_enc_cynum = ce.OrdinalEncoder(cols=['cylindernumber'], return_df=True, mapping=[{'col':'cylindernumber',
                                                                                     'mapping':{'two':0,'three':1, 'four':2, 'five':3, 'six':4, 'eight':5, 'twelve':6}},])

data4 = le_enc_cynum.fit_transform(data3_enc)
data4

Unnamed: 0,CarName,fueltype,aspiration,doornumber,carbody,drivewheel,enginelocation,wheelbase,carlength,carwidth,...,fuelsystem,boreratio,stroke,compressionratio,horsepower,peakrpm,citympg,highwaympg,price,Symboling_Categorical
0,alfa-romero giulia,gas,0,two,convertible,rwd,front,88.6,168.8,64.1,...,mpfi,3.47,2.68,9.0,111,5000,21,27,13495.0,-3
1,alfa-romero stelvio,gas,0,two,convertible,rwd,front,88.6,168.8,64.1,...,mpfi,3.47,2.68,9.0,111,5000,21,27,16500.0,-3
2,alfa-romero Quadrifoglio,gas,0,two,hatchback,rwd,front,94.5,171.2,65.5,...,mpfi,2.68,3.47,9.0,154,5000,19,26,16500.0,-1
3,audi 100 ls,gas,0,four,sedan,fwd,front,99.8,176.6,66.2,...,mpfi,3.19,3.40,10.0,102,5500,24,30,13950.0,-2
4,audi 100ls,gas,0,four,sedan,4wd,front,99.4,176.6,66.4,...,mpfi,3.19,3.40,8.0,115,5500,18,22,17450.0,-2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
200,volvo 145e (sw),gas,0,four,sedan,rwd,front,109.1,188.8,68.9,...,mpfi,3.78,3.15,9.5,114,5400,23,28,16845.0,1
201,volvo 144ea,gas,1,four,sedan,rwd,front,109.1,188.8,68.8,...,mpfi,3.78,3.15,8.7,160,5300,19,25,19045.0,1
202,volvo 244dl,gas,0,four,sedan,rwd,front,109.1,188.8,68.9,...,mpfi,3.58,2.87,8.8,134,5500,18,23,21485.0,1
203,volvo 246,diesel,1,four,sedan,rwd,front,109.1,188.8,68.9,...,idi,3.01,3.40,23.0,106,4800,26,27,22470.0,1


In [58]:
test_enc3 = ce.OneHotEncoder(cols=['enginelocation', 'drivewheel', 'carbody', 'doornumber', 'fueltype'], handle_unknown='return_nan',
                             return_df=True, use_cat_names=True)

data3_enc1 = test_enc3.fit_transform(data3_enc)
data3_enc1

Unnamed: 0,CarName,fueltype_gas,fueltype_diesel,aspiration,doornumber_two,doornumber_four,carbody_convertible,carbody_hatchback,carbody_sedan,carbody_wagon,...,fuelsystem,boreratio,stroke,compressionratio,horsepower,peakrpm,citympg,highwaympg,price,Symboling_Categorical
0,alfa-romero giulia,1.0,0.0,0,1.0,0.0,1.0,0.0,0.0,0.0,...,mpfi,3.47,2.68,9.0,111,5000,21,27,13495.0,-3
1,alfa-romero stelvio,1.0,0.0,0,1.0,0.0,1.0,0.0,0.0,0.0,...,mpfi,3.47,2.68,9.0,111,5000,21,27,16500.0,-3
2,alfa-romero Quadrifoglio,1.0,0.0,0,1.0,0.0,0.0,1.0,0.0,0.0,...,mpfi,2.68,3.47,9.0,154,5000,19,26,16500.0,-1
3,audi 100 ls,1.0,0.0,0,0.0,1.0,0.0,0.0,1.0,0.0,...,mpfi,3.19,3.40,10.0,102,5500,24,30,13950.0,-2
4,audi 100ls,1.0,0.0,0,0.0,1.0,0.0,0.0,1.0,0.0,...,mpfi,3.19,3.40,8.0,115,5500,18,22,17450.0,-2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
200,volvo 145e (sw),1.0,0.0,0,0.0,1.0,0.0,0.0,1.0,0.0,...,mpfi,3.78,3.15,9.5,114,5400,23,28,16845.0,1
201,volvo 144ea,1.0,0.0,1,0.0,1.0,0.0,0.0,1.0,0.0,...,mpfi,3.78,3.15,8.7,160,5300,19,25,19045.0,1
202,volvo 244dl,1.0,0.0,0,0.0,1.0,0.0,0.0,1.0,0.0,...,mpfi,3.58,2.87,8.8,134,5500,18,23,21485.0,1
203,volvo 246,0.0,1.0,1,0.0,1.0,0.0,0.0,1.0,0.0,...,idi,3.01,3.40,23.0,106,4800,26,27,22470.0,1


In [59]:
#SCALE
col_to_scale = ['wheelbase', 'carlength', 'carwidth','carheight', 'curbweight', 'enginesize', 'boreratio', 'stroke', 'compressionratio', 'horsepower', 'peakrpm', 'citympg', 'highwaympg']

stdsc = StandardScaler()

data3_enc1[col_to_scale] = stdsc.fit_transform(data3_enc1[col_to_scale])

data3_enc1

Unnamed: 0,CarName,fueltype_gas,fueltype_diesel,aspiration,doornumber_two,doornumber_four,carbody_convertible,carbody_hatchback,carbody_sedan,carbody_wagon,...,fuelsystem,boreratio,stroke,compressionratio,horsepower,peakrpm,citympg,highwaympg,price,Symboling_Categorical
0,alfa-romero giulia,1.0,0.0,0,1.0,0.0,1.0,0.0,0.0,0.0,...,mpfi,0.519071,-1.839377,-0.288349,0.174483,-0.262960,-0.646553,-0.546059,13495.0,-3
1,alfa-romero stelvio,1.0,0.0,0,1.0,0.0,1.0,0.0,0.0,0.0,...,mpfi,0.519071,-1.839377,-0.288349,0.174483,-0.262960,-0.646553,-0.546059,16500.0,-3
2,alfa-romero Quadrifoglio,1.0,0.0,0,1.0,0.0,0.0,1.0,0.0,0.0,...,mpfi,-2.404880,0.685946,-0.288349,1.264536,-0.262960,-0.953012,-0.691627,16500.0,-1
3,audi 100 ls,1.0,0.0,0,0.0,1.0,0.0,0.0,1.0,0.0,...,mpfi,-0.517266,0.462183,-0.035973,-0.053668,0.787855,-0.186865,-0.109354,13950.0,-2
4,audi 100ls,1.0,0.0,0,0.0,1.0,0.0,0.0,1.0,0.0,...,mpfi,-0.517266,0.462183,-0.540725,0.275883,0.787855,-1.106241,-1.273900,17450.0,-2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
200,volvo 145e (sw),1.0,0.0,0,0.0,1.0,0.0,0.0,1.0,0.0,...,mpfi,1.666445,-0.336970,-0.162161,0.250533,0.577692,-0.340094,-0.400490,16845.0,1
201,volvo 144ea,1.0,0.0,1,0.0,1.0,0.0,0.0,1.0,0.0,...,mpfi,1.666445,-0.336970,-0.364062,1.416637,0.367529,-0.953012,-0.837195,19045.0,1
202,volvo 244dl,1.0,0.0,0,0.0,1.0,0.0,0.0,1.0,0.0,...,mpfi,0.926204,-1.232021,-0.338824,0.757535,0.787855,-1.106241,-1.128332,21485.0,1
203,volvo 246,0.0,1.0,1,0.0,1.0,0.0,0.0,1.0,0.0,...,idi,-1.183483,0.462183,3.244916,0.047732,-0.683286,0.119594,-0.546059,22470.0,1


In [60]:
le = LabelEncoder()

data3_enc1['CarName'] = le.fit_transform(data3_enc1['CarName'])
data3_enc1['cylindernumber'] = le.fit_transform(data3_enc1['cylindernumber'])
data3_enc1['fuelsystem'] = le.fit_transform(data3_enc1['fuelsystem'])
data3_enc1['enginetype'] = le.fit_transform(data3_enc1['enginetype'])

data3_enc1

Unnamed: 0,CarName,fueltype_gas,fueltype_diesel,aspiration,doornumber_two,doornumber_four,carbody_convertible,carbody_hatchback,carbody_sedan,carbody_wagon,...,fuelsystem,boreratio,stroke,compressionratio,horsepower,peakrpm,citympg,highwaympg,price,Symboling_Categorical
0,2,1.0,0.0,0,1.0,0.0,1.0,0.0,0.0,0.0,...,5,0.519071,-1.839377,-0.288349,0.174483,-0.262960,-0.646553,-0.546059,13495.0,-3
1,3,1.0,0.0,0,1.0,0.0,1.0,0.0,0.0,0.0,...,5,0.519071,-1.839377,-0.288349,0.174483,-0.262960,-0.646553,-0.546059,16500.0,-3
2,1,1.0,0.0,0,1.0,0.0,0.0,1.0,0.0,0.0,...,5,-2.404880,0.685946,-0.288349,1.264536,-0.262960,-0.953012,-0.691627,16500.0,-1
3,4,1.0,0.0,0,0.0,1.0,0.0,0.0,1.0,0.0,...,5,-0.517266,0.462183,-0.035973,-0.053668,0.787855,-0.186865,-0.109354,13950.0,-2
4,5,1.0,0.0,0,0.0,1.0,0.0,0.0,1.0,0.0,...,5,-0.517266,0.462183,-0.540725,0.275883,0.787855,-1.106241,-1.273900,17450.0,-2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
200,139,1.0,0.0,0,0.0,1.0,0.0,0.0,1.0,0.0,...,5,1.666445,-0.336970,-0.162161,0.250533,0.577692,-0.340094,-0.400490,16845.0,1
201,138,1.0,0.0,1,0.0,1.0,0.0,0.0,1.0,0.0,...,5,1.666445,-0.336970,-0.364062,1.416637,0.367529,-0.953012,-0.837195,19045.0,1
202,140,1.0,0.0,0,0.0,1.0,0.0,0.0,1.0,0.0,...,5,0.926204,-1.232021,-0.338824,0.757535,0.787855,-1.106241,-1.128332,21485.0,1
203,142,0.0,1.0,1,0.0,1.0,0.0,0.0,1.0,0.0,...,3,-1.183483,0.462183,3.244916,0.047732,-0.683286,0.119594,-0.546059,22470.0,1


## Modelling

In [61]:
X = data3_enc1.drop(['price'], axis=1)
y = data3_enc1['price']

In [62]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

#### Random Forest

In [63]:
model_rfr = RandomForestRegressor()

model_rfr.fit(X_train, y_train)

y_pred = model_rfr.predict(X_test)

In [64]:
mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
mape = mean_absolute_percentage_error(y_test, y_pred)

print(f'MAE score : {mae:.4f}')
print(f'MSE score : {mae:.4f}')
print(f'MAPE score : {mae:.4f}')

MAE score : 1327.4326
MSE score : 1327.4326
MAPE score : 1327.4326


In [65]:
mpe = abs (np.mean((y_test - y_pred) / y_test)) * 100
print(mpe)

1.7156129180500361


In [68]:
r2 = r2_score(y_test, y_pred)
print(f"R2 score on random forest: {r2:.4f}")

R2 score on random forest: 0.9558
