# Importing Libraries

In [6]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [7]:
df = pd.read_csv('audi.csv')
X = df.iloc[:,[0,1,3,4,5,6,7,8]].values
Y = df.iloc[:,[2]].values

In [8]:
print(X)

[[' A1' 2017 'Manual' ... 150 55.4 1.4]
 [' A6' 2016 'Automatic' ... 20 64.2 2.0]
 [' A1' 2016 'Manual' ... 30 55.4 1.4]
 ...
 [' A3' 2020 'Manual' ... 150 49.6 1.0]
 [' Q3' 2017 'Automatic' ... 150 47.9 1.4]
 [' Q3' 2016 'Manual' ... 150 47.9 1.4]]


In [9]:
print(Y)

[[12500]
 [16500]
 [11000]
 ...
 [17199]
 [19499]
 [15999]]


#Data Preprocessing

#Label Encoding

In [10]:
from sklearn.preprocessing import LabelEncoder
le1 = LabelEncoder()
X[:,0] = le1.fit_transform(X[:,0])
le2 = LabelEncoder()
X[:,-4] = le2.fit_transform(X[:,-4])

In [11]:
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
ct = ColumnTransformer(transformers = [('encoder',OneHotEncoder(),[2])],remainder='passthrough')
X = ct.fit_transform(X)

In [12]:
print(X)

[[0.0 1.0 0.0 ... 150 55.4 1.4]
 [1.0 0.0 0.0 ... 20 64.2 2.0]
 [0.0 1.0 0.0 ... 30 55.4 1.4]
 ...
 [0.0 1.0 0.0 ... 150 49.6 1.0]
 [1.0 0.0 0.0 ... 150 47.9 1.4]
 [0.0 1.0 0.0 ... 150 47.9 1.4]]


#Feature Scaling

In [13]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X = sc.fit_transform(X)

In [14]:
print(X)

[[-0.58326752  1.2007284  -0.71233307 ...  0.35714729  0.35755001
  -0.88021837]
 [ 1.71447913 -0.83282781 -0.71233307 ... -1.57832278  1.03713001
   0.11492465]
 [-0.58326752  1.2007284  -0.71233307 ... -1.42944047  0.35755001
  -0.88021837]
 ...
 [-0.58326752  1.2007284  -0.71233307 ...  0.35714729 -0.09035499
  -1.54364705]
 [ 1.71447913 -0.83282781 -0.71233307 ...  0.35714729 -0.22163749
  -0.88021837]
 [-0.58326752  1.2007284  -0.71233307 ...  0.35714729 -0.22163749
  -0.88021837]]


#Splitting Dataset into Training set and Test set

In [15]:
from sklearn.model_selection import train_test_split
(X_train,X_test,Y_train,Y_test) = train_test_split(X,Y,test_size=0.2,random_state=0)

#Training Model

In [16]:
from sklearn.ensemble import RandomForestRegressor
regression = RandomForestRegressor(random_state=0)
regression.fit(X_train,Y_train)

  This is separate from the ipykernel package so we can avoid doing imports until


RandomForestRegressor(bootstrap=True, ccp_alpha=0.0, criterion='mse',
                      max_depth=None, max_features='auto', max_leaf_nodes=None,
                      max_samples=None, min_impurity_decrease=0.0,
                      min_impurity_split=None, min_samples_leaf=1,
                      min_samples_split=2, min_weight_fraction_leaf=0.0,
                      n_estimators=100, n_jobs=None, oob_score=False,
                      random_state=0, verbose=0, warm_start=False)

In [17]:
y_pred = regression.predict(X_test)

#Testing result

In [18]:
print(np.concatenate((y_pred.reshape(len(y_pred),1),Y_test.reshape(len(Y_test),1)),1))

[[14337.15 14998.  ]
 [23450.35 21950.  ]
 [27330.07 28990.  ]
 ...
 [46275.18 45995.  ]
 [31359.   30500.  ]
 [ 9929.62  8400.  ]]


#Calculating Accuracy

In [28]:
from sklearn.metrics import r2_score,mean_absolute_error
r2_score(Y_test, y_pred)

0.9536134841307546

In [29]:
mean_absolute_error(Y_test,y_pred)

1538.730980670462

In [20]:
print(y_pred)

[14337.15 23450.35 27330.07 ... 46275.18 31359.    9929.62]


#Reshape to 2D

In [21]:
print(Y_test)

[[14998]
 [21950]
 [28990]
 ...
 [45995]
 [30500]
 [ 8400]]


In [22]:
y_pred = np.reshape(y_pred,(-1,1))

#Making Pandas DataFrame

In [23]:
mydata = np.concatenate((Y_test,y_pred),axis=1)
dataframe = pd.DataFrame(mydata,columns=['Real Price','Predicted Price'])

In [26]:
print(dataframe)

      Real Price  Predicted Price
0        14998.0         14337.15
1        21950.0         23450.35
2        28990.0         27330.07
3        25489.0         27200.98
4        30950.0         32250.05
...          ...              ...
2129     23700.0         39147.77
2130     18000.0         16679.95
2131     45995.0         46275.18
2132     30500.0         31359.00
2133      8400.0          9929.62

[2134 rows x 2 columns]
