## Import Library

In [1]:
import numpy as np
import pandas as pd

from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.linear_model import Lasso
from sklearn.feature_selection import SelectFromModel

import matplotlib.pyplot as plt
import seaborn as sns

## Load data

In [2]:
house = pd.read_csv(r'C:\\Users\\Benai\\Documents\\machin-learning\\regression-model-evaluation\house.csv')

In [3]:
# Check first five row of the dataset
house.head()

Unnamed: 0,Area,Garage,FirePlace,Baths,White Marble,Black Marble,Indian Marble,Floors,City,Solar,Electric,Fiber,Glass Doors,Swiming Pool,Garden,Prices
0,164,2,0,2,0,1,0,0,3,1,1,1,1,0,0,43800
1,84,2,0,4,0,0,1,1,2,0,0,0,1,1,1,37550
2,190,2,4,4,1,0,0,0,2,0,0,1,0,0,0,49500
3,75,2,4,4,0,0,1,1,1,1,1,1,1,1,1,50075
4,148,1,4,2,1,0,0,1,2,1,0,0,1,1,1,52400


In [4]:
# no of rows and columns 
house.shape

(500000, 16)

In [5]:
house.describe()

Unnamed: 0,Area,Garage,FirePlace,Baths,White Marble,Black Marble,Indian Marble,Floors,City,Solar,Electric,Fiber,Glass Doors,Swiming Pool,Garden,Prices
count,500000.0,500000.0,500000.0,500000.0,500000.0,500000.0,500000.0,500000.0,500000.0,500000.0,500000.0,500000.0,500000.0,500000.0,500000.0,500000.0
mean,124.929554,2.00129,2.003398,2.998074,0.332992,0.33269,0.334318,0.499386,2.00094,0.498694,0.50065,0.500468,0.49987,0.500436,0.501646,42050.13935
std,71.795363,0.817005,1.414021,1.414227,0.471284,0.471177,0.471752,0.5,0.816209,0.499999,0.5,0.5,0.5,0.5,0.499998,12110.237201
min,1.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,7725.0
25%,63.0,1.0,1.0,2.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,33500.0
50%,125.0,2.0,2.0,3.0,0.0,0.0,0.0,0.0,2.0,0.0,1.0,1.0,0.0,1.0,1.0,41850.0
75%,187.0,3.0,3.0,4.0,1.0,1.0,1.0,1.0,3.0,1.0,1.0,1.0,1.0,1.0,1.0,50750.0
max,249.0,3.0,4.0,5.0,1.0,1.0,1.0,1.0,3.0,1.0,1.0,1.0,1.0,1.0,1.0,77975.0


In [6]:
# chreck for missing value 
house.isna().sum()

Area             0
Garage           0
FirePlace        0
Baths            0
White Marble     0
Black Marble     0
Indian Marble    0
Floors           0
City             0
Solar            0
Electric         0
Fiber            0
Glass Doors      0
Swiming Pool     0
Garden           0
Prices           0
dtype: int64

In [7]:
## separate  predictors and target variable 
x = house.iloc[:,:-1]
y = house.iloc[:,-1]

## Train test split

In [8]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3)

## Fit data to the model 

In [9]:
# instance object
rfr = RandomForestRegressor(n_jobs=-1, n_estimators=150)

rfr.fit(x_train, y_train)

RandomForestRegressor(n_estimators=150, n_jobs=-1)

In [10]:
# predict using test data
y_predict = rfr.predict(x_test)

# Evaluate model 
### 1. Mean squared error MSR

In [11]:
from sklearn.metrics import mean_squared_error

mse = mean_squared_error(y_test, y_predict)

print('Mean squared error --->', mse)

Mean squared error ---> 53159.78180462962


### 2. Root Mean squared error RMSE

In [12]:
print('Root mean squared error --->', np.sqrt(mse))

Root mean squared error ---> 230.56405141441635


### 3. Mean absolute error MAE

In [13]:
from sklearn.metrics import mean_absolute_error

mae = mean_absolute_error(y_test, y_predict)

print('Mean squared error --->', mae)

Mean squared error ---> 176.16001444444447


### 4. Mean Squared Log Error MSLE 

In [14]:
from sklearn.metrics import mean_squared_log_error

msle = mean_squared_log_error(y_test, y_predict)

print('Mean squared log error --->', msle)

Mean squared log error ---> 4.16193486684169e-05


### 5. Root Mean Squared Log Error MSLE 

In [15]:
print('Root mean log squared error --->', np.sqrt(msle))

Root mean log squared error ---> 0.006451305966113908


### 6. R-squared

In [16]:
from sklearn.metrics import r2_score

r_square = r2_score(y_test, y_predict)

print('R-squared --->', r_square)

R-squared ---> 0.9996372330417861


### 7. Adjusted R Squared 

In [None]:
#### total no of sample size (no of rows)
n = x_train.shape[0]

# total no of features/independent variables 
k = x_train.shape[1]

r_square_adj = 1 - (1-r_square) * (n-1)/(n-k-1)

print('Adjusted R-squared --->', r_square_adj)