In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split,GridSearchCV,RandomizedSearchCV
from sklearn.metrics import mean_absolute_error,mean_squared_error,r2_score
from sklearn.ensemble import RandomForestRegressor
from sklearn.datasets import load_boston
import warnings
warnings.filterwarnings("ignore")

In [2]:
bostan=load_boston()
x=pd.DataFrame(bostan.data,columns=bostan.feature_names)
y=pd.Series(bostan.target)

In [3]:
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.22,random_state=45)

In [4]:
# Hyperparameter tuning
rf_reg=RandomForestRegressor()
parameter={"n_estimators":np.arange(10,100),
          "criterion":["mse","mae"],
          "min_samples_split":np.arange(2,20),
          "min_samples_leaf":np.arange(2,20),
          "max_depth":np.arange(2,10),
          "random_state":[11],}

rscv_rf_reg=RandomizedSearchCV(rf_reg,parameter,cv=5)  # RandomizedSearchCV
rscv_rf_reg.fit(x_train,y_train)
rscv_rf_reg.best_estimator_

RandomForestRegressor(criterion='mae', max_depth=7, min_samples_leaf=3,
                      min_samples_split=9, n_estimators=94, random_state=11)

In [5]:
rf_reg=rscv_rf_reg.best_estimator_
rf_reg.fit(x_train,y_train)

RandomForestRegressor(criterion='mae', max_depth=7, min_samples_leaf=3,
                      min_samples_split=9, n_estimators=94, random_state=11)

In [6]:
# Testing accuracy
y_pred=rf_reg.predict(x_test)
mse = mean_squared_error(y_test,y_pred)
print("MSE :",mse)

print("*"*80)
rmse = np.sqrt(mse)
print("RMSE :",rmse)

print("*"*80)
mae = mean_absolute_error(y_test,y_pred)
print("MAE:",mae)
print("*"*80)
r2_value = r2_score(y_test,y_pred)
print("R-Squared value:",r2_value)

MSE : 13.602649626325732
********************************************************************************
RMSE : 3.688177005828995
********************************************************************************
MAE: 2.5443199088145882
********************************************************************************
R-Squared value: 0.8675530144478381


In [7]:
# Traning accuracy
y_pred_train = rf_reg.predict(x_train)

mse = mean_squared_error(y_train,y_pred_train)
print("MSE :",mse)

print("*"*80)
rmse = np.sqrt(mse)
print("RMSE :",rmse)

print("*"*80)
mae = mean_absolute_error(y_train,y_pred_train)
print("MAE:",mae)
print("*"*80)
r2_value = r2_score(y_train,y_pred_train)
print("R-Squared value:",r2_value)

MSE : 6.710925487535987
********************************************************************************
RMSE : 2.590545403488614
********************************************************************************
MAE: 1.6247205421751796
********************************************************************************
R-Squared value: 0.9152545826542581


In [8]:
# import pickle
# with open("Random_Forest.pkl",'wb') as f:
#     pickle.dump(rf_reg,f)

In [28]:
y_train

437     8.7
208    24.4
313    21.6
190    37.0
149    15.4
       ... 
32     13.2
380    10.4
131    19.6
414     7.0
459    20.0
Length: 394, dtype: float64

In [26]:
CRIM = 1.38799
ZN=0.0
INDUS=8.14 
CHAS=0.0
NOX=0.538 
RM=5.950
AGE=82.0
DIS=3.9900
RAD=4.0
TAX=307.0
PTRATIO=21.0
B=232.60
LSTAT=27.71
#32 	1.38799 	0.0 	8.14 	0.0 	0.538 	5.950 	82.0 	3.9900 	4.0 	307.0 	21.0 	232.60 	27.71

In [21]:
label_encoded_columns = {"columns":list(x.columns)}
label_encoded_columns

{'columns': ['CRIM',
  'ZN',
  'INDUS',
  'CHAS',
  'NOX',
  'RM',
  'AGE',
  'DIS',
  'RAD',
  'TAX',
  'PTRATIO',
  'B',
  'LSTAT']}

In [16]:
import json
with open("Label_Endecoded_columns.json","w") as f:
    json.dump(label_encoded_columns,f)

In [27]:
Column_names=x.columns
array = np.zeros(len(x.columns),dtype=int)

array[0]=CRIM
array[1]=ZN
array[2]=INDUS
array[3]=CHAS
array[4]=NOX
array[5]=RM
array[6]=AGE
array[7]=DIS
array[8]=RAD
array[9]=TAX
array[10]=PTRATIO
array[11]=B
array[12]=LSTAT

predicted_price = np.around(rf_reg.predict([array])[0],2)
print("predicted_price of House in Bostan is :",predicted_price)

predicted_price of House in Bostan is : 14.65
