In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split,GridSearchCV,RandomizedSearchCV
from sklearn.metrics import mean_absolute_error,mean_squared_error,r2_score
from sklearn.ensemble import RandomForestRegressor
from sklearn.datasets import load_boston
import warnings
warnings.filterwarnings("ignore")

In [2]:
bostan=load_boston()
x=pd.DataFrame(bostan.data,columns=bostan.feature_names)
y=pd.Series(bostan.target)

In [3]:
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.22,random_state=45)

In [4]:
# Hyperparameter tuning
rf_reg=RandomForestRegressor()
parameter={"n_estimators":np.arange(10,100),
          "criterion":["mse","mae"],
          "min_samples_split":np.arange(2,20),
          "min_samples_leaf":np.arange(2,20),
          "max_depth":np.arange(2,10),
          "random_state":[11],}

rscv_rf_reg=RandomizedSearchCV(rf_reg,parameter,cv=5)  # RandomizedSearchCV
rscv_rf_reg.fit(x_train,y_train)
rscv_rf_reg.best_estimator_

RandomForestRegressor(criterion='mse', max_depth=6, min_samples_leaf=4,
                      min_samples_split=8, n_estimators=24, random_state=11)

In [5]:
rf_reg=rscv_rf_reg.best_estimator_
rf_reg.fit(x_train,y_train)

RandomForestRegressor(criterion='mse', max_depth=6, min_samples_leaf=4,
                      min_samples_split=8, n_estimators=24, random_state=11)

In [6]:
# Testing accuracy
y_pred=rf_reg.predict(x_test)
mse = mean_squared_error(y_test,y_pred)
print("MSE :",mse)

print("*"*80)
rmse = np.sqrt(mse)
print("RMSE :",rmse)

print("*"*80)
mae = mean_absolute_error(y_test,y_pred)
print("MAE:",mae)
print("*"*80)
r2_value = r2_score(y_test,y_pred)
print("R-Squared value:",r2_value)

MSE : 11.447617085596175
********************************************************************************
RMSE : 3.3834327369693895
********************************************************************************
MAE: 2.4816774474335324
********************************************************************************
R-Squared value: 0.8885362472463987


In [7]:
# Traning accuracy
y_pred_train = rf_reg.predict(x_train)

mse = mean_squared_error(y_train,y_pred_train)
print("MSE :",mse)

print("*"*80)
rmse = np.sqrt(mse)
print("RMSE :",rmse)

print("*"*80)
mae = mean_absolute_error(y_train,y_pred_train)
print("MAE:",mae)
print("*"*80)
r2_value = r2_score(y_train,y_pred_train)
print("R-Squared value:",r2_value)

MSE : 7.368371860724337
********************************************************************************
RMSE : 2.7144745091314335
********************************************************************************
MAE: 1.6863514032494566
********************************************************************************
R-Squared value: 0.9069523645202958


In [9]:
import pickle
with open("Random_Forest.pkl",'wb') as f:
    pickle.dump(rf_reg,f)