In [1]:
import pandas as pd
import numpy as np
from scipy.stats import uniform
from scipy.stats import randint
from sklearn.datasets import load_iris
from sklearn.datasets import load_boston
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import RandomizedSearchCV
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn import metrics
from sklearn.metrics import mean_squared_error
from sklearn import preprocessing
import warnings

In [2]:
boston = load_boston()
print(boston.keys())
data = pd.DataFrame(boston.data)
data.columns = boston.feature_names
data['PRICE'] = boston.target 
data.head()

dict_keys(['data', 'target', 'feature_names', 'DESCR', 'filename', 'data_module'])


Unnamed: 0,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT,PRICE
0,0.00632,18.0,2.31,0.0,0.538,6.575,65.2,4.09,1.0,296.0,15.3,396.9,4.98,24.0
1,0.02731,0.0,7.07,0.0,0.469,6.421,78.9,4.9671,2.0,242.0,17.8,396.9,9.14,21.6
2,0.02729,0.0,7.07,0.0,0.469,7.185,61.1,4.9671,2.0,242.0,17.8,392.83,4.03,34.7
3,0.03237,0.0,2.18,0.0,0.458,6.998,45.8,6.0622,3.0,222.0,18.7,394.63,2.94,33.4
4,0.06905,0.0,2.18,0.0,0.458,7.147,54.2,6.0622,3.0,222.0,18.7,396.9,5.33,36.2


In [3]:
X = data.drop(['PRICE'], axis = 1)
y = data['PRICE']

X_train, X_test, y_train, y_test = train_test_split(X,y, train_size=0.8, random_state = 4)

In [8]:
# Create a Random Forest Regressor
rf_reg = RandomForestRegressor()
hps = dict(n_estimators=[50,100,150,200,250],min_samples_split=[2,3,4],min_samples_leaf=[1,2,3],max_features=["auto","sqrt","log2"])
# reg = RandomizedSearchCV(rf_reg, hps, random_state=0, n_iter=10)
reg = GridSearchCV(rf_reg, hps)
# Train the model using the training sets 
reg.fit(X_train, y_train)
y_pred = reg.predict(X_train)

# Model Evaluation
print(reg.best_params_)
print('R^2:',metrics.r2_score(y_train, y_pred))
print('Adjusted R^2:',1 - (1-metrics.r2_score(y_train, y_pred))*(len(y_train)-1)/(len(y_train)-X_train.shape[1]-1))
print('MAE:',metrics.mean_absolute_error(y_train, y_pred))
print('MSE:',metrics.mean_squared_error(y_train, y_pred))
print('RMSE:',np.sqrt(metrics.mean_squared_error(y_train, y_pred)))

# Predicting Test data with the model
y_test_pred = reg.predict(X_test)
# Model Evaluation
acc_linreg = metrics.r2_score(y_test, y_test_pred)
print('R^2:', acc_linreg)
print('Adjusted R^2:',1 - (1-metrics.r2_score(y_test, y_test_pred))*(len(y_test)-1)/(len(y_test)-X_test.shape[1]-1))
print('MAE:',metrics.mean_absolute_error(y_test, y_test_pred))
print('MSE:',metrics.mean_squared_error(y_test, y_test_pred))
print('RMSE:',np.sqrt(metrics.mean_squared_error(y_test, y_test_pred)))

{'max_features': 'sqrt', 'min_samples_leaf': 1, 'min_samples_split': 3, 'n_estimators': 250}
R^2: 0.9779262429637576
Adjusted R^2: 0.9771904510625495
MAE: 0.9083394115840171
MSE: 1.8102310480447592
RMSE: 1.3454482702968402
R^2: 0.8677232681568838
Adjusted R^2: 0.8481823873164235
MAE: 2.125682138188607
MSE: 12.287083018713098
RMSE: 3.5052935709742057


In [13]:
from sklearn.model_selection import SequenceSearchCV
# Create a Random Forest Regressor
rf_reg = RandomForestRegressor()
hps = dict(n_estimators=[50,100,150,200,250],min_samples_split=[2,3,4],min_samples_leaf=[1,2,3],max_features=["auto","sqrt","log2"])
reg = RandomizedSearchCV(rf_reg, hps, random_state=0, n_iter=50)
# Train the model using the training sets 
reg.fit(X_train, y_train)
y_pred = reg.predict(X_train)

# Model Evaluation
print(reg.best_params_)
print('R^2:',metrics.r2_score(y_train, y_pred))
print('Adjusted R^2:',1 - (1-metrics.r2_score(y_train, y_pred))*(len(y_train)-1)/(len(y_train)-X_train.shape[1]-1))
print('MAE:',metrics.mean_absolute_error(y_train, y_pred))
print('MSE:',metrics.mean_squared_error(y_train, y_pred))
print('RMSE:',np.sqrt(metrics.mean_squared_error(y_train, y_pred)))

# Predicting Test data with the model
y_test_pred = reg.predict(X_test)
# Model Evaluation
acc_linreg = metrics.r2_score(y_test, y_test_pred)
print('R^2:', acc_linreg)
print('Adjusted R^2:',1 - (1-metrics.r2_score(y_test, y_test_pred))*(len(y_test)-1)/(len(y_test)-X_test.shape[1]-1))
print('MAE:',metrics.mean_absolute_error(y_test, y_test_pred))
print('MSE:',metrics.mean_squared_error(y_test, y_test_pred))
print('RMSE:',np.sqrt(metrics.mean_squared_error(y_test, y_test_pred)))

{'n_estimators': 50, 'min_samples_split': 3, 'min_samples_leaf': 1, 'max_features': 'sqrt'}
R^2: 0.9748993056300446
Adjusted R^2: 0.9740626158177127
MAE: 0.9409747053276751
MSE: 2.058464999926021
RMSE: 1.434735167174075
R^2: 0.838993102752339
Adjusted R^2: 0.8152079929316618
MAE: 2.230286367880485
MSE: 14.955805798209056
RMSE: 3.867273690626131


In [18]:
from sklearn.model_selection import SequenceSearchCV
# Create a Random Forest Regressor
rf_reg = RandomForestRegressor()
hps = dict(n_estimators=[50,100,150,200,250],min_samples_split=[2,3,4],min_samples_leaf=[1,2,3],max_features=["auto","sqrt","log2"])
reg = SequenceSearchCV(rf_reg, hps, random_state=0, n_iter=50)
# Train the model using the training sets 
reg.fit(X_train, y_train)
y_pred = reg.predict(X_train)

# Model Evaluation
print(reg.best_params_)
print('R^2:',metrics.r2_score(y_train, y_pred))
print('Adjusted R^2:',1 - (1-metrics.r2_score(y_train, y_pred))*(len(y_train)-1)/(len(y_train)-X_train.shape[1]-1))
print('MAE:',metrics.mean_absolute_error(y_train, y_pred))
print('MSE:',metrics.mean_squared_error(y_train, y_pred))
print('RMSE:',np.sqrt(metrics.mean_squared_error(y_train, y_pred)))

# Predicting Test data with the model
y_test_pred = reg.predict(X_test)
# Model Evaluation
acc_linreg = metrics.r2_score(y_test, y_test_pred)
print('R^2:', acc_linreg)
print('Adjusted R^2:',1 - (1-metrics.r2_score(y_test, y_test_pred))*(len(y_test)-1)/(len(y_test)-X_test.shape[1]-1))
print('MAE:',metrics.mean_absolute_error(y_test, y_test_pred))
print('MSE:',metrics.mean_squared_error(y_test, y_test_pred))
print('RMSE:',np.sqrt(metrics.mean_squared_error(y_test, y_test_pred)))

{'n_estimators': 50, 'min_samples_split': 2, 'min_samples_leaf': 1, 'max_features': 'sqrt'}
R^2: 0.9768609578521648
Adjusted R^2: 0.9760896564472369
MAE: 0.8750445544554459
MSE: 1.8975932574257421
RMSE: 1.3775315812807132
R^2: 0.8799396580298686
Adjusted R^2: 0.8622034711479174
MAE: 2.0496666666666665
MSE: 11.152312039215685
RMSE: 3.3395077540283817
