Download Boston dataset
Split dataset: 70 / 30 - train / test

In [6]:
from sklearn.datasets import load_boston
from sklearn.model_selection import train_test_split

boston = load_boston()

x_train, x_test, y_train, y_test = train_test_split(boston.data, boston.target, train_size = 0.7)

Train linear regression and random forest

In [7]:
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestClassifier

lr = LinearRegression()

lr.fit(x_train, y_train)
y_lr_pred = lr.predict(x_test)

y_train_rounded = [round(y) for y in y_train]
# y_train_mapped = map(round, y_train)

rf = RandomForestClassifier(n_estimators = 30, random_state = 0)
rf.fit(x_train, y_train_rounded)
y_rf_pred = rf.predict(x_test)

Compare results with MAE metric

In [8]:
from sklearn.metrics import mean_absolute_error

print('MAE of LinearRegression ' + str(mean_absolute_error(y_test, y_lr_pred)))
print('MAE of RandomForestClassifier ' + str(mean_absolute_error(y_test, y_rf_pred)))

MAE of LinearRegression 3.190663288398581
MAE of RandomForestClassifier 2.6006578947368424


Show features importance in random forest

In [9]:
for inx, feature_name in enumerate(boston.feature_names):
    print('{0}:\t{1}'.format(rf.feature_importances_[inx], feature_name))

0.11430643977969819:	CRIM
0.020742248252247204:	ZN
0.04932038658492186:	INDUS
0.010703573842632246:	CHAS
0.07288458913895864:	NOX
0.1360804423228355:	RM
0.10657942629233735:	AGE
0.11070893361207226:	DIS
0.03384371053766679:	RAD
0.04919905061633206:	TAX
0.055097539792198946:	PTRATIO
0.10259578199332474:	B
0.1379378772347744:	LSTAT


RFE https://scikit-learn.org/stable/modules/generated/sklearn.feature_selection.RFE.html#sklearn.feature_selection.RFE

In [10]:
from sklearn.feature_selection import RFE
from numpy import absolute

rfe = RFE(lr, 1)
ftrs = rfe.fit(boston.data, boston.target)

for inx, feature_name in enumerate(boston.feature_names):
    if ftrs.support_[inx]:
        print('RFE asnwer:\t{0}'.format(feature_name))

print('Max abs coef:\t{0}'.format(boston.feature_names[absolute(lr.coef_).argmax()]))
print('Max coef:\t{0}'.format(boston.feature_names[lr.coef_.argmax()]))

RFE asnwer:	NOX
Max abs coef:	NOX
Max coef:	RM
