In [0]:
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.datasets import load_boston

In [0]:
boston = load_boston()
X = boston.data
y = boston.target

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)
print('X_train shape：',X_train.shape,' y_train shape：',y_train.shape,' X_test shape：',X_test.shape,' y_test shape：',y_test.shape)

In [0]:
# ランダムフォレスト回帰のモデルを作成
model = RandomForestRegressor(bootstrap=True, n_estimators=1000, criterion='mse', max_depth=None, random_state=0, n_jobs=-1)

model.fit(X_train, y_train)

In [0]:
y_train_pred = model.predict(X_train)
y_test_pred = model.predict(X_test)

print('MSE train: %.2f, test: %.2f' % (
        mean_squared_error(y_train, y_train_pred),
        mean_squared_error(y_test, y_test_pred)))

In [0]:
# 残差プロット
plt.figure(figsize=(8,4)) 

plt.scatter(y_train_pred,  y_train_pred - y_train,
            c='red', marker='o', edgecolor='white',
            label='Training data')
plt.scatter(y_test_pred,  y_test_pred - y_test,
            c='blue', marker='s', edgecolor='white',
            label='Test data')
plt.xlabel('Predicted values')
plt.ylabel('Residuals')
plt.legend(loc='upper left')
plt.hlines(y=0, xmin=-10, xmax=50, color='black', lw=2)
plt.xlim([-10, 50])
plt.tight_layout()

plt.show()

In [0]:
# 特徴量重要度
model.feature_importances_

In [0]:
# 特徴量重要性を計算
importances = model.feature_importances_

# 特徴量重要性を降順にソート
indices = np.argsort(importances)[::-1]

# 特徴量の名前をソートした順に並び替え
names = [boston.feature_names[i] for i in indices]

# プロットの作成
plt.figure(figsize=(8,4)) #プロットのサイズ指定
plt.title("Feature Importance")
plt.bar(range(X.shape[1]), importances[indices])
plt.xticks(range(X.shape[1]), names, rotation=90)

plt.show()