# Borutaを動かしてみる

## Boston不動産価格サンプルデータセットを準備

In [1]:
from sklearn.datasets import load_boston
from sklearn.ensemble import RandomForestRegressor

boston = load_boston()
X = boston.data
y = boston.target

## パラメータ設定

In [2]:
n_estimators = 100
random_state = 42

## Boruta

In [3]:
from boruta import BorutaPy
selector = BorutaPy(RandomForestRegressor(n_estimators=n_estimators, random_state=random_state), 
                    n_estimators=n_estimators, 
                    two_step=False, 
                    verbose=2, 
                    random_state=random_state) 
selector.fit(X, y)
mask = selector.support_
print(boston.feature_names)
print(mask)

# 選択した特徴量の列のみ取得
X_selected = selector.transform(X)
print("X.shape={}, X_selected.shape={}".format(X.shape, X_selected.shape))

Iteration: 	1 / 100
Confirmed: 	0
Tentative: 	13
Rejected: 	0
Iteration: 	2 / 100
Confirmed: 	0
Tentative: 	13
Rejected: 	0
Iteration: 	3 / 100
Confirmed: 	0
Tentative: 	13
Rejected: 	0
Iteration: 	4 / 100
Confirmed: 	0
Tentative: 	13
Rejected: 	0
Iteration: 	5 / 100
Confirmed: 	0
Tentative: 	13
Rejected: 	0
Iteration: 	6 / 100
Confirmed: 	0
Tentative: 	13
Rejected: 	0
Iteration: 	7 / 100
Confirmed: 	0
Tentative: 	13
Rejected: 	0
Iteration: 	8 / 100
Confirmed: 	0
Tentative: 	13
Rejected: 	0
Iteration: 	9 / 100
Confirmed: 	9
Tentative: 	1
Rejected: 	3
Iteration: 	10 / 100
Confirmed: 	9
Tentative: 	1
Rejected: 	3
Iteration: 	11 / 100
Confirmed: 	9
Tentative: 	1
Rejected: 	3
Iteration: 	12 / 100
Confirmed: 	9
Tentative: 	0
Rejected: 	4


BorutaPy finished running.

Iteration: 	13 / 100
Confirmed: 	9
Tentative: 	0
Rejected: 	4
['CRIM' 'ZN' 'INDUS' 'CHAS' 'NOX' 'RM' 'AGE' 'DIS' 'RAD' 'TAX' 'PTRATIO'
 'B' 'LSTAT']
[ True False False False  True  True  True  True False  True  True  True
  Tru

## SelectFromModelクラス（参考）
sklearn.feature_selectionモジュールにあるやつ

In [4]:
from sklearn.feature_selection import SelectFromModel
# estimator として RandomForestRegressor を使用。重要度が median 以上のものを選択
selector = SelectFromModel(RandomForestRegressor(n_estimators=n_estimators, random_state=random_state),
                           threshold="median")    
selector.fit(X, y)
mask = selector.get_support()
print(boston.feature_names)
print(mask)

# 選択した特徴量の列のみ取得
X_selected = selector.transform(X)
print("X.shape={}, X_selected.shape={}".format(X.shape, X_selected.shape))

['CRIM' 'ZN' 'INDUS' 'CHAS' 'NOX' 'RM' 'AGE' 'DIS' 'RAD' 'TAX' 'PTRATIO'
 'B' 'LSTAT']
[ True False False False  True  True  True  True False False  True False
  True]
X.shape=(506, 13), X_selected.shape=(506, 7)


## 参考

https://aotamasaki.hatenablog.com/entry/2019/01/05/195813

https://qiita.com/rockhopper/items/a68ceb3248f2b3a41c89