In [1]:
from sklearn.ensemble import RandomForestRegressor
from RandomForest import RandomForest
from sklearn.datasets import load_boston
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score

import numpy as np

In [2]:
data = load_boston()

In [3]:
print(data.DESCR)

.. _boston_dataset:

Boston house prices dataset
---------------------------

**Data Set Characteristics:**  

    :Number of Instances: 506 

    :Number of Attributes: 13 numeric/categorical predictive. Median Value (attribute 14) is usually the target.

    :Attribute Information (in order):
        - CRIM     per capita crime rate by town
        - ZN       proportion of residential land zoned for lots over 25,000 sq.ft.
        - INDUS    proportion of non-retail business acres per town
        - CHAS     Charles River dummy variable (= 1 if tract bounds river; 0 otherwise)
        - NOX      nitric oxides concentration (parts per 10 million)
        - RM       average number of rooms per dwelling
        - AGE      proportion of owner-occupied units built prior to 1940
        - DIS      weighted distances to five Boston employment centres
        - RAD      index of accessibility to radial highways
        - TAX      full-value property-tax rate per $10,000
        - PTRATIO  pu

In [4]:
X_train, X_test, y_train, y_test = train_test_split(data.data, data.target[:, np.newaxis], train_size=0.2)

## Custom Random Forest

In [5]:
custom_rf_parameters = {
    'n_estimators' : 100,
    'criterion_name' : 'variance',
    'max_depth' : 10,
    'min_samples_split' : 2,
    'bootstrap' : True,
    'random_state' : 42
}

In [6]:
custom_rf = RandomForest(**custom_rf_parameters)

In [8]:
custom_rf.fit(X_train, y_train)

RandomForest(bootstrap=True, criterion_name='variance', max_depth=10,
             min_samples_split=2, n_estimators=100, random_state=42)

In [9]:
r2_score(y_test, custom_rf.predict(X_test))

0.6811962988151601

## Sklearn Random Forest

In [11]:
rf_parameters = {
    'n_estimators' : 100,
    'criterion' : 'mse',
    'max_depth' : 10,
    'min_samples_split' : 2,
    'bootstrap' : True,
    'random_state' : 42
}

In [12]:
rf = RandomForestRegressor(**rf_parameters)

In [18]:
rf.fit(X_train, y_train[:, 0])

RandomForestRegressor(bootstrap=True, ccp_alpha=0.0, criterion='mse',
                      max_depth=10, max_features='auto', max_leaf_nodes=None,
                      max_samples=None, min_impurity_decrease=0.0,
                      min_impurity_split=None, min_samples_leaf=1,
                      min_samples_split=2, min_weight_fraction_leaf=0.0,
                      n_estimators=100, n_jobs=None, oob_score=False,
                      random_state=42, verbose=0, warm_start=False)

In [19]:
r2_score(y_test, rf.predict(X_test))

0.6749847058338894

Итак, кастомная модель на бостоне дает похожий скор с моделью с sklearn, что не может не радовать