In [11]:
import json
import numpy
from sklearn.model_selection import train_test_split

data_name = 'p6s4-1275'

with open(f'data/{data_name.replace("-", "_")}.json') as f:
    data = json.load(f)
    depth = data['score_depth']
    x = numpy.array([d['v'] for d in data['data']])
    y = numpy.array([d['s'][depth] for d in data['data']])

test_size = 0.1
x_train, x_test, y_train, y_test= train_test_split(x, y, test_size=test_size)

In [2]:
from sklearn.ensemble import RandomForestRegressor

  from numpy.core.umath_tests import inner1d


In [14]:
import time
import pickle
from pathlib import Path

def fit_test(reg):
    t1 = time.time()
    reg.fit(x_train, y_train)
    t2 = time.time()
    print(f'{t2 - t1:.4}sec')
    test_score = reg.score(x_test, y_test)
    t3 = time.time()
    print(f'test_score={test_score:.3} [{t3 - t2:.3}sec]')

def run(n, depth, features):
    reg = RandomForestRegressor(n_jobs=3, n_estimators=n, max_depth=depth, max_features=features)
    fit_test(reg)
    path = Path('model') / f'{data_name}_RF-n{n}-d{depth}-f{features}.pickle'
    with path.open('wb') as f:
        pickle.dump(obj=reg, file=f)
    print(path.stat().st_size / 2**20)

In [15]:
run(n=50, depth=20, features='auto')

96.74sec
test_score=0.804 [0.718sec]
605.5172119140625


In [16]:
run(n=50, depth=20, features=10)

66.71sec
test_score=0.813 [0.737sec]
682.8226203918457


In [17]:
run(n=50, depth=20, features=5)

37.72sec
test_score=0.812 [0.824sec]
866.0687141418457


In [18]:
run(n=50, depth=20, features=15)

95.02sec
test_score=0.804 [0.62sec]
605.5560188293457


In [20]:
run(n=50, depth=20, features=1)

14.19sec
test_score=0.719 [0.842sec]
1027.7270393371582


In [21]:
run(n=50, depth=20, features=3)

26.28sec
test_score=0.789 [0.827sec]
944.1750373840332


In [25]:
run(n=50, depth=10, features=5)

24.84sec
test_score=0.651 [0.214sec]
5.993110656738281


In [24]:
run(n=50, depth=12, features=5)

27.74sec
test_score=0.698 [0.21sec]
21.57221221923828


In [26]:
run(n=50, depth=14, features=5)

28.92sec
test_score=0.74 [0.316sec]
71.16095733642578


In [27]:
run(n=50, depth=16, features=5)

33.12sec
test_score=0.774 [0.417sec]
198.58699226379395


In [28]:
run(n=50, depth=16, features=10)

57.67sec
test_score=0.788 [0.423sec]
176.93915557861328


In [29]:
run(n=50, depth=18, features=10)

63.05sec
test_score=0.804 [0.519sec]
379.0984992980957
