In [45]:
import json
import numpy
from sklearn.model_selection import train_test_split

with open('data/p6s4_37.json') as f:
    data = json.load(f)
    depth = data['score_depth']
    x = numpy.array([d['v'][1:] for d in data['data']])
    yy = [numpy.array([d['s'][i] for d in data['data']]) for i in range(depth + 1)]

test_size = 0.1
train_data = []
test_data = []
for y in yy:
    x_train, x_test, y_train, y_test= train_test_split(x, y, test_size=test_size)
    train_data.append([x_train, y_train])
    test_data.append([x_test, y_test])

In [29]:
import time
def fit(factory):
    for i in range(depth + 1):
        reg = factory()
        print(f'depth={i}')
        t1 = time.time()
        reg.fit(*train_data[i])
        t2 = time.time()
        print(f'  train {t2-t1:.3}sec')
        train_score = reg.score(*train_data[i])
        t3 = time.time()
        print(f'  train_score={train_score:.3} [{t3-t2:.3}sec]')
        test_score = reg.score(*test_data[i])
        t4 = time.time()
        print(f'  test_score={test_score:.3} [{t4-t3:.3}sec]')

In [37]:
from sklearn import linear_model
from sklearn import svm
from sklearn import neighbors
from sklearn import tree
from sklearn import naive_bayes
from sklearn import neural_network
from sklearn import ensemble

  from numpy.core.umath_tests import inner1d


In [46]:
# http://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LinearRegression.html#sklearn.linear_model.LinearRegression
fit(lambda: linear_model.LinearRegression())

depth=0
  train 0.0129sec
  train_score=1.0 [0.00154sec]
  test_score=1.0 [0.000815sec]
depth=1
  train 0.0131sec
  train_score=0.461 [0.0019sec]
  test_score=0.464 [0.00113sec]
depth=2
  train 0.00969sec
  train_score=0.426 [0.00172sec]
  test_score=0.445 [0.00134sec]
depth=3
  train 0.0139sec
  train_score=0.385 [0.00304sec]
  test_score=0.394 [0.000742sec]
depth=4
  train 0.00834sec
  train_score=0.396 [0.00123sec]
  test_score=0.39 [0.000767sec]


In [47]:
# http://scikit-learn.org/stable/modules/generated/sklearn.svm.SVR.html#sklearn.svm.SVR
fit(lambda: svm.SVR())

depth=0
  train 9.91sec
  train_score=0.779 [2.37sec]
  test_score=0.747 [0.264sec]
depth=1


KeyboardInterrupt: 

In [48]:
# http://scikit-learn.org/stable/modules/generated/sklearn.neighbors.KNeighborsRegressor.html#sklearn.neighbors.KNeighborsRegressor
fit(lambda: neighbors.KNeighborsRegressor(n_neighbors=5))

depth=0
  train 0.036sec
  train_score=0.986 [4.7sec]
  test_score=0.978 [0.606sec]
depth=1
  train 0.0338sec
  train_score=0.655 [4.75sec]
  test_score=0.492 [0.573sec]
depth=2
  train 0.0392sec
  train_score=0.732 [4.71sec]
  test_score=0.619 [0.585sec]
depth=3
  train 0.0486sec
  train_score=0.723 [4.69sec]
  test_score=0.558 [0.569sec]
depth=4
  train 0.0384sec
  train_score=0.759 [4.74sec]
  test_score=0.607 [0.571sec]


In [49]:
fit(lambda: tree.DecisionTreeRegressor())

depth=0
  train 0.095sec
  train_score=1.0 [0.00356sec]
  test_score=1.0 [0.001sec]
depth=1
  train 0.188sec
  train_score=0.995 [0.00835sec]
  test_score=0.15 [0.00177sec]
depth=2
  train 0.177sec
  train_score=0.997 [0.00843sec]
  test_score=0.332 [0.00149sec]
depth=3
  train 0.183sec
  train_score=0.998 [0.00831sec]
  test_score=0.256 [0.00146sec]
depth=4
  train 0.173sec
  train_score=0.998 [0.00767sec]
  test_score=0.357 [0.00142sec]


In [51]:
# http://scikit-learn.org/stable/modules/generated/sklearn.ensemble.RandomForestRegressor.html
fit(lambda: ensemble.RandomForestRegressor(n_estimators=10))

depth=0
  train 0.544sec
  train_score=1.0 [0.0246sec]
  test_score=1.0 [0.00387sec]
depth=1
  train 1.16sec
  train_score=0.903 [0.0637sec]
  test_score=0.482 [0.00866sec]
depth=2
  train 1.15sec
  train_score=0.931 [0.0565sec]
  test_score=0.648 [0.00986sec]
depth=3
  train 1.17sec
  train_score=0.926 [0.0606sec]
  test_score=0.546 [0.00892sec]
depth=4
  train 1.15sec
  train_score=0.936 [0.0571sec]
  test_score=0.629 [0.009sec]


In [50]:
fit(lambda: ensemble.RandomForestRegressor(n_estimators=100))

depth=0
  train 5.39sec
  train_score=1.0 [0.243sec]
  test_score=1.0 [0.0324sec]
depth=1
  train 11.6sec
  train_score=0.927 [0.6sec]
  test_score=0.516 [0.0861sec]
depth=2
  train 11.5sec
  train_score=0.95 [0.576sec]
  test_score=0.687 [0.0881sec]
depth=3
  train 11.7sec
  train_score=0.947 [0.608sec]
  test_score=0.587 [0.0856sec]
depth=4
  train 11.4sec
  train_score=0.955 [0.558sec]
  test_score=0.656 [0.0852sec]


In [52]:
fit(lambda: neural_network.MLPRegressor(hidden_layer_sizes=[40, 40, 40]))

depth=0
  train 1.65sec
  train_score=0.985 [0.0436sec]
  test_score=0.984 [0.00495sec]
depth=1
  train 2.51sec
  train_score=0.485 [0.0371sec]
  test_score=0.476 [0.00379sec]
depth=2
  train 7.57sec
  train_score=0.6 [0.0363sec]
  test_score=0.603 [0.00332sec]
depth=3
  train 4.75sec
  train_score=0.557 [0.0397sec]
  test_score=0.52 [0.00359sec]
depth=4
  train 4.98sec
  train_score=0.657 [0.0394sec]
  test_score=0.594 [0.0039sec]
