In [23]:
import json
import numpy
from sklearn.model_selection import train_test_split

with open('data/p6s4_1024.json') as f:
    data = json.load(f)
    depth = data['score_depth']
    x = numpy.array([d['v'] for d in data['data'] if d['v'][0] == 0])
    yy = [numpy.array([d['s'][i] for d in data['data'] if d['v'][0] == 0]) for i in range(depth + 1)]

test_size = 0.1
train_data = []
test_data = []
for y in yy:
    x_train, x_test, y_train, y_test= train_test_split(x, y, test_size=test_size)
    train_data.append([x_train, y_train])
    test_data.append([x_test, y_test])

In [2]:
import time
def fit(factory):
    for i in range(depth + 1):
        reg = factory()
        print(f'depth={i}')
        t1 = time.time()
        reg.fit(*train_data[i])
        t2 = time.time()
        print(f'  train {t2-t1:.3}sec')
        train_score = reg.score(*train_data[i])
        t3 = time.time()
        print(f'  train_score={train_score:.3} [{t3-t2:.3}sec]')
        test_score = reg.score(*test_data[i])
        t4 = time.time()
        print(f'  test_score={test_score:.3} [{t4-t3:.3}sec]')

In [3]:
from sklearn import linear_model
from sklearn import svm
from sklearn import neighbors
from sklearn import tree
from sklearn import naive_bayes
from sklearn import neural_network
from sklearn import ensemble

  from numpy.core.umath_tests import inner1d


In [24]:
# http://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LinearRegression.html#sklearn.linear_model.LinearRegression
fit(lambda: linear_model.LinearRegression())

depth=0
  train 0.218sec
  train_score=1.0 [0.0355sec]
  test_score=1.0 [0.00396sec]
depth=1
  train 0.206sec
  train_score=0.56 [0.0442sec]
  test_score=0.563 [0.00508sec]
depth=2
  train 0.214sec
  train_score=0.471 [0.0506sec]
  test_score=0.465 [0.00937sec]
depth=3
  train 0.212sec
  train_score=0.438 [0.0483sec]
  test_score=0.442 [0.00777sec]
depth=4
  train 0.177sec
  train_score=0.423 [0.0424sec]
  test_score=0.421 [0.0035sec]


In [None]:
# http://scikit-learn.org/stable/modules/generated/sklearn.svm.SVR.html#sklearn.svm.SVR
fit(lambda: svm.SVR())

In [None]:
# http://scikit-learn.org/stable/modules/generated/sklearn.neighbors.KNeighborsRegressor.html#sklearn.neighbors.KNeighborsRegressor
fit(lambda: neighbors.KNeighborsRegressor(n_neighbors=5))

In [25]:
fit(lambda: tree.DecisionTreeRegressor())

depth=0
  train 1.41sec
  train_score=1.0 [0.0492sec]
  test_score=1.0 [0.00494sec]
depth=1
  train 2.89sec
  train_score=1.0 [0.153sec]
  test_score=0.753 [0.0169sec]
depth=2
  train 3.07sec
  train_score=1.0 [0.174sec]
  test_score=0.657 [0.0187sec]
depth=3
  train 3.1sec
  train_score=1.0 [0.172sec]
  test_score=0.666 [0.0174sec]
depth=4
  train 3.11sec
  train_score=1.0 [0.171sec]
  test_score=0.623 [0.0173sec]


In [26]:
# http://scikit-learn.org/stable/modules/generated/sklearn.ensemble.RandomForestRegressor.html
fit(lambda: ensemble.RandomForestRegressor(n_estimators=5))

depth=0
  train 4.86sec
  train_score=1.0 [0.178sec]
  test_score=1.0 [0.0207sec]
depth=1
  train 10.0sec
  train_score=0.965 [0.712sec]
  test_score=0.839 [0.0746sec]
depth=2
  train 10.9sec
  train_score=0.953 [0.729sec]
  test_score=0.784 [0.0841sec]
depth=3
  train 10.7sec
  train_score=0.953 [0.767sec]
  test_score=0.784 [0.0941sec]
depth=4
  train 10.6sec
  train_score=0.947 [0.739sec]
  test_score=0.751 [0.082sec]


In [27]:
fit(lambda: ensemble.RandomForestRegressor(n_estimators=20))

depth=0
  train 19.8sec
  train_score=1.0 [0.69sec]
  test_score=1.0 [0.0742sec]
depth=1
  train 39.6sec
  train_score=0.978 [2.56sec]
  test_score=0.868 [0.296sec]
depth=2
  train 43.1sec
  train_score=0.971 [3.01sec]
  test_score=0.817 [0.327sec]
depth=3
  train 41.9sec
  train_score=0.971 [2.87sec]
  test_score=0.82 [0.341sec]
depth=4
  train 42.7sec
  train_score=0.967 [3.1sec]
  test_score=0.79 [0.37sec]


In [28]:
fit(lambda: ensemble.RandomForestRegressor(n_estimators=50))

depth=0
  train 49.9sec
  train_score=1.0 [1.7sec]
  test_score=1.0 [0.189sec]
depth=1
  train 1.01e+02sec
  train_score=0.981 [6.76sec]
  test_score=0.873 [0.845sec]
depth=2
  train 1.06e+02sec
  train_score=0.975 [6.82sec]
  test_score=0.824 [0.787sec]
depth=3
  train 1.07e+02sec
  train_score=0.975 [7.62sec]
  test_score=0.828 [0.857sec]
depth=4
  train 1.08e+02sec
  train_score=0.971 [7.33sec]
  test_score=0.798 [0.822sec]


In [29]:
fit(lambda: neural_network.MLPRegressor(hidden_layer_sizes=[20, 30, 20]))

depth=0
  train 6.67sec
  train_score=0.998 [0.39sec]
  test_score=0.998 [0.0479sec]
depth=1
  train 16.6sec
  train_score=0.836 [0.485sec]
  test_score=0.839 [0.0484sec]
depth=2
  train 24.7sec
  train_score=0.776 [0.637sec]
  test_score=0.772 [0.0639sec]
depth=3
  train 27.8sec
  train_score=0.739 [0.739sec]
  test_score=0.736 [0.0826sec]
depth=4
  train 22.5sec
  train_score=0.723 [0.391sec]
  test_score=0.716 [0.0393sec]


In [18]:
for n in [5, 10, 15, 20, 30]:
    reg = neural_network.MLPRegressor(hidden_layer_sizes=[n] * 3)
    print(n)
    t1 = time.time()
    reg.fit(*train_data[-1])
    t2 = time.time()
    print(t2 - t1)
    print(reg.score(*test_data[-1]))

5
24.79939889907837
0.6347958925613256
10
42.49556088447571
0.6808438436012151
15
49.57706117630005
0.6992966685167856
20
39.343989849090576
0.6957850571446336
30
87.10426712036133
0.7397505783543122


In [19]:
for n in [5, 10, 15, 20, 30]:
    reg = neural_network.MLPRegressor(hidden_layer_sizes=[n] * 3, activation='logistic')
    print(n)
    t1 = time.time()
    reg.fit(*train_data[-1])
    t2 = time.time()
    print(t2 - t1)
    print(reg.score(*test_data[-1]))

5
34.018481969833374
0.6382998028649867
10
33.28068518638611
0.6701704724546107
15
27.092972993850708
0.6816108163949606
20
27.652522087097168
0.6853372160212124
30
35.61436700820923
0.6923152176947085


In [21]:
reg = neural_network.MLPRegressor(hidden_layer_sizes=[15, 20, 30, 20, 15])
t1 = time.time()
reg.fit(*train_data[-1])
t2 = time.time()
print(t2 - t1)
print(reg.score(*test_data[-1]))

150.2144820690155
0.7232178171317489


In [22]:
for n in [1, 2, 3, 4, 5]:
    reg = neural_network.MLPRegressor(hidden_layer_sizes=[30] * n)
    print(n)
    t1 = time.time()
    reg.fit(*train_data[-1])
    t2 = time.time()
    print(t2 - t1)
    print(reg.score(*test_data[-1]))

1
17.750744104385376
0.6190780136813103
2
43.415302991867065
0.713208537772013
3
85.89765214920044
0.7233397800233101
4
92.11808896064758
0.7327368560604073
5
110.85415506362915
0.7347707150007432


In [22]:
for n in [1, 2, 3, 4, 5]:
    reg = en
    print(n)
    t1 = time.time()
    reg.fit(*train_data[-1])
    t2 = time.time()
    print(t2 - t1)
    print(reg.score(*test_data[-1]))

1
17.750744104385376
0.6190780136813103
2
43.415302991867065
0.713208537772013
3
85.89765214920044
0.7233397800233101
4
92.11808896064758
0.7327368560604073
5
110.85415506362915
0.7347707150007432
