-
Notifications
You must be signed in to change notification settings - Fork 2k
/
pyunit_pubdev_5493_glrm_seed.py
63 lines (49 loc) · 2.21 KB
/
pyunit_pubdev_5493_glrm_seed.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
from __future__ import print_function
import sys
sys.path.insert(1, "../../../")
import h2o
from tests import pyunit_utils
from h2o.estimators.glrm import H2OGeneralizedLowRankEstimator
'''
PUBDEV-5493: GLRM return different results regardless of seed setting.
'''
def test_glrm_seeds():
print("Importing iris_wheader.csv data...")
irisH2O = h2o.upload_file(pyunit_utils.locate("smalldata/iris/iris_wheader.csv"))
irisH2O.describe()
initMethods = ["random", "svd", "plus_plus", "user"] # user mode without init values is equivalent to randomized
seeds = [123456789, 987654321]
for initM in initMethods:
# first two models are trained with same seed and should be the same
glrm_h2o_seed0 = setupTrainModel(initM, seeds[0])
predict_seed0 = predGLRM(irisH2O, glrm_h2o_seed0)
glrm_h2o_seed0Same = setupTrainModel(initM, seeds[0])
predict_seed0Same = predGLRM(irisH2O, glrm_h2o_seed0Same)
# trained with same seed, reconstructed datasets should be the same
pyunit_utils.compare_frames_local(predict_seed0[0:4], predict_seed0Same[0:4],
prob=1.0) # compare and make sure reconstructed frames are the same
# trained with different seed, reconstructed datasets should be different
glrm_h2o_seed1 = setupTrainModel(initM, seeds[1])
predict_seed1 = predGLRM(irisH2O, glrm_h2o_seed1)
assert not (pyunit_utils.compare_frames_local(predict_seed0[0:4], predict_seed1[0:4], prob=1.0, returnResult=True)), \
"GLRM return same results with different random seed."
def setupTrainModel(initM, seed):
rank = 3
gx = 0.25
gy = 0.25
trans = "STANDARDIZE"
return H2OGeneralizedLowRankEstimator(k=rank, loss="Quadratic", gamma_x=gx, gamma_y=gy, transform=trans,
init=initM, seed=seed)
def predGLRM(dataset, model):
'''
Simple method to train GLRM model and return prediction result.
:param dataset: dataset to be scored and trained on
:param model: glrm model to be trained
:return: reconstructed dataframe.
'''
model.train(x=dataset.names, training_frame=dataset)
return model.predict(dataset)
if __name__ == "__main__":
pyunit_utils.standalone_test(test_glrm_seeds)
else:
test_glrm_seeds()