diff --git a/tests/python/unittest/test_module.py b/tests/python/unittest/test_module.py index 8ad32e93ff84..92ce08d3acda 100644 --- a/tests/python/unittest/test_module.py +++ b/tests/python/unittest/test_module.py @@ -442,17 +442,20 @@ def test_shared_exec_group(exec_grp_shared, exec_grp_created, shared_arg_names=N shared_arg_names=shared_arg_names, extra_args=extra_args) -def test_module_fm(): +def test_factorization_machine_module(): + """ Test factorization machine model with sparse operators """ mx.random.seed(11) rnd.seed(11) - def fm_model(k, feature_dim): - norm = mx.initializer.Normal(sigma=0.01) + def fm(factor_size, feature_dim, init): x = mx.symbol.Variable("data", stype='csr') - v = mx.symbol.Variable("v", shape=(feature_dim, k), init=norm, stype='row_sparse') + v = mx.symbol.Variable("v", shape=(feature_dim, factor_size), + init=init, stype='row_sparse') - w1_weight = mx.symbol.var('w1_weight', shape=(feature_dim, 1), init=norm, stype='row_sparse') - w1 = mx.symbol.dot(x, w1_weight) + w1_weight = mx.symbol.var('w1_weight', shape=(feature_dim, 1), + init=init, stype='row_sparse') + w1_bias = mx.symbol.var('w1_bias', shape=(1)) + w1 = mx.symbol.broadcast_add(mx.symbol.dot(x, w1_weight), w1_bias) v_s = mx._symbol_internal._square_sum(data=v, axis=1, keepdims=True) x_s = mx.symbol.square(data=x) @@ -466,38 +469,41 @@ def fm_model(k, feature_dim): sum2 = 0.5 * mx.symbol.negative(bd_sum) model = mx.sym.elemwise_add(sum1, sum2) - y = mx.symbol.Variable("out_label") - model = mx.symbol.LinearRegressionOutput(data=model, label=y, name="out") + y = mx.symbol.Variable("label") + model = mx.symbol.LinearRegressionOutput(data=model, label=y) return model # model ctx = default_context() - k = 5 - feature_dim = 20 - model = fm_model(k, feature_dim) + init = mx.initializer.Normal(sigma=0.01) + factor_size = 4 + feature_dim = 10000 + model = fm(factor_size, feature_dim, init) # data iter - num_batches = 8 - batch_size = 25 + num_batches = 5 + batch_size = 64 num_samples = batch_size * num_batches import scipy.sparse as sp # generate some random scipy csr data - csr_sp = sp.rand(num_samples, feature_dim, density=0.5, format='csr') + csr_sp = sp.rand(num_samples, feature_dim, density=0.1, format='csr') csr_nd = mx.nd.csr(csr_sp.data, csr_sp.indptr, csr_sp.indices, - (num_samples, feature_dim)) + (num_samples, feature_dim)) label = mx.nd.ones((num_samples,1)) # the alternative is to use LibSVMIter train_iter = mx.io.NDArrayIter(data=csr_nd, - label={'out_label':label}, + label={'label':label}, batch_size=batch_size) # create module - mod = mx.mod.Module(symbol=model, data_names=['data'], label_names=['out_label']) + mod = mx.mod.Module(symbol=model, data_names=['data'], label_names=['label']) # allocate memory by given the input data and lable shapes mod.bind(data_shapes=train_iter.provide_data, label_shapes=train_iter.provide_label) # initialize parameters by uniform random numbers - mod.init_params(initializer=mx.init.Uniform(scale=.1)) + mod.init_params(initializer=init) # use Sparse SGD with learning rate 0.1 to train - mod.init_optimizer(optimizer='sgd') + sgd = mx.optimizer.SGD(momentum=0.1, clip_gradient=5.0, learning_rate=0.01, + rescale_grad=1.0/batch_size) + mod.init_optimizer(optimizer=sgd) # use accuracy as the metric metric = mx.metric.create('MSE') # train 10 epoch @@ -510,7 +516,7 @@ def fm_model(k, feature_dim): mod.backward() # compute gradients mod.update() # update parameters # print('Epoch %d, Training %s' % (epoch, metric.get())) - assert(metric.get()[1] < 0.2) + assert(metric.get()[1] < 0.02) def test_module_initializer():