apache · piiswrong · Apr 10, 2018 · Apr 2, 2018 · Apr 2, 2018 · Apr 5, 2018
@@ -200,12 +200,19 @@ MXNET_DLL const char *MXGetLastError();
 // Part 0: Global State setups
 //-------------------------------------
 /*!
- * \brief Seed the global random number generators in mxnet.
+ * \brief Seed all global random number generators in mxnet.
  * \param seed the random number seed.
  * \return 0 when success, -1 when failure happens.
  */
 MXNET_DLL int MXRandomSeed(int seed);
 
+/*!
+ * \brief Seed the global random number generator of the given device.
+ * \param seed the random number seed.
+ * \return 0 when success, -1 when failure happens.
+ */
+MXNET_DLL int MXRandomSeedContext(int seed, int dev_type, int dev_id);
+
 /*!
  * \brief Notify the engine about a shutdown,
  *  This can help engine to print less messages into display.

@@ -1049,10 +1049,15 @@ NDArray operator/(const NDArray &lhs, const NDArray &rhs);
 NDArray operator/(const NDArray &lhs, const real_t &rhs);
 
 /*!
- * \brief Seed the random number generator.
+ * \brief Seed all random number generator in mxnet.
  * \param seed the seed to set to global random number generators.
  */
 void RandomSeed(uint32_t seed);
+/*!
+ * \brief Seed the random number generator of the device.
+ * \param seed the seed to set to global random number generators.
+ */
+void RandomSeed(Context ctx, uint32_t seed);
 /*!
  * \brief Sample uniform distribution for each elements of out.
  * \param begin lower bound of distribution.

@@ -200,10 +200,15 @@ class ResourceManager {
    */
   virtual Resource Request(Context ctx, const ResourceRequest &req) = 0;
   /*!
-   * \brief Seed all the allocated random numbers.
+   * \brief Seed all the allocated random number generators.
    * \param seed the seed to the random number generators on all devices.
    */
   virtual void SeedRandom(uint32_t seed) = 0;
+  /*!
+   * \brief Seed the random number generators of the given context.
+   * \param seed the seed to the random number generators.
+   */
+  virtual void SeedRandom(Context ctx, uint32_t seed) = 0;
   /*! \brief virtual destructor */
   virtual ~ResourceManager() DMLC_THROW_EXCEPTION {}
   /*!

@@ -24,9 +24,10 @@
 import ctypes
 from .base import _LIB, check_call
 from .ndarray.random import *
+from .context import Context
 
 
-def seed(seed_state):
+def seed(seed_state, ctx="all"):
     """Seeds the random number generators in MXNet.
 
     This affects the behavior of modules in MXNet that uses random number generators,
@@ -35,12 +36,23 @@ def seed(seed_state):
     Parameters
     ----------
     seed_state : int
-        The random number seed to set to all devices.
+        The random number seed.
+
+    ctx : Context
+        The device context of the generator. The default is "all" which means seeding random
+        number generators of all devices.
 
     Notes
     -----
-    Random number generators in MXNet are device specific. Therefore, random numbers
-    generated from two devices can be different even if they are seeded using the same seed.
+    Random number generators in MXNet are device specific.
+    `mx.random.seed(seed_state)` sets the state of each generator using `seed_state` and the
+    device id. Therefore, random numbers generated from different devices can be different
+    even if they are seeded using the same seed.
+
+    To produce identical random number sequences independent of the device id,
+    set optional `ctx` argument. This produces the same sequence of random numbers independent
+    of the device id, but the sequence can be different on different kind of devices as MXNet's
+    random number generators for CPU and GPU use different algorithms.
 
     Example
     -------
@@ -50,7 +62,7 @@ def seed(seed_state):
     >>> print(mx.nd.random.normal(shape=(2,2)).asnumpy())
     [[ 1.09544981 -0.20014545]
      [-0.20808885  0.2527658 ]]
-    >>>
+    # Same results on the same device with the same seed
     >>> mx.random.seed(128)
     >>> print(mx.nd.random.normal(shape=(2,2)).asnumpy())
     [[ 0.47400656 -0.75213492]
@@ -59,8 +71,30 @@ def seed(seed_state):
     >>> print(mx.nd.random.normal(shape=(2,2)).asnumpy())
     [[ 0.47400656 -0.75213492]
      [ 0.20251541  0.95352972]]
+    # Different results on gpu(0) and gpu(1) with the same seed
+    >>> mx.random.seed(128)
+    >>> print(mx.nd.random.normal(shape=(2,2), ctx=mx.gpu(0)).asnumpy())
+    [[ 2.5020072 -1.6884501]
+     [-0.7931333 -1.4218881]]
+    >>> mx.random.seed(128)
+    >>> print(mx.nd.random.normal(shape=(2,2), ctx=mx.gpu(1)).asnumpy())
+    [[ 0.24336822 -1.664805  ]
+     [-1.0223296   1.253198  ]]
+    # Seeding with `ctx` argument produces identical results on gpu(0) and gpu(1)
+    >>> mx.random.seed(128, ctx=mx.gpu(0))
+    >>> print(mx.nd.random.normal(shape=(2,2), ctx=mx.gpu(0)).asnumpy())
+    [[ 2.5020072 -1.6884501]
+     [-0.7931333 -1.4218881]]
+    >>> mx.random.seed(128, ctx=mx.gpu(1))
+    >>> print(mx.nd.random.normal(shape=(2,2), ctx=mx.gpu(1)).asnumpy())
+    [[ 2.5020072 -1.6884501]
+     [-0.7931333 -1.4218881]]
     """
     if not isinstance(seed_state, int):
-        raise ValueError('sd must be int')
-    seed_state = ctypes.c_int(int(seed_state))
-    check_call(_LIB.MXRandomSeed(seed_state))
+        raise ValueError('seed_state must be int')
+    seed_state = ctypes.c_int(seed_state)
+    if ctx == "all":
+        check_call(_LIB.MXRandomSeed(seed_state))
+    else:
+        ctx = Context(ctx)
+        check_call(_LIB.MXRandomSeedContext(seed_state, ctx.device_typeid, ctx.device_id))
diff --git a/src/c_api/c_api.cc b/src/c_api/c_api.cc
@@ -91,6 +91,13 @@ int MXRandomSeed(int seed) {
   API_END();
 }
 
+int MXRandomSeedContext(int seed, int dev_type, int dev_id) {
+  API_BEGIN();
+  Context ctx = Context::Create(static_cast<Context::DeviceType>(dev_type), dev_id);
+  mxnet::RandomSeed(ctx, seed);
+  API_END();
+}
+
 int MXNotifyShutdown() {
   API_BEGIN();
   Engine::Get()->NotifyShutdown();

diff --git a/src/ndarray/ndarray.cc b/src/ndarray/ndarray.cc
@@ -1473,6 +1473,10 @@ void RandomSeed(uint32_t seed) {
   ResourceManager::Get()->SeedRandom(seed);
 }
 
+void RandomSeed(Context ctx, uint32_t seed) {
+  ResourceManager::Get()->SeedRandom(ctx, seed);
+}
+
 template<typename OP>
 inline NDArray BinaryOpRet(const NDArray &lhs,
                            const NDArray &rhs) {

diff --git a/src/resource.cc b/src/resource.cc
@@ -159,18 +159,31 @@ class ResourceManagerImpl : public ResourceManager {
 
   void SeedRandom(uint32_t seed) override {
     global_seed_ = seed;
-    cpu_rand_->Seed(global_seed_);
-    cpu_parallel_rand_->Seed(global_seed_);
+    cpu_rand_->SeedWithDeviceID(global_seed_);
+    cpu_parallel_rand_->SeedWithDeviceID(global_seed_);
 #if MXNET_USE_CUDA
     gpu_rand_.ForEach([seed](size_t i, ResourceRandom<gpu> *p) {
-        p->Seed(seed);
+        p->SeedWithDeviceID(seed);
       });
     gpu_parallel_rand_.ForEach([seed](size_t i, ResourceParallelRandom<gpu> *p) {
-      p->Seed(seed);
+      p->SeedWithDeviceID(seed);
     });
 #endif
   }
 
+  void SeedRandom(Context ctx, uint32_t seed) override {
+    cpu_rand_->Seed(seed);
+    cpu_parallel_rand_->Seed(seed);
+#if MXNET_USE_CUDA
+    gpu_rand_.Get(ctx.dev_id, [ctx, seed, this]() {
+      return new ResourceRandom<gpu>(ctx, seed);
+    })->Seed(seed);
+    gpu_parallel_rand_.Get(ctx.dev_id, [ctx, seed, this]() {
+      return new ResourceParallelRandom<gpu>(ctx, gpu_native_rand_copy_, seed);
+    })->Seed(seed);
+#endif
+  }
+
  private:
   /*! \brief Maximum number of GPUs */
   static constexpr std::size_t kMaxNumGPUs = 16;
@@ -201,9 +214,12 @@ class ResourceManagerImpl : public ResourceManager {
             MSHADOW_CATCH_ERROR(delete r);
           }, ctx, resource.var);
     }
+    // set seed to a PRNG using global_seed and device id
+    inline void SeedWithDeviceID(uint32_t global_seed) {
+      Seed(ctx.dev_id + global_seed * kRandMagic);
+    }
     // set seed to a PRNG
-    inline void Seed(uint32_t global_seed) {
-      uint32_t seed = ctx.dev_id + global_seed * kRandMagic;
+    inline void Seed(uint32_t seed) {
       mshadow::Random<xpu> *r = prnd;
       Engine::Get()->PushAsync(
         [r, seed](RunContext rctx, Engine::CallbackOnComplete on_complete) {
@@ -300,21 +316,32 @@ class ResourceManagerImpl : public ResourceManager {
         }, ctx, resource[i].var);
       }
     }
+    // set seed to a sampler using global_seed and device id
+    inline void SeedWithDeviceID(uint32_t global_seed) {
+      for (size_t i = 0; i < sampler.size(); ++i) {
+        SeedOne(i, ctx.dev_id + i * kMaxNumGPUs + global_seed * kRandMagic);
+      }
+      // reset pointer to ensure the same result with the same seed.
+      curr_ptr.store(0);
+    }
     // set seed to a sampler
-    inline void Seed(uint32_t global_seed) {
+    inline void Seed(uint32_t seed) {
       for (size_t i = 0; i < sampler.size(); ++i) {
-        const uint32_t seed = ctx.dev_id + i * kMaxNumGPUs + global_seed * kRandMagic;
-        common::random::RandGenerator<xpu> *r = sampler[i];
-        Engine::Get()->PushAsync(
-        [r, seed](RunContext rctx, Engine::CallbackOnComplete on_complete) {
-          r->Seed(rctx.get_stream<xpu>(), seed);
-          on_complete();
-        }, ctx, {}, {resource[i].var},
-        FnProperty::kNormal, 0, "ResourceNativeRandomSetSeed");
+        SeedOne(i, i * kMaxNumGPUs + seed * kRandMagic);
       }
       // reset pointer to ensure the same result with the same seed.
       curr_ptr.store(0);
     }
+    // set seed to a sampler
+    inline void SeedOne(size_t i, uint32_t seed) {
+      common::random::RandGenerator<xpu> *r = sampler[i];
+      Engine::Get()->PushAsync(
+      [r, seed](RunContext rctx, Engine::CallbackOnComplete on_complete) {
+        r->Seed(rctx.get_stream<xpu>(), seed);
+        on_complete();
+      }, ctx, {}, {resource[i].var},
+      FnProperty::kNormal, 0, "ResourceNativeRandomSetSeed");
+    }
     // get next resource in round roubin matter
     inline Resource GetNext() {
       const size_t kMaxDigit = std::numeric_limits<size_t>::max() / 2;

diff --git a/tests/python/unittest/test_random.py b/tests/python/unittest/test_random.py
@@ -273,6 +273,107 @@ def test_parallel_random_seed_setting():
             assert same(un1.asnumpy(), un2.asnumpy()), \
                 "symbolic seed-setting test: `uniform` should give the same result with the same seed"
 
+# Set seed for the context variously based on `start_seed` and `num_init_seeds`, then set seed finally to `final_seed`
+def set_seed_variously_for_context(ctx, init_seed, num_init_seeds, final_seed):
+    end_seed = init_seed + num_init_seeds
+    for seed in range(init_seed, end_seed):
+        mx.random.seed(seed, ctx=ctx)
+    mx.random.seed(final_seed, ctx=ctx)
+    return end_seed
+
+# Tests that seed setting of std (non-parallel) rng for specific context is synchronous w.r.t. rng use before and after.
+@with_seed()
+def test_random_seed_setting_for_context():
+    seed_to_test = 1234
+    num_temp_seeds = 25
+    probs = [0.125, 0.25, 0.25, 0.0625, 0.125, 0.1875]
+    num_samples = 100000
+    dev_type = mx.context.current_context().device_type
+    for dtype in ['float16', 'float32', 'float64']:
+        samples_imp = []
+        samples_sym = []
+        # Collect random number samples from the generators of all devices, each seeded with the same number.
+        for dev_id in range(0, 16 if dev_type == 'gpu' else 1):
+            # Currently python API does not provide a method to get the number of gpu devices.
+            # Waiting for PR #10354, which provides the method, to be merged.
+            # As a temporal workaround, try first and catch the exception caused by the absence of the device with `dev_id`.
+            try:
+                with mx.Context(dev_type, dev_id):
+                    ctx = mx.context.current_context()
+                    seed = set_seed_variously_for_context(ctx, 1, num_temp_seeds, seed_to_test)
+
+                    # Check imperative. `multinomial` uses non-parallel rng.
+                    rnds = mx.nd.random.multinomial(data=mx.nd.array(probs, dtype=dtype), shape=num_samples)
+                    samples_imp.append(rnds.asnumpy())
+
+                    # Check symbolic. `multinomial` uses non-parallel rng.
+                    P = mx.sym.Variable("P")
+                    X = mx.sym.random.multinomial(data=P, shape=num_samples, get_prob=False)
+                    exe = X.bind(ctx, {"P": mx.nd.array(probs, dtype=dtype)})
+                    set_seed_variously_for_context(ctx, seed, num_temp_seeds, seed_to_test)
+                    exe.forward()
+                    samples_sym.append(exe.outputs[0].asnumpy())
+            except mx.MXNetError as e:
+                if str(e).find("invalid device ordinal") != -1:
+                    break
+                else:
+                    raise e
+        # The samples should be identical across different gpu devices.
+        for i in range(1, len(samples_imp)):
+            assert same(samples_imp[i - 1], samples_imp[i])
+        for i in range(1, len(samples_sym)):
+            assert same(samples_sym[i - 1], samples_sym[i])
+
+# Tests that seed setting of parallel rng for specific context is synchronous w.r.t. rng use before and after.
+@with_seed()
+def test_parallel_random_seed_setting_for_context():
+    seed_to_test = 1234
+    dev_type = mx.context.current_context().device_type
+    for dtype in ['float16', 'float32', 'float64']:
+        samples_imp = []
+        samples_sym = []
+        # Collect random number samples from the generators of all devices, each seeded with the same number.
+        for dev_id in range(0, 16 if dev_type == 'gpu' else 1):
+            # Currently python API does not provide a method to get the number of gpu devices.
+            # Waiting for PR #10354, which provides the method, to be merged.
+            # As a temporal workaround, try first and catch the exception caused by the absence of the device with `dev_id`.
+            try:
+                with mx.Context(dev_type, dev_id):
+                    ctx = mx.context.current_context()
+                    # Avoid excessive test cpu runtimes.
+                    num_temp_seeds = 25 if dev_type == 'gpu' else 1
+                    # To flush out a possible race condition, run multiple times.
+                    for _ in range(20):
+                        # Create enough samples such that we get a meaningful distribution.
+                        shape = (200, 200)
+                        params = { 'low': -1.5, 'high': 3.0 }
+                        params.update(shape=shape, dtype=dtype)
+
+                        # Check imperative. `uniform` uses parallel rng.
+                        seed = set_seed_variously_for_context(ctx, 1, num_temp_seeds, seed_to_test)
+                        rnds = mx.nd.random.uniform(**params)
+                        samples_imp.append(rnds.asnumpy())
+
+                        # Check symbolic. `uniform` uses parallel rng.
+                        X = mx.sym.Variable("X")
+                        Y = mx.sym.random.uniform(**params) + X
+                        x = mx.nd.zeros(shape, dtype=dtype)
+                        xgrad = mx.nd.zeros(shape, dtype=dtype)
+                        yexec = Y.bind(ctx, {'X' : x}, {'X': xgrad})
+                        set_seed_variously_for_context(ctx, seed, num_temp_seeds, seed_to_test)
+                        yexec.forward(is_train=True)
+                        yexec.backward(yexec.outputs[0])
+                        samples_sym.append(yexec.outputs[0].asnumpy())
+            except mx.MXNetError as e:
+                if str(e).find("invalid device ordinal") != -1:
+                    break
+                else:
+                    raise e
+        # The samples should be identical across different gpu devices.
+        for i in range(1, len(samples_imp)):
+            assert same(samples_imp[i - 1], samples_imp[i])
+        for i in range(1, len(samples_sym)):
+            assert same(samples_sym[i - 1], samples_sym[i])
 
 @with_seed()
 def test_sample_multinomial():