From 4ae3439ef3150630f14c88f8085e50eb67026844 Mon Sep 17 00:00:00 2001
From: Anton Chernov <mechernov@gmail.com>
Date: Thu, 9 Aug 2018 12:58:47 +0200
Subject: [PATCH] Used standard random generators instean of rand_r

---
 src/operator/rnn_impl.h                  | 40 +++++++++++++++---------
 tests/cpp/engine/threaded_engine_test.cc | 13 +++++---
 tests/cpp/include/test_ndarray_utils.h   | 17 +++++-----
 3 files changed, 42 insertions(+), 28 deletions(-)
diff --git a/src/operator/rnn_impl.h b/src/operator/rnn_impl.h
index e1b4a2b79c0a..9b2c000b3a27 100644
--- a/src/operator/rnn_impl.h
+++ b/src/operator/rnn_impl.h
@@ -34,6 +34,7 @@
 #include <vector>
 #include <string>
 #include <utility>
+#include <random>
 #include "./math.h"
 #include "./math_functions-inl.h"
 #include "./operator_common.h"
@@ -149,7 +150,6 @@ void LstmForwardTraining(DType* ws,
   const int r_size = D * T * N * H * 6;
   const int y_offset = T * N * H * 5;
   const int cell_size = N * H;
-  unsigned int seed_ = 17 + rand() % 4096;  // NOLINT(runtime/threadsafe_fn)
   int idx = 0;  // state & cell state's idx;
   const int omp_threads = mxnet::engine::OpenMP::Get()->GetRecommendedOMPThreadCount();
   for (int i = 0; i < L; ++i) {
@@ -174,17 +174,21 @@ void LstmForwardTraining(DType* ws,
       w_ptr += w_size;
       b_ptr += b_size;
       if (dropout > 0.0f) {
-        #pragma omp parallel for num_threads(omp_threads)
-        for (int j = 0; j < T * N * H * D; j++) {
-          int rand_data = rand_r(&seed_);
-          if (static_cast<float>(rand_data % 1000) < static_cast<float>(1000 * dropout)) {
-            dropout_random[i * T * N * H * D + j] = 0;
-            y.dptr_[j] = 0;
-          } else {
-            dropout_random[i * T * N * H * D + j] = 1.0f - dropout;
-            y.dptr_[j] =  y.dptr_[j] / (1.0f - dropout);
+          #pragma omp parallel for num_threads(omp_threads)
+          for (int j = 0; j < T * N * H * D; j++) {
+              static thread_local std::random_device device;
+              static thread_local std::default_random_engine generator(device());
+              static thread_local std::uniform_int_distribution<int> distribution;
+              static thread_local auto dice = std::bind(distribution, generator);
+              int rand_data = dice();
+              if (static_cast<float>(rand_data % 1000) < static_cast<float>(1000 * dropout)) {
+                  dropout_random[i * T * N * H * D + j] = 0;
+                  y.dptr_[j] = 0;
+              } else {
+                  dropout_random[i * T * N * H * D + j] = 1.0f - dropout;
+                  y.dptr_[j] = y.dptr_[j] / (1.0f - dropout);
+              }
           }
-        }
       }
       x_ptr = y.dptr_;
       rs2 += r_size;
@@ -994,7 +998,6 @@ void GruForwardTraining(DType* ws,
   DType* bx_l = bx;
   DType* bh_l = bh;
   DType* y_tmp = x_ptr;
-  unsigned int seed_ = 17 + rand() % 4096;  // NOLINT(runtime/threadsafe_fn)
   for (int l = 0; l < L; l++) {
     if (l != 0) {
       y_tmp = y_l;
@@ -1004,7 +1007,11 @@ void GruForwardTraining(DType* ws,
       const int omp_threads = mxnet::engine::OpenMP::Get()->GetRecommendedOMPThreadCount();
       #pragma omp parallel for num_threads(omp_threads)
       for (int i = 0; i < T * N * I; i++) {
-        int rand_data = rand_r(&seed_);
+        static thread_local std::random_device device;
+        static thread_local std::default_random_engine generator(device());
+        static thread_local std::uniform_int_distribution<int> distribution;
+        static thread_local auto dice = std::bind(distribution, generator);
+        int rand_data = dice();
         if (static_cast<float>(rand_data % 1000) < static_cast<float>(1000 * dropout)) {
           dropout_random[(l - 1) * T * N * I + i] = 0;
           y_tmp[i] = 0;
@@ -1881,7 +1888,6 @@ void VanillaRNNForwardTraining(DType* ws,
   DType* bh_l = bh;
   DType* y_tmp = x_ptr;
   const int omp_threads = mxnet::engine::OpenMP::Get()->GetRecommendedOMPThreadCount();
-  unsigned int seed_ = 17 + rand() % 4096;  // NOLINT(runtime/threadsafe_fn)
   for (int l = 0; l < L; l++) {
     if (l != 0) {
       y_tmp = y_l;
@@ -1890,7 +1896,11 @@ void VanillaRNNForwardTraining(DType* ws,
     if (dropout > 0.0f && l > 0) {
       #pragma omp parallel for num_threads(omp_threads)
       for (int i = 0; i < T * N * I; i++) {
-        int rand_data = rand_r(&seed_);
+        static thread_local std::random_device device;
+        static thread_local std::default_random_engine generator(device());
+        static thread_local std::uniform_int_distribution<int> distribution;
+        static thread_local auto dice = std::bind(distribution, generator);
+        int rand_data = dice();
         if (static_cast<float>(rand_data % 1000) < static_cast<float>(1000 * dropout)) {
           dropout_random[(l - 1) * T * N * I + i] = 0;
           y_tmp[i] = 0;
diff --git a/tests/cpp/engine/threaded_engine_test.cc b/tests/cpp/engine/threaded_engine_test.cc
index 92d0958c4630..47be4a2edb1d 100644
--- a/tests/cpp/engine/threaded_engine_test.cc
+++ b/tests/cpp/engine/threaded_engine_test.cc
@@ -33,6 +33,7 @@
 #include <thread>
 #include <chrono>
 #include <vector>
+#include <random>
 
 #include "../src/engine/engine_impl.h"
 #include "../include/test_util.h"
@@ -58,17 +59,21 @@ void GenerateWorkload(int num_workloads, int num_var,
                       int min_read, int max_read,
                       int min_time, int max_time,
                       std::vector<Workload>* workloads) {
+  static thread_local std::default_random_engine generator(seed_);
+  static thread_local std::uniform_int_distribution<int> distribution;
+  static thread_local auto dice = std::bind(distribution, generator);
+
   workloads->clear();
   workloads->resize(num_workloads);
   for (int i = 0; i < num_workloads; ++i) {
     auto& wl = workloads->at(i);
-    wl.write = rand_r(&seed_) % num_var;
-    int r = rand_r(&seed_);
+    wl.write = dice() % num_var;
+    int r = dice();
     int num_read = min_read + (r % (max_read - min_read));
     for (int j = 0; j < num_read; ++j) {
-      wl.reads.push_back(rand_r(&seed_) % num_var);
+      wl.reads.push_back(dice() % num_var);
     }
-    wl.time = min_time + rand_r(&seed_) % (max_time - min_time);
+    wl.time = min_time + dice() % (max_time - min_time);
   }
 }
 
diff --git a/tests/cpp/include/test_ndarray_utils.h b/tests/cpp/include/test_ndarray_utils.h
index f5ab96794ada..c0bcc61d2430 100644
--- a/tests/cpp/include/test_ndarray_utils.h
+++ b/tests/cpp/include/test_ndarray_utils.h
@@ -29,6 +29,8 @@
 #include <cstdlib>
 #include <string>
 #include <map>
+#include <random>
+
 #include "test_util.h"
 #include "test_op.h"
 
@@ -47,16 +49,13 @@ inline void CheckDataRegion(const TBlob &src, const TBlob &dst) {
   EXPECT_EQ(equals, 0);
 }
 
-inline unsigned gen_rand_seed() {
-  time_t timer;
-  ::time(&timer);
-  return static_cast<unsigned>(timer);
-}
-
 inline float RandFloat() {
-  static unsigned seed = gen_rand_seed();
-  double v = rand_r(&seed) * 1.0 / RAND_MAX;
-  return static_cast<float>(v);
+  static thread_local std::random_device device;
+  static thread_local std::default_random_engine generator(device());
+  static thread_local std::uniform_real_distribution<float> distribution;
+  static thread_local auto dice = std::bind(distribution, generator);
+
+  return dice();
 }
 
 // Get an NDArray with provided indices, prepared for a RowSparse NDArray.