Merge pull request #219 from nickhand/fix_random_seeds

ensure multiple calls to same object with random element returns same…
bccp · Jul 28, 2016 · 0efed2c · 0efed2c
2 parents def767f + 766dff9
commit 0efed2c
Show file tree

Hide file tree

Showing 6 changed files with 66 additions and 55 deletions.
diff --git a/nbodykit/mockmaker.py b/nbodykit/mockmaker.py
@@ -1,7 +1,7 @@
 import numpy
 from nbodykit.utils.meshtools import SlabIterator
-
-def make_gaussian_fields(pm, linear_power, compute_displacement=False, random_state=None):
+        
+def make_gaussian_fields(pm, linear_power, compute_displacement=False):
     r"""
     Make a Gaussian realization of a overdensity field, :math:`\delta(x)`
     
@@ -54,20 +54,16 @@ def make_gaussian_fields(pm, linear_power, compute_displacement=False, random_st
     compute_displacement : bool, optional
         if ``True``, also return the linear Zel'dovich displacement field; 
         default is ``False``
-    random_state : numpy.random.RandomState, optional
-        the random state used to draw normally distributed random variates
     
     Returns
     -------
     delta : array_like
         the real-space Gaussian overdensity field
     disp : array_like or ``None``
         if requested, the Gaussian displacement field
-    """  
-    if random_state is None: random_state = numpy.random
-
+    """            
     # assign Gaussian rvs with mean 0 and unit variance
-    pm.real[:] = random_state.normal(size=pm.real.shape)
+    pm.real[:] = numpy.random.normal(size=pm.real.shape)
 
     # initialize the displacement field arrays
     if compute_displacement:
@@ -150,7 +146,7 @@ def lognormal_transform(density, bias=1.):
     return toret
 
 
-def poisson_sample_to_points(delta, displacement, pm, nbar, rsd=None, f=0., bias=1., random_state=None):
+def poisson_sample_to_points(delta, displacement, pm, nbar, rsd=None, f=0., bias=1.):
     """
     Poisson sample the linear delta and displacement fields to points. 
     
@@ -180,16 +176,12 @@ def poisson_sample_to_points(delta, displacement, pm, nbar, rsd=None, f=0., bias
         strength of the RSD; default is 0. (no RSD)
     bias : float, optional
         apply a linear bias to the overdensity field (default is 1.)
-    random_state : numpy.random.RandomState, optional
-        the random state used to perform the Poisson sampling
     
     Returns
     -------
     pos : array_like, (N, 3)
         the Cartesian positions of the particles in the box     
     """
-    if random_state is None: random_state = numpy.random
-
     # apply the lognormal transformatiom to the initial conditions density
     # this creates a positive-definite delta (necessary for Poisson sampling)
     lagrangian_bias = bias - 1.
@@ -203,7 +195,7 @@ def poisson_sample_to_points(delta, displacement, pm, nbar, rsd=None, f=0., bias
     cellmean = delta*overallmean
 
     # number of objects in each cell
-    N = random_state.poisson(cellmean)
+    N = numpy.random.poisson(cellmean)
     Ntot = N.sum()
     pts = N.nonzero() # indices of nonzero points
 
@@ -229,9 +221,9 @@ def poisson_sample_to_points(delta, displacement, pm, nbar, rsd=None, f=0., bias
     z += offset[2]
 
     # coordinates of each object (placed randomly in each cell)
-    x = numpy.repeat(x, N[pts]) + random_state.uniform(0, H[0], size=Ntot)
-    y = numpy.repeat(y, N[pts]) + random_state.uniform(0, H[1], size=Ntot)
-    z = numpy.repeat(z, N[pts]) + random_state.uniform(0, H[2], size=Ntot)
+    x = numpy.repeat(x, N[pts]) + numpy.random.uniform(0, H[0], size=Ntot)
+    y = numpy.repeat(y, N[pts]) + numpy.random.uniform(0, H[1], size=Ntot)
+    z = numpy.repeat(z, N[pts]) + numpy.random.uniform(0, H[2], size=Ntot)
 
     # enforce periodic and stack
     x %= pm.BoxSize[0]

diff --git a/nbodykit/plugins/algorithms/FiberCollisions.py b/nbodykit/plugins/algorithms/FiberCollisions.py
@@ -33,11 +33,9 @@ def __init__(self, datasource, collision_radius=62/60./60., seed=None):
         if self.comm.rank == 0: 
             self.logger.info("collision radius in degrees = %.4f" %collision_radius)
 
-        # create the random state from the global seed and comm size
-        if self.seed is not None:
-            self.random_state = utils.local_random_state(self.seed, self.comm)
-        else:
-            self.random_state = numpy.random
+        # create the local random seed from the global seed and comm size
+        self.local_seed = utils.local_random_seed(self.seed, self.comm)
+        self.logger.info("local_seed = %d" %self.local_seed)
 
     @classmethod
     def register(cls):
@@ -73,6 +71,7 @@ def run(self):
                     on the sky (0-indexed), else it is set to -1
         """
         from nbodykit import fof
+        from astropy.utils.misc import NumpyRNGContext
 
         # open a persistent cache
         with self.datasource.keep_cache():
@@ -83,7 +82,8 @@ def run(self):
             labels = fof.fof(self.datasource, self._collision_radius_rad, 1, comm=self.comm)
 
             # assign the fibers (in parallel)
-            collided, neighbors = self._assign_fibers(labels)
+            with NumpyRNGContext(self.local_seed):
+                collided, neighbors = self._assign_fibers(labels)
 
         # all reduce to get summary statistics
         N_pop1 = self.comm.allreduce((collided^1).sum())
@@ -163,7 +163,10 @@ def _assign_fibers(self, Label):
 
             # pairs (random selection)
             if N == 2:
-                which = self.random_state.choice([0,1])
+
+                # randomly choose, with fixed local seed
+                which = numpy.random.choice([0,1])
+
                 indices = [start+which, start+(which^1)]
                 PIG2['Collided'][indices] = [1, 0]
                 PIG2['NeighborID'][indices] = [PIG2['Index'][start+(which^1)], -1]
@@ -219,7 +222,9 @@ def count(slice, n):
             # remove object that has most # of collisions 
             # and those colliding objects have least # of collisions
             idx = numpy.where(n_collisions == n_collisions.max())[0]
-            ii = self.random_state.choice(numpy.where(n_other[idx] == n_other[idx].min())[0])
+
+            # choose randomly, with a fixed local seed
+            ii = numpy.random.choice(numpy.where(n_other[idx] == n_other[idx].min())[0])
             collided_index = idx[ii]  
 
             # make the collided galaxy and remove from group

diff --git a/nbodykit/plugins/algorithms/Subsample.py b/nbodykit/plugins/algorithms/Subsample.py
@@ -38,6 +38,7 @@ def run(self):
         from pmesh.particlemesh import ParticleMesh
         from mpi4py import MPI
         import mpsort
+        from astropy.utils.misc import NumpyRNGContext
 
         pm = ParticleMesh(self.datasource.BoxSize, self.Nmesh, dtype='f4', comm=self.comm)
         if self.smoothing is None:
@@ -82,7 +83,7 @@ def NormalizeDC(pm, complex):
             pm.transfer([Smoothing, NormalizeDC])
             pm.c2r()
             columns = ['Position', 'ID', 'Velocity']
-            random_state = utils.local_random_state(self.seed, self.comm)
+            local_seed = utils.local_random_seed(self.seed, self.comm)
 
             dtype = numpy.dtype([
                     ('Position', ('f4', 3)),
@@ -95,7 +96,9 @@ def NormalizeDC(pm, complex):
 
             with self.datasource.open() as stream:
                 for Position, ID, Velocity in stream.read(columns):
-                    u = random_state.uniform(size=len(ID))
+
+                    with NumpyRNGContext(local_seed):
+                        u = numpy.random.uniform(size=len(ID))
                     keep = u < self.ratio
                     Nkeep = keep.sum()
                     if Nkeep == 0: continue 

diff --git a/nbodykit/plugins/datasource/UniformBox.py b/nbodykit/plugins/datasource/UniformBox.py
@@ -15,12 +15,9 @@ class UniformBoxDataSource(DataSource):
 
     def __init__(self, N, BoxSize, max_speed=500., mu_logM=13.5, sigma_logM=0.5, seed=None):        
 
-        # create the random state from the global seed and comm size
-        if self.seed is not None:
-            self.random_state = utils.local_random_state(self.seed, self.comm)
-        else:
-            self.random_state = numpy.random
-
+        # create the local random seed from the global seed and comm size
+        self.local_seed = utils.local_random_seed(self.seed, self.comm)
+
     @classmethod
     def register(cls):
 
@@ -47,14 +44,22 @@ def readall(self):
             `Velocity` : uniformly distributed between ``+/- max_speed``
             `LogMass`  : normally distributed with mean `mu_logM` and std dev `sigma_logM`
             `Mass`     : values corresponding to 10**`LogMass` 
-        """            
+        """   
+        # helpful astropy random number utility
+        from astropy.utils.misc import NumpyRNGContext
+
+        # the return dictionary and shape
         toret = {}
         shape = (self.N, 3)
-
-        toret['Position'] = self.random_state.uniform(size=shape) * self.BoxSize
-        toret['Velocity'] = 2*self.max_speed * self.random_state.uniform(size=shape) - self.max_speed
-        toret['LogMass']  = self.random_state.normal(loc=self.mu_logM, scale=self.sigma_logM, size=self.N)
-        toret['Mass']     = 10**(toret['LogMass'])
+
+        # ensure that each call to readall generates random numbers
+        # seeded by `local_seed`
+        with NumpyRNGContext(self.local_seed):
+
+            toret['Position'] = numpy.random.uniform(size=shape) * self.BoxSize
+            toret['Velocity'] = 2*self.max_speed * numpy.random.uniform(size=shape) - self.max_speed
+            toret['LogMass']  = numpy.random.normal(loc=self.mu_logM, scale=self.sigma_logM, size=self.N)
+            toret['Mass']     = 10**(toret['LogMass'])
 
         return toret
 
diff --git a/nbodykit/plugins/datasource/ZeldovichSim.py b/nbodykit/plugins/datasource/ZeldovichSim.py
@@ -18,11 +18,8 @@ class ZeldovichSimDataSource(DataSource):
 
     def __init__(self, nbar, redshift, BoxSize, Nmesh, bias=2., rsd=None, seed=None):        
 
-        # create the random state from the global seed and comm size
-        if self.seed is not None:
-            self.random_state = utils.local_random_state(self.seed, self.comm)
-        else:
-            self.random_state = numpy.random
+        # create the local random seed from the global seed and comm size
+        self.local_seed = utils.local_random_seed(self.seed, self.comm)
 
         # crash if no cosmology provided
         if self.cosmo is None:
@@ -64,6 +61,7 @@ def parallel_read(self, columns, full=False):
         # the other imports
         from nbodykit import mockmaker
         from pmesh.particlemesh import ParticleMesh
+        from astropy.utils.misc import NumpyRNGContext
 
         # initialize the CLASS parameters 
         pars = classylss.ClassParams.from_astropy(self.cosmo.engine)
@@ -79,13 +77,16 @@ def parallel_read(self, columns, full=False):
         # the particle mesh for gridding purposes
         pm = ParticleMesh(self.BoxSize, self.Nmesh, dtype='f4', comm=self.comm)
 
-        # compute the linear overdensity and displacement fields
-        delta, disp = mockmaker.make_gaussian_fields(pm, Plin, random_state=self.random_state, compute_displacement=True)
+        # generate initialize fields and Poisson sample with fixed local seed
+        with NumpyRNGContext(self.local_seed):
 
-        # sample to Poisson points
-        f = cosmo.f_z(self.redshift) # growth rate to do RSD in the Zel'dovich approx
-        kws = {'rsd':self.rsd, 'f':f, 'bias':self.bias, 'random_state':self.random_state}
-        pos = mockmaker.poisson_sample_to_points(delta, disp, pm, self.nbar, **kws)
+            # compute the linear overdensity and displacement fields
+            delta, disp = mockmaker.make_gaussian_fields(pm, Plin, compute_displacement=True)
+
+            # sample to Poisson points
+            f = cosmo.f_z(self.redshift) # growth rate to do RSD in the Zel'dovich approx
+            kws = {'rsd':self.rsd, 'f':f, 'bias':self.bias}
+            pos = mockmaker.poisson_sample_to_points(delta, disp, pm, self.nbar, **kws)
 
         # yield position
         yield [pos if col == 'Position' else None for col in columns]

diff --git a/nbodykit/utils/__init__.py b/nbodykit/utils/__init__.py
@@ -3,9 +3,9 @@
 """
 import numpy
 
-def local_random_state(seed, comm):
+def local_random_seed(seed, comm):
     """
-    Return a random state for the local rank, 
+    Return a random seed for the local rank, 
     which is seeded by the global `seed`
     
     Notes
@@ -19,20 +19,25 @@ def local_random_state(seed, comm):
     
     Parameters
     ----------
-    seed : int
+    seed : int, None
         the global seed, used to seed the local random state
     comm : MPI.Communicator
         the MPI communicator
+    
+    Returns
+    -------
+    int : 
+        a integer appropriate for seeding the local random state
     """ 
-    # create the global random state
+    # create a global random state
     rng = numpy.random.RandomState(seed)
 
     # use the global seed to seed all ranks
     # seed must be an unsigned 32 bit integer (0xffffffff in hex)
     seeds = rng.randint(0, 4294967295, size=comm.size)
 
     # choose the right local seed for this rank
-    return numpy.random.RandomState(seeds[comm.rank])
+    return seeds[comm.rank]
 
 def timer(start, end):
     """