diff --git a/CHANGES.rst b/CHANGES.rst index ad7a3684c..347ee9646 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -3,4 +3,12 @@ * base dependencies + extras (halotools, h5py); install all dependencies via pip nbodykit[extras] * meta-data calculations in FKPCatalog now account for Source selection properly -* support for numpy int/float meta-data in JSON output files \ No newline at end of file +* support for numpy int/float meta-data in JSON output files +* Cosmology instances no longer return attributes as Quantity instances, assuming a default set of units +* renaming of various classes/module related to the nbodykit.Source syntax + * no more nbodykit.Source in nbodykit.lab + * nbodykit.source.particle has been renamed to nbodykit.source.catalog + * source objects are now catalogs -- there class names have "Catalog" appended to their names + * added individual catalogs for different file types in nbodykit.io, i.e., CSVCatalog, HDFCatalog, etc +* the ``.apply`` operation is no longer in place for sources; it returns a view with the +list of actions extended diff --git a/nbodykit/base/catalog.py b/nbodykit/base/catalog.py index f1598c4e3..835a5b86a 100644 --- a/nbodykit/base/catalog.py +++ b/nbodykit/base/catalog.py @@ -31,7 +31,7 @@ def decorator(getter): def find_columns(cls): """ Find all hard-coded column names associated with the input class - + Returns ------- hardcolumns : set @@ -39,12 +39,12 @@ def find_columns(cls): input class ``cls`` """ hardcolumns = [] - + # search through the class dict for columns for key, value in cls.__dict__.items(): if hasattr(value, 'column_name'): hardcolumns.append(value.column_name) - + # recursively search the base classes, too for base in cls.__bases__: hardcolumns += find_columns(base) @@ -56,7 +56,7 @@ def find_column(cls, name): """ Find a specific column ``name`` of an input class, or raise an exception if it does not exist - + Returns ------- column : callable @@ -66,11 +66,11 @@ def find_column(cls, name): for key, value in cls.__dict__.items(): if not hasattr(value, 'column_name'): continue if value.column_name == name: return value - + for base in cls.__bases__: try: return find_column(base, name) except: pass - + args = (name, str(cls)) raise AttributeError("unable to find column '%s' for '%s'" % args) @@ -81,25 +81,25 @@ class CatalogSource(object): An abstract base class for a source of particles. This objects behaves like a structured numpy array -- it must have well-defined size when initialized. The ``size`` here represents the number of particles - in the source. - + in the source. + Subclasses of this class must define a ``size`` property. - + The class stores a series of columns as dask arrays, which can be accessed - in a dict-like fashion. - + in a dict-like fashion. + The class handles the generation of particle data, either reading from disk or generating at runtime, and provides the interface for creating a ``ParticleMesh``. The mesh handles the gridding of discrete - particle data on to a continuous mesh of cells. + particle data on to a continuous mesh of cells. """ logger = logging.getLogger('CatalogSource') @staticmethod def make_column(array): - """ - Utility function to convert a numpy array to a column object - (dask.array.Array) + """ + Utility function to convert a numpy array to a column object + (dask.array.Array) """ if isinstance(array, da.Array): return array @@ -107,24 +107,24 @@ def make_column(array): return da.from_array(array, chunks=100000) def __init__(self, comm, use_cache=False): - + # ensure self.comm is set, though usually already set by the child. self.comm = comm # initialize a cache self.use_cache = use_cache - + # user-provided overrides for columns self._overrides = {} - + # if size is already computed update csize # otherwise the subclass shall call update_csize explicitly. if self.size is not NotImplemented: self.update_csize() - + def __len__(self): """ - The length of CatalogSource is equal to :attr:`size`; this is the + The length of CatalogSource is equal to :attr:`size`; this is the local size of the source on a given rank """ return self.size @@ -134,28 +134,28 @@ def __iter__(self): def __contains__(self, col): return col in self.columns - + def __getitem__(self, sel): """ The following types of indexing supported are supported for CatalogSource: - + 1. strings specifying a column in the CatalogSource; returns a dask array holding the column data - 2. boolean arrays specifying a slice of the CatalogSource; + 2. boolean arrays specifying a slice of the CatalogSource; returns a CatalogSubset holding only the revelant slice 3. slice object specifying which particles to select """ # handle boolean array slices if not isinstance(sel, string_types): - + # convert slices to boolean arrays if isinstance(sel, (slice, list)): - + # list must be all integers if isinstance(sel, list) and not numpy.array(sel).dtype == numpy.integer: raise KeyError("array-like indexing via a list should be a list of integers") - + index = numpy.zeros(self.size, dtype='?') index[sel] = True; sel = index @@ -164,7 +164,7 @@ def __getitem__(self, sel): return get_catalog_subset(self, sel) else: raise KeyError("strings and boolean arrays are the only supported indexing methods") - + # get the right column if sel in self._overrides: r = self._overrides[sel] @@ -173,15 +173,15 @@ def __getitem__(self, sel): else: raise KeyError("column `%s` is not defined in this source; " %sel + \ "try adding column via `source[column] = data`") - + # evaluate callables if we need to if callable(r): r = r() - + # add a column attrs dict - if not hasattr(r, 'attrs'): r.attrs = {} - + if not hasattr(r, 'attrs'): r.attrs = {} + return r - + def __setitem__(self, col, value): """ Add columns to the CatalogSource, overriding any existing columns @@ -191,18 +191,18 @@ def __setitem__(self, col, value): if numpy.isscalar(value): assert self.size is not NotImplemented, "size is not implemented! cannot set scalar array" value = ConstantArray(value, self.size, chunks=100000) - + # check the correct size, if we know the size if self.size is not NotImplemented: assert len(value) == self.size, "error setting column, data must be array of size %d" %self.size - + # add the column to the overrides dict self._overrides[col] = self.make_column(value) - + @abc.abstractproperty def size(self): """ - The number of particles in the source on the local rank. This + The number of particles in the source on the local rank. This property must be defined for all subclasses """ return NotImplemented @@ -214,7 +214,7 @@ def use_cache(self): to cache data in memory """ return self._use_cache - + @use_cache.setter def use_cache(self, val): """ @@ -231,13 +231,13 @@ def use_cache(self, val): else: if hasattr(self, '_cache'): delattr(self, '_cache') self._use_cache = val - + @property def hardcolumns(self): - """ - A list of the hard-coded columns in the Source. These are usually + """ + A list of the hard-coded columns in the Source. These are usually member functions marked by @column decorator. - + Subclasses may override this method and :func:`get_hardcolumn` to bypass the decorator logic. """ @@ -251,20 +251,20 @@ def hardcolumns(self): def columns(self): """ All columns in this Source, which include: - + 1. the columns hard-coded into the class's defintion 2. override columns provided by the user """ hcolumns = list(self.hardcolumns) - overrides = list(self._overrides) + overrides = list(self._overrides) return sorted(set(hcolumns + overrides)) @property def csize(self): """ - The total, collective size of the Source, i.e., summed across all + The total, collective size of the Source, i.e., summed across all ranks. - + It is the sum of :attr:`size` across all available ranks. """ return self._csize @@ -279,28 +279,28 @@ def attrs(self): except AttributeError: self._attrs = {} return self._attrs - + @column def Selection(self): """ - Override the ``Selection`` column to select a subset slice of a - CatalogSource. - + Override the ``Selection`` column to select a subset slice of a + CatalogSource. + By default, this column is set to ``True`` for all particles. """ return ConstantArray(True, self.size, chunks=100000) - + @column def Weight(self): """ When interpolating a CatalogSource on to a mesh, the value of this - array is used as the Weight that each particle contributes to a given + array is used as the Weight that each particle contributes to a given mesh cell. - + By default, this array is set to unity for all particles """ return ConstantArray(1.0, self.size, chunks=100000) - + def get_hardcolumn(self, col): """ Construct and return a hard-coded column. These are usually produced by calling @@ -310,18 +310,18 @@ def get_hardcolumn(self, col): the decorator logic. """ return find_column(self.__class__, col)(self) - + def update_csize(self): - """ + """ Set the collective size, :attr:`csize` - This function should be called in :func:`__init__` of a subclass, + This function should be called in :func:`__init__` of a subclass, after :attr:`size` has been set to a valid value (not ``NotImplemented``) """ if self.size is NotImplemented: raise ValueError(("``size`` cannot be NotImplemented when trying " "to compute collective size `csize`")) - + # sum size across all ranks self._csize = self.comm.allreduce(self.size) @@ -334,45 +334,45 @@ def compute(self, *args, **kwargs): """ Our version of :func:`dask.compute` that computes multiple delayed dask collections at once - + This should be called on the return value of :func:`read` to converts any dask arrays to numpy arrays - + Parameters ----------- args : object - Any number of objects. If the object is a dask - collection, it's computed and the result is returned. + Any number of objects. If the object is a dask + collection, it's computed and the result is returned. Otherwise it's passed through unchanged. - + Notes ----- - The dask default optimizer induces too many (unnecesarry) + The dask default optimizer induces too many (unnecesarry) IO calls -- we turn this off feature off by default. - + Eventually we want our own optimizer probably. """ import dask - + # do not optimize graph (can lead to slower optimizations) kwargs.setdefault('optimize_graph', False) - + # use a cache? if self.use_cache and hasattr(self, '_cache'): with self._cache: toret = dask.compute(*args, **kwargs) else: toret = dask.compute(*args, **kwargs) - + # do not return tuples of length one if len(toret) == 1: toret = toret[0] return toret - + def save(self, output, columns, datasets=None, header='Header'): - """ + """ Save the CatalogSource to a :class:`bigfile.BigFile` - - Only the selected columns are saved and :attr:`attrs` are saved in + + Only the selected columns are saved and :attr:`attrs` are saved in ``header``. The attrs of columns are stored in the datasets. Parameters @@ -382,15 +382,17 @@ def save(self, output, columns, datasets=None, header='Header'): columns : list of str the names of the columns to save in the file datasets : list of str, optional - names for the data set where each column is stored; defaults to + names for the data set where each column is stored; defaults to the name of the column header : str, optional the name of the data set holding the header information, where :attr:`attrs` is stored """ import bigfile - - if datasets is None: datasets = columns + import json + from nbodykit.utils import JSONEncoder + + if datasets is None: datasets = columns if len(datasets) != len(columns): raise ValueError("`datasets` must have the same length as `columns`") @@ -401,28 +403,36 @@ def save(self, output, columns, datasets=None, header='Header'): bb = ff.create(header) with bb : for key in self.attrs: - bb.attrs[key] = self.attrs[key] + try: + bb.attrs[key] = self.attrs[key] + except ValueError: + try: + json_str = 'json://'+json.dumps(self.attrs[key], cls=JSONEncoder) + bb.attrs[key] = json_str + except: + raise ValueError("cannot save '%s' key in attrs dictionary" % key) for column, dataset in zip(columns, datasets): c = self[column] + # save column attrs too with ff.create_from_array(dataset, c) as bb: - if hasattr(c, 'attrs'): + if hasattr(c, 'attrs'): for key in c.attrs: bb.attrs[key] = c.attrs[key] def read(self, columns): """ Return the requested columns as dask arrays - + Parameters ---------- columns : list of str the names of the requested columns - + Returns ------- - list of dask.array.Array : + list of dask.array.Array : the list of column data, in the form of dask arrays """ missing = set(columns) - set(self.columns) @@ -431,13 +441,13 @@ def read(self, columns): "try adding columns via `source[column] = data`") return [self[col] for col in columns] - - def to_mesh(self, Nmesh=None, BoxSize=None, dtype='f4', + + def to_mesh(self, Nmesh=None, BoxSize=None, dtype='f4', interlaced=False, compensated=False, window='cic', weight='Weight', selection='Selection'): - """ + """ Convert the CatalogSource to a MeshSource - + Parameters ---------- Nmesh : int, optional @@ -449,21 +459,21 @@ def to_mesh(self, Nmesh=None, BoxSize=None, dtype='f4', dtype : string, optional the data type of the mesh array interlaced : bool, optional - use the interlacing technique of Sefusatti et al. 2015 to reduce + use the interlacing technique of Sefusatti et al. 2015 to reduce the effects of aliasing on Fourier space quantities computed from the mesh compensated : bool, optional - whether to correct for the window introduced by the grid + whether to correct for the window introduced by the grid interpolation scheme window : str, optional - the string specifying which window interpolation scheme to use; + the string specifying which window interpolation scheme to use; see `pmesh.window.methods` weight : str, optional the name of the column specifying the weight for each particle selection : str, optional the name of the column that specifies which (if any) slice of the CatalogSource to take - + Returns ------- mesh : CatalogMeshSource @@ -472,10 +482,10 @@ def to_mesh(self, Nmesh=None, BoxSize=None, dtype='f4', """ from nbodykit.base.catalogmesh import CatalogMeshSource from pmesh.window import methods - + if window not in methods: raise ValueError("valid window methods: %s" %str(methods)) - + if BoxSize is None: try: BoxSize = self.attrs['BoxSize'] @@ -487,17 +497,17 @@ def to_mesh(self, Nmesh=None, BoxSize=None, dtype='f4', try: Nmesh = self.attrs['Nmesh'] except KeyError: - raise ValueError(("cannot convert particle source to a mesh; " - "'Nmesh' keyword is not supplied and the CatalogSource " + raise ValueError(("cannot convert particle source to a mesh; " + "'Nmesh' keyword is not supplied and the CatalogSource " "does not define one in 'attrs'.")) - r = CatalogMeshSource(self, Nmesh=Nmesh, BoxSize=BoxSize, + r = CatalogMeshSource(self, Nmesh=Nmesh, BoxSize=BoxSize, dtype=dtype, weight=weight, selection=selection) r.interlaced = interlaced r.compensated = compensated r.window = window return r - + class CatalogSubset(CatalogSource): """ @@ -516,27 +526,27 @@ def __init__(self, size, comm, use_cache=False, **columns): comm of the object that we are selecting from use_cache : bool; optional whether to cache results - **columns : + **columns : the data arrays that will be added to this source; keys represent the column names """ self._size = size CatalogSource.__init__(self, comm=comm, use_cache=use_cache) - + # store the column arrays for name in columns: self[name] = columns[name] - + @property def size(self): return self._size - + def get_catalog_subset(parent, index): """ - Select a subset of a CatalogSource according to a boolean index array, - returning a CatalogSubset holding only the data that satisfies + Select a subset of a CatalogSource according to a boolean index array, + returning a CatalogSubset holding only the data that satisfies the slice criterion - + Parameters ---------- parent : CatalogSource @@ -544,33 +554,33 @@ def get_catalog_subset(parent, index): index : array_like either a dask or numpy boolean array; this determines which rows are included in the returned object - + Returns ------- subset : CatalogSubset the particle source with the same meta-data as `parent`, and with the sliced data arrays - """ + """ # compute the index slice if needed and get the size if isinstance(index, da.Array): index = parent.compute(index) elif isinstance(index, list): index = numpy.array(index) - + # verify the index is a boolean array if len(index) != len(parent): raise ValueError("slice index has length %d; should be %d" %(len(index), len(parent))) if getattr(index, 'dtype', None) != '?': raise ValueError("index used to slice CatalogSource must be boolean and array-like") - + # new size is just number of True entries size = index.sum() - + # initialize subset Source of right size subset_data = {col:parent[col][index] for col in parent} toret = CatalogSubset(size, parent.comm, use_cache=parent.use_cache, **subset_data) - + # and the meta-data toret.attrs.update(parent.attrs) - - return toret \ No newline at end of file + + return toret diff --git a/nbodykit/cosmology/core.py b/nbodykit/cosmology/core.py index 131e9c8e8..f19286bc5 100644 --- a/nbodykit/cosmology/core.py +++ b/nbodykit/cosmology/core.py @@ -3,16 +3,29 @@ import numpy as np import functools +def removeunits(f): + """ + Decorator to remove units from :class:`astropy.units.Quantity` + instances + """ + @functools.wraps(f) + def wrapped(*args, **kwargs): + ans = f(*args, **kwargs) + if isinstance(ans, units.Quantity): + ans = ans.value + return ans + return wrapped + class fittable(object): """ A "fittable" function - + There exists a `.fit()` method of the original function - which returns a spline-interpolated version of the function + which returns a spline-interpolated version of the function for a specified variable """ def __init__(self, func, instance=None): - + # update the docstring, etc from the original func functools.update_wrapper(self, func) self.func = func @@ -39,11 +52,11 @@ def fit(self, argname, kwargs={}, bins=1024, range=None): kwargs : dict; optional dict of keywords to pass to the original function bins : int, iterable; optional - either an iterable specifying the bin edges, or an + either an iterable specifying the bin edges, or an integer specifying the number of linearly-spaced bins range : tuple; optional the range to fit over if `bins` specifies an integer - + Returns ------- spl : callable @@ -51,7 +64,7 @@ def fit(self, argname, kwargs={}, bins=1024, range=None): """ from scipy import interpolate from astropy.units import Quantity - + if isiterable(bins): bin_edges = np.asarray(bins) else: @@ -74,7 +87,7 @@ def fit(self, argname, kwargs={}, bins=1024, range=None): def vectorize_if_needed(func, *x): """ - Helper function to vectorize functions on array inputs; + Helper function to vectorize functions on array inputs; borrowed from :mod:`astropy.cosmology.core` """ if any(map(isiterable, x)): @@ -98,23 +111,36 @@ class Cosmology(dict): An extension of the :mod:`astropy.cosmology` framework that can store additional, orthogonal parameters and behaves like a read-only dictionary - + The class relies on :mod:`astropy.cosmology` as the underlying "engine" for calculation of cosmological quantities. This "engine" - is stored as :attr:`engine` and supports :class:`~astropy.cosmology.LambdaCDM` + is stored as :attr:`engine` and supports :class:`~astropy.cosmology.LambdaCDM` and :class:`~astropy.cosmology.wCDM`, and their flat equivalents - + Any attributes or functions of the underlying astropy engine can be directly accessed as attributes or keys of this class - + + ..note:: + + A default set of units is assumed, so attributes stored internally + as :class:`astropy.units.Quantity` instances will be returned + here as numpy arrays. Those units are: + + - temperature: ``K`` + - distance: ``Mpc`` + - density: ``g/cm^3`` + - neutrino mass: ``eV`` + - time: ``Gyr`` + - H0: ``Mpc/km/s`` + .. warning:: - - This class does not currently support a non-constant dark energy - equation of state - """ - def __init__(self, H0=67.6, Om0=0.31, Ob0=0.0486, Ode0=0.69, w0=-1., Tcmb0=2.7255, + + This class does not currently support a non-constant dark energy + equation of state + """ + def __init__(self, H0=67.6, Om0=0.31, Ob0=0.0486, Ode0=0.69, w0=-1., Tcmb0=2.7255, Neff=3.04, m_nu=0., flat=False, name=None, **kwargs): - + """ Parameters ---------- @@ -138,79 +164,84 @@ def __init__(self, H0=67.6, Om0=0.31, Ob0=0.0486, Ode0=0.69, w0=-1., Tcmb0=2.725 if `True`, automatically set `Ode0` such that `Ok0` is zero name : str a name for the cosmology - kwargs : + kwargs : additional key/value pairs to store in the dictionary """ # convert neutrino mass to a astropy `Quantity` if m_nu is not None: m_nu = units.Quantity(m_nu, 'eV') - + # the astropy keywords kws = {'name':name, 'Ob0':Ob0, 'w0':w0, 'Tcmb0':Tcmb0, 'Neff':Neff, 'm_nu':m_nu, 'Ode0':Ode0} - + # determine the astropy class if w0 == -1.0: # cosmological constant cls = 'LambdaCDM' kws.pop('w0') else: cls = 'wCDM' - + # use special flat case if Ok0 = 0 - if flat: + if flat: cls = 'Flat' + cls kws.pop('Ode0') - + # initialize the astropy engine self.engine = getattr(cosmology, cls)(H0=H0, Om0=Om0, **kws) - + # add valid params to the underlying dict for k in kws: if hasattr(self.engine, k): kwargs[k] = getattr(self.engine, k) dict.__init__(self, H0=H0, Om0=Om0, **kwargs) - + # store D_z normalization integrand = lambda a: a ** (-3) * self.engine.inv_efunc(1/a-1.) ** 3 self._Dz_norm = 1. / quad(integrand, 0., 1. )[0] - + def __dir__(self): """ - Explicitly the underlying astropy engine's attributes as + Explicitly the underlying astropy engine's attributes as part of the attributes of this class """ this_attrs = set(dict.__dir__(self)) | set(self.keys()) engine_attrs = set(self.engine.__dir__()) return list(this_attrs|engine_attrs) - + @classmethod def from_astropy(self, cosmo, **kwargs): """ Return a :class:`Cosmology` instance from an astropy cosmology - + Parameters ---------- cosmo : subclass of :class:`astropy.cosmology.FLRW` the astropy cosmology instance - ** kwargs : + ** kwargs : extra key/value parameters to store in the dictionary - """ - valid = ['H0', 'Om0', 'Ob0', 'Ode0', 'w0', 'Tcmb0', 'Neff', 'm_nu'] + """ + valid = ['H0', 'Om0', 'Ob0', 'Ode0', 'w0', 'Tcmb0', 'Neff', 'm_nu'] for name in valid: if hasattr(cosmo, name): kwargs[name] = getattr(cosmo, name) - kwargs['flat'] = cosmo.Ok0 == 0. + kwargs['flat'] = cosmo.Ok0 == 0. kwargs.setdefault('name', getattr(cosmo, 'name', None)) - + toret = Cosmology(**kwargs) toret.__doc__ += "\n" + cosmo.__doc__ return toret - + def __setitem__(self, key, value): """ No setting --> read-only """ raise ValueError("Cosmology is a read-only dictionary; see clone() to create a copy with changes") - + + @removeunits + def __getitem__(self, key): + return dict.__getitem__(self, key) + + @removeunits def __missing__(self, key): """ Missing dict keys returned only if they are attributes of the @@ -221,32 +252,34 @@ def __missing__(self, key): toret = getattr(self.engine, key) if not callable(toret): return toret - + # otherwise fail raise KeyError("no such parameter '%s' in Cosmology" %key) - + + @removeunits def __getattr__(self, key): """ - Try to return attributes from the underlying astropy engine and + Try to return attributes from the underlying astropy engine and then from the dictionary - + Notes ----- - For callable attributes part of the astropy engine's public API (i.e., + For callable attributes part of the astropy engine's public API (i.e., functions that do not begin with a '_'), the function will be decorated - with the :class:`fittable` class + with the :class:`fittable` class """ try: toret = getattr(self.engine, key) # if a callable function part of public API of the "engine", make it fittable if callable(toret) and not key.startswith('_'): toret = fittable(toret.__func__, instance=self) + return toret except: if key in self: return self[key] raise AttributeError("no such attribute '%s'" %key) - + def clone(self, **kwargs): """ Returns a copy of this object, potentially with some changes. @@ -255,7 +288,7 @@ def clone(self, **kwargs): ------- newcos : Subclass of FLRW A new instance of this class with the specified changes. - + Notes ----- This assumes that the values of all constructor arguments @@ -277,18 +310,18 @@ def clone(self, **kwargs): # filter out astropy-defined parameters and extras extras = {k:self[k] for k in self if not hasattr(self.engine, k)} extras.update({k:kwargs.pop(k) for k in list(kwargs) if not hasattr(self.engine, k)}) - + # make the new astropy instance new_engine = self.engine.clone(**kwargs) - + # return a new Cosmology instance return self.from_astropy(new_engine, **extras) def efunc_prime(self, z): """ Function giving the derivative of :func:`efunc with respect - to the scale factor ``a`` - + to the scale factor ``a`` + Parameters ---------- z : array-like @@ -299,19 +332,19 @@ def efunc_prime(self, z): eprime : ndarray, or float if input scalar The derivative of the hubble factor redshift-scaling with respect to scale factor - """ + """ if not np.all(self.de_density_scale(z)==1.0): raise NotImplementedError("non-constant dark energy redshift dependence is not supported") - - if isiterable(z): + + if isiterable(z): z = np.asarray(z) zp1 = 1.0 + z - + # compute derivative of Or term wrt to scale factor if self.has_massive_nu: Or = self.Ogamma0 * (1 + self.nu_relative_density(z)) - - # compute derivative of nu_relative_density() function with + + # compute derivative of nu_relative_density() function with # uses fitting formula from Komatsu et al 2011 p = 1.83 invp = 0.54644808743 # 1.0 / p @@ -321,30 +354,30 @@ def efunc_prime(self, z): drel_mass_per = x / curr_nu_y * (1.0 + x) ** (invp-1) * self._nu_y drel_mass = drel_mass_per.sum(-1) + self._nmasslessnu nu_relative_density_deriv = 0.22710731766 * self._neff_per_nu * drel_mass - + rad_deriv = -4*Or*zp1**5 + zp1**4*self.Ogamma0*nu_relative_density_deriv else: Or = self.Ogamma0 + self.Onu0 - rad_deriv = -4*Or*zp1**5 - + rad_deriv = -4*Or*zp1**5 + # dE^2 / da (assumes Ode0 independent of a) esq_prime = rad_deriv - 3*self.Om0*zp1**4 - 2*self.Ok0*zp1**3 - + # dE/dA eprime = esq_prime / (2*self.efunc(z)) return eprime - + @fittable def growth_rate(self, z): """ Linear growth rate :math:`f(z) = dlnD / dlna`, where ``a`` is the scale factor and ``D`` is the growth function, given by :func:`D_z` - + Parameters ---------- z : array-like Input redshifts. - + Returns ------- fz : ndarray, or float if input scalar @@ -353,41 +386,40 @@ def growth_rate(self, z): z = np.asarray(z) a = 1./(1+z) inv_efunc = self.inv_efunc(z) - + # D_z integrand integrand = lambda red: quad(lambda a: a ** (-3) * self.engine.inv_efunc(1/a-1.) ** 3, 0, 1./(1+red))[0] D_z = vectorize_if_needed(integrand, z) - + return a * inv_efunc * self.efunc_prime(z) + inv_efunc**3 / (a**2 * D_z) - + @fittable def growth_function(self, z): """ - Linear growth function :math:`D(z)` at redshift ``z`` + Linear growth function :math:`D(z)` at redshift ``z`` .. math:: - + D(a) \propto H(a) \int_0^a \frac{da'}{a'^3 H(a')^3} The normalization is such that :math:`D_1(a=1) = D_1(z=0) = 1` - + Parameters ---------- z : array-like Input redshifts. - + Returns ------- Dz : ndarray, or float if input scalar The linear growth function evaluated at the input redshifts - """ + """ # this is 1 / (E(a) * a)**3, with H(a) = H0 * E(a) integrand = lambda a: a ** (-3) * self.engine.inv_efunc(1/a-1.) ** 3 - + # normalize to D(z=0) = 1 norm = self.engine.efunc(z) * self._Dz_norm - + # be sure to return vectorized quantities f = lambda red: quad(integrand, 0., 1./(1+red))[0] return norm * vectorize_if_needed(f, z) - diff --git a/nbodykit/cosmology/ehpower.py b/nbodykit/cosmology/ehpower.py index 13911b6d0..57ae95dca 100644 --- a/nbodykit/cosmology/ehpower.py +++ b/nbodykit/cosmology/ehpower.py @@ -23,19 +23,19 @@ def __init__(self, cosmo, redshift): if isinstance(cosmo, FLRW): raise ValueError("please provide a nbodykit.cosmology.Cosmology class; see Cosmology.from_astropy") self.cosmo = cosmo - + # check required parameters required = ['Ob0', 'sigma8', 'n_s'] for param in required: if param not in self.cosmo: raise ValueError("'%s' attribute of cosmology must be provided" %param) - + # set sigma8 to the cosmology value self._sigma8 = self._sigma8_0 = self.cosmo.sigma8 - + # redshift self.redshift = redshift - + @property def attrs(self): """ @@ -43,36 +43,31 @@ def attrs(self): """ from astropy.units import Quantity attrs = {'cosmo.%s' %k : self.cosmo[k] for k in self.cosmo} - - # get the value for any parameters with units - for k in attrs: - if isinstance(attrs[k], Quantity): - attrs[k] = attrs[k].value - + # current value of sigma8 attrs['cosmo.sigma8'] = self.sigma8 - + return attrs - + @abc.abstractmethod def transfer(self, k): """ Subclasses should override this to return the transfer function """ return NotImplemented - + @property def sigma8(self): """ The present day value of ``sigma_r(r=8 Mpc/h)``, used to normalize the power spectrum, which is proportional to the square of this value. - + The power spectrum can re-normalized by setting a different value for this parameter - + """ return self._sigma8 - + @sigma8.setter def sigma8(self, value): """ @@ -80,7 +75,7 @@ def sigma8(self, value): """ self._sigma8 = value self._normalize() - + def _normalize(self): """ Internal function that is called when ``sigma8`` is set, ensuring @@ -89,23 +84,23 @@ def _normalize(self): """ # set to unity so we get the un-normalized sigma self._norm = 1.0 - + # power is proportional so square of sigma8 self._norm = (self.sigma8/self.sigma_r(8.))**2 - + def __call__(self, k, no_BAO=False): """ Return the linear power spectrum in units of [Mpc/h]^3 at the redshift :attr:`z` - + The amplitude scales with redshift through the square of the growth function, evaluated using ``cosmo.growth_function`` - + Paramters --------- k : float, array_like the wavenumbers in units of h/Mpc - + Returns ------- Plin : float, array_like @@ -121,40 +116,40 @@ def __call__(self, k, no_BAO=False): r = np.asarray(r) r[k == 0] = 1. return r - + def sigma_r(self, r): """ - The mass fluctuation within a sphere of radius ``R``, in + The mass fluctuation within a sphere of radius ``R``, in units of Mpc/h - + The value of this function with ``R=8`` returns :attr:`sigma8`, within numerical precision - + Parameters ---------- r : float - the scale to compute the mass fluctation over, in + the scale to compute the mass fluctation over, in units of `Mpc/h` """ if not hasattr(self, '_norm'): self._normalize() - + def integrand(lnk): k = np.exp(lnk) kr = k*r j1_kr = np.sin(kr)/kr**3 - np.cos(kr)/kr**2 T = self.transfer(k) return k*k*k * (k ** self.cosmo.n_s) * (T ** 2) * (3*j1_kr) ** 2 - + norm = self._norm * (self.sigma8 / self._sigma8_0)**2 lnk = np.log(np.logspace(-4, 4, 4096)) sigma_sq = norm*simps(integrand(lnk), x=lnk) / (2*np.pi**2) return sigma_sq**0.5 - - + + class EHPower(LinearPowerBase): """ Eisenstein & Hu (1998) fitting function with BAO wiggles - + From EH 1998, Eqs. 26,28-31. """ def __init__(self, cosmo, redshift): @@ -169,7 +164,7 @@ def __init__(self, cosmo, redshift): """ LinearPowerBase.__init__(self, cosmo, redshift) self._set_params() - + def _set_params(self): """ Initialize the parameters of the fitting formula @@ -177,48 +172,48 @@ def _set_params(self): self.Obh2 = self.cosmo.Ob0 * self.cosmo.h ** 2 self.Omh2 = self.cosmo.Om0 * self.cosmo.h ** 2 self.f_baryon = self.cosmo.Ob0 / self.cosmo.Om0 - self.theta_cmb = self.cosmo.Tcmb0.value / 2.7 - + self.theta_cmb = self.cosmo.Tcmb0 / 2.7 + # redshift and wavenumber of equality self.z_eq = 2.5e4 * self.Omh2 * self.theta_cmb ** (-4) # this is 1 + z self.k_eq = 0.0746 * self.Omh2 * self.theta_cmb ** (-2) # units of 1/Mpc - + # sound horizon and k_silk self.z_drag_b1 = 0.313 * self.Omh2 ** -0.419 * (1 + 0.607 * self.Omh2 ** 0.674) self.z_drag_b2 = 0.238 * self.Omh2 ** 0.223 self.z_drag = 1291 * self.Omh2 ** 0.251 / (1. + 0.659 * self.Omh2 ** 0.828) * \ (1. + self.z_drag_b1 * self.Obh2 ** self.z_drag_b2) - + self.r_drag = 31.5 * self.Obh2 * self.theta_cmb ** -4 * (1000. / (1+self.z_drag)) self.r_eq = 31.5 * self.Obh2 * self.theta_cmb ** -4 * (1000. / self.z_eq) - + self.sound_horizon = 2. / (3.*self.k_eq) * np.sqrt(6. / self.r_eq) * \ np.log((np.sqrt(1 + self.r_drag) + np.sqrt(self.r_drag + self.r_eq)) / (1 + np.sqrt(self.r_eq)) ) self.k_silk = 1.6 * self.Obh2 ** 0.52 * self.Omh2 ** 0.73 * (1 + (10.4*self.Omh2) ** -0.95) - + # alpha_c alpha_c_a1 = (46.9*self.Omh2) ** 0.670 * (1 + (32.1*self.Omh2) ** -0.532) alpha_c_a2 = (12.0*self.Omh2) ** 0.424 * (1 + (45.0*self.Omh2) ** -0.582) - self.alpha_c = alpha_c_a1 ** -self.f_baryon * alpha_c_a2 ** (-self.f_baryon**3) - + self.alpha_c = alpha_c_a1 ** -self.f_baryon * alpha_c_a2 ** (-self.f_baryon**3) + # beta_c beta_c_b1 = 0.944 / (1 + (458*self.Omh2) ** -0.708) beta_c_b2 = 0.395 * self.Omh2 ** -0.0266 self.beta_c = 1. / (1 + beta_c_b1 * ((1-self.f_baryon) ** beta_c_b2) - 1) - + y = self.z_eq / (1 + self.z_drag) alpha_b_G = y * (-6.*np.sqrt(1+y) + (2. + 3.*y) * np.log((np.sqrt(1+y)+1) / (np.sqrt(1+y)-1))) self.alpha_b = 2.07 * self.k_eq * self.sound_horizon * (1+self.r_drag)**-0.75 * alpha_b_G - + self.beta_node = 8.41 * self.Omh2 ** 0.435 self.beta_b = 0.5 + self.f_baryon + (3. - 2.*self.f_baryon) * np.sqrt( (17.2*self.Omh2) ** 2 + 1 ) - + def transfer(self, k): """ Return the transfer function with BAO wiggles - + This is normalized to unity on large scales - + Paramters --------- k : float, array_like @@ -226,11 +221,11 @@ def transfer(self, k): """ if np.isscalar(k) and k == 0.: return 1.0 - + k = np.asarray(k) # only compute k > 0 modes valid = k > 0. - + k = k[valid] * self.cosmo.h # now in 1/Mpc q = k / (13.41*self.k_eq) ks = k*self.sound_horizon @@ -246,20 +241,20 @@ def transfer(self, k): s_tilde = self.sound_horizon * (1 + (self.beta_node/ks)**3) ** (-1./3.) ks_tilde = k*s_tilde - + T_b_T0 = f(T_c_ln_nobeta, T_c_C_noalpha) - T_b_1 = T_b_T0 / (1 + (ks/5.2)**2 ) + T_b_1 = T_b_T0 / (1 + (ks/5.2)**2 ) T_b_2 = self.alpha_b / (1 + (self.beta_b/ks)**3 ) * np.exp(-(k/self.k_silk) ** 1.4) T_b = np.sinc(ks_tilde/np.pi) * (T_b_1 + T_b_2) - - T = np.ones(valid.shape) + + T = np.ones(valid.shape) T[valid] = self.f_baryon*T_b + (1-self.f_baryon)*T_c; return T class NoWiggleEHPower(LinearPowerBase): """ Eisenstein & Hu (1998) fitting function without BAO wiggles - + From EH 1998, Eqs. 26,28-31. """ def __init__(self, cosmo, redshift): @@ -274,7 +269,7 @@ def __init__(self, cosmo, redshift): """ LinearPowerBase.__init__(self, cosmo, redshift) self._set_params() - + def _set_params(self): """ Initialize the parameters of the fitting formula @@ -282,21 +277,21 @@ def _set_params(self): self.Obh2 = self.cosmo.Ob0 * self.cosmo.h ** 2 self.Omh2 = self.cosmo.Om0 * self.cosmo.h ** 2 self.f_baryon = self.cosmo.Ob0 / self.cosmo.Om0 - self.theta_cmb = self.cosmo.Tcmb0.value / 2.7 - + self.theta_cmb = self.cosmo.Tcmb0 / 2.7 + # wavenumber of equality self.k_eq = 0.0746 * self.Omh2 * self.theta_cmb ** (-2) # units of 1/Mpc - + self.sound_horizon = self.cosmo.h * 44.5 * np.log(9.83/self.Omh2) / np.sqrt(1 + 10 * self.Obh2** 0.75) # in Mpc/h self.alpha_gamma = 1 - 0.328 * np.log(431*self.Omh2) * self.f_baryon + 0.38* np.log(22.3*self.Omh2) * self.f_baryon ** 2 - - + + def transfer(self, k): """ Return the transfer function without BAO wiggles - + This is normalized to unity on large scales - + Paramters --------- k : float, array_like @@ -304,20 +299,20 @@ def transfer(self, k): """ if np.isscalar(k) and k == 0.: return 1.0 - + # only compute k > 0 modes k = np.asarray(k) valid = k > 0. - + k = k[valid] * self.cosmo.h # in 1/Mpc now ks = k * self.sound_horizon / self.cosmo.h q = k / (13.41*self.k_eq) - + gamma_eff = self.Omh2 * (self.alpha_gamma + (1 - self.alpha_gamma) / (1 + (0.43*ks) ** 4)) q_eff = q * self.Omh2 / gamma_eff L0 = np.log(2*np.e + 1.8 * q_eff) C0 = 14.2 + 731.0 / (1 + 62.5 * q_eff) - + T = np.ones(valid.shape) T[valid] = L0 / (L0 + C0 * q_eff**2) return T diff --git a/nbodykit/io/bigfile.py b/nbodykit/io/bigfile.py index 450700319..4b7c62985 100644 --- a/nbodykit/io/bigfile.py +++ b/nbodykit/io/bigfile.py @@ -1,16 +1,18 @@ from __future__ import absolute_import -# the future import is important. or in python 2.7 we try to +# the future import is important. or in python 2.7 we try to # import this module itself. Due to the unfortnate name conflict! import numpy from .base import FileType from ..extern.six import string_types +import json +from nbodykit.utils import JSONDecoder class BigFile(FileType): """ - A file object to handle the reading of columns of data from - a ``bigfile`` file. ``bigfile`` is the default format of + A file object to handle the reading of columns of data from + a ``bigfile`` file. ``bigfile`` is the default format of FastPM and MP-Gadget. https://github.com/rainwoodman/bigfile @@ -54,19 +56,26 @@ def __init__(self, path, exclude=None, header='.', dataset='./'): self.size = ds.size header = ff[header] - attrs = header.attrs + + # copy over the attrs for k in attrs.keys(): - self.attrs[k] = numpy.array(attrs[k], copy=True) + + # load a JSON representation if str starts with json::// + if isinstance(attrs[k], string_types) and attrs[k].startswith('json://'): + self.attrs[k] = json.loads(attrs[k][7:], cls=JSONDecoder) + # copy over an array + else: + self.attrs[k] = numpy.array(attrs[k], copy=True) def read(self, columns, start, stop, step=1): """ - Read the specified column(s) over the given range, + Read the specified column(s) over the given range, as a dictionary 'start' and 'stop' should be between 0 and :attr:`size`, which is the total size of the binary file (in particles) - """ + """ import bigfile if isinstance(columns, string_types): columns = [columns] diff --git a/nbodykit/source/catalog/hod.py b/nbodykit/source/catalog/hod.py index 0bd375f9e..c2cc3cf17 100644 --- a/nbodykit/source/catalog/hod.py +++ b/nbodykit/source/catalog/hod.py @@ -142,6 +142,13 @@ def _makesource(self): self._model.populate_mock(halocat=self._halos, halo_mass_column_key=self.mass, Num_ptcl_requirement=1, seed=self.attrs['seed']) + # remap gal_type to integers (cen: 0, sats: 1) + galtab = self._model.mock.galaxy_table + gal_type = numpy.zeros(len(galtab), dtype='i4') + sats = galtab['gal_type'] == 'satellites' + gal_type[sats] = 1 + galtab.replace_column('gal_type', gal_type) + # replace any object dtypes data = remove_object_dtypes(self._model.mock.galaxy_table).as_array() del self._model.mock.galaxy_table @@ -212,7 +219,7 @@ def _log_populated_stats(self, data): """ if len(data) > 0: - fsat = 1.*(data['gal_type'] == 'satellites').sum()/len(data) + fsat = 1.*(data['gal_type'] == 1).sum()/len(data) self.logger.info("satellite fraction: %.2f" %fsat) self.attrs['fsat'] = fsat @@ -317,9 +324,13 @@ def _makemodel(self): conc_mass_model = 'dutton_maccio14' # occupation functions - model['centrals_occupation'] = Zheng07Cens(prim_haloprop_key=self.mass) - model['satellites_occupation'] = Zheng07Sats(prim_haloprop_key=self.mass, modulate_with_cenocc=True) - model['satellites_occupation']._suppress_repeated_param_warning = True + cenocc = Zheng07Cens(prim_haloprop_key=self.mass) + satocc = Zheng07Sats(prim_haloprop_key=self.mass, modulate_with_cenocc=True, cenocc_model=cenocc) + satocc._suppress_repeated_param_warning = True + + # add to model + model['centrals_occupation'] = cenocc + model['satellites_occupation'] = satocc # profile functions kws = {'cosmology':self.cosmo.engine, 'redshift':self.attrs['redshift'], 'mdef':self.attrs['mdef']} diff --git a/nbodykit/source/catalog/tests/test_catalog.py b/nbodykit/source/catalog/tests/test_catalog.py index fb0b963d2..c7ca0b44f 100644 --- a/nbodykit/source/catalog/tests/test_catalog.py +++ b/nbodykit/source/catalog/tests/test_catalog.py @@ -95,35 +95,43 @@ def test_tomesh(comm): @MPITest([1, 4]) def test_save(comm): + cosmo = cosmology.Planck15 CurrentMPIComm.set(comm) + import tempfile import shutil - from nbodykit.io.bigfile import BigFile + + # initialize an output directory if comm.rank == 0: tmpfile = tempfile.mkdtemp() else: tmpfile = None - tmpfile = comm.bcast(tmpfile) + # initialize a uniform catalog source = UniformCatalog(nbar=0.2e-2, BoxSize=1024., seed=42) + # add a non-array attrs (saved as JSON) + source.attrs['empty'] = None + + # save to a BigFile source.save(tmpfile, ['Position', 'Velocity']) + # load as a BigFileCatalog source2 = BigFileCatalog(tmpfile, header='Header', attrs={"Nmesh":32}) + # check sources for k in source.attrs: assert_array_equal(source2.attrs[k], source.attrs[k]) + # check the data def allconcat(data): return numpy.concatenate(comm.allgather(data), axis=0) - assert_allclose(allconcat(source['Position']), allconcat(source2['Position'])) assert_allclose(allconcat(source['Velocity']), allconcat(source2['Velocity'])) comm.barrier() - if comm.rank == 0: shutil.rmtree(tmpfile) diff --git a/nbodykit/source/catalog/tests/test_hod.py b/nbodykit/source/catalog/tests/test_hod.py index 024a06a75..25bedcdc4 100644 --- a/nbodykit/source/catalog/tests/test_hod.py +++ b/nbodykit/source/catalog/tests/test_hod.py @@ -1,6 +1,7 @@ from runtests.mpi import MPITest from nbodykit.lab import * from nbodykit import setup_logging +import shutil setup_logging() @@ -56,3 +57,47 @@ def test_hod(comm): # compute the power r = FFTPower(hod.to_mesh(Nmesh=128), mode='2d', Nmu=5, los=[0,0,1]) + +@MPITest([4]) +def test_save(comm): + + CurrentMPIComm.set(comm) + + redshift = 0.55 + cosmo = cosmology.Planck15 + BoxSize = 512 + + # lognormal particles + source = LogNormalCatalog(Plin=cosmology.EHPower(cosmo, redshift), + nbar=3e-3, BoxSize=BoxSize, Nmesh=128, seed=42) + + # run FOF + r = FOF(source, linking_length=0.2, nmin=20) + halos = r.to_halos(cosmo=cosmo, redshift=redshift, particle_mass=1e12, mdef='vir') + + # make the HOD catalog from halotools catalog + hod = HODCatalog(halos.to_halotools(), seed=42) + + # save to a tmp file + hod.save('tmp-hod.bigfile', ['Position', 'Velocity', 'gal_type']) + + # read tmp file + cat = BigFileCatalog('tmp-hod.bigfile', header="Header") + + try: + # check attrs + for name in hod.attrs: + numpy.testing.assert_array_equal(cat.attrs[name], hod.attrs[name]) + + # check same size + assert hod.csize == cat.csize + + # check total number of satellites + nsat1 = comm.allreduce(hod['gal_type'].sum()) + nsat2 = comm.allreduce(cat['gal_type'].sum()) + assert nsat1 == nsat2 + + finally: + comm.barrier() + if comm.rank == 0: + shutil.rmtree('tmp-hod.bigfile')