-
Notifications
You must be signed in to change notification settings - Fork 71
/
demo_mpi_params.py
363 lines (298 loc) · 13.6 KB
/
demo_mpi_params.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
import time, sys
import numpy as np
from sedpy.observate import load_filters
from prospect import prospect_args
from prospect.fitting import fit_model
from prospect.io import write_results as writer
# --------------
# RUN_PARAMS
# When running as a script with argparsing, these are ignored. Kept here for backwards compatibility.
# --------------
run_params = {'verbose': True,
'debug': False,
'outfile': 'demo_galphot',
'output_pickles': False,
# Optimization parameters
'do_powell': False,
'ftol': 0.5e-5, 'maxfev': 5000,
'do_levenberg': True,
'nmin': 10,
# emcee fitting parameters
'nwalkers': 128,
'nburn': [16, 32, 64],
'niter': 512,
'interval': 0.25,
'initial_disp': 0.1,
# dynesty Fitter parameters
'nested_bound': 'multi', # bounding method
'nested_sample': 'unif', # sampling method
'nested_nlive_init': 100,
'nested_nlive_batch': 100,
'nested_bootstrap': 0,
'nested_dlogz_init': 0.05,
'nested_weight_kwargs': {"pfrac": 1.0},
'nested_target_n_effective': 10000,
# Obs data parameters
'objid': 0,
'phottable': 'demo_photometry.dat',
'luminosity_distance': 1e-5, # in Mpc
# Model parameters
'add_neb': False,
'add_duste': False,
# SPS parameters
'zcontinuous': 1,
}
# --------------
# Model Definition
# --------------
def build_model(object_redshift=0.0, fixed_metallicity=None, add_duste=False,
add_neb=False, luminosity_distance=0.0, **extras):
"""Construct a model. This method defines a number of parameter
specification dictionaries and uses them to initialize a
`models.sedmodel.SedModel` object.
:param object_redshift:
If given, given the model redshift to this value.
:param add_dust: (optional, default: False)
Switch to add (fixed) parameters relevant for dust emission.
:param add_neb: (optional, default: False)
Switch to add (fixed) parameters relevant for nebular emission, and
turn nebular emission on.
:param luminosity_distance: (optional)
If present, add a `"lumdist"` parameter to the model, and set it's
value (in Mpc) to this. This allows one to decouple redshift from
distance, and fit, e.g., absolute magnitudes (by setting
luminosity_distance to 1e-5 (10pc))
"""
from prospect.models.templates import TemplateLibrary
from prospect.models import priors, sedmodel
# --- Get a basic delay-tau SFH parameter set. ---
# This has 5 free parameters:
# "mass", "logzsol", "dust2", "tage", "tau"
# And two fixed parameters
# "zred"=0.1, "sfh"=4
# See the python-FSPS documentation for details about most of these
# parameters. Also, look at `TemplateLibrary.describe("parametric_sfh")` to
# view the parameters, their initial values, and the priors in detail.
model_params = TemplateLibrary["parametric_sfh"]
# Add lumdist parameter. If this is not added then the distance is
# controlled by the "zred" parameter and a WMAP9 cosmology.
if luminosity_distance > 0:
model_params["lumdist"] = {"N": 1, "isfree": False,
"init": luminosity_distance, "units":"Mpc"}
# Adjust model initial values (only important for optimization or emcee)
model_params["dust2"]["init"] = 0.1
model_params["logzsol"]["init"] = -0.3
model_params["tage"]["init"] = 13.
model_params["mass"]["init"] = 1e8
# If we are going to be using emcee, it is useful to provide an
# initial scale for the cloud of walkers (the default is 0.1)
# For dynesty these can be skipped
model_params["mass"]["init_disp"] = 1e7
model_params["tau"]["init_disp"] = 3.0
model_params["tage"]["init_disp"] = 5.0
model_params["tage"]["disp_floor"] = 2.0
model_params["dust2"]["disp_floor"] = 0.1
# adjust priors
model_params["dust2"]["prior"] = priors.TopHat(mini=0.0, maxi=2.0)
model_params["tau"]["prior"] = priors.LogUniform(mini=1e-1, maxi=10)
model_params["mass"]["prior"] = priors.LogUniform(mini=1e6, maxi=1e10)
# Change the model parameter specifications based on some keyword arguments
if fixed_metallicity is not None:
# make it a fixed parameter
model_params["logzsol"]["isfree"] = False
#And use value supplied by fixed_metallicity keyword
model_params["logzsol"]['init'] = fixed_metallicity
if object_redshift != 0.0:
# make sure zred is fixed
model_params["zred"]['isfree'] = False
# And set the value to the object_redshift keyword
model_params["zred"]['init'] = object_redshift
if add_duste:
# Add dust emission (with fixed dust SED parameters)
model_params.update(TemplateLibrary["dust_emission"])
if add_neb:
# Add nebular emission (with fixed parameters)
model_params.update(TemplateLibrary["nebular"])
# Now instantiate the model using this new dictionary of parameter specifications
model = sedmodel.SedModel(model_params)
return model
# --------------
# Observational Data
# --------------
# Here we are going to put together some filter names
galex = ['galex_FUV', 'galex_NUV']
spitzer = ['spitzer_irac_ch'+n for n in '1234']
bessell = ['bessell_'+n for n in 'UBVRI']
sdss = ['sdss_{0}0'.format(b) for b in 'ugriz']
# The first filter set is Johnson/Cousins, the second is SDSS. We will use a
# flag in the photometry table to tell us which set to use for each object
# (some were not in the SDSS footprint, and therefore have Johnson/Cousins
# photometry)
#
# All these filters are available in sedpy. If you want to use other filters,
# add their transmission profiles to sedpy/sedpy/data/filters/ with appropriate
# names (and format)
filtersets = (galex + bessell + spitzer,
galex + sdss + spitzer)
def build_obs(objid=0, phottable='demo_photometry.dat',
luminosity_distance=None, **kwargs):
"""Load photometry from an ascii file. Assumes the following columns:
`objid`, `filterset`, [`mag0`,....,`magN`] where N >= 11. The User should
modify this function (including adding keyword arguments) to read in their
particular data format and put it in the required dictionary.
:param objid:
The object id for the row of the photomotery file to use. Integer.
Requires that there be an `objid` column in the ascii file.
:param phottable:
Name (and path) of the ascii file containing the photometry.
:param luminosity_distance: (optional)
The Johnson 2013 data are given as AB absolute magnitudes. They can be
turned into apparent magnitudes by supplying a luminosity distance.
:returns obs:
Dictionary of observational data.
"""
# Writes your code here to read data. Can use FITS, h5py, astropy.table,
# sqlite, whatever.
# e.g.:
# import astropy.io.fits as pyfits
# catalog = pyfits.getdata(phottable)
from prospect.utils.obsutils import fix_obs
# Here we will read in an ascii catalog of magnitudes as a numpy structured
# array
with open(phottable, 'r') as f:
# drop the comment hash
header = f.readline().split()[1:]
catalog = np.genfromtxt(phottable, comments='#',
dtype=np.dtype([(n, np.float) for n in header]))
# Find the right row
ind = catalog['objid'] == float(objid)
# Here we are dynamically choosing which filters to use based on the object
# and a flag in the catalog. Feel free to make this logic more (or less)
# complicated.
filternames = filtersets[int(catalog[ind]['filterset'])]
# And here we loop over the magnitude columns
mags = [catalog[ind]['mag{}'.format(i)] for i in range(len(filternames))]
mags = np.array(mags)
# And since these are absolute mags, we can shift to any distance.
if luminosity_distance is not None:
dm = 25 + 5 * np.log10(luminosity_distance)
mags += dm
# Build output dictionary.
obs = {}
# This is a list of sedpy filter objects. See the
# sedpy.observate.load_filters command for more details on its syntax.
obs['filters'] = load_filters(filternames)
# This is a list of maggies, converted from mags. It should have the same
# order as `filters` above.
obs['maggies'] = np.squeeze(10**(-mags/2.5))
# HACK. You should use real flux uncertainties
obs['maggies_unc'] = obs['maggies'] * 0.07
# Here we mask out any NaNs or infs
obs['phot_mask'] = np.isfinite(np.squeeze(mags))
# We have no spectrum.
obs['wavelength'] = None
obs['spectrum'] = None
# Add unessential bonus info. This will be stored in output
#obs['dmod'] = catalog[ind]['dmod']
obs['objid'] = objid
# This ensures all required keys are present and adds some extra useful info
obs = fix_obs(obs)
return obs
# --------------
# SPS Object
# --------------
def build_sps(zcontinuous=1, compute_vega_mags=False, **extras):
from prospect.sources import CSPSpecBasis
sps = CSPSpecBasis(zcontinuous=zcontinuous,
compute_vega_mags=compute_vega_mags)
return sps
# -----------------
# Noise Model
# ------------------
def build_noise(**extras):
return None, None
# -----------
# Everything
# ------------
def build_all(**kwargs):
return (build_obs(**kwargs), build_model(**kwargs),
build_sps(**kwargs), build_noise(**kwargs))
if __name__ == '__main__':
# - Parser with default arguments -
parser = prospect_args.get_parser()
# - Add custom arguments -
parser.add_argument('--object_redshift', type=float, default=0.0,
help=("Redshift for the model"))
parser.add_argument('--add_neb', action="store_true",
help="If set, add nebular emission in the model (and mock).")
parser.add_argument('--add_duste', action="store_true",
help="If set, add dust emission to the model.")
parser.add_argument('--luminosity_distance', type=float, default=1e-5,
help=("Luminosity distance in Mpc. Defaults to 10pc "
"(for case of absolute mags)"))
parser.add_argument('--phottable', type=str, default="demo_photometry.dat",
help="Names of table from which to get photometry.")
parser.add_argument('--objid', type=int, default=0,
help="zero-index row number in the table to fit.")
args = parser.parse_args()
run_params = vars(args)
obs, model, sps, noise = build_all(**run_params)
run_params["sps_libraries"] = sps.ssp.libraries
run_params["param_file"] = __file__
if args.debug:
sys.exit()
# Set up MPI. Note that only model evaluation is parallelizable in dynesty,
# and many operations (e.g. new point proposal) are still done in serial.
# This means that single-core fits will always be more efficient for large
# samples. having a large ratio of (live points / processors) helps efficiency
# Scaling is: S = K ln(1 + M/K), where M = number of processes and K = number of live points
# Run as: mpirun -np <number of processors> python demo_mpi_params.py
try:
import mpi4py
from mpi4py import MPI
from schwimmbad import MPIPool
mpi4py.rc.threads = False
mpi4py.rc.recv_mprobe = False
comm = MPI.COMM_WORLD
size = comm.Get_size()
withmpi = comm.Get_size() > 1
except ImportError:
print('Failed to start MPI; are mpi4py and schwimmbad installed? Proceeding without MPI.')
withmpi = False
# Evaluate SPS over logzsol grid in order to get necessary data in cache/memory
# for each MPI process. Otherwise, you risk creating a lag between the MPI tasks
# caching data depending which can slow down the parallelization
if (withmpi) & ('logzsol' in model.free_params):
dummy_obs = dict(filters=None, wavelength=None)
logzsol_prior = model.config_dict["logzsol"]['prior']
lo, hi = logzsol_prior.range
logzsol_grid = np.around(np.arange(lo, hi, step=0.1), decimals=2)
sps.update(**model.params) # make sure we are caching the correct IMF / SFH / etc
for logzsol in logzsol_grid:
model.params["logzsol"] = np.array([logzsol])
_ = model.predict(model.theta, obs=dummy_obs, sps=sps)
# ensure that each processor runs its own version of FSPS
# this ensures no cross-over memory usage
from prospect.fitting import lnprobfn
from functools import partial
lnprobfn_fixed = partial(lnprobfn, sps=sps)
if withmpi:
with MPIPool() as pool:
# The subprocesses will run up to this point in the code
if not pool.is_master():
pool.wait()
sys.exit(0)
nprocs = pool.size
output = fit_model(obs, model, sps, noise, pool=pool, queue_size=nprocs, lnprobfn=lnprobfn_fixed, **run_params)
else:
output = fit_model(obs, model, sps, noise, lnprobfn=lnprobfn_fixed, **run_params)
hfile = "{0}_{1}_mcmc.h5".format(args.outfile, int(time.time()))
writer.write_hdf5(hfile, run_params, model, obs,
output["sampling"][0], output["optimization"][0],
tsample=output["sampling"][1],
toptimize=output["optimization"][1],
sps=sps)
try:
hfile.close()
except(AttributeError):
pass