Skip to content

Commit

Permalink
Merge pull request #458 from desihub/ADMhptargets
Browse files Browse the repository at this point in the history
Refactor SV and main survey targeting to allow a range of spatial queries
  • Loading branch information
geordie666 committed Feb 15, 2019
2 parents 0856b6e + f0bb06f commit dc07802
Show file tree
Hide file tree
Showing 17 changed files with 1,469 additions and 427 deletions.
2 changes: 1 addition & 1 deletion bin/gather_targets
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ ap.add_argument("infiles",
help="SEMI-COLON separated list of input files, which may have to be enclosed by quotes (e.g. 'file1;file2;file3;file4')")
ap.add_argument("outfile",
help="Output file name")
ap.add_argument("targtype",choices=['skies','randoms'],
ap.add_argument("targtype",choices=['skies', 'randoms', 'targets'],
help="Type of target run with parallelization/multiprocessing code to gather")

ns = ap.parse_args()
Expand Down
21 changes: 21 additions & 0 deletions bin/select_cmx_targets
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
from __future__ import print_function, division

import sys
import numpy as np
import fitsio

from desitarget import io
from desitarget.cmx.cmx_cuts import select_targets
Expand Down Expand Up @@ -38,6 +40,25 @@ if len(infiles) == 0:
log.critical('no sweep or tractor files found')
sys.exit(1)

# ADM Only coded for objects with Gaia matches
# ADM (e.g. DR6 or above). Fail for earlier Data Releases.
# ADM Guard against a single file being passed.
fn = infiles
if ~isinstance(infiles, str):
fn = infiles[0]
data = fitsio.read(fn, columns=["RELEASE","PMRA"])
if np.any(data["RELEASE"] < 6000):
log.critical('Commissioning cuts only coded for DR6 or above')
raise ValueError
if (np.max(data['PMRA']) == 0.) & np.any(data["RELEASE"] < 7000):
d = "/project/projectdirs/desi/target/gaia_dr2_match_dr6"
log.info("Zero objects have a proper motion.")
log.critical(
"Did you mean to send the Gaia-matched sweeps in, e.g., {}?"
.format(d)
)
raise IOError

log.info("running on {} processors".format(ns.numproc))

targets = select_targets(infiles, numproc=ns.numproc)
Expand Down
8 changes: 4 additions & 4 deletions bin/select_randoms
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,9 @@ from desitarget.randoms import pixweight, select_randoms

import multiprocessing
nproc = multiprocessing.cpu_count() // 2
#ADM default HEALPix Nside used throughout desitarget
#ADM don't confuse this with the ns.nside parallelization input that is parsed below!!!
nside = 64
# ADM default HEALPix Nside used throughout desitarget.
# ADM don't confuse this with the ns.nside parallelization input that is parsed below!!!
nside = io.desitarget_nside()

from desiutil.log import get_logger
log = get_logger()
Expand Down Expand Up @@ -52,7 +52,7 @@ ap.add_argument("--dustdir",

ns = ap.parse_args()

#ADM parse the list of HEALPixels in which to run
# ADM parse the list of HEALPixels in which to run.
pixlist = ns.healpixels
if pixlist is not None:
pixlist = [ int(pixnum) for pixnum in pixlist.split(',') ]
Expand Down
51 changes: 43 additions & 8 deletions bin/select_sv_targets
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,24 @@ ap.add_argument('-t','--tcnames', default=None,
help="Comma-separated names of target classes to run (e.g. QSO,LRG). Options are ELG, QSO, LRG, MWS, BGS, STD. Default is to run everything)")
ap.add_argument('-i','--iteration', default="1",
help="Iteration of SV target selection to run [defaults to 1 for 'sv1']")
ap.add_argument('--nside', type=int,
help="Process targets in HEALPixels at this resolution (defaults to None). See also the 'healpixels' input flag",
default=None)
ap.add_argument('--healpixels',
help="HEALPixels corresponding to `nside` (e.g. '6,21,57'). Only process files that touch these pixels and return targets within these pixels",
default=None)
ap.add_argument("--bundlefiles", type=int,
help="(overrides all options but `src`) print slurm script to parallelize, with about this many sweeps files touching each HEALPixel (e.g. 100)",
default=None)
ap.add_argument("--filespersec", type=float,
help="estimate of sweeps files completed per second by the (parallelized) code. Used with `bundlefiles` to guess run times (defaults to 0.08)",
default=0.08)
ap.add_argument('--radecbox',
help="Only return targets in an RA/Dec box denoted by 'RAmin,RAmax,Decmin,Decmax' in degrees (e.g. '140,150,-10,-20')",
default=None)
ap.add_argument('--radecrad',
help="Only return targets in an RA/Dec circle/cap denoted by 'centerRA,centerDec,radius' in degrees (e.g. '140,150,0.5')",
default=None)

ns = ap.parse_args()
infiles = io.list_sweepfiles(ns.src)
Expand All @@ -58,7 +76,7 @@ if ~isinstance(infiles, str):
fn = infiles[0]
data = fitsio.read(fn, columns=["RELEASE","PMRA"])
if np.any(data["RELEASE"] < 6000):
log.critical('Commissioning cuts only coded for DR6 or above')
log.critical('SV cuts only coded for DR6 or above')
raise ValueError
if (np.max(data['PMRA']) == 0.) & np.any(data["RELEASE"] < 7000):
d = "/project/projectdirs/desi/target/gaia_dr2_match_dr6"
Expand All @@ -69,16 +87,33 @@ if (np.max(data['PMRA']) == 0.) & np.any(data["RELEASE"] < 7000):
)
raise IOError

log.info("running on {} processors".format(ns.numproc))
if ns.bundlefiles is None:
log.info("running on {} processors".format(ns.numproc))

#ADM if specific bit names were passed, use them, otherwise run all target classes
# ADM parse the list of HEALPixels in which to run.
pixlist = ns.healpixels
if pixlist is not None:
pixlist = [ int(pixnum) for pixnum in pixlist.split(',') ]

# ADM parse the list of RA/Dec regions in which to run.
inlists = [ns.radecbox, ns.radecrad]
for i, inlist in enumerate(inlists):
if inlist is not None:
inlists[i] = [ float(num) for num in inlist.split(',') ]

# ADM if specific bit names were passed, use them, otherwise run all target classes
tcnames = _parse_tcnames(tcstring=ns.tcnames, add_all=False)

targets = select_targets(infiles, numproc=ns.numproc, tcnames=tcnames, survey=survey)
targets = select_targets(infiles, numproc=ns.numproc,
nside=ns.nside, pixlist=pixlist,
bundlefiles=ns.bundlefiles, filespersec=ns.filespersec,
radecbox=inlists[0], radecrad=inlists[1],
tcnames=tcnames, survey=survey)
if ns.mask:
targets = mask_targets(targets, inmaskfile=ns.mask, nside=nside)

io.write_targets(ns.dest, targets, indir=ns.src, survey=survey,
qso_selection=survey, nside=nside)

log.info('{} targets written to {}...t={:.1f}s'.format(len(targets), ns.dest, time()-start))
if ns.bundlefiles is None:
io.write_targets(ns.dest, targets, indir=ns.src, survey=survey,
nsidefile=ns.nside, hpxlist=pixlist,
qso_selection=survey, nside=nside)
log.info('{} targets written to {}...t={:.1f}s'.format(len(targets), ns.dest, time()-start))
60 changes: 47 additions & 13 deletions bin/select_targets
Original file line number Diff line number Diff line change
Expand Up @@ -19,37 +19,56 @@ start = time()

import multiprocessing
nproc = multiprocessing.cpu_count() // 2
# ADM don't confuse this with the ns.nside parallelization input that is parsed below!!!
nside = io.desitarget_nside()

from desiutil.log import get_logger
log = get_logger()

from argparse import ArgumentParser
ap = ArgumentParser(description='Generates DESI target bits from Legacy Surveys sweeps or tractor files')
ap.add_argument("src",
ap.add_argument("src",
help="Tractor/sweeps file or root directory with tractor/sweeps files")
ap.add_argument("dest",
ap.add_argument("dest",
help="Output target selection file")
ap.add_argument('-c', "--check", action='store_true',
help="Process tractor/sweeps files to check for corruption, without running full target selection")
ap.add_argument('-m', "--mask",
ap.add_argument('-m', "--mask",
help="If sent then mask the targets, the name of the mask file should be supplied")
ap.add_argument("--sandbox", action='store_true',
help="Apply the sandbox target selection algorithms")
ap.add_argument("--FoMthresh", type=float,
help='XD Figure of Merit Threshold for an ELG (sandbox)')
ap.add_argument('--qsoselection',choices=qso_selection_options,default='randomforest',
ap.add_argument('--qsoselection', choices=qso_selection_options, default='randomforest',
help="QSO target selection method")
ap.add_argument('--Method',choices=Method_sandbox_options,default='XD',
ap.add_argument('--Method' ,choices=Method_sandbox_options, default='XD',
help="Method used in sandbox target for ELG")
### ap.add_argument('-b', "--bricklist", help='filename with list of bricknames to include')
ap.add_argument("--gaiamatch", action='store_true',
help="DO match to Gaia DR2 chunks files in order to populate Gaia columns for MWS/STD selection")
ap.add_argument("--numproc", type=int,
help='number of concurrent processes to use [{}]'.format(nproc),
default=nproc)
ap.add_argument('-t','--tcnames', default=None,
ap.add_argument('-t','--tcnames', default=None,
help="Comma-separated names of target classes to run (e.g. QSO,LRG). Options are ELG, QSO, LRG, MWS, BGS, STD. Default is to run everything)")
ap.add_argument('--nside', type=int,
help="Process targets in HEALPixels at this resolution (defaults to None). See also the 'healpixels' input flag",
default=None)
ap.add_argument('--healpixels',
help="HEALPixels corresponding to `nside` (e.g. '6,21,57'). Only process files that touch these pixels and return targets within these pixels",
default=None)
ap.add_argument("--bundlefiles", type=int,
help="(overrides all options but `src`) print slurm script to parallelize, with about this many sweeps files touching each HEALPixel (e.g. 100)",
default=None)
ap.add_argument("--filespersec", type=float,
help="estimate of sweeps files completed per second by the (parallelized) code. Used with `bundlefiles` to guess run times (defaults to 0.12)",
default=0.12)
ap.add_argument('--radecbox',
help="Only return targets in an RA/Dec box denoted by 'RAmin,RAmax,Decmin,Decmax' in degrees (e.g. '140,150,-10,-20')",
default=None)
ap.add_argument('--radecrad',
help="Only return targets in an RA/Dec circle/cap denoted by 'centerRA,centerDec,radius' in degrees (e.g. '140,150,0.5')",
default=None)

ns = ap.parse_args()
infiles = io.list_sweepfiles(ns.src)
Expand All @@ -59,9 +78,21 @@ if len(infiles) == 0:
log.critical('no sweep or tractor files found')
sys.exit(1)

log.info("running on {} processors".format(ns.numproc))
if ns.bundlefiles is None:
log.info("running on {} processors".format(ns.numproc))

#ADM if specific bit names were passed, use them, otherwise run all target classes
# ADM parse the list of HEALPixels in which to run.
pixlist = ns.healpixels
if pixlist is not None:
pixlist = [ int(pix) for pix in pixlist.split(',') ]

# ADM parse the list of RA/Dec regions in which to run.
inlists = [ns.radecbox, ns.radecrad]
for i, inlist in enumerate(inlists):
if inlist is not None:
inlists[i] = [ float(num) for num in inlist.split(',') ]

# ADM if specific bit names were passed, use them, otherwise run all target classes.
tcnames = _parse_tcnames(tcstring=ns.tcnames, add_all=False)

if ns.check:
Expand All @@ -72,12 +103,15 @@ else:
targets = select_targets(infiles, numproc=ns.numproc,
qso_selection=ns.qsoselection, gaiamatch=ns.gaiamatch,
sandbox=ns.sandbox, FoMthresh=ns.FoMthresh, Method=ns.Method,
nside=ns.nside, pixlist=pixlist,
bundlefiles=ns.bundlefiles, filespersec=ns.filespersec,
radecbox=inlists[0], radecrad=inlists[1],
tcnames=tcnames, survey='main')
if ns.mask:
targets = mask_targets(targets, inmaskfile=ns.mask, nside=nside)

io.write_targets(ns.dest, targets, indir=ns.src, survey="main",
qso_selection=ns.qsoselection, sandboxcuts=ns.sandbox, nside=nside)

log.info('{} targets written to {}...t={:.1f}s'.format(len(targets), ns.dest, time()-start))

if ns.bundlefiles is None:
io.write_targets(ns.dest, targets,
indir=ns.src, survey="main", nsidefile=ns.nside, hpxlist=pixlist,
qso_selection=ns.qsoselection, sandboxcuts=ns.sandbox, nside=nside)
log.info('{} targets written to {}...t={:.1f}s'.format(len(targets), ns.dest, time()-start))
10 changes: 10 additions & 0 deletions doc/changes.rst
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,15 @@ desitarget Change Log
0.27.1 (unreleased)
-------------------

* Refactor SV/main targeting for spatial queries [`PR #458`_]. Includes:
* Many new spatial query capabilities in :func:`desitarget.geomask`.
* Parallelize target selection by splitting across HEALPixels.
* Wrappers to read in HEALPix-split target files split by:
* HEALPixels, RA/Dec boxes, RA/Dec/radius caps, column names.
* Only process subsets of targets in regions of space, again including:
* HEALPixels, RA/Dec boxes, RA/Dec/radius caps.
* New unit tests to check these spatial queries.
* Updated notebook including tutorials on spatial queries.
* Update the SV selections for BGS [`PR #457`_].
* Update MTL to work for SV0-like cmx and SV1 tables [`PR #456`_]. Includes:
* Make SUBPRIORITY a random number (0->1) in skies output.
Expand All @@ -23,6 +32,7 @@ desitarget Change Log
.. _`PR #452`: https://github.com/desihub/desitarget/pull/452
.. _`PR #456`: https://github.com/desihub/desitarget/pull/456
.. _`PR #457`: https://github.com/desihub/desitarget/pull/457
.. _`PR #458`: https://github.com/desihub/desitarget/pull/458

0.27.0 (2018-12-14)
-------------------
Expand Down

0 comments on commit dc07802

Please sign in to comment.