Skip to content

Commit

Permalink
Merge pull request #727 from desihub/ADM-QSO-code-clean
Browse files Browse the repository at this point in the history
Clean up the QSO code for the Main Survey
  • Loading branch information
geordie666 committed May 7, 2021
2 parents e073dd3 + 311112f commit affc0e2
Show file tree
Hide file tree
Showing 6 changed files with 98 additions and 223 deletions.
6 changes: 6 additions & 0 deletions doc/changes.rst
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,11 @@ desitarget Change Log
0.57.3 (unreleased)
-------------------

* Clean up the QSO code for the Main Survey [`PR #727`_]. Includes:
* Remove QSO selection code for data releases prior to DR9.
* Remove code that selects high-redshift quasars (``QSO_HIZ``).
* Also, change the initial priorities for some of the ELG classes:
* ``ELG_VLO`` is now 3000, ``ELG_LOP`` is now 3100.
* Update the ELG/LRG code for the Main Survey [`PR #726`_]. Includes:
* Deprecate the ``LRG_LOWDENS`` targeting bit. It was never used.
* Upweight 10% of the "filler" ELG sample to the LRG priority.
Expand Down Expand Up @@ -50,6 +55,7 @@ desitarget Change Log
.. _`PR #724`: https://github.com/desihub/desitarget/pull/724
.. _`PR #725`: https://github.com/desihub/desitarget/pull/725
.. _`PR #726`: https://github.com/desihub/desitarget/pull/726
.. _`PR #727`: https://github.com/desihub/desitarget/pull/727

0.57.2 (2021-04-18)
-------------------
Expand Down
225 changes: 48 additions & 177 deletions py/desitarget/cuts.py
Original file line number Diff line number Diff line change
Expand Up @@ -1664,9 +1664,9 @@ def isBGS_sga(gflux=None, rflux=None, zflux=None, w1flux=None, refcat=None,


def isQSO_cuts(gflux=None, rflux=None, zflux=None, w1flux=None, w2flux=None,
w1snr=None, w2snr=None, deltaChi2=None, maskbits=None,
w1snr=None, w2snr=None, maskbits=None,
gnobs=None, rnobs=None, znobs=None,
release=None, objtype=None, primary=None, optical=False, south=True):
objtype=None, primary=None, optical=False, south=True):
"""QSO targets from color cuts. Returns a boolean array.
Parameters
Expand Down Expand Up @@ -1705,12 +1705,6 @@ def isQSO_cuts(gflux=None, rflux=None, zflux=None, w1flux=None, w2flux=None,
# ADM observed in every band.
qso &= (gnobs > 0) & (rnobs > 0) & (znobs > 0)

# ADM default to RELEASE of 6000 if nothing is passed.
if release is None:
release = np.zeros_like(gflux, dtype='?')+6000

qso &= ((deltaChi2 > 40.) | (release >= 5000))

if primary is not None:
qso &= primary

Expand Down Expand Up @@ -1765,8 +1759,8 @@ def isQSO_colors(gflux=None, rflux=None, zflux=None, w1flux=None, w2flux=None,


def isQSO_randomforest(gflux=None, rflux=None, zflux=None, maskbits=None,
w1flux=None, w2flux=None, objtype=None, release=None,
gnobs=None, rnobs=None, znobs=None, deltaChi2=None,
w1flux=None, w2flux=None, objtype=None,
gnobs=None, rnobs=None, znobs=None,
primary=None, ra=None, dec=None, south=True,
return_probs=False):
"""Define QSO targets from a Random Forest. Returns a boolean array.
Expand All @@ -1785,30 +1779,19 @@ def isQSO_randomforest(gflux=None, rflux=None, zflux=None, maskbits=None,
-------
:class:`array_like`
``True`` for objects that are Random Forest quasar targets.
:class:`array_like`
``True`` for objects that are high-z RF quasar targets.
:class:`array_like`
The (float) probability that a target is a quasar. Only returned
if `return_probs` is ``True``.
:class:`array_like`
The (float) probability that a target is a high-z quasar. Only
returned if `return_probs` is ``True``.
Notes
-----
- Current version (03/27/21) is version 8 on `the SV3 wiki`_.
- Current version (05/06/21) is version 259 on `the wiki`_.
- See :func:`~desitarget.cuts.set_target_bits` for other parameters.
"""
# ADM Primary (True for any initially possible target).
if primary is None:
primary = np.ones_like(gflux, dtype=bool)

# RELEASE
# ADM default to RELEASE of 5000 if nothing is passed.
if release is None:
release = np.zeros_like(gflux, dtype='?') + 5000
release = np.atleast_1d(release)

# Build variables for random forest
# Number of attributes describing each object to be classified.
nFeatures = 11
Expand Down Expand Up @@ -1839,181 +1822,76 @@ def isQSO_randomforest(gflux=None, rflux=None, zflux=None, maskbits=None,

if objtype is not None:
preSelection &= _psflike(objtype)
if deltaChi2 is not None:
deltaChi2 = np.atleast_1d(deltaChi2)
preSelection[release < 5000] &= deltaChi2[release < 5000] > 30.

if maskbits is not None:
# ADM default mask bits from the Legacy Surveys not set.
preSelection &= imaging_mask(maskbits)

# "qso" mask initialized to "preSelection" mask.
qso = np.copy(preSelection)
# ADM to specifically store the selection from the "HighZ" RF.
qsohiz = np.copy(preSelection)

# ADM these store the probabilities, should they need returned.
pqso = np.zeros_like(qso, dtype='>f4')
pqsohiz = np.zeros_like(qso, dtype='>f4')

if np.any(preSelection):

from desitarget.myRF import myRF

# Data reduction to preselected objects.
colorsReduced = colors[preSelection]
releaseReduced = release[preSelection]
r_Reduced = r[preSelection]
W1_Reduced, W2_Reduced = W1[preSelection], W2[preSelection]
colorsIndex = np.arange(0, nbEntries, dtype=np.int64)
colorsReducedIndex = colorsIndex[preSelection]

# Path to random forest files.
pathToRF = resource_filename('desitarget', 'data')
# rf filenames.
rf_DR3_fileName = pathToRF + '/rf_model_dr3.npz'
rf_DR7_fileName = pathToRF + '/rf_model_dr7.npz'
rf_DR7_HighZ_fileName = pathToRF + '/rf_model_dr7_HighZ.npz'
rf_DR8_fileName = pathToRF + '/rf_model_dr8.npz'
rf_DR8_HighZ_fileName = pathToRF + '/rf_model_dr8_HighZ.npz'
# rf filename.
rf_DR9_fileName = pathToRF + '/rf_model_dr9_final.npz'

tmpReleaseOK = releaseReduced < 5000
if np.any(tmpReleaseOK):
# rf initialization - colors data duplicated within "myRF".
rf_DR3 = myRF(colorsReduced[tmpReleaseOK], pathToRF,
numberOfTrees=200, version=1)
# rf loading.
rf_DR3.loadForest(rf_DR3_fileName)
# Compute rf probabilities.
tmp_rf_proba = rf_DR3.predict_proba()
tmp_r_Reduced = r_Reduced[tmpReleaseOK]
# Compute optimized proba cut.
pcut = np.where(tmp_r_Reduced > 20.0,
0.95 - (tmp_r_Reduced - 20.0) * 0.08, 0.95)
# Add rf proba test result to "qso" mask.
qso[colorsReducedIndex[tmpReleaseOK]] = tmp_rf_proba >= pcut
# ADM no high-z selection for DR3.
qsohiz &= False

tmpReleaseOK = (releaseReduced >= 5000) & (releaseReduced < 8000)
if np.any(tmpReleaseOK):
# rf initialization - colors data duplicated within "myRF".
rf = myRF(colorsReduced[tmpReleaseOK], pathToRF,
numberOfTrees=500, version=2)
rf_HighZ = myRF(colorsReduced[tmpReleaseOK], pathToRF,
numberOfTrees=500, version=2)
# rf loading.
rf.loadForest(rf_DR7_fileName)
rf_HighZ.loadForest(rf_DR7_HighZ_fileName)
# Compute rf probabilities.
tmp_rf_proba = rf.predict_proba()
tmp_rf_HighZ_proba = rf_HighZ.predict_proba()
# Compute optimized proba cut.
tmp_r_Reduced = r_Reduced[tmpReleaseOK]
pcut = np.where(tmp_r_Reduced > 20.8,
0.83 - (tmp_r_Reduced - 20.8) * 0.025, 0.83)
pcut[tmp_r_Reduced > 21.5] = 0.8125 - 0.15 * (
tmp_r_Reduced[tmp_r_Reduced > 21.5] - 21.5)
pcut[tmp_r_Reduced > 22.3] = 0.6925 - 0.70 * (
tmp_r_Reduced[tmp_r_Reduced > 22.3] - 22.3)
pcut_HighZ = np.where(tmp_r_Reduced > 20.5,
0.55 - (tmp_r_Reduced - 20.5) * 0.025, 0.55)

# Add rf proba test result to "qso" mask.
qso[colorsReducedIndex[tmpReleaseOK]] = \
(tmp_rf_proba >= pcut) | (tmp_rf_HighZ_proba >= pcut_HighZ)
# ADM populate a mask specific to the "HighZ" selection.
qsohiz[colorsReducedIndex[tmpReleaseOK]] = \
(tmp_rf_HighZ_proba >= pcut_HighZ)

tmpReleaseOK = (releaseReduced >= 8000) & (releaseReduced < 9000)
if np.any(tmpReleaseOK):
# rf initialization - colors data duplicated within "myRF".
rf = myRF(colorsReduced[tmpReleaseOK], pathToRF,
numberOfTrees=500, version=2)
rf_HighZ = myRF(colorsReduced[tmpReleaseOK], pathToRF,
numberOfTrees=500, version=2)
# rf loading.
rf.loadForest(rf_DR8_fileName)
rf_HighZ.loadForest(rf_DR8_HighZ_fileName)
# Compute rf probabilities.
tmp_rf_proba = rf.predict_proba()
tmp_rf_HighZ_proba = rf_HighZ.predict_proba()
# Compute optimized proba cut.
tmp_r_Reduced = r_Reduced[tmpReleaseOK]
pcut = 0.88 - 0.03*np.tanh(tmp_r_Reduced - 20.5)
pcut_HighZ = 0.55

# Add rf proba test result to "qso" mask.
qso[colorsReducedIndex[tmpReleaseOK]] = \
(tmp_rf_proba >= pcut) | (tmp_rf_HighZ_proba >= pcut_HighZ)
# ADM populate a mask specific to the "HighZ" selection.
qsohiz[colorsReducedIndex[tmpReleaseOK]] = \
(tmp_rf_HighZ_proba >= pcut_HighZ)
# ADM store the probabilities in case they need returned.
pqso[colorsReducedIndex[tmpReleaseOK]] = tmp_rf_proba
# ADM populate a mask specific to the "HighZ" selection.
pqsohiz[colorsReducedIndex[tmpReleaseOK]] = tmp_rf_HighZ_proba

tmpReleaseOK = releaseReduced >= 9000
if np.any(tmpReleaseOK):
# rf initialization - colors data duplicated within "myRF".
rf = myRF(colorsReduced[tmpReleaseOK], pathToRF,
numberOfTrees=500, version=2)
# rf loading.
rf.loadForest(rf_DR9_fileName)
# Compute rf probabilities.
tmp_rf_proba = rf.predict_proba()
# Compute optimized proba cut.
tmp_r_Reduced = r_Reduced[tmpReleaseOK]
tmp_W1_Reduced = W1_Reduced[tmpReleaseOK]
tmp_W2_Reduced = W2_Reduced[tmpReleaseOK]

# NO SECOND RF! Structure kept but doesn't impact selection.
tmp_rf_HighZ_proba, pcut_HighZ = np.zeros(tmp_r_Reduced.size), 1.1

if not south:
# threshold selection for North footprint.
pcut = 0.88 - 0.04*np.tanh(tmp_r_Reduced - 20.5)
else:
pcut = np.ones(tmp_rf_proba.size)
is_des = (gnobs[preSelection][tmpReleaseOK] > 4) & \
(rnobs[preSelection][tmpReleaseOK] > 4) & \
(znobs[preSelection][tmpReleaseOK] > 4) & \
((ra[preSelection][tmpReleaseOK] >= 320) |
(ra[preSelection][tmpReleaseOK] <= 100)) & \
(dec[preSelection][tmpReleaseOK] <= 10)
# threshold selection for DES footprint.
pcut[is_des] = 0.7 - 0.05*np.tanh(tmp_r_Reduced[is_des] - 20.5)
# threshold selection for South footprint.
pcut[~is_des] = 0.84 - 0.04*np.tanh(
tmp_r_Reduced[~is_des] - 20.5)

# Add rf proba test result to "qso" mask
qso[colorsReducedIndex[tmpReleaseOK]] = \
(tmp_rf_proba >= pcut) & (tmp_W1_Reduced <= W1_cut) & \
(tmp_W2_Reduced <= W2_cut)
# (NO IMPACT) ADM mask specific to the "HighZ" selection.
qsohiz[colorsReducedIndex[tmpReleaseOK]] = \
(tmp_rf_HighZ_proba >= pcut_HighZ)
# ADM store the probabilities in case they need returned.
pqso[colorsReducedIndex[tmpReleaseOK]] = tmp_rf_proba
# (NO IMPACT) ADM mask specific to the "HighZ" selection.
pqsohiz[colorsReducedIndex[tmpReleaseOK]] = tmp_rf_HighZ_proba
# rf initialization - colors data duplicated within "myRF".
rf = myRF(colorsReduced, pathToRF, numberOfTrees=500, version=2)

# rf loading.
rf.loadForest(rf_DR9_fileName)
# Compute rf probabilities.
tmp_rf_proba = rf.predict_proba()
# Compute optimized proba cut.
tmp_r_Reduced = r_Reduced
tmp_W1_Reduced = W1_Reduced
tmp_W2_Reduced = W2_Reduced

if not south:
# threshold selection for North footprint.
pcut = 0.88 - 0.04*np.tanh(tmp_r_Reduced - 20.5)
else:
pcut = np.ones(tmp_rf_proba.size)
is_des = (gnobs[preSelection] > 4) & \
(rnobs[preSelection] > 4) & \
(znobs[preSelection] > 4) & \
((ra[preSelection] >= 320) | (ra[preSelection] <= 100)) & \
(dec[preSelection] <= 10)
# threshold selection for DES footprint.
pcut[is_des] = 0.7 - 0.05*np.tanh(tmp_r_Reduced[is_des] - 20.5)
# threshold selection for South footprint.
pcut[~is_des] = 0.84 - 0.04*np.tanh(tmp_r_Reduced[~is_des] - 20.5)

# Add rf proba test result to "qso" mask
qso[colorsReducedIndex] = (tmp_rf_proba >= pcut) & \
(tmp_W1_Reduced <= W1_cut) & (tmp_W2_Reduced <= W2_cut)
# ADM store the probabilities in case they need returned.
pqso[colorsReducedIndex] = tmp_rf_proba

# In case of call for a single object passed to the function with
# scalar arguments. Return "numpy.bool_" instead of "~numpy.ndarray".
if nbEntries == 1:
qso = qso[0]
qsohiz = qsohiz[0]
pqso = pqso[0]
pqsohiz = pqsohiz[0]

# ADM if requested, return the probabilities as well.
if return_probs:
return qso, qsohiz, pqso, pqsohiz
return qso, qsohiz
return qso, pqso
return qso


def _psflike(psftype):
Expand Down Expand Up @@ -2508,41 +2386,35 @@ def set_target_bits(photsys_north, photsys_south, obs_rflux,

# ADM initially set everything to arrays of False for QSO selection.
# ADM zeroth element stores the northern targets bits (south=False).
qso_classes = [[tcfalse, tcfalse], [tcfalse, tcfalse]]
qso_classes = [tcfalse, tcfalse]
if "QSO" in tcnames:
for south in south_cuts:
if qso_selection == 'colorcuts':
# ADM find QSO targets in the north and south separately.
# ADM [0] here needed as isQSO_cuts only returns one bit
# ADM and the other bit (the "high-z" bit from the Random
# ADM Forest) needs to be set to all "False".
qso_classes[int(south)][0] = isQSO_cuts(
qso_classes[int(south)] = isQSO_cuts(
primary=primary, zflux=zflux, rflux=rflux, gflux=gflux,
w1flux=w1flux, w2flux=w2flux,
deltaChi2=deltaChi2, maskbits=maskbits,
w1flux=w1flux, w2flux=w2flux, maskbits=maskbits,
gnobs=gnobs, rnobs=rnobs, znobs=znobs,
objtype=objtype, w1snr=w1snr, w2snr=w2snr, release=release,
objtype=objtype, w1snr=w1snr, w2snr=w2snr,
optical=qso_optical_cuts, south=south
)
elif qso_selection == 'randomforest':
# ADM find QSO targets in the north and south separately.
qso_classes[int(south)] = isQSO_randomforest(
primary=primary, zflux=zflux, rflux=rflux, gflux=gflux,
w1flux=w1flux, w2flux=w2flux, deltaChi2=deltaChi2,
maskbits=maskbits, gnobs=gnobs, rnobs=rnobs, znobs=znobs,
objtype=objtype, release=release, ra=ra, dec=dec, south=south
w1flux=w1flux, w2flux=w2flux, maskbits=maskbits,
gnobs=gnobs, rnobs=rnobs, znobs=znobs,
objtype=objtype, ra=ra, dec=dec, south=south
)
else:
msg = "Unknown qso_selection {}; valid options are {}".format(
qso_selection, qso_selection_options)
log.critical(msg)
raise ValueError(msg)
qso_north, qso_hiz_north = qso_classes[0]
qso_south, qso_hiz_south = qso_classes[1]
qso_north, qso_south = qso_classes

# ADM combine QSO targeting bits for a QSO selected in any imaging.
qso = (qso_north & photsys_north) | (qso_south & photsys_south)
qsohiz = (qso_hiz_north & photsys_north) | (qso_hiz_south & photsys_south)

# ADM initially set everything to arrays of False for BGS selection.
# ADM zeroth element stores the northern targets bits (south=False).
Expand Down Expand Up @@ -2684,7 +2556,6 @@ def set_target_bits(photsys_north, photsys_south, obs_rflux,
desi_target |= elg_vlo * desi_mask.ELG_VLO
desi_target |= elg_lop * desi_mask.ELG_LOP
desi_target |= qso * desi_mask.QSO
desi_target |= qsohiz * desi_mask.QSO_HIZ

# ADM set fraction of the ELG_LOP/ELG_VLO targets to ELG_HIP.
desi_target |= elg_hip * desi_mask.ELG_HIP
Expand Down
4 changes: 2 additions & 2 deletions py/desitarget/data/targetmask.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -260,9 +260,9 @@ priorities:
QSO: {UNOBS: 3400, MORE_ZGOOD: 3350, MORE_ZWARN: 3300, MORE_MIDZQSO: 100, DONE: 2, OBS: 1, DONOTOBSERVE: 0}

# ADM different priority ELGs.
ELG_LOP: SAME_AS_ELG
ELG_LOP: {UNOBS: 3100, MORE_ZGOOD: 2, DONE: 2, OBS: 1, DONOTOBSERVE: 0, MORE_MIDZQSO: 0}
ELG_HIP: SAME_AS_LRG
ELG_VLO: {UNOBS: 2999, MORE_ZGOOD: 2, DONE: 2, OBS: 1, DONOTOBSERVE: 0, MORE_MIDZQSO: 0}
ELG_VLO: SAME_AS_ELG

# ADM Informational bits. Don't let them set priorities.
QSO_HIZ: {UNOBS: 0, DONE: 0, OBS: 0, DONOTOBSERVE: 0, MORE_MIDZQSO: 0}
Expand Down
Loading

0 comments on commit affc0e2

Please sign in to comment.