Skip to content

Commit

Permalink
Merge pull request #241 from abhi0395/patch-1
Browse files Browse the repository at this point in the history
Merge branch from Abhijeet's forked redrock to a branch on desihub/redrock
  • Loading branch information
abhi0395 committed Mar 24, 2023
2 parents 71e4e12 + 0efcb4c commit d634a8a
Show file tree
Hide file tree
Showing 9 changed files with 213 additions and 29 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -42,3 +42,4 @@ MANIFEST

# Mac OSX
.DS_Store

7 changes: 7 additions & 0 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,13 @@ To install::
That will install the templates with the code. Alternatively, the templates
can be put elsewhere and set ``$RR_TEMPLATE_DIR`` to that location.

Archetypes::

If want to run with --nearest_nbh option, the user must clone archetypes as:
git clone https://github.com/abhi0395/new-archetypes.git
Then run rrdesi --archetypes <archetype_dir> --nearest_nbh
Another required file is archetype file for galaxies that contain physical data and is stored at NERSC. If the rrdesi is run on NERSC, then the file would automatically be read. Otherwise there should be an io error. For help contact AbhijeetAnand@lbl.gov

Running
-------

Expand Down
54 changes: 46 additions & 8 deletions py/redrock/archetypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,15 @@

from .utils import transmission_Lyman

from .nearest_neighbours import return_N_nearest_archetypes_from_synthetic_spectra
from .nearest_neighbours import return_galaxy_archetype_properties
from .nearest_neighbours import params_for_all_galaxies

## Loading some global data once as it will be used many times
archetype_galaxies = params_for_all_galaxies() ## Dictionary for ELG, LRG, BGS
## Dictionary of galaxy properties of archetypes
rrarchs_params = return_galaxy_archetype_properties('/global/cfs/cdirs/desi/users/abhijeet/new-archetypes/rrarchetype-galaxy.fits')


class Archetype():
"""Class to store all different archetypes from the same spectype.
Expand All @@ -34,7 +43,7 @@ def __init__(self, filename):
h = fits.open(os.path.expandvars(filename), memmap=False)

hdr = h['ARCHETYPES'].header
self.flux = np.asarray(h['ARCHETYPES'].data['ARCHETYPE']).astype('float64') # trapz_rebin only works with 'f8' arrays
self.flux = np.asarray(h['ARCHETYPES'].data['ARCHETYPE']).astype('float64')# trapz_rebin only works with 'f8' arrays
self._narch = self.flux.shape[0]
self._nwave = self.flux.shape[1]
self._rrtype = hdr['RRTYPE'].strip()
Expand Down Expand Up @@ -81,7 +90,8 @@ def eval(self, subtype, dwave, coeff, wave, z):

return flux

def get_best_archetype(self,spectra,weights,flux,wflux,dwave,z,legendre):

def get_best_archetype(self,spectra,weights,flux,wflux,dwave,z,legendre, nearest_nbh, n_nbh):
"""Get the best archetype for the given redshift and spectype.
Args:
Expand Down Expand Up @@ -110,14 +120,42 @@ def get_best_archetype(self,spectra,weights,flux,wflux,dwave,z,legendre):
binned = { hs:trans[hs]*binned[hs] for hs, w in dwave.items() }
tdata = { hs:np.append(binned[hs][:,None],legendre[hs].transpose(), axis=1 ) for hs, wave in dwave.items() }
zzchi2[i], zzcoeff[i] = calc_zchi2_one(spectra, weights, flux, wflux, tdata)

iBest = np.argmin(zzchi2)
binned = self.rebin_template(iBest, z, dwave,trapz=True)
binned = { hs:trans[hs]*binned[hs] for hs, w in dwave.items() }
tdata = { hs:np.append(binned[hs][:,None],legendre[hs].transpose(), axis=1 ) for hs, wave in dwave.items() }
zzchi2, zzcoeff = calc_zchi2_one(spectra, weights, flux, wflux, tdata)
if nearest_nbh:
### Applying nearest neighbour method based on artifical galaxies with known physical properties
zzchi2 = np.zeros(n_nbh, dtype=np.float64)
zzcoeff = np.zeros((n_nbh, nleg+1), dtype=np.float64)
spectype = self._full_type[iBest].split(':::')[0] #redrock best spectype
subtype = self._full_type[iBest].split(':::')[1].split('_')[0] #redrock best subtype
if spectype=='GALAXY':
new_arch, gal_inds = return_N_nearest_archetypes_from_synthetic_spectra(arch_id=iBest, archetype_data=rrarchs_params, gal_data=archetype_galaxies[subtype], n_nbh=n_nbh, ret_wave=False)
new_arch = new_arch.astype('float64') # to let it work with trapz_rebin and Numba
for i in range(n_nbh):
binned = {hs:trapz_rebin(self.wave*(1.+z), new_arch[i], wave) for hs, wave in dwave.items()}
binned = { hs:trans[hs]*binned[hs] for hs, w in dwave.items() }
tdata = {hs:np.append(binned[hs][:,None], legendre[hs].transpose(), axis=1 ) for hs in dwave.keys()}
zzchi2[i], zzcoeff[i] = calc_zchi2_one(spectra, weights, flux, wflux, tdata)
i_new_Best = np.argmin(zzchi2) # new archetype
gal_id = gal_inds[i_new_Best]
new_fulltype = 'GALAXY:::%s_%d'%(subtype, gal_id) #same as Redrock format
chi2, coeff, fin_fulltype = zzchi2[i_new_Best], zzcoeff[i_new_Best], new_fulltype
else:
# For QSOs and Stars
binned = self.rebin_template(iBest, z, dwave,trapz=True)
binned = { hs:trans[hs]*binned[hs] for hs, w in dwave.items() }
tdata = { hs:np.append(binned[hs][:,None],legendre[hs].transpose(), axis=1 ) for hs, wave in dwave.items() }
zzchi2, zzcoeff = calc_zchi2_one(spectra, weights, flux, wflux, tdata)
chi2, coeff, fin_fulltype = zzchi2, zzcoeff, self._full_type[iBest]

return zzchi2, zzcoeff, self._full_type[iBest]
else:
#Without New archetypes
binned = self.rebin_template(iBest, z, dwave,trapz=True)
binned = { hs:trans[hs]*binned[hs] for hs, w in dwave.items() }
tdata = { hs:np.append(binned[hs][:,None],legendre[hs].transpose(), axis=1 ) for hs, wave in dwave.items() }
zzchi2, zzcoeff = calc_zchi2_one(spectra, weights, flux, wflux, tdata)
chi2, coeff, fin_fulltype = zzchi2, zzcoeff, self._full_type[iBest]

return chi2, coeff, fin_fulltype


class All_archetypes():
Expand Down
33 changes: 30 additions & 3 deletions py/redrock/external/desi.py
Original file line number Diff line number Diff line change
Expand Up @@ -556,6 +556,12 @@ def rrdesi(options=None, comm=None):
parser.add_argument("--archetypes", type=str, default=None,
required=False,
help="archetype file or directory for final redshift comparison")

parser.add_argument("--nearest_nbh", default=False, action="store_true",
required=False, help="Will apply the nearest neighbour approach on archetypes")

parser.add_argument("-n_nbh", type=int, default=9,
required=False, help="if nearest_nbh True, N-nearest neighbours taken into account (default is 9)")

parser.add_argument("-d", "--details", type=str, default=None,
required=False, help="output file for full redrock fit details")
Expand Down Expand Up @@ -680,8 +686,28 @@ def rrdesi(options=None, comm=None):
comm.Abort()
else:
sys.exit(1)



if args.nearest_nbh:
if os.path.isfile(args.archetypes):
file_check = args.archetypes
if os.path.isdir(args.archetypes):
file_check = args.archetypes+'rrarchetype-galaxy.fits'
try:
fits.open(file_check)[2].data ## HDU2 of galaxy-archetype must contain properties
except IndexError:
print('ERROR: Archetype file does not contain required HDU with galaxy properties\n')
sys.stdout.flush()
if comm is not None:
comm.Abort()
else:
sys.exit(1)
print('\n=================\n')
print('Archetype with galaxy properties is provided\n')
print('Nearest neighbour approach is provided, so will apply the N-nearest neighbour approach on Redrock\n')
print('%d nearest neighbours will be used...\n'%(args.n_nbh))
print('===================\n')


targetids = None
if args.targetids is not None:
targetids = [ int(x) for x in args.targetids.split(",") ]
Expand Down Expand Up @@ -803,7 +829,8 @@ def rrdesi(options=None, comm=None):
start = elapsed(None, "", comm=comm)

scandata, zfit = zfind(targets, dtemplates, mpprocs,
nminima=args.nminima, archetypes=args.archetypes,
nminima=args.nminima, archetypes=args.archetypes,
nearest_nbh=args.nearest_nbh, n_nbh=args.n_nbh,
priors=args.priors, chi2_scan=args.chi2_scan, use_gpu=use_gpu)

stop = elapsed(start, "Computing redshifts", comm=comm)
Expand Down
5 changes: 3 additions & 2 deletions py/redrock/fitz.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ def minfit(x, y):
return (x0, xerr, y0, zwarn)


def fitz(zchi2, redshifts, spectra, template, nminima=3, archetype=None, use_gpu=False):
def fitz(zchi2, redshifts, spectra, template, nminima=3, archetype=None, nearest_nbh=False, n_nbh=9, use_gpu=False):
"""Refines redshift measurement around up to nminima minima.
TODO:
Expand Down Expand Up @@ -145,6 +145,7 @@ def fitz(zchi2, redshifts, spectra, template, nminima=3, archetype=None, use_gpu
legendre = { hs:np.array([scipy.special.legendre(i)( (w-wave_min)/(wave_max-wave_min)*2.-1. ) for i in range(deg_legendre)]) for hs, w in dwave.items() }

(weights, flux, wflux) = spectral_data(spectra)

if (use_gpu):
#Copy arrays to GPU
weights = cp.asarray(weights)
Expand Down Expand Up @@ -244,7 +245,7 @@ def fitz(zchi2, redshifts, spectra, template, nminima=3, archetype=None, use_gpu
chi2=chi2min, zz=zz, zzchi2=zzchi2,
coeff=coeff))
else:
chi2min, coeff, fulltype = archetype.get_best_archetype(spectra,weights,flux,wflux,dwave,zbest,legendre)
chi2min, coeff, fulltype = archetype.get_best_archetype(spectra,weights,flux,wflux,dwave,zbest,legendre, nearest_nbh, n_nbh)

results.append(dict(z=zbest, zerr=zerr, zwarn=zwarn,
chi2=chi2min, zz=zz, zzchi2=zzchi2,
Expand Down
108 changes: 108 additions & 0 deletions py/redrock/nearest_neighbours.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
## Best Archetype redshift estimateion based on Nearest neighbour approach on synthetic spectra approach

import numpy as np
from astropy.io import fits
from scipy.spatial.distance import euclidean
from numba import jit
import os

def read_fits_data(filename, nhdu):
"""
Reads hdu data from a fits file for given hdu and returns fits data
"""
hdu = fits.open(filename)
data = hdu[nhdu].data
hdu.close()
return data


def synthetic_galaxy_data_read(gals):

""" Returns fluxes and physical parameters (dictionary)
for synthetic galaxy spectra generated with desisim
by Abhijeet Anand, LBL, email: AbhijeetAnand@lbl.gov
"""
all_gal_data = {}
file_gals = '/global/cfs/cdirs/desi/users/abhijeet/synthetic_spectra/%s/%s_spectral_flux_rest_frame.fits'%(gals, gals)
temp_gal_data = read_fits_data(file_gals, nhdu=2)
all_gal_data['FLUX'] = fits.open(file_gals)[1].data
for key in temp_gal_data.dtype.names:
all_gal_data[key] = temp_gal_data[key].data
return all_gal_data

#@jit(nopython=True)
def cartesian_dist(x_ref, Y, n_nearest):

"""
Calculates 3d euclidean distance between reference and target array points and returns
n-nearest distances and indices of Y (two arrays)
Input:
x_ref: [list or 1d array], reference 3d coordinate
Y: [2d array], each row should be 3d coordinate of one object
n_nearest: [int]: return n_nearest objects (theri indices)
"""
dist = np.array([euclidean(x_ref, c) for c in Y])
inds = np.argsort(dist)
return dist, inds[0:n_nearest]

def return_N_nearest_archetypes_from_synthetic_spectra(arch_id, archetype_data, gal_data, n_nbh, ret_wave=True):

""" Returns the fluxes and rest-frame wavelength (same as archetypes) of N-nearest neighbours for a given Best
archetypes after estimating the distance between it and all archetypes
Input:
arch_id: [int]; best archetype id found by redrock in archetype mode
archetype_data: [dict]; dictionary containing physical parameters for archetypes
must contain: ['LOGMSTAR [M_sol]', 'LOGSSFR [yr^-1]', 'AV_ISM [mag]'] --> 3d coordinate...
gal_data: [dict]; full synthetic galaxy data dictionary,
must contain:['LOGMSTAR [M_sol]', 'LOGSSFR [yr^-1]', 'AV_ISM [mag]', 'FLUX'] ....
n_nbh: [int], number of nearest neighbours you want to return
ret_wave: [bool], returns the wavelength array if True (default True)
"""
param_keys = ['LOGMSTAR [M_sol]', 'LOGSSFR [yr^-1]', 'AV_ISM [mag]']
Y = np.array([gal_data[key] for key in param_keys]).T # parameters for synthetic galaxies (3d coordinates)
Xref = np.array([archetype_data[key][arch_id] for key in param_keys]) # parameter for best fit redrock archetypes
dist, ii = cartesian_dist(x_ref=Xref, Y=Y, n_nearest=n_nbh)
arch_flux = gal_data['FLUX'][ii]
if ret_wave:
lam0 = 1228.0701754386 #starting rest frame wavelength (same as PCA templates)
npix = 19545 # number of wavelength pixels same as archetypes (5 times lower than PCA)
step_size = 0.5 # pixel size in Angstrom (5 times lower resolution than PCA templates)
temp_wave = lam0 + np.arange(npix)*step_size # Galaxy archetype wavelengths
return arch_flux, temp_wave, ii
else:
return arch_flux, ii


def return_galaxy_archetype_properties(archetypes):

""" Returns a dictionary containing the physical properties of archetype galaxies
generated from desisim by Abhijeet Anand, LBL, email: AbhijeetAnand@lbl.gov
Returns: ['LOGMSTAR [M_sol]', 'LOGSSFR [yr^-1]', 'AV_ISM [mag]']
"""

if os.path.isfile(archetypes):
filename = archetypes
if os.path.isdir(archetypes):
filename = archetypes+'rrarchetype-galaxy.fits'
data= read_fits_data(filename, nhdu=2)
params = {}
for key in data.dtype.names:
params[key] = data[key]
return params

def params_for_all_galaxies():
"""
Returns dictionaries for all galaxies (ELG, LRG, BGS)
access each dictionary as fin_dict[subtype]
"""
synthetic_galaxies = {}
for galaxies in ['ELG', 'LRG', 'BGS']:
synthetic_galaxies[galaxies] = synthetic_galaxy_data_read(galaxies)
return synthetic_galaxies



3 changes: 1 addition & 2 deletions py/redrock/targets.py
Original file line number Diff line number Diff line change
Expand Up @@ -194,7 +194,7 @@ def compute_coadd(self, cache_Rcsr=False, cosmics_nsig=0.):

flux=np.array(flux)
ivar=np.array(ivar)

if len(grad)>1 and cosmics_nsig > 0 :
# detect outliers by comparing spectra
grad=np.array(grad)
Expand Down Expand Up @@ -228,7 +228,6 @@ def compute_coadd(self, cache_Rcsr=False, cosmics_nsig=0.):

spc = Spectrum(wave, flux, weights, R, Rcsr)
coadd.append(spc)

# swap the coadds into place.
self.spectra = coadd
return
Expand Down
27 changes: 15 additions & 12 deletions py/redrock/zfind.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ def sort_dict_by_cols(d, colnames, sort_first_column_first = True):
d[k] = d[k][idx]
return

def _mp_fitz(chi2, target_data, t, nminima, qout, archetype, use_gpu):
def _mp_fitz(chi2, target_data, t, nminima, qout, archetype, nearest_nbh, n_nbh, use_gpu):
"""Wrapper for multiprocessing version of fitz.
"""
try:
Expand All @@ -88,7 +88,7 @@ def _mp_fitz(chi2, target_data, t, nminima, qout, archetype, use_gpu):
results = list()
for i, tg in enumerate(target_data):
zfit = fitz(chi2[i], t.template.redshifts, tg.spectra,
t.template, nminima=nminima, archetype=archetype, use_gpu=use_gpu)
t.template, nminima=nminima, archetype=archetype, nearest_nbh=nearest_nbh, n_nbh=n_nbh, use_gpu=use_gpu)
results.append( (tg.id, zfit) )
qout.put(results)
except:
Expand Down Expand Up @@ -208,7 +208,7 @@ def sort_zfit_dict(zfit):
sort_dict_by_cols(zfit, ('__badfit__', 'chi2'), sort_first_column_first=True)
zfit.pop('__badfit__')

def zfind(targets, templates, mp_procs=1, nminima=3, archetypes=None, priors=None, chi2_scan=None, use_gpu=False):
def zfind(targets, templates, mp_procs=1, nminima=3, archetypes=None, nearest_nbh=False, n_nbh=9, priors=None, chi2_scan=None, use_gpu=False):
"""Compute all redshift fits for the local set of targets and collect.
Given targets and templates distributed across a set of MPI processes,
Expand All @@ -230,6 +230,8 @@ def zfind(targets, templates, mp_procs=1, nminima=3, archetypes=None, priors=Non
Passed to fitz().
archetypes (str, optional): file or directory containing archetypes
to use for final fitz choice of best chi2 vs. z minimum.
nearest_nbh (bool), for applying nearest neighbour approach (default False)
n_nbh (int); if above true, then number of nearest neighbour to take into account
priors (str, optional): file containing redshift priors
chi2_scan (str, optional): file containing already computed chi2 scan
use_gpu (bool, optional): use gpu for calc_zchi2
Expand Down Expand Up @@ -331,7 +333,7 @@ def zfind(targets, templates, mp_procs=1, nminima=3, archetypes=None, priors=Non
zfit = fitz(results[tg.id][ft]['zchi2'] \
+ results[tg.id][ft]['penalty'],
t.template.redshifts, tg.spectra,
t.template, nminima=nminima,archetype=archetype, use_gpu=use_gpu)
t.template, nminima=nminima,archetype=archetype, nearest_nbh=nearest_nbh, n_nbh=n_nbh, use_gpu=use_gpu)
results[tg.id][ft]['zfit'] = zfit
else:
# Multiprocessing case.
Expand All @@ -355,7 +357,7 @@ def zfind(targets, templates, mp_procs=1, nminima=3, archetypes=None, priors=Non
eff_chi2[i,:] = results[tg.id][ft]['zchi2'] \
+ results[tg.id][ft]['penalty']
p = mp.Process(target=_mp_fitz, args=(eff_chi2,
target_data, t, nminima, qout, archetype, use_gpu))
target_data, t, nminima, qout, archetype, nearest_nbh, n_nbh, use_gpu))
procs.append(p)
p.start()

Expand Down Expand Up @@ -410,17 +412,18 @@ def zfind(targets, templates, mp_procs=1, nminima=3, archetypes=None, priors=Non
spectype, subtype = fulltype.split(':::')
else:
spectype, subtype = (fulltype, '')
nmin = len(tmp['chi2']) # defining dimension of array to save fulltype
else:
spectype = [ el.split(':::')[0] for el in tmp['fulltype'] ]
subtype = [ el.split(':::')[1] for el in tmp['fulltype'] ]
tmp.remove_column('fulltype')

spectype = [ el[0].split(':::')[0] for el in tmp['fulltype'] ] #el is a list with one element (corresponding to each minima)
subtype = [ el[0].split(':::')[1] for el in tmp['fulltype'] ]
del tmp['fulltype'] #it's a dictionary
nmin=1 # defining dimension of array to save fulltype

#Have to create arrays of correct length since using dict of
#np arrays instead of astropy Table
l = len(tmp['chi2'])
tmp['spectype'] = np.array([spectype]*l).reshape((l, 1))
tmp['subtype'] = np.array([subtype]*l).reshape((l, 1))

tmp['spectype'] = np.array([spectype]*nmin).reshape((l, 1))
tmp['subtype'] = np.array([subtype]*nmin).reshape((l, 1))
tmp['ncoeff'] = np.array([tmp['coeff'].shape[1]]*l).reshape((l, 1))
tzfit.append(tmp)
del allresults[tid][fulltype]['zfit']
Expand Down
Loading

0 comments on commit d634a8a

Please sign in to comment.