Merge pull request #241 from abhi0395/patch-1

Merge branch from Abhijeet's forked redrock to a branch on desihub/redrock
desihub · Mar 24, 2023 · d634a8a · d634a8a
2 parents 71e4e12 + 0efcb4c
commit d634a8a
Show file tree

Hide file tree

Showing 9 changed files with 213 additions and 29 deletions.
diff --git a/.gitignore b/.gitignore
@@ -42,3 +42,4 @@ MANIFEST
 
 # Mac OSX
 .DS_Store
+
diff --git a/README.rst b/README.rst
@@ -32,6 +32,13 @@ To install::
 That will install the templates with the code.  Alternatively, the templates
 can be put elsewhere and set ``$RR_TEMPLATE_DIR`` to that location.
 
+Archetypes::
+
+    If want to run with --nearest_nbh option, the user must clone archetypes as:
+    git clone https://github.com/abhi0395/new-archetypes.git
+    Then run rrdesi --archetypes <archetype_dir> --nearest_nbh
+    Another required file is archetype file for galaxies that contain physical data and is stored at NERSC. If the rrdesi is run on NERSC, then the file would automatically be read. Otherwise there should be an io error. For help contact AbhijeetAnand@lbl.gov
+
 Running
 -------
 

diff --git a/py/redrock/archetypes.py b/py/redrock/archetypes.py
@@ -18,6 +18,15 @@
 
 from .utils import transmission_Lyman
 
+from .nearest_neighbours import return_N_nearest_archetypes_from_synthetic_spectra
+from .nearest_neighbours import return_galaxy_archetype_properties
+from .nearest_neighbours import params_for_all_galaxies
+
+## Loading some global data once as it will be used many times
+archetype_galaxies = params_for_all_galaxies() ## Dictionary for ELG, LRG, BGS
+## Dictionary of galaxy properties of archetypes
+rrarchs_params = return_galaxy_archetype_properties('/global/cfs/cdirs/desi/users/abhijeet/new-archetypes/rrarchetype-galaxy.fits') 
+
 
 class Archetype():
     """Class to store all different archetypes from the same spectype.
@@ -34,7 +43,7 @@ def __init__(self, filename):
         h = fits.open(os.path.expandvars(filename), memmap=False)
 
         hdr = h['ARCHETYPES'].header
-        self.flux = np.asarray(h['ARCHETYPES'].data['ARCHETYPE']).astype('float64') # trapz_rebin only works with 'f8' arrays
+        self.flux = np.asarray(h['ARCHETYPES'].data['ARCHETYPE']).astype('float64')# trapz_rebin only works with 'f8' arrays
         self._narch = self.flux.shape[0]
         self._nwave = self.flux.shape[1]
         self._rrtype = hdr['RRTYPE'].strip()
@@ -81,7 +90,8 @@ def eval(self, subtype, dwave, coeff, wave, z):
 
         return flux
 
-    def get_best_archetype(self,spectra,weights,flux,wflux,dwave,z,legendre):
+
+    def get_best_archetype(self,spectra,weights,flux,wflux,dwave,z,legendre, nearest_nbh, n_nbh):
         """Get the best archetype for the given redshift and spectype.
 
         Args:
@@ -110,14 +120,42 @@ def get_best_archetype(self,spectra,weights,flux,wflux,dwave,z,legendre):
             binned = { hs:trans[hs]*binned[hs] for hs, w in dwave.items() }
             tdata = { hs:np.append(binned[hs][:,None],legendre[hs].transpose(), axis=1 ) for hs, wave in dwave.items() }
             zzchi2[i], zzcoeff[i] = calc_zchi2_one(spectra, weights, flux, wflux, tdata)
-
         iBest = np.argmin(zzchi2)
-        binned = self.rebin_template(iBest, z, dwave,trapz=True)
-        binned = { hs:trans[hs]*binned[hs] for hs, w in dwave.items() }
-        tdata = { hs:np.append(binned[hs][:,None],legendre[hs].transpose(), axis=1 ) for hs, wave in dwave.items() }
-        zzchi2, zzcoeff = calc_zchi2_one(spectra, weights, flux, wflux, tdata)
+        if nearest_nbh:
+            ### Applying nearest neighbour method based on artifical galaxies with known physical properties
+            zzchi2 = np.zeros(n_nbh, dtype=np.float64)
+            zzcoeff = np.zeros((n_nbh, nleg+1), dtype=np.float64)
+            spectype = self._full_type[iBest].split(':::')[0] #redrock best spectype
+            subtype = self._full_type[iBest].split(':::')[1].split('_')[0] #redrock best subtype
+            if spectype=='GALAXY':
+                new_arch, gal_inds = return_N_nearest_archetypes_from_synthetic_spectra(arch_id=iBest, archetype_data=rrarchs_params, gal_data=archetype_galaxies[subtype], n_nbh=n_nbh, ret_wave=False)
+                new_arch = new_arch.astype('float64') # to let it work with trapz_rebin and Numba
+                for i in range(n_nbh):
+                    binned = {hs:trapz_rebin(self.wave*(1.+z), new_arch[i], wave) for hs, wave in dwave.items()}
+                    binned = { hs:trans[hs]*binned[hs] for hs, w in dwave.items() }
+                    tdata = {hs:np.append(binned[hs][:,None], legendre[hs].transpose(), axis=1 ) for hs in dwave.keys()}
+                    zzchi2[i], zzcoeff[i] = calc_zchi2_one(spectra, weights, flux, wflux, tdata)
+                i_new_Best = np.argmin(zzchi2) # new archetype
+                gal_id = gal_inds[i_new_Best]
+                new_fulltype = 'GALAXY:::%s_%d'%(subtype, gal_id) #same as Redrock format
+                chi2, coeff, fin_fulltype = zzchi2[i_new_Best], zzcoeff[i_new_Best], new_fulltype
+            else:
+                # For QSOs and Stars
+                binned = self.rebin_template(iBest, z, dwave,trapz=True)
+                binned = { hs:trans[hs]*binned[hs] for hs, w in dwave.items() }
+                tdata = { hs:np.append(binned[hs][:,None],legendre[hs].transpose(), axis=1 ) for hs, wave in dwave.items() }
+                zzchi2, zzcoeff = calc_zchi2_one(spectra, weights, flux, wflux, tdata)
+                chi2, coeff, fin_fulltype =  zzchi2, zzcoeff, self._full_type[iBest]
 
-        return zzchi2, zzcoeff, self._full_type[iBest]
+        else:
+            #Without New archetypes
+            binned = self.rebin_template(iBest, z, dwave,trapz=True)
+            binned = { hs:trans[hs]*binned[hs] for hs, w in dwave.items() }
+            tdata = { hs:np.append(binned[hs][:,None],legendre[hs].transpose(), axis=1 ) for hs, wave in dwave.items() }
+            zzchi2, zzcoeff = calc_zchi2_one(spectra, weights, flux, wflux, tdata)
+            chi2, coeff, fin_fulltype =  zzchi2, zzcoeff, self._full_type[iBest]
+
+        return chi2, coeff, fin_fulltype
 
 
 class All_archetypes():

diff --git a/py/redrock/external/desi.py b/py/redrock/external/desi.py
@@ -556,6 +556,12 @@ def rrdesi(options=None, comm=None):
     parser.add_argument("--archetypes", type=str, default=None,
         required=False,
         help="archetype file or directory for final redshift comparison")
+
+    parser.add_argument("--nearest_nbh", default=False, action="store_true",
+        required=False, help="Will apply the nearest neighbour approach on archetypes")
+
+    parser.add_argument("-n_nbh", type=int, default=9,
+        required=False, help="if nearest_nbh True, N-nearest neighbours taken into account (default is 9)")
 
     parser.add_argument("-d", "--details", type=str, default=None,
         required=False, help="output file for full redrock fit details")
@@ -680,8 +686,28 @@ def rrdesi(options=None, comm=None):
                     comm.Abort()
                 else:
                     sys.exit(1)
-
-
+
+        if args.nearest_nbh:
+            if os.path.isfile(args.archetypes):
+                file_check = args.archetypes
+            if os.path.isdir(args.archetypes):
+                file_check = args.archetypes+'rrarchetype-galaxy.fits'
+            try:
+                fits.open(file_check)[2].data ## HDU2 of galaxy-archetype must contain properties
+            except IndexError:
+                print('ERROR: Archetype file does not contain required HDU with galaxy properties\n')
+                sys.stdout.flush()
+                if comm is not None:
+                    comm.Abort()
+                else:
+                    sys.exit(1)
+            print('\n=================\n')    
+            print('Archetype with galaxy properties is provided\n')
+            print('Nearest neighbour approach is provided, so will apply the N-nearest neighbour approach on Redrock\n')
+            print('%d nearest neighbours will be used...\n'%(args.n_nbh))
+            print('===================\n')
+
+
     targetids = None
     if args.targetids is not None:
         targetids = [ int(x) for x in args.targetids.split(",") ]
@@ -803,7 +829,8 @@ def rrdesi(options=None, comm=None):
         start = elapsed(None, "", comm=comm)
 
         scandata, zfit = zfind(targets, dtemplates, mpprocs,
-            nminima=args.nminima, archetypes=args.archetypes,
+            nminima=args.nminima, archetypes=args.archetypes, 
+            nearest_nbh=args.nearest_nbh, n_nbh=args.n_nbh,
             priors=args.priors, chi2_scan=args.chi2_scan, use_gpu=use_gpu)
 
         stop = elapsed(start, "Computing redshifts", comm=comm)

diff --git a/py/redrock/fitz.py b/py/redrock/fitz.py
@@ -107,7 +107,7 @@ def minfit(x, y):
     return (x0, xerr, y0, zwarn)
 
 
-def fitz(zchi2, redshifts, spectra, template, nminima=3, archetype=None, use_gpu=False):
+def fitz(zchi2, redshifts, spectra, template, nminima=3, archetype=None, nearest_nbh=False, n_nbh=9, use_gpu=False):
     """Refines redshift measurement around up to nminima minima.
 
     TODO:
@@ -145,6 +145,7 @@ def fitz(zchi2, redshifts, spectra, template, nminima=3, archetype=None, use_gpu
         legendre = { hs:np.array([scipy.special.legendre(i)( (w-wave_min)/(wave_max-wave_min)*2.-1. ) for i in range(deg_legendre)]) for hs, w in dwave.items() }
 
     (weights, flux, wflux) = spectral_data(spectra)
+
     if (use_gpu):
         #Copy arrays to GPU
         weights = cp.asarray(weights)
@@ -244,7 +245,7 @@ def fitz(zchi2, redshifts, spectra, template, nminima=3, archetype=None, use_gpu
                 chi2=chi2min, zz=zz, zzchi2=zzchi2,
                 coeff=coeff))
         else:
-            chi2min, coeff, fulltype = archetype.get_best_archetype(spectra,weights,flux,wflux,dwave,zbest,legendre)
+            chi2min, coeff, fulltype = archetype.get_best_archetype(spectra,weights,flux,wflux,dwave,zbest,legendre, nearest_nbh, n_nbh)
 
             results.append(dict(z=zbest, zerr=zerr, zwarn=zwarn,
                 chi2=chi2min, zz=zz, zzchi2=zzchi2,

diff --git a/py/redrock/nearest_neighbours.py b/py/redrock/nearest_neighbours.py
@@ -0,0 +1,108 @@
+## Best Archetype redshift estimateion based on Nearest neighbour approach on synthetic spectra approach
+
+import numpy as np
+from astropy.io import fits
+from scipy.spatial.distance import euclidean
+from numba import jit
+import os
+
+def read_fits_data(filename, nhdu):
+    """
+     Reads hdu data from a fits file for given hdu and returns fits data    
+    """
+    hdu = fits.open(filename)
+    data = hdu[nhdu].data
+    hdu.close()
+    return data
+
+
+def synthetic_galaxy_data_read(gals):
+
+    """ Returns fluxes and physical parameters (dictionary) 
+        for synthetic galaxy spectra generated with desisim
+        by Abhijeet Anand, LBL, email: AbhijeetAnand@lbl.gov
+    """
+    all_gal_data = {}
+    file_gals = '/global/cfs/cdirs/desi/users/abhijeet/synthetic_spectra/%s/%s_spectral_flux_rest_frame.fits'%(gals, gals)
+    temp_gal_data = read_fits_data(file_gals, nhdu=2)
+    all_gal_data['FLUX'] = fits.open(file_gals)[1].data
+    for key in temp_gal_data.dtype.names:
+        all_gal_data[key] = temp_gal_data[key].data
+    return all_gal_data
+
+#@jit(nopython=True)
+def cartesian_dist(x_ref, Y, n_nearest):
+
+    """
+        Calculates 3d euclidean distance between reference and target array points and returns
+        n-nearest distances and indices of Y (two arrays)
+
+    Input:
+        x_ref: [list or 1d array], reference 3d coordinate
+        Y: [2d array], each row should be 3d coordinate of one object
+        n_nearest: [int]: return n_nearest objects (theri indices)
+    """
+    dist =  np.array([euclidean(x_ref, c) for c in Y])
+    inds = np.argsort(dist)
+    return dist, inds[0:n_nearest]
+
+def return_N_nearest_archetypes_from_synthetic_spectra(arch_id, archetype_data, gal_data, n_nbh, ret_wave=True):
+
+    """ Returns the fluxes and rest-frame wavelength (same as archetypes) of N-nearest neighbours for a given Best 
+        archetypes after estimating the distance between it and all archetypes
+
+    Input: 
+        arch_id: [int]; best archetype id found by redrock in archetype mode
+        archetype_data: [dict]; dictionary containing physical parameters for archetypes
+        must contain:  ['LOGMSTAR [M_sol]', 'LOGSSFR [yr^-1]', 'AV_ISM [mag]'] --> 3d coordinate...
+
+        gal_data: [dict]; full synthetic galaxy data dictionary, 
+        must contain:['LOGMSTAR [M_sol]', 'LOGSSFR [yr^-1]', 'AV_ISM [mag]', 'FLUX'] ....
+
+        n_nbh: [int], number of nearest neighbours you want to return
+        ret_wave: [bool], returns the wavelength array if True (default True)
+    """
+    param_keys = ['LOGMSTAR [M_sol]', 'LOGSSFR [yr^-1]', 'AV_ISM [mag]']
+    Y =  np.array([gal_data[key] for key in param_keys]).T  # parameters for synthetic galaxies (3d coordinates)
+    Xref = np.array([archetype_data[key][arch_id] for key in param_keys]) # parameter for best fit redrock archetypes
+    dist, ii = cartesian_dist(x_ref=Xref, Y=Y, n_nearest=n_nbh)
+    arch_flux = gal_data['FLUX'][ii]
+    if ret_wave:
+        lam0 = 1228.0701754386 #starting rest frame wavelength (same as PCA templates)
+        npix = 19545 # number of wavelength pixels same as archetypes (5 times lower than PCA)
+        step_size = 0.5 # pixel size in Angstrom (5 times lower resolution than PCA templates)
+        temp_wave = lam0 + np.arange(npix)*step_size # Galaxy archetype wavelengths
+        return arch_flux, temp_wave, ii
+    else:
+        return arch_flux, ii
+
+
+def return_galaxy_archetype_properties(archetypes):
+
+    """ Returns a dictionary containing the physical properties of archetype galaxies
+        generated from desisim by Abhijeet Anand, LBL, email: AbhijeetAnand@lbl.gov
+        Returns: ['LOGMSTAR [M_sol]', 'LOGSSFR [yr^-1]', 'AV_ISM [mag]']
+    """
+
+    if os.path.isfile(archetypes):
+        filename = archetypes
+    if os.path.isdir(archetypes):
+        filename = archetypes+'rrarchetype-galaxy.fits'
+    data= read_fits_data(filename, nhdu=2)
+    params = {}
+    for key in data.dtype.names:
+        params[key] = data[key]
+    return params
+
+def params_for_all_galaxies():
+    """
+        Returns dictionaries for all galaxies (ELG, LRG, BGS)
+        access each dictionary as fin_dict[subtype]
+    """
+    synthetic_galaxies = {}
+    for galaxies in ['ELG', 'LRG', 'BGS']:
+        synthetic_galaxies[galaxies] = synthetic_galaxy_data_read(galaxies)
+    return synthetic_galaxies
+
+
+
diff --git a/py/redrock/targets.py b/py/redrock/targets.py
@@ -194,7 +194,7 @@ def compute_coadd(self, cache_Rcsr=False, cosmics_nsig=0.):
 
             flux=np.array(flux)
             ivar=np.array(ivar)
-
+        
             if len(grad)>1 and cosmics_nsig > 0 :
                 # detect outliers by comparing spectra
                 grad=np.array(grad)
@@ -228,7 +228,6 @@ def compute_coadd(self, cache_Rcsr=False, cosmics_nsig=0.):
 
             spc = Spectrum(wave, flux, weights, R, Rcsr)
             coadd.append(spc)
-
         # swap the coadds into place.
         self.spectra = coadd
         return

diff --git a/py/redrock/zfind.py b/py/redrock/zfind.py
@@ -78,7 +78,7 @@ def sort_dict_by_cols(d, colnames, sort_first_column_first = True):
         d[k] = d[k][idx]
     return
 
-def _mp_fitz(chi2, target_data, t, nminima, qout, archetype, use_gpu):
+def _mp_fitz(chi2, target_data, t, nminima, qout, archetype, nearest_nbh, n_nbh, use_gpu):
     """Wrapper for multiprocessing version of fitz.
     """
     try:
@@ -88,7 +88,7 @@ def _mp_fitz(chi2, target_data, t, nminima, qout, archetype, use_gpu):
         results = list()
         for i, tg in enumerate(target_data):
             zfit = fitz(chi2[i], t.template.redshifts, tg.spectra,
-                t.template, nminima=nminima, archetype=archetype, use_gpu=use_gpu)
+                t.template, nminima=nminima, archetype=archetype, nearest_nbh=nearest_nbh, n_nbh=n_nbh, use_gpu=use_gpu)
             results.append( (tg.id, zfit) )
         qout.put(results)
     except:
@@ -208,7 +208,7 @@ def sort_zfit_dict(zfit):
     sort_dict_by_cols(zfit, ('__badfit__', 'chi2'), sort_first_column_first=True)
     zfit.pop('__badfit__')
 
-def zfind(targets, templates, mp_procs=1, nminima=3, archetypes=None, priors=None, chi2_scan=None, use_gpu=False):
+def zfind(targets, templates, mp_procs=1, nminima=3, archetypes=None, nearest_nbh=False, n_nbh=9, priors=None, chi2_scan=None, use_gpu=False):
     """Compute all redshift fits for the local set of targets and collect.
 
     Given targets and templates distributed across a set of MPI processes,
@@ -230,6 +230,8 @@ def zfind(targets, templates, mp_procs=1, nminima=3, archetypes=None, priors=Non
             Passed to fitz().
         archetypes (str, optional): file or directory containing archetypes
             to use for final fitz choice of best chi2 vs. z minimum.
+        nearest_nbh (bool), for applying nearest neighbour approach (default False)
+        n_nbh (int); if above true, then number of nearest neighbour to take into account
         priors (str, optional): file containing redshift priors
         chi2_scan (str, optional): file containing already computed chi2 scan
         use_gpu (bool, optional): use gpu for calc_zchi2
@@ -331,7 +333,7 @@ def zfind(targets, templates, mp_procs=1, nminima=3, archetypes=None, priors=Non
                 zfit = fitz(results[tg.id][ft]['zchi2'] \
                     + results[tg.id][ft]['penalty'],
                     t.template.redshifts, tg.spectra,
-                    t.template, nminima=nminima,archetype=archetype, use_gpu=use_gpu)
+                    t.template, nminima=nminima,archetype=archetype, nearest_nbh=nearest_nbh, n_nbh=n_nbh, use_gpu=use_gpu)
                 results[tg.id][ft]['zfit'] = zfit
         else:
             # Multiprocessing case.
@@ -355,7 +357,7 @@ def zfind(targets, templates, mp_procs=1, nminima=3, archetypes=None, priors=Non
                     eff_chi2[i,:] = results[tg.id][ft]['zchi2'] \
                         + results[tg.id][ft]['penalty']
                 p = mp.Process(target=_mp_fitz, args=(eff_chi2,
-                    target_data, t, nminima, qout, archetype, use_gpu))
+                    target_data, t, nminima, qout, archetype, nearest_nbh, n_nbh, use_gpu))
                 procs.append(p)
                 p.start()
 
@@ -410,17 +412,18 @@ def zfind(targets, templates, mp_procs=1, nminima=3, archetypes=None, priors=Non
                         spectype, subtype = fulltype.split(':::')
                     else:
                         spectype, subtype = (fulltype, '')
+                    nmin = len(tmp['chi2']) # defining dimension of array to save fulltype
                 else:
-                    spectype = [ el.split(':::')[0] for el in tmp['fulltype'] ]
-                    subtype = [ el.split(':::')[1] for el in tmp['fulltype'] ]
-                    tmp.remove_column('fulltype')
-
+                    spectype = [ el[0].split(':::')[0] for el in tmp['fulltype'] ] #el is a list with one element (corresponding to each minima)
+                    subtype = [ el[0].split(':::')[1] for el in tmp['fulltype'] ]
+                    del tmp['fulltype'] #it's a dictionary
+                    nmin=1 # defining dimension of array to save fulltype
+
                 #Have to create arrays of correct length since using dict of
                 #np arrays instead of astropy Table
                 l = len(tmp['chi2'])
-                tmp['spectype'] = np.array([spectype]*l).reshape((l, 1))
-                tmp['subtype'] = np.array([subtype]*l).reshape((l, 1))
-
+                tmp['spectype'] = np.array([spectype]*nmin).reshape((l, 1))
+                tmp['subtype'] = np.array([subtype]*nmin).reshape((l, 1))
                 tmp['ncoeff'] = np.array([tmp['coeff'].shape[1]]*l).reshape((l, 1))
                 tzfit.append(tmp)
                 del allresults[tid][fulltype]['zfit']