# Split QSO-maker catalogs for Loa
Stephanie Juneau (NOIRLab)


NOTE: Edmond generated individual survey/program files for all targets:
```
## QSO-maker path 
path_qsom = '/global/cfs/cdirs/desi/science/gqp/agncatalog/qsomaker/loa/'
```

We need to split the files for `main/dark` and `main/bright` in the same way as for the Fastspecfit files with NSIDE=1 Healpixels:
```
from desimodel.footprint import radec2pix
pix = radec2pix(nside=1, RA, DEC)
survey = 'main'
prog = 'dark'
ffile = f'fastspec-loa-{survey}-{prog}-nside1-hp{pix:02}.fits'
qsofile = f'QSO_cat_loa_{survey}_{prog}_nside1_hp{pix:02}_healpix_all_targets_v1.fits'
```

## Imports

In [2]:
# General imports
import numpy as np

# Import Astropy libraries - useful for many astronomy related function
from astropy.table import Table, vstack
from astropy.io import fits

# Fast FITS file I/O access
import fitsio


## Data files

In [3]:
## Define filepaths (same for Loa)
path_qsom = '/global/cfs/cdirs/desi/science/gqp/agncatalog/qsomaker/loa/'
path_gqp = '/global/cfs/cdirs/desi/science/gqp/agncatalog/qsomaker/loa/'

In [4]:
## SJ: will exclude the targeting cols because we'll add them from the zcat VAC instead 
#qsom_cols=['TARGETID','Z','ZERR','ZWARN','SPECTYPE','COADD_FIBERSTATUS','TARGET_RA','TARGET_DEC',\
#           'MORPHTYPE','EBV','MASKBITS','DESI_TARGET','SCND_TARGET','COADD_NUMEXP','COADD_EXPTIME',\
#           'CMX_TARGET','SV1_DESI_TARGET','SV2_DESI_TARGET','SV3_DESI_TARGET',\
#           'SV1_SCND_TARGET','SV2_SCND_TARGET','SV3_SCND_TARGET','TSNR2_LYA','TSNR2_QSO',\
#           'DELTA_CHI2_MGII','A_MGII','SIGMA_MGII','B_MGII','VAR_A_MGII','VAR_SIGMA_MGII','VAR_B_MGII',\
#           'Z_RR','Z_QN','C_LYA','C_CIV','C_CIII','C_MgII','C_Hbeta','C_Halpha','QSO_MASKBITS']

# Current choice for Loa
qsom_cols=['TARGETID','Z','ZERR','ZWARN','OBJTYPE','SPECTYPE','COADD_FIBERSTATUS','TARGET_RA','TARGET_DEC',\
           'MORPHTYPE','MASKBITS','COADD_NUMEXP','COADD_EXPTIME','TSNR2_LYA','TSNR2_QSO',\
           'DELTA_CHI2_MGII','A_MGII','SIGMA_MGII','B_MGII','VAR_A_MGII','VAR_SIGMA_MGII','VAR_B_MGII',\
           'Z_RR','Z_QN','C_LYA','C_CIV','C_CIII','C_MgII','C_Hbeta','C_Halpha',\
           'QSO_MASKBITS','SURVEY','PROGRAM','HPXPIXEL']
## NOTES:
# - cut on OBJTYPE then delete the column (wasn't in Fuji/EDR version)
# - added HPXPIXEL (now renamed to HEALPIX so beware if joining later)
# - could try without these: 'Z_LYA','Z_CIV','Z_CIII','Z_MgII','Z_Hbeta','Z_Halpha' ?

print(qsom_cols)

['TARGETID', 'Z', 'ZERR', 'ZWARN', 'OBJTYPE', 'SPECTYPE', 'COADD_FIBERSTATUS', 'TARGET_RA', 'TARGET_DEC', 'MORPHTYPE', 'MASKBITS', 'COADD_NUMEXP', 'COADD_EXPTIME', 'TSNR2_LYA', 'TSNR2_QSO', 'DELTA_CHI2_MGII', 'A_MGII', 'SIGMA_MGII', 'B_MGII', 'VAR_A_MGII', 'VAR_SIGMA_MGII', 'VAR_B_MGII', 'Z_RR', 'Z_QN', 'C_LYA', 'C_CIV', 'C_CIII', 'C_MgII', 'C_Hbeta', 'C_Halpha', 'QSO_MASKBITS', 'SURVEY', 'PROGRAM', 'HPXPIXEL']


## Loop over survey-program combos

In [4]:
## Loop over fsf files to list the healpixels
pixs = np.arange(0,12)
fpath = '/global/cfs/cdirs/desi/vac/dr2/fastspecfit/loa/v1.0/catalogs/'
cols_f = ['TARGETID', 'HEALPIX']

print("pix_1, hpx_min, hpx_max, n_hpx, hpx_max-hpx_min")

for pix in pixs:
    ffile = f'fastspec-loa-main-dark-nside1-hp{pix:02}.fits'
    f = fitsio.read(fpath+ffile, ext=1, columns=cols_f)
    hpxs = np.unique(f['HEALPIX'])
    hpx_min = np.min(hpxs)
    hpx_max = np.max(hpxs)
    n_hpx = len(hpxs)
    print(pix, hpx_min, hpx_max, n_hpx)

pix_1, hpx_min, hpx_max, n_hpx, hpx_max-hpx_min
0 0 2593 637
1 4096 8129 3405
2 8192 11842 2984
3 12288 16101 851
4 16725 20448 3286
5 21777 23304 593
6 25343 28671 2968
7 29969 32767 1049
8 36182 36863 511
9 40791 40959 110
10 44911 45055 98
11 48607 49151 139


In [5]:
# NSIDE = 1; NPIX = 12	
# NSIDE = 64; NPIX = 49152	
print(49152/12)

4096.0


In [6]:
# Need to batch the healpixels
batch_size = 4096

# NSIDE=1 healpixels
pixs = np.arange(0,12)

In [7]:
%%time

# List of survey_program combos that need to be split per healpix groups
#survey_programs = ['sv1_bright', 'sv1_dark']  # for testing code with smaller files
#save_path = path_gqp+'test/'
survey_programs = ['main_bright', 'main_dark']  # actual (large) files
save_path = path_gqp

# Initialize table
T_qsom = Table()

for i in range(len(survey_programs)):

    # Read each file
    surv_prog = survey_programs[i]
    file_i = path_qsom+f"QSO_cat_loa_{surv_prog}_healpix_all_targets_v1.fits"
    T_qsom_i = Table(fitsio.read(file_i, columns=qsom_cols, ext=1)) 

    # Print some stats:
    N_init = len(T_qsom_i)
    print(f"N={N_init} in file {file_i}")
    
    ## Keep only OBJTYPE='TGT'
    keep = T_qsom_i['OBJTYPE']=='TGT'
    T_qsom_i = T_qsom_i[keep]
    
    # Print some stats:
    N_keep = len(T_qsom_i)
    print(f"... After cutting on OBTYPE=TGT: N={N_keep}; (fraction: {np.round(N_keep/N_init, 2)})")

    ## Adding two columns we need for the cuts
    a = np.array([T_qsom_i['C_LYA'], T_qsom_i['C_CIV'], T_qsom_i['C_CIII'], \
                  T_qsom_i['C_MgII'], T_qsom_i['C_Hbeta'], T_qsom_i['C_Halpha']])
    T_qsom_i['QN_C_LINE_BEST'] = [max(l) for l in (a.T).tolist()]
    T_qsom_i['QN_C_LINE_SECOND_BEST'] = [sorted(l)[-2] for l in (a.T).tolist()]

    hi_conf_50 = T_qsom_i['QN_C_LINE_BEST']>0.50
    # Remove stars (except wait for possible mid/high-confidence QN cases)
    is_star = (T_qsom_i['SPECTYPE']=='STAR')&(T_qsom_i['Z']<0.001)&(~hi_conf_50)
    T_qsom_i = T_qsom_i[~is_star]
    
    # Print some stats:
    print(f"... After cutting Stars at z<0.001: N={len(T_qsom_i)}; (fraction: {np.round(len(T_qsom_i)/N_init, 2)})")

    # Remove the OBJTYPE column (no longer needed)
    T_qsom_i.remove_column('OBJTYPE')

    # Code for splitting in healpix groups
    for pix in pixs:
        hpx64_min = pix*batch_size
        hpx64_max = (pix+1)*batch_size - 1

        outfile_pix1 = f'QSO_cat_loa_{surv_prog}_nside1_hp{pix:02}_healpix_all_targets_v1.fits'

        hpx_col = T_qsom_i['HPXPIXEL']
        isin_pix = (hpx_col>=hpx64_min)&(hpx_col<=hpx64_max)

        N_in_pix = len(T_qsom_i[isin_pix])
        if N_in_pix>=1:
            T_qsom_i[isin_pix].write(save_path+outfile_pix1, overwrite=True)
            print('Wrote file ',outfile_pix1)

N=24204098 in file /global/cfs/cdirs/desi/science/gqp/agncatalog/qsomaker/loa/QSO_cat_loa_main_bright_healpix_all_targets_v1.fits
... After cutting on OBTYPE=TGT: N=19613147; (fraction: 0.81)
... After cutting Stars at z<0.001: N=13399979; (fraction: 0.55)
Wrote file  QSO_cat_loa_main_bright_nside1_hp00_healpix_all_targets_v1.fits
Wrote file  QSO_cat_loa_main_bright_nside1_hp01_healpix_all_targets_v1.fits
Wrote file  QSO_cat_loa_main_bright_nside1_hp02_healpix_all_targets_v1.fits
Wrote file  QSO_cat_loa_main_bright_nside1_hp03_healpix_all_targets_v1.fits
Wrote file  QSO_cat_loa_main_bright_nside1_hp04_healpix_all_targets_v1.fits
Wrote file  QSO_cat_loa_main_bright_nside1_hp05_healpix_all_targets_v1.fits
Wrote file  QSO_cat_loa_main_bright_nside1_hp06_healpix_all_targets_v1.fits
Wrote file  QSO_cat_loa_main_bright_nside1_hp07_healpix_all_targets_v1.fits
Wrote file  QSO_cat_loa_main_bright_nside1_hp08_healpix_all_targets_v1.fits
Wrote file  QSO_cat_loa_main_bright_nside1_hp09_healpix_all

In [36]:
# Code for splitting in healpix groups
for pix in pixs:
    hpx64_min = pix*batch_size
    hpx64_max = (pix+1)*batch_size - 1
    print(pix, ', min, max = ', hpx64_min, hpx64_max)

0 , min, max =  0 4095
1 , min, max =  4096 8191
2 , min, max =  8192 12287
3 , min, max =  12288 16383
4 , min, max =  16384 20479
5 , min, max =  20480 24575
6 , min, max =  24576 28671
7 , min, max =  28672 32767
8 , min, max =  32768 36863
9 , min, max =  36864 40959
10 , min, max =  40960 45055
11 , min, max =  45056 49151


## Record keeping of results from above

Looping over all survey-program combos printed this:
```
N=24204098 in file /global/cfs/cdirs/desi/science/gqp/agncatalog/qsomaker/loa/QSO_cat_loa_main_bright_healpix_all_targets_v1.fits
... After cutting on OBTYPE=TGT: N=19613147; (fraction: 0.81)
... After cutting Stars at z<0.001: N=13399979; (fraction: 0.55)
N=29733557 in file /global/cfs/cdirs/desi/science/gqp/agncatalog/qsomaker/loa/QSO_cat_loa_main_dark_healpix_all_targets_v1.fits
... After cutting on OBTYPE=TGT: N=24252859; (fraction: 0.82)
```

## Tests and VI below for Iron (repeat for Loa?)

In [None]:
print(170042/18260646)

In [6]:
%%time
#test
file_i = path_qsom+"QSO_cat_iron_sv1_backup_healpix_all_targets_v1.fits"
#file_i = path_qsom+"QSO_cat_iron_sv3_dark_healpix_all_targets_v1.fits"
#file_i = path_qsom+"QSO_cat_iron_sv2_bright_healpix_all_targets_v1.fits" #N=1
#file_i = path_qsom+"QSO_cat_iron_sv2_dark_healpix_all_targets_v1.fits"

#T_qsom_i = Table(fitsio.read(file_i, columns=qsom_cols, ext=1))
T_qsom_i = Table(fitsio.read(file_i, ext=1))

## Adding two columns we need for the cuts
a = np.array([T_qsom_i['C_LYA'], T_qsom_i['C_CIV'], T_qsom_i['C_CIII'], \
              T_qsom_i['C_MgII'], T_qsom_i['C_Hbeta'], T_qsom_i['C_Halpha']])
T_qsom_i['QN_C_LINE_BEST'] = [max(l) for l in (a.T).tolist()]
T_qsom_i['QN_C_LINE_SECOND_BEST'] = [sorted(l)[-2] for l in (a.T).tolist()]


CPU times: user 684 ms, sys: 251 ms, total: 934 ms
Wall time: 1.08 s


In [7]:
T_qsom_i.columns

<TableColumns names=('TARGETID','Z','ZERR','ZWARN','SPECTYPE','COADD_FIBERSTATUS','TARGET_RA','TARGET_DEC','OBJTYPE','MORPHTYPE','EBV','FLUX_G','FLUX_R','FLUX_Z','FLUX_W1','FLUX_W2','FLUX_IVAR_G','FLUX_IVAR_R','FLUX_IVAR_Z','FLUX_IVAR_W1','FLUX_IVAR_W2','MASKBITS','SV1_DESI_TARGET','SV1_SCND_TARGET','DESI_TARGET','COADD_NUMEXP','COADD_EXPTIME','CMX_TARGET','SV2_DESI_TARGET','SV3_DESI_TARGET','SV2_SCND_TARGET','SV3_SCND_TARGET','SCND_TARGET','WISE_VAR_QSO','TSNR2_LYA','TSNR2_QSO','DELTA_CHI2_MGII','A_MGII','SIGMA_MGII','B_MGII','VAR_A_MGII','VAR_SIGMA_MGII','VAR_B_MGII','Z_RR','Z_QN','C_LYA','C_CIV','C_CIII','C_MgII','C_Hbeta','C_Halpha','Z_LYA','Z_CIV','Z_CIII','Z_MgII','Z_Hbeta','Z_Halpha','QSO_MASKBITS','HPXPIXEL','SURVEY','PROGRAM','QN_C_LINE_BEST','QN_C_LINE_SECOND_BEST')>

In [None]:
#fits.info(file_i)

In [None]:
print(np.unique(T_qsom_i['OBJTYPE']))

In [None]:
for objtyp in np.unique(T_qsom_i['OBJTYPE']):
    print(f"OBJTYPE={objtyp};  N={len(T_qsom_i[T_qsom_i['OBJTYPE']==objtyp])}")

In [None]:
no_objtype = (T_qsom_i['OBJTYPE']=='')
print(np.max(T_qsom_i['TARGETID'][no_objtype]))
print(np.min(T_qsom_i['ZWARN'][no_objtype]))
print(np.min(T_qsom_i['COADD_FIBERSTATUS'][no_objtype]))

In [None]:
print(len(T_qsom_i))
print(len(T_qsom_i[T_qsom_i['TARGETID']<0]))
is_star = (T_qsom_i['SPECTYPE']=='STAR')&(T_qsom_i['Z']<0.001)
print(len(T_qsom_i[is_star]))

In [None]:
hi_conf_95 = T_qsom_i['QN_C_LINE_BEST']>0.5
print(len(T_qsom_i[is_star&hi_conf_95]))

In [None]:
T_qsom_i[is_star&hi_conf_95]

In [None]:
prospect_prefix = 'https://www.legacysurvey.org/viewer-desi/desi-spectrum/dr1/targetid'
#'https://www.legacysurvey.org/viewer-desi/desi-spectrum/daily/targetid'

In [None]:
for tid in T_qsom_i['TARGETID'][is_star&hi_conf_95][:10]:
    print(prospect_prefix+str(tid))

### SV1 backup (N=4)
- https://www.legacysurvey.org/viewer-desi/desi-spectrum/dr1/targetid39633489959586121 (STAR, RR correct)
- https://www.legacysurvey.org/viewer-desi/desi-spectrum/dr1/targetid39633493742847107 (STAR but artifact/fake break)
- https://www.legacysurvey.org/viewer-desi/desi-spectrum/dr1/targetid39633478597218272 (STAR)
- https://www.legacysurvey.org/viewer-desi/desi-spectrum/dr1/targetid39633554824495772 (STAR but artifact/fake break)

### SV1 bright (N=8)
- https://www.legacysurvey.org/viewer-desi/desi-spectrum/dr1/targetid39627878731613209 (STAR, jump in B/R gap)
- https://www.legacysurvey.org/viewer-desi/desi-spectrum/dr1/targetid39632940052778995 (STAR, jump in B/R gap, fiber off-center?)
- https://www.legacysurvey.org/viewer-desi/desi-spectrum/dr1/targetid39632955525563342 (? looks bad, not sure)
- https://www.legacysurvey.org/viewer-desi/desi-spectrum/dr1/targetid39632995233039010 (QSO at z=2.72 --> LENSED)
- https://www.legacysurvey.org/viewer-desi/desi-spectrum/dr1/targetid39628500637844358 (STAR, artifact in red)
- https://www.legacysurvey.org/viewer-desi/desi-spectrum/dr1/targetid39633134756564663 (STAR, artifact in red)
- https://www.legacysurvey.org/viewer-desi/desi-spectrum/dr1/targetid39633165249152063 (STAR)
- https://www.legacysurvey.org/viewer-desi/desi-spectrum/dr1/targetid39628446967531209 (QSO at z=2.84)

### SV1 dark (N=20); first 10:
- https://www.legacysurvey.org/viewer-desi/desi-spectrum/dr1/targetid39628395528587194 (STAR, WD)
- https://www.legacysurvey.org/viewer-desi/desi-spectrum/dr1/targetid39627914815214333 (QSO at z=1.181)
- https://www.legacysurvey.org/viewer-desi/desi-spectrum/dr1/targetid39627914819409528 (QSO, BAL at z=2.179)
- https://www.legacysurvey.org/viewer-desi/desi-spectrum/dr1/targetid39628417116668690 (QSO at z=2.38)
- https://www.legacysurvey.org/viewer-desi/desi-spectrum/dr1/targetid39627158296986610 (QSO at z=1.98)
- https://www.legacysurvey.org/viewer-desi/desi-spectrum/dr1/targetid39627218212621778 (QSO at z=1.373)
- https://www.legacysurvey.org/viewer-desi/desi-spectrum/dr1/targetid39628151147466939 (QSO at z=3.04)
- https://www.legacysurvey.org/viewer-desi/desi-spectrum/dr1/targetid39632945178216893 (QSO at z=1.24)
- https://www.legacysurvey.org/viewer-desi/desi-spectrum/dr1/targetid39628522041380522 (QSO at z=2.4)
- https://www.legacysurvey.org/viewer-desi/desi-spectrum/dr1/targetid39632939855643261 (? looks bad, not sure)

### SV2 dark (N=6)
```
TARGETID	     SURVEY	PROGRAM	  Z_RR	       Z_QN	        Z_VI
39633318479659525	sv2	dark	 0.00095977    1.1393893	1.1399
39633362096229283	sv2	dark	-0.00028507    1.2618234	1.3022
39633297059350765	sv2	dark	-0.00031275    1.0668758	1.056
39633318471272704	sv2	dark	 0.00088319    1.2718421	1.2568
39633328860564489	sv2	dark	-0.00136077    1.2698301	1.2759
39633368509318861	sv2	dark	-5.000e-05     2.8922458	2.895
```

In [None]:
# some VI for sv2-dark (pasted table above):
t_vi = Table()
t_vi['TARGETID'] = T_qsom_i['TARGETID'][is_star&hi_conf_95]
t_vi['SURVEY'] = T_qsom_i['SURVEY'][is_star&hi_conf_95]
t_vi['PROGRAM'] = T_qsom_i['PROGRAM'][is_star&hi_conf_95]
t_vi['Z_RR'] = T_qsom_i['Z_RR'][is_star&hi_conf_95]
t_vi['Z_QN'] = T_qsom_i['Z_QN'][is_star&hi_conf_95]
t_vi['Z_VI'] = [1.1399, 1.3022, 1.0560, 1.2568, 1.2759, 2.895]

t_vi