In [17]:
from astropy.io import fits
from astropy.table import Table, join
import numpy as np
import pylab as plt
import random
from scipy import stats
from sklearn.neighbors import KDTree
import time
from sklearn.metrics import mean_squared_error
from astropy.cosmology import FlatLambdaCDM
from os import listdir
import scipy
from desitarget.targetmask import desi_mask, bgs_mask, mws_mask
from desitarget.cmx.cmx_targetmask import cmx_mask

server = 1 # 0 is perlmutter, 1 is cori
server_paths = ['/pscratch/sd/a/ashodkh/', '/global/cscratch1/sd/ashodkh/']
## DATA ##
## I'm combining fastphot,fastspect, and ztile to make sure I use the same data everywhere

sv = '3'
zall_path = "/global/cfs/cdirs/desi/spectro/redux/fuji/zcatalog/ztile-sv"+sv+"-bright-cumulative.fits"
data1 = Table.read(zall_path,hdu=1)
needed1 = ["TARGETID", "SV"+sv+"_BGS_TARGET", "SPECTYPE", "DELTACHI2", "Z", "ZWARN", "FIBER", "PETAL_LOC", "TILEID"]

fastspec_path = "/global/cfs/cdirs/desi/spectro/fastspecfit/fuji/catalogs/fastspec-fuji-sv"+sv+"-bright.fits"
data2 = Table.read(fastspec_path,hdu=1)
data2.rename_column('CONTINUUM_COEFF', 'CONTINUUM_COEFF_FASTSPEC')
data2.rename_column('CONTINUUM_AV', 'CONTINUUM_AV_FASTSPEC')

needed2 = ["TARGETID", "OII_3726_EW", "OII_3729_EW", "HGAMMA_EW", "HBETA_EW", "OIII_4959_EW", "OIII_5007_EW", "NII_6548_EW", "HALPHA_EW", "NII_6584_EW", "SII_6716_EW", "SII_6731_EW",\
           "FLUX_SYNTH_G", "FLUX_SYNTH_R", "FLUX_SYNTH_Z", 'CONTINUUM_COEFF_FASTSPEC', 'CONTINUUM_AV_FASTSPEC',\
           "OII_3726_EW_IVAR", "OII_3729_EW_IVAR", "HGAMMA_EW_IVAR", "HBETA_EW_IVAR", "OIII_4959_EW_IVAR", "OIII_5007_EW_IVAR", "NII_6548_EW_IVAR", "HALPHA_EW_IVAR", "NII_6584_EW_IVAR",\
           "SII_6716_EW_IVAR", "SII_6731_EW_IVAR"]


fastphot_path = "/global/cfs/cdirs/desi/spectro/fastspecfit/fuji/catalogs/fastphot-fuji-sv"+sv+"-bright.fits"
data3 = Table.read(fastphot_path,hdu=1)
data3.rename_column('CONTINUUM_COEFF', 'CONTINUUM_COEFF_FASTPHOT')
data3.rename_column('CONTINUUM_AV', 'CONTINUUM_AV_FASTPHOT')

needed3 = ["TARGETID", "ABSMAG_SDSS_U", "ABSMAG_SDSS_G", "ABSMAG_SDSS_R", "ABSMAG_SDSS_I", "ABSMAG_SDSS_Z", 'CONTINUUM_COEFF_FASTPHOT', 'CONTINUUM_AV_FASTPHOT']

data4 = join(data1[needed1], data2[needed2], keys="TARGETID")
data = join(data4, data3[needed3], keys="TARGETID")

N=len(data['TARGETID'])

## Adding the sum of OII doublets to use them as a single line
data.add_column(data["OII_3726_EW"]+data["OII_3729_EW"], name='OII_DOUBLET_EW')
data.add_column(1/(data["OII_3726_EW_IVAR"]+data["OII_3729_EW_IVAR"]), name='OII_DOUBLET_EW_IVAR')

lines = ["OII_DOUBLET_EW", "HGAMMA_EW", "HBETA_EW", "OIII_4959_EW", "OIII_5007_EW", "NII_6548_EW", "HALPHA_EW", "NII_6584_EW", "SII_6716_EW", "SII_6731_EW", "test"]
lines_ivar = ["OII_DOUBLET_EW_IVAR", "HGAMMA_EW_IVAR", "HBETA_EW_IVAR", "OIII_4959_EW_IVAR", "OIII_5007_EW_IVAR", "NII_6548_EW_IVAR", "HALPHA_EW_IVAR", "NII_6584_EW_IVAR",\
              "SII_6716_EW_IVAR", "SII_6731_EW_IVAR"]




    
# calculating minimum redshift to have de-redshifted wavelengths be in the interval w1,w2 A
w1 = 3400
w_min = 3600
z_min = w_min/w1-1
w2 = 8500
w_max = 9824
#z_max = w_max/w2-1
z_max = 0.3

# target selection index. run should be changed if anything in select changed and/or the number of data points changed
run = 1 # old run=0 was sv1 with no bgs selection and n=30k. new run=0 is sv1 with bgs selection. There's only about 25k per line so I will use them for training only.
        # run 1 is sv3 with bgs selection. So far I'm only using it for testing so only l=10.

## I am splitting data into training and testing before applying snr_cuts, and then saving them separately
select = ((data["SV"+sv+"_BGS_TARGET"] & bgs_mask.mask("BGS_BRIGHT"))>0)*(data["SPECTYPE"]=="GALAXY")*(data["DELTACHI2"]>=25)\
         *(data["Z"]>z_min)*(data["Z"]<z_max)*(data["ZWARN"]==0)

select_size = len(np.where(select)[0])
print(select_size)
data_select = data[select]
select_half = int(select_size/2)
#select_half = int(select_size-1)
data_train = data_select[:select_half]
data_test = data_select[select_half:]

n_test = 30*10**3 # size of testing set I'm keeping

target_ids = data_test["TARGETID"][:n_test]
fiber_ids = data_test["FIBER"][:n_test]
petal_locs = data_test["PETAL_LOC"][:n_test]
tile_ids = data_test["TILEID"][:n_test]
zs = data_test["Z"][:n_test]

coeffs_fastspec = data_test['CONTINUUM_COEFF_FASTSPEC'][:n_test]
AV_fastspec = data_test['CONTINUUM_AV_FASTSPEC'][:n_test]

coeffs_fastphot = data_test['CONTINUUM_COEFF_FASTPHOT'][:n_test]
AV_fastphot = data_test['CONTINUUM_AV_FASTPHOT'][:n_test]

line_ews = np.array(data_test[lines[:-1]][:n_test])
line_ivars = np.array(data_test[lines_ivar][:n_test])

test = True
train = False
if test:
    l = 10 # l=10 corresponds to test set that has no snr cuts. This is a good way of separating those files without changing much of my code.
    np.savez_compressed(server_paths[server]+"target_selection/target_ids_selection" + str(run) + "_" + str(lines[l]) + ".txt", target_ids)
    np.savez_compressed(server_paths[server]+"target_selection/fiber_ids_selection" + str(run) + "_" + str(lines[l]) + ".txt", fiber_ids)
    np.savez_compressed(server_paths[server]+"target_selection/petal_locs_selection" + str(run) + "_" + str(lines[l]) + ".txt", petal_locs)
    np.savez_compressed(server_paths[server]+"target_selection/tile_ids_selection" + str(run) + "_" + str(lines[l]) + ".txt", tile_ids)
    np.savez_compressed(server_paths[server]+"target_selection/zs_selection" + str(run) + "_" + str(lines[l]) + ".txt", zs)
    np.savez_compressed(server_paths[server]+"target_selection/line_ews_selection"+str(run)+"_" + str(lines[l]) + ".txt", line_ews)
    np.savez_compressed(server_paths[server]+"target_selection/line_ivars_selection"+str(run)+"_" + str(lines[l]) + ".txt", line_ivars)

    np.savez_compressed(server_paths[server]+"target_selection/fastspec_coeffs_selection" + str(run) + "_" + str(lines[l]) + ".txt", coeffs_fastspec)
    np.savez_compressed(server_paths[server]+"target_selection/fastspec_AV_selection" + str(run) + "_" + str(lines[l]) + ".txt", AV_fastspec)

    np.savez_compressed(server_paths[server]+"target_selection/fastphot_coeffs_selection" + str(run) + "_" + str(lines[l]) + ".txt", coeffs_fastphot)
    np.savez_compressed(server_paths[server]+"target_selection/fastphot_AV_selection" + str(run) + "_" + str(lines[l]) + ".txt", AV_fastphot)


lines = ["OII_DOUBLET_EW", "HGAMMA_EW", "HBETA_EW", "OIII_4959_EW", "OIII_5007_EW", "NII_6548_EW", "HALPHA_EW", "NII_6584_EW", "SII_6716_EW", "SII_6731_EW"]
## choosing what snr cuts to make on data_train
N = len(data_train['TARGETID'])
snr_cut = 1
# calculating snr for all lines and setting the snr cut boolean as select_snr
snr_all = np.zeros([N,len(lines)])
snr_all[:,0] = data_train[lines[0]]*np.sqrt(data_train[lines[0]+"_IVAR"])

cut_Halpha = True # cut Halpha on all data and later cut snr>0 for every line separately
cut_all = False # cut on all lines together, which extremely restricts the data
for i in range(1,len(lines)):
    snr_all[:,i]=data_train[lines[i]]*np.sqrt(data_train[lines[i]+"_IVAR"])
    if cut_all:
        select_snr = select_snr*(snr_all[:,i]>snr_cut)
if cut_Halpha:
    select_snr = snr_all[:,6]>snr_cut


if train:
    for l in range(len(lines)):
        # select = ((data["SV3_BGS_TARGET"] & bgs_mask.mask("BGS_BRIGHT"))>0)*(data["SPECTYPE"]=="GALAXY")*(data["DELTACHI2"]>=25)\
        #          *(data["Z"]>z_min)*(data["Z"]<z_max)*(data["ZWARN"]==0)*(select_snr)*(snr_all[:,l]>0)
        snr_cut = (select_snr)*(snr_all[:,l]>0)
        print(len(np.where(snr_cut)[0]))

        n_train = 25*10**3 # size of data set I'm keeping

        target_ids = data_train["TARGETID"][snr_cut][:n_train]
        fiber_ids = data_train["FIBER"][snr_cut][:n_train]
        petal_locs = data_train["PETAL_LOC"][snr_cut][:n_train]
        tile_ids = data_train["TILEID"][snr_cut][:n_train]
        zs = data_train["Z"][snr_cut][:n_train]

        coeffs_fastspec = data_train['CONTINUUM_COEFF_FASTSPEC'][snr_cut][:n_train]
        AV_fastspec = data_train['CONTINUUM_AV_FASTSPEC'][snr_cut][:n_train]

        coeffs_fastphot = data_train['CONTINUUM_COEFF_FASTPHOT'][snr_cut][:n_train]
        AV_fastphot = data_train['CONTINUUM_AV_FASTPHOT'][snr_cut][:n_train]

        line_ews=np.array(data_train[lines][snr_cut][:n_train])
        line_ivars = np.array(data_train[lines_ivar][snr_cut][:n_train])

        np.savez_compressed(server_paths[server]+"target_selection/target_ids_selection" + str(run) + "_" + str(lines[l]) + ".txt", target_ids)
        np.savez_compressed(server_paths[server]+"target_selection/fiber_ids_selection" + str(run) + "_" + str(lines[l]) + ".txt", fiber_ids)
        np.savez_compressed(server_paths[server]+"target_selection/petal_locs_selection" + str(run) + "_" + str(lines[l]) + ".txt", petal_locs)
        np.savez_compressed(server_paths[server]+"target_selection/tile_ids_selection" + str(run) + "_" + str(lines[l]) + ".txt", tile_ids)
        np.savez_compressed(server_paths[server]+"target_selection/zs_selection" + str(run) + "_" + str(lines[l]) + ".txt", zs)
        np.savez_compressed(server_paths[server]+"target_selection/line_ews_selection"+str(run)+"_" + str(lines[l]) + ".txt", line_ews)
        np.savez_compressed(server_paths[server]+"target_selection/line_ivars_selection"+str(run)+"_" + str(lines[l]) + ".txt", line_ivars)

        np.savez_compressed(server_paths[server]+"target_selection/fastspec_coeffs_selection" + str(run) + "_" + str(lines[l]) + ".txt", coeffs_fastspec)
        np.savez_compressed(server_paths[server]+"target_selection/fastspec_AV_selection" + str(run) + "_" + str(lines[l]) + ".txt", AV_fastspec)

        np.savez_compressed(server_paths[server]+"target_selection/fastphot_coeffs_selection" + str(run) + "_" + str(lines[l]) + ".txt", coeffs_fastphot)
        np.savez_compressed(server_paths[server]+"target_selection/fastphot_AV_selection" + str(run) + "_" + str(lines[l]) + ".txt", AV_fastphot)

  data.add_column(1/(data["OII_3726_EW_IVAR"]+data["OII_3729_EW_IVAR"]), name='OII_DOUBLET_EW_IVAR')


175903


  snr_all[:,0] = data_train[lines[0]]*np.sqrt(data_train[lines[0]+"_IVAR"])


In [18]:
print(select_size)
print(len(data_train['TARGETID']))

175903
87951


In [3]:
lines

['OII_DOUBLET_EW',
 'HGAMMA_EW',
 'HBETA_EW',
 'OIII_4959_EW',
 'OIII_5007_EW',
 'NII_6548_EW',
 'HALPHA_EW',
 'NII_6584_EW',
 'SII_6716_EW',
 'SII_6731_EW']

In [4]:
lines[:-1]

['OII_DOUBLET_EW',
 'HGAMMA_EW',
 'HBETA_EW',
 'OIII_4959_EW',
 'OIII_5007_EW',
 'NII_6548_EW',
 'HALPHA_EW',
 'NII_6584_EW',
 'SII_6716_EW']