In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.colors as c
from astropy.wcs import WCS
import astropy.coordinates as coord
import astropy.units as u
import syslog
import astropy.io.fits as fits
import healpy as hp
from astropy.table import Table,join,vstack,unique
import types
import requests
import re
import os
import sys
from urllib.parse import urljoin

if './SelfCalGroupFinder/py/' not in sys.path:
    sys.path.append('./SelfCalGroupFinder/py/')
from pyutils import *
from dataloc import *
from photoz import *
import groupcatalog as gc

%load_ext autoreload
%autoreload 2

In [None]:
KEEP_PASSES = 1
APP_MAG_CUT = 19.5 # BGS BRIGHT, though 19.54 for some cameras. I don't know if FLUX_R has been corrected for this.
#APP_MAG_CUT = 20.175 # BGS FAINT, and in SV3 it is 20.3 instead
Z_MIN = 0.001
Z_MAX = 0.8

def get_app_mag(flux):
    """This converts nanomaggies into Pogson magnitudes"""
    return 22.5 - 2.5*np.log10(flux)

# This corresponds to 8.33 square degrees and empircally makes sense by looking at the randoms
TILE_RADIUS = 5862.0 * u.arcsec # arcsec

def find_tiles_for_galaxies(tiles_df, gals_df, num_tiles_to_find):
    num_galaxies = len(gals_df.RA)
    num_tiles = len(tiles_df.RA)

    tiles_coord = coord.SkyCoord(ra=tiles_df.RA.to_numpy()*u.degree, dec=tiles_df.Dec.to_numpy()*u.degree, frame='icrs')
    gals_coord = coord.SkyCoord(ra=gals_df.RA.to_numpy()*u.degree, dec=gals_df.Dec.to_numpy()*u.degree, frame='icrs')

    # Structure for resultant data
    nearest_tile_ids = np.zeros((num_galaxies, num_tiles_to_find), dtype=int)
    ntiles_inside = np.zeros((num_galaxies), dtype=int)

    for n in range(num_tiles_to_find):
        idx, d2d, d3d = coord.match_coordinates_sky(gals_coord, tiles_coord, nthneighbor=n+1, storekdtree='kdtree_tiles')
        nearest_tile_ids[:,n] = tiles_df.iloc[idx].TILEID
        ntiles_inside += (d2d < TILE_RADIUS).astype(int)

    
    return ntiles_inside, nearest_tile_ids

def table_to_df(table: Table):
    """
    This does not work for all purposes yet.
    """
    # TODO why not use to_pandas()?
    #df = table.to_pandas()
    
    obj_type = table['SPECTYPE'].data.data
    dec = table['DEC'].astype("<f8") # Big endian vs little endian regression in pandas. Convert more of these fields like this
    ra = table['RA'].astype("<f8") # as needed if using pandas with this data
    z_obs = table['Z'].data.data
    target_id = table['TARGETID']
    #flux_r = table['FLUX_R']
    #flux_g = table['FLUX_G']
    app_mag_r = get_app_mag(table['FLUX_R'])
    app_mag_g = get_app_mag(table['FLUX_G'])
    g_r_apparent = app_mag_g - app_mag_r
    #sdss_g_r = table['ABSMAG_SDSS_G'] - table['ABSMAG_SDSS_R'] 
    #G_R_JM1 = table['ABSMAG01_SDSS_G'] - table['ABSMAG01_SDSS_R']
    p_obs = table['PROB_OBS'] 
    unobserved = table['Z'].mask
    deltachi2 = table['DELTACHI2'].data.data
    ntiles = table['NTILE']
    #abs_mag_sdss = table['ABSMAG_SDSS_R']
    dn4000 = table['DN4000'].data.data

    df = pd.DataFrame({
        'SPECTYPE': obj_type,
        'Dec': dec,
        'RA': ra,
        'z': z_obs,
        'TARGETID': target_id,
        #'FLUX_R': flux_r,
        #'FLUX_G': flux_g,
        'APP_MAG_R': app_mag_r,
        'APP_MAG_G': app_mag_g,
        'G_R_APPARENT': g_r_apparent,
        #'SDSS_G_R': sdss_g_r,
        #'G_R_JM1': G_R_JM1,
        'PROB_OBS': p_obs,
        'UNOBSERVED': unobserved,
        'DELTACHI2': deltachi2,
        'NTILE': ntiles,
        #'ABS_MAG_SDSS': abs_mag_sdss,
        'DN4000': dn4000
        })

    return df

def read_tiles_Y1_file():
    tiles_table = Table.read(BGS_TILES_FILE, format='csv')
    tiles_table.keep_columns(['TILEID', 'FAFLAVOR', 'TILERA', 'TILEDEC'])
    tiles_df = pd.DataFrame({'RA': tiles_table['TILERA'].astype("<f8"), 'Dec': tiles_table['TILEDEC'].astype("<f8"), 'FAFLAVOR': tiles_table['FAFLAVOR'], 'TILEID': tiles_table['TILEID']})
    tiles_df = tiles_df[tiles_df.FAFLAVOR == 'mainbright']
    tiles_df.reset_index(drop=True, inplace=True)
    return tiles_df

def read_tiles_file():
    tiles_table = Table.read(BGS_Y3_TILES_FILE, format='csv')
    tiles_table.keep_columns(['TILEID', 'FAFLAVOR', 'TILERA', 'TILEDEC'])
    tiles_df = pd.DataFrame({'RA': tiles_table['TILERA'].astype("<f8"), 'Dec': tiles_table['TILEDEC'].astype("<f8"), 'FAFLAVOR': tiles_table['FAFLAVOR'], 'TILEID': tiles_table['TILEID']})
    return tiles_df

# Create Merged SV3 File

In [None]:
sv3_table = Table.read(BGS_SV3_ANY_FULL_FILE, format='fits')
tiles = read_tiles_file()
SV3_tiles = tiles.loc[tiles.FAFLAVOR == 'sv3bright']
print(sv3_table.columns)

In [None]:
# Filter to needed columns only and save
sv3_table.keep_columns(['TARGETID', 'SPECTYPE', 'DEC', 'RA', 'Z_not4clus', 'ZTILEID', 'NUMOBS', 'FLUX_R', 'FLUX_G', 'PROB_OBS', 'ZWARN', 'DELTACHI2', 'NTILE', 'TILES', 'TILEID', 'TILELOCID'])
sv3_table.rename_column('Z_not4clus', 'Z')

sv3_df = sv3_table.to_pandas()
sv3_df.rename(columns={'DEC': 'Dec'}, inplace=True)

In [None]:
ntiles_inside, nearest_tile_ids = find_tiles_for_galaxies(SV3_tiles, sv3_df, 15)
sv3_table.add_column(ntiles_inside, name="NTILE_MINE")
sv3_table.add_column(nearest_tile_ids, name="NEAREST_TILEIDS")
sv3_table.write(IAN_BGS_SV3_MERGED_FILE, format='fits', overwrite='True')
del(sv3_table)
del(sv3_df)

# Photo-z
 
See photoz.py

In [None]:
fits_links_pz, fits_links_main = get_photoz_file_lists()
print(len(fits_links_pz))
print(len(fits_links_main))

# Network Bandwidth:
# 445 GB for pz sweeps
# more for main sweeps...

In [None]:
# INITIALIZE LIGHTWEIGHT DESI BGS PHOTO-Z TABLE
# Don't re-run! Will overwrite the file.
"""
desi_table = Table.read(IAN_BGS_Y3_MERGED_FILE, format='fits')
desi_table2 = Table.read(IAN_BGS_SV3_MERGED_FILE, format='fits')

assert len(np.unique(desi_table['TARGETID'])) == len(desi_table), "There are duplicate TARGETIDs in the Y3 file"
assert len(np.unique(desi_table2['TARGETID'])) == len(desi_table2), "There are duplicate TARGETIDs in the SV3 file"

desi_table.keep_columns(['TARGETID', 'RA', 'DEC'])
desi_table2.keep_columns(['TARGETID', 'RA', 'DEC'])

desi_targets_table = vstack([desi_table, desi_table2], join_type='inner')
desi_targets_table = unique(desi_targets_table, 'TARGETID')
desi_targets_table['Z_LEGACY_BEST'] = -99.0

# add columns for 'RELEASE', 'BRICKID', 'OBJID' with no values
desi_targets_table.add_column(np.zeros(len(desi_targets_table), dtype=int), name='RELEASE')
desi_targets_table.add_column(np.zeros(len(desi_targets_table), dtype=int), name='BRICKID')
desi_targets_table.add_column(np.zeros(len(desi_targets_table), dtype=int), name='OBJID')


desi_targets_table = desi_targets_table.to_pandas()
desi_targets_table.set_index('TARGETID', inplace=True)
pickle.dump(desi_targets_table, open(IAN_PHOT_Z_FILE, 'wb'))

"""


In [None]:
# Brick is 5 degrees wide in both RA and DEC TODO can filter DESI targets by this before matching? faster? Maybe...
#plt.hist(df.RA) 
#plt.hist(df.DEC)

# Checking the matched objects
#plot_positions(
#    pd.DataFrame({'RA': df[matched2].RA, 'Dec': df[matched2].DEC}), 
#    pd.DataFrame({'RA': desi_table['RA'].astype('<f8'), 'Dec': desi_table['DEC'].astype('<f8')}), 
#    tiles_df=None, 
#    DEG_LONG=1,
#    ra_min=np.min(df['RA']), 
#    dec_min=np.min(df['DEC']+0.5), 
#    split=False
#)



# Create a merged master BGS data file

In [None]:
hdul = fits.open(BGS_FASTSPEC_FILE, memmap=True)
#print(hdul[1].columns)
data = hdul[1].data
fastspecfit_id = data['TARGETID']
DN4000 = data['DN4000'] # TODO there is also DN4000_OBS and DN4000_MODEL (and inverse variance)
FSF_G = data['ABSMAG01_SDSS_G']
FSF_R = data['ABSMAG01_SDSS_R']
hdul.close()

print(len(fastspecfit_id))
print(len(DN4000))

fastspecfit_table = Table([fastspecfit_id, DN4000, FSF_G, FSF_R], names=('TARGETID', 'DN4000', 'ABSMAG01_SDSS_G', 'ABSMAG01_SDSS_R'))


In [None]:
main_table = Table.read(BGS_ANY_FULL_FILE, format='fits')
print(main_table.columns)

In [None]:
# ALREADY DONE FOR US; only needed to do this in Iron v1.2 due to a bug.
# Prob obs file
#p_table = Table.read(BGS_PROB_OBS_FILE, format='fits')
#print(len(p_table))

# Join them all on TARGETID
#joined_table = join(main_table, p_table, keys="TARGETID")
#print(len(joined_table))

to_join = main_table
#to_join = p_table

# The lost galaxies will not have fastspecfit rows I think
final_table = join(to_join, fastspecfit_table, join_type='left', keys="TARGETID")
print(len(final_table))

# Sanity check that everything went as intended
assert len(final_table) == len(main_table)

# Filter to needed columns only and save
final_table.keep_columns(['TARGETID', 'SPECTYPE', 'DEC', 'RA', 'Z_not4clus', 'FLUX_R', 'FLUX_G', 'BITWEIGHTS', 'PROB_OBS', 'ZWARN', 'DELTACHI2', 'NTILE', 'TILES', 'DN4000', 'ABSMAG01_SDSS_G', 'ABSMAG01_SDSS_R', 'MASKBITS'])
final_table.rename_column('Z_not4clus', 'Z')
final_table.write(IAN_BGS_MERGED_FILE, format='fits', overwrite='True')

del(main_table)
#del(p_table)
del(fastspecfit_table)
del(final_table)

# Augment with my version of NTILE

In [None]:
def add_NTILE_MINE_to_table(table_file):
    tiles_df = read_tiles_Y1_file()
    table = Table.read(table_file, format='fits')
    galaxies_df = table_to_df(table)
    
    ntiles_inside, nearest_tile_ids = find_tiles_for_galaxies(tiles_df, galaxies_df, 15)
    if 'NTILE_MINE' in table.columns:
        table.remove_columns(['NTILE_MINE', 'NEAREST_TILEIDS'])
    table.add_column(ntiles_inside, name="NTILE_MINE")
    table.add_column(nearest_tile_ids, name="NEAREST_TILEIDS")

    table.write(table_file, format='fits', overwrite='True')

In [None]:
#add_NTILE_MINE_to_table(IAN_BGS_MERGED_FILE)
add_NTILE_MINE_to_table(IAN_BGS_MERGED_FILE_OLD)

# Jura Quick Analysis

In [None]:
# Don't have all files needed to use the above pipline yet, so just working with what we have

# Main file
main_table = Table.read(BGS_Y3_ANY_FULL_FILE, format='fits')
print(len(main_table))

# Filter to needed columns only and save
main_table.keep_columns(['TARGETID', 'SPECTYPE', 'DEC', 'RA', 'Z', 'FLUX_R', 'FLUX_G', 'ZWARN', 'DELTACHI2', 'NTILE', 'TILES'])

galaxies_df = pd.DataFrame({
    'Dec': main_table['DEC'],
    'RA': main_table['RA'],
    })

tiles_BGS = read_tiles_file()

ntiles_inside, nearest_tile_ids = find_tiles_for_galaxies(tiles_BGS, galaxies_df, 10)

main_table.add_column(ntiles_inside, name="NTILE_MINE")
main_table.add_column(nearest_tile_ids, name="NEAREST_TILEIDS")

main_table.write(IAN_BGS_Y3_MERGED_FILE, format='fits', overwrite='True')

del(main_table)
del(tiles_BGS)
del(galaxies_df)

# Examine data in Merged BGS File

In [None]:
# Pick one
table = Table.read(IAN_BGS_MERGED_FILE, format='fits')
#table = Table.read(IAN_BGS_SV3_MERGED_FILE, format='fits')
#table = Table.read(IAN_BGS_Y3_MERGED_FILE, format='fits')

In [None]:
tiles_BGS = read_tiles_file()

In [None]:
# See two equivalent ways of determining which rows are for unobserved galaxies
one=table['ZWARN'] == 999999
two=table['Z'].mask
three=table['Z'] == 999999.0
assert(np.all(one == two))
assert(np.all(one == three))

In [None]:
plt.hist(table['Z'], bins=50)
plt.title("Z")
plt.yscale('log')
print(np.min(table['Z']), np.max(table['Z']))
print(table['Z'].mask)

In [None]:
print(np.unique(table['ZWARN']))
#print(np.unique(table['ZWARN_MTL']))
print(np.unique(table['SPECTYPE']))
print(np.unique(table['NTILE']))
#print(np.unique(table['TARGET_STATE']))

## Cut to the galaxy data we actually need

In [None]:
# TODO this gets easilly out of sync with the .py file that does the 'production' filtering

if np.ma.is_masked(table['Z']):
    print("Masked table")
    z_obs = table['Z'].data.data
    obj_type = table['SPECTYPE'].data.data
    unobserved = table['Z'].mask # the masked values are what is unobserved
    deltachi2 = table['DELTACHI2'].data.data  
    maskbits = table['MASKBITS'].data.data
else:
    print("Unmasked table")
    # SV3 version didn't do this
    z_obs = table['Z']
    obj_type = table['SPECTYPE']
    unobserved = table['Z'].astype("<i8") == 999999
    deltachi2 = table['DELTACHI2']
    maskbits = table['MASKBITS']
    
dec = table['DEC']
ra = table['RA']
target_id = table['TARGETID']
app_mag_r = get_app_mag(table['FLUX_R'])
app_mag_g = get_app_mag(table['FLUX_G'])
flux_r = table['FLUX_R']
flux_g = table['FLUX_G']
g_r_apparent = app_mag_g - app_mag_r
#sdss_g_r = table['ABSMAG_SDSS_G'] - table['ABSMAG_SDSS_R'] 
#G_R_JM1 = table['ABSMAG01_SDSS_G'] - table['ABSMAG01_SDSS_R']
#p_obs = table['PROB_OBS'] 
ntiles = table['NTILE']
#tiles = table['TILES']
#ztileid = table['ZTILEID']
#tile_id = table['TILEID']
#numobs = table['NUMOBS']
#tile_locid = table['TILELOCID']
ntiles_mine = table['NTILE_MINE']
tileids = table['NEAREST_TILEIDS'][:,0].astype("<i8") # TODO there are 10 here, we want NTILES_MINE many...
#abs_mag_sdss = table['ABSMAG_SDSS_R']
#dn4000 = table['DN4000'].data.data


before_count = len(dec)
print(before_count, "objects in FITS file")

# TODO BUG Can we be mistaking STARS for GALAXIES?
# Make filter array (True/False values)
PASSES_REQUIRED = [1,2,3,4,10]

galaxy_observed_filter = obj_type == b'GALAXY'
app_mag_filter = app_mag_r < APP_MAG_CUT
redshift_filter = z_obs > Z_MIN
redshift_hi_filter = z_obs < Z_MAX
deltachi2_filter = deltachi2 > 40
#abs_mag_sdss_filter = abs_mag_sdss < 100
#observed_requirements = np.all([galaxy_observed_filter, app_mag_filter, redshift_filter, redshift_hi_filter, deltachi2_filter, abs_mag_sdss_filter], axis=0)
observed_requirements = np.all([galaxy_observed_filter, app_mag_filter, redshift_filter, redshift_hi_filter, deltachi2_filter], axis=0)

treat_as_unobserved = np.all([galaxy_observed_filter, app_mag_filter, np.invert(deltachi2_filter)], axis=0)

unobserved = np.all([app_mag_filter, np.logical_or(unobserved, treat_as_unobserved)], axis=0)
keep = np.all([np.logical_or(observed_requirements, unobserved)], axis=0)

print("\nWhole sample:")
print(f"There are {len(obj_type):,} objects in the entire sample, of which {np.sum(galaxy_observed_filter):,} are observed galaxies.") 

for n in PASSES_REQUIRED:
    n_pass_filter = ntiles_mine >= n
    n_pass_filter_old = ntiles >= n
    unobserved_n = np.all([n_pass_filter, unobserved], axis=0)
    observed_requirements_n = np.all([n_pass_filter, observed_requirements], axis=0)
    keepn = np.all([np.logical_or(observed_requirements_n, unobserved_n)], axis=0)

    print(f"\n{n}-pass analysis (NTILE_MINE):")
    print(f"There are {np.sum(observed_requirements_n):,} galaxies in the bright (<{APP_MAG_CUT} mag) sample that pass our quality checks.")
    print(f"There are {np.sum(unobserved_n):,} unobserved galaxies, including bad observed galaxies.")
    print(f"This {n}-pass catalog would have {np.sum(keepn):,} galaxies ({np.sum(unobserved_n) / np.sum(keepn) * 100:.2f}% lost).")

    # We've demonstratred this is definetely not what we want
    #unobserved_n_old = np.all([n_pass_filter_old, unobserved], axis=0)
    #observed_requirements_n_old = np.all([n_pass_filter_old, observed_requirements], axis=0)
    #keepn_old = np.all([np.logical_or(observed_requirements_n_old, unobserved_n_old)], axis=0)
    #print(f"\n{n}-pass analysis (NTILE):")
    #print(f"There are {np.sum(observed_requirements_n_old):,} galaxies in the bright (<{APP_MAG_CUT} mag) sample that pass our quality checks.")
    #print(f"There are {np.sum(unobserved_n_old):,} unobserved galaxies, including bad observed galaxies.")
    #print(f"This {n}-pass catalog would have {np.sum(keepn_old):,} galaxies ({np.sum(unobserved_n_old) / np.sum(keepn_old) * 100:.2f}% lost).")

# FOR PARTS BELOW SET WHAT YOU WANT TO KEEP!
keep = np.all([keep, ntiles_mine >= KEEP_PASSES], axis=0)

obj_type = obj_type[keep]
dec = dec[keep]
ra = ra[keep]
z_obs = z_obs[keep]
target_id = target_id[keep] 
flux_r = flux_r[keep]
app_mag_r = app_mag_r[keep]
app_mag_g = app_mag_g[keep]
g_r_apparent = g_r_apparent[keep]
#p_obs = p_obs[keep]
unobserved = unobserved[keep]
deltachi2 = deltachi2[keep]
ntiles = ntiles[keep]
#tiles = tiles[keep]
#ztileid = ztileid[keep]
ntiles_mine = ntiles_mine[keep]
tileids = tileids[keep]
#tile_id = tile_id[keep]
#numobs = numobs[keep]
#tile_locid = tile_locid[keep]
#abs_mag_sdss = abs_mag_sdss[keep]
#sdss_g_r = sdss_g_r[keep]
#G_R_JM1 = G_R_JM1[keep]
#dn4000 = dn4000[keep]
maskbits = maskbits[keep]
indexes_not_assigned = np.argwhere(unobserved)

after_count = len(dec)

print(f"\nAfter all filters we have {after_count} of the original {before_count} rows.")

In [None]:
tiles_split = np.zeros((len(tiles)), dtype=list)
counts = np.zeros(15, dtype=int)
for i in range(len(tiles)):
    tiles_split[i] = list(tiles[i].split('-'))
    counts[len(tiles_split[i])] += 1

print(counts)

In [None]:
tiles_split[9000]

In [None]:
# Make maps
two_pass_filter = ntiles_mine >= 2 
three_pass_filter = ntiles_mine >= 3 
four_pass_filter = ntiles_mine >= 4 

ra2 = ra[two_pass_filter]
dec2 = dec[two_pass_filter]
tileids2 =  tileids[two_pass_filter]
unobserved2 = unobserved[two_pass_filter]

ra3 = ra[three_pass_filter]
dec3 = dec[three_pass_filter]
tileids3 =  tileids[three_pass_filter]
unobserved3 = unobserved[three_pass_filter]

ra4 = ra[four_pass_filter]
dec4 = dec[four_pass_filter]
tileids4 =  tileids[four_pass_filter]
unobserved4 = unobserved[four_pass_filter]

one_pass_df = pd.DataFrame({'RA': ra, 'Dec': dec, 'z_assigned_flag': unobserved, 'TILEID': tileids})
two_pass_df = pd.DataFrame({'RA': ra2, 'Dec': dec2, 'z_assigned_flag': unobserved2, 'TILEID': tileids2})
three_pass_df = pd.DataFrame({'RA': ra3, 'Dec': dec3, 'z_assigned_flag': unobserved3, 'TILEID': tileids3})
four_pass_df = pd.DataFrame({'RA': ra4, 'Dec': dec4, 'z_assigned_flag': unobserved4, 'TILEID': tileids4})

plot_positions(one_pass_df, three_pass_df, tiles_df=tiles_BGS, DEG_LONG=5, split=False)

#fig=make_map(ra, dec)
#ra_4 = ra[four_pass_filter]
#dec_4 = dec[four_pass_filter]
#print(f"Number of 4-pass galaxies: {len(ra_4)}, number of 3-pass galaxies: {len(ra)}")
#fig=make_map(ra_4, dec_4, fig=fig, alpha=0.1)

### Siena Galaxy Atlas Analysis (SGA)

In [None]:
# Understand how many galaxies are affected by Siena Galaxy Atlas (SGA) Masks
has_a_maskbit = maskbits != 0
idx_with_masks = np.nonzero(maskbits)
print(f"{np.sum(has_a_maskbit):,} galaxies ({np.sum(has_a_maskbit) / len(maskbits) * 100:.2f}%) have a maskbit set.")

unobserved_with_maskbits = np.logical_and(has_a_maskbit, unobserved)
print(f"{np.sum(unobserved_with_maskbits):,} galaxies ({np.sum(unobserved_with_maskbits) / len(maskbits) * 100:.2f}%) have a maskbit set and are unobserved.")

# See https://www.legacysurvey.org/dr9/bitmasks/
# https://github.com/legacysurvey/legacypipe/blob/master/py/legacypipe/bits.py
BITMASK_SGA = 0x1000 
sga_collision = (maskbits & BITMASK_SGA) != 0
print(f"{np.sum(sga_collision):,} galaxies ({np.sum(sga_collision) / len(maskbits) * 100:.2f}%) have a SGA collision.")

to_remove = np.logical_and(sga_collision, unobserved)
print(f"{np.sum(to_remove):,} galaxies ({np.sum(to_remove) / len(maskbits) * 100:.2f}%) have a SGA collision and are unobserved.")

# TODO well it looks like the maskbits are only set on targets with spectra for some reason.
# They are photometric so I don't know why this would be.
# But also looking at the images, more often than not the masked target is the SGA one itself, not one inside its ellipse.
# Thus removing all these seems worse than leaving them.

sga_ra = ra[to_remove]
sga_dec = dec[to_remove]
df = pd.DataFrame({'RA': sga_ra, 'Dec': sga_dec})
df.to_csv(OUTPUT_FOLDER + f'sga_collisions.csv', index=False)

# SV3 Analysis

SV3 is composed of 20 regions where 10 or 11 exposures eacj were taken, almost completely on top of each other.  Our SV3 analysis takes the inner part of these patches (NTILE_MINE >= 10) of these regions as the data set.  

Then, we can eliminate 1 tile from each of these regions to make test sets in order to view our systematics as a function of NTILE_MINE. The order they are eliminated in matters; we need to go backwards in time.}

In [None]:
# Make a DataFrame filtered down to the galaxies we want to keep
sv3_merged_table = Table.read(IAN_BGS_SV3_MERGED_FILE, format='fits')
sv3_merged_table.remove_column('NEAREST_TILEIDS')
sv3_df = sv3_merged_table.to_pandas()
print(len(sv3_df))
sv3_df['app_mag'] = get_app_mag(sv3_df['FLUX_R'])
unobserved = sv3_merged_table['Z'].astype("<i8") == 999999
galaxy_observed_filter = sv3_df['SPECTYPE'] == b'GALAXY'
redshift_filter = sv3_df['Z'] > Z_MIN
redshift_hi_filter = sv3_df['Z'] < Z_MAX
deltachi2_filter = sv3_df['DELTACHI2'] > 40
app_mag_filter = sv3_df['app_mag'] < 20.3
observed_requirements = np.all([galaxy_observed_filter, app_mag_filter, redshift_filter, redshift_hi_filter, deltachi2_filter], axis=0)
treat_as_unobserved = np.all([galaxy_observed_filter, app_mag_filter, np.invert(deltachi2_filter)], axis=0)

unobserved = np.all([app_mag_filter, np.logical_or(unobserved, treat_as_unobserved)], axis=0)
sv3_df['OBSERVED'] = np.invert(unobserved)
keep = np.all([np.logical_or(observed_requirements, unobserved)], axis=0)
keep = np.all([keep, sv3_df['NTILE_MINE'] >= 10], axis=0)

sv3_df = sv3_df.loc[keep] 
sv3_df.reset_index(drop=True, inplace=True)
print(len(sv3_df))

# Initialize new columns for observed as function of N pass
for i in range(0, 12):
    sv3_df[f'OBSERVED_{i}'] = sv3_df['OBSERVED']

for FAINT in [False, True]:

    if not FAINT:
        mag_filter = sv3_df['app_mag'] < 19.5
    else:
        mag_filter = sv3_df['app_mag'] > 19.5
        
    print(f"{len(sv3_df[mag_filter]) / 138.192} galaxies per sq degree")

    for patch_number in range(len(gc.sv3_regions_sorted)):
        tilelist = gc.sv3_regions_sorted[patch_number]
        #print(f'Patch {patch_number} - TILE IDs: {tilelist}')
        
        row_selector = np.logical_and(sv3_df['TILEID'].isin(tilelist), mag_filter)

        #one_patch_df = sv3_df[sv3_df['TILEID'].isin(tilelist)]
        #print(f"{len(one_patch_df)} galaxies, {np.sum(one_patch_df['OBSERVED']) / len(one_patch_df) :.1%} of the targets are observed")
        #one_patch_df[f'OBSERVED_{len(tilelist)}'] = one_patch_df['OBSERVED']
        
        #print ("Remove tiles in reverse TILEID order:")
        for i in np.flip(np.arange(0, len(tilelist))):
            tileid = tilelist[i]
            observed_by_this_tile = sv3_df.loc[row_selector, 'TILEID'] == tileid
            #print(f'{np.sum(observed_by_this_tile)} galaxies were observed by tile {tileid} ({i+1}/{len(tilelist)})')
            prev = sv3_df.loc[row_selector, f'OBSERVED_{i+1}']
            sv3_df.loc[row_selector, f'OBSERVED_{i}'] = np.where(observed_by_this_tile, False, prev)
            
        #for i in np.flip(np.arange(0, len(tilelist)+1)):
        #    if FAINT:
        #        totals_observed_faint[i] += np.sum(sv3_df.loc[row_selector, f'OBSERVED_{i}'])
        #        totals_all_faint[i] += len(sv3_df.loc[row_selector])
        #    else:
        #        totals_observed_bright[i] += np.sum(sv3_df.loc[row_selector, f'OBSERVED_{i}'])
        #        totals_all_bright[i] += len(sv3_df.loc[row_selector])

            #print(f"{np.sum(one_patch_df[f'OBSERVED_{i}']) / len(one_patch_df) :.1%} of the targets are observed with {i} passes")
                
    #for i in range(1, 12):
    #    if FAINT:
    #        print(f"{totals_observed_faint[i]:,} ({totals_observed_faint[i] / totals_all_faint[i]:.1%}) faint galaxies are observed with {i} passes")
    #    else: 
    #        print(f"{totals_observed_bright[i]:,} ({totals_observed_bright[i] / totals_all_bright[i]:.1%}) bright galaxies are observed with {i} passes")


In [None]:
observed_faint = np.zeros(12, dtype=int)
observed_bright = np.zeros(12, dtype=int)

total_faint = np.sum(sv3_df['app_mag'] > 19.5)
total_bright = np.sum(sv3_df['app_mag'] < 19.5)

for i in range(0, 12):
    observed_faint[i] = np.sum(sv3_df.loc[sv3_df['app_mag'] > 19.5, f'OBSERVED_{i}'])
    observed_bright[i] = np.sum(sv3_df.loc[sv3_df['app_mag'] < 19.5, f'OBSERVED_{i}'])


plt.plot(observed_bright / total_bright, color='b', label="BGS BRIGHT ME")
plt.plot(observed_faint / total_faint, color='orange', label="BGS FAINT ME")
plt.plot([1,2,3,4], [.29, .52, 0.68, .81], '--', color='b', label="BGS BRIGHT PAPER")
plt.plot([1,2,3,4], [.15, .32, 0.47, .62], '--', color='orange', label="BGS FAINT PAPER")
plt.xlabel("Number of passes")
plt.ylabel("Fraction of targets observed")
plt.title("SV3 BGS Completeness")
plt.xticks(np.arange(0, 12))
plt.legend()

In [None]:
print(sv3_df.loc[sv3_df['app_mag'] < 19.5, 'OBSERVED'].count() - sv3_df.loc[sv3_df['app_mag'] < 19.5, 'OBSERVED'].sum())
print(sv3_df.loc[sv3_df['app_mag'] < 19.5, 'OBSERVED'].count())

The above numbers do not seem to track with the Y1 data. In Y1 no region has more than 4 passes so how do I have a fiber incompleteness better than the above number?

Also Figure 17 of https://iopscience.iop.org/article/10.3847/1538-3881/accff8/pdf disagrees with my above analysis. So what is above is wrong.

# Color Analysis

Lesson from this analysis: the BGS data, workign with my 0.1^G-R with GAMA k-corrections, does not distribute a per logLgal bin G-R; the global 0.76 split seems to work for all bins.

In [None]:
G = app_mag_to_abs_mag(app_mag_g, z_obs)
R = app_mag_to_abs_mag(app_mag_r, z_obs)

G_R = G - R

Gk = k_correct_bgs(G, z_obs, g_r_apparent, band='g')
Rk = k_correct_bgs(R, z_obs, g_r_apparent, band='r')

G_R_k = Gk - Rk

Gk_GAMA = k_correct_gama(G, z_obs, g_r_apparent, band='g')
Rk_GAMA = k_correct_gama(R, z_obs, g_r_apparent, band='r')

G_R_k_GAMA = Gk_GAMA - Rk_GAMA


In [None]:
# Comparison of g-r computed a few ways
bins = np.linspace(0, 2.0, 200)

plt.figure()
#junk=plt.hist(g_r_apparent, bins=bins, label="g-r", histtype='step')
#junk=plt.hist(sdss_g_r, bins=bins, label='From LSS Pipeline (JM?)', histtype='step', density=True)
#junk=plt.hist(G_R, bins=bins, label="G-R", histtype='step')
junk=plt.hist(G_R_k, bins=bins, label="0.1^(G-R) BGS poly", histtype='step', density=True)
junk=plt.hist(G_R_k_GAMA, bins=bins, label="0.1^(G-R) GAMA poly", histtype='step', density=True)
#junk=plt.hist(G_R_JM1, bins=bins, label="0.1^(G-R) JM", histtype='step', density=True)
plt.xlabel("g-r")
plt.ylabel("Count")
plt.legend()
plt.xlim(0.2, 1.3)
plt.title("Comparison of g-r computed a few ways")
plt.tight_layout()
plt.ylim(0,3.5)

In [None]:
# Can see global GLOBAL_RED_COLOR_CUT=0.76 here
junk=plt.hist(G_R_k_GAMA, bins=300, alpha=0.5, label="0.1^(G-R) GAMA-style")
plt.legend()
plt.xlim(0.5, 1.0)

In [None]:
print(is_quiescent_lost_gal_guess(g_r_apparent).sum() / len(g_r_apparent))
assert len(G_R_k_GAMA) == len(g_r_apparent)
print(is_quiescent_BGS_gmr(None, G_R_k_GAMA).sum() / len(G_R_k_GAMA))

In [None]:
from pyutils import *
print(BGS_LOGLGAL_BINS)
print(BINWISE_RED_COLOR_CUT)

In [None]:
is_quiescent_BGS_gmr(np.array([5.8, 9.0, 14.5]), np.array([0.5, 0.9, 0.9]))

In [None]:
# Get logLgal bins
log_L_gal = abs_mag_r_to_log_solar_L(Rk) 
logLgal_bin_idx = np.digitize(log_L_gal, BGS_LOGLGAL_BINS)
# 0 is less than the lowest, len(BGS_LOGLGAL_BINS) is greater than the highest entry in BGS_LOGLGAL_BINS

In [None]:
print(np.min(log_L_gal))
print(np.max(log_L_gal))
print(np.min(logLgal_bin_idx))
print(np.max(logLgal_bin_idx))
plt.hist(log_L_gal, bins=BGS_LOGLGAL_BINS, align='mid')
#plt.yscale('log')

In [None]:
# Make a plot of G_R_k in each logLgal bin
for i in range(0, len(BGS_LOGLGAL_BINS)+1):
    galaxy_idx_for_this_bin = logLgal_bin_idx == i

    plt.figure(dpi=80, figsize=(10, 6))
    junk=plt.hist(G_R_k[galaxy_idx_for_this_bin], bins=np.arange(0,1.3,0.02), label=f"0.1^(G-R) Bin {i}", align='mid')
    plt.legend()
    plt.xlim(0.4, 1.2)
    plt.xticks(np.arange(0.4, 1.2, 0.04))

In [None]:
mag1 = abs_mag_sdss
mag2 = R

In [None]:
# Compare Absolute Magnitudes
# Difference is how we k-correct I believe
bins = np.linspace(-25, -10, 100)
my_counts, my_bins, my_p = plt.hist(mag2, label="my abs_mag", bins=bins, alpha=0.5)
alex_counts, alex_bins, alex_p = plt.hist(mag1, label="ABSMAG_SDSS_R", bins=bins, alpha=0.5)
plt.xlabel("Absolute Mag")
plt.ylabel("Count")
plt.title("Compare Absolute Mags")
#plt.yscale('log')
plt.legend()

print(f"The peak is shifted from ABSMAG_SDSS_R {alex_bins[np.argmax(alex_counts)]:.1f} to my {my_bins[np.argmax(my_counts)]:.1f}")


In [None]:
fig=make_map(ra, dec)

## Dn4000 Comparison (BGS, SDSS)

In [None]:
sdss = pd.read_csv(SDSS_v1_DAT_FILE, delimiter=' ', names=('RA', 'Dec', 'z', 'logLgal', 'V_max', 'quiescent', 'chi'), index_col=False)
sdss_galprops = pd.read_csv("../data/sdss_galprops_v1.0.dat", delimiter=' ', names=('Mag_g', 'Mag_r', 'sigma_v', 'Dn4000', 'concentration', 'log_M_star'))
sdss = pd.merge(sdss, sdss_galprops, left_index=True, right_index=True)


In [None]:
plt.hist(dn4000, bins=np.linspace(-0.5, 5.0, 100), alpha=0.6, label="BGS Y1")
plt.hist(sdss.Dn4000, bins=np.linspace(-0.5, 5.0, 100), alpha=0.8, label="SDSS")
plt.yscale('log')
plt.legend()
plt.xlabel('Dn4000')
plt.ylabel('Count')

In [None]:
plt.hist(dn4000, bins=np.linspace(-0.5, 5.0, 100), alpha=0.6, label="BGS Y1")
plt.hist(sdss.Dn4000, bins=np.linspace(-0.5, 5.0, 100), alpha=0.8, label="SDSS")
plt.legend()
plt.xlabel('Dn4000')
plt.ylabel('Count')
plt.xlim(0.9,2.5)

In [None]:
sdss_catalog = coord.SkyCoord(ra=sdss.RA.to_numpy()*u.degree, dec=sdss.Dec.to_numpy()*u.degree, frame='icrs')
BGS_catalog = coord.SkyCoord(ra=ra*u.degree, dec=dec*u.degree, frame='icrs')

neighbor_indexes, d2d, d3d = coord.match_coordinates_sky(BGS_catalog, sdss_catalog, storekdtree='sdss')
ang_distances = d2d.to(u.arcsec).value

match_found_filter = ang_distances < 3.0
bgs_matches = dn4000[match_found_filter]
sdss_indexes = neighbor_indexes[match_found_filter]
sdss_matches = sdss.iloc[sdss_indexes].Dn4000.to_numpy()

In [None]:
print(f"{np.isclose(bgs_matches, sdss_matches, atol=0.05).sum() / len(bgs_matches)} of the matches are within 0.05 of each other.")
print(f"{np.isclose(bgs_matches, sdss_matches, atol=0.1).sum() / len(bgs_matches)} of the matches are within 0.1 of each other.")
print(f"{np.isclose(bgs_matches, sdss_matches, atol=0.2).sum() / len(bgs_matches)} of the matches are within 0.2 of each other.")
print(f"{np.isclose(bgs_matches, sdss_matches, atol=0.3).sum() / len(bgs_matches)} of the matches are within 0.3 of each other.")


In [None]:
fig=make_map(ra, dec)
fig=make_map(sdss.RA.to_numpy(), sdss.Dec.to_numpy(), fig=fig, alpha=0.05)

In [None]:
plt.scatter(sdss_matches, bgs_matches, s=1, alpha=.2)
plt.xlabel("SDSS Dn4000")
plt.ylabel("BGS Dn4000")
plt.xlim(1, 2.3)
plt.ylim(1, 2.3)

In [None]:
df = pd.DataFrame({'SDSS_Dn4000': sdss_matches, 'BGS_Dn4000': bgs_matches})
df['diff_frac'] =  (df['BGS_Dn4000'] - df['SDSS_Dn4000']) / df['SDSS_Dn4000']
bins = np.linspace(-1, 5, 60)
labels = bins[0:len(bins)-1] 
df['dn4000_sdssbin'] = pd.cut(x = sdss_matches, bins = bins, labels = labels, include_lowest = True)

In [None]:
plt.figure(dpi=80)
diff_mean = df.groupby('dn4000_sdssbin').diff_frac.mean()
diff_std= df.groupby('dn4000_sdssbin').diff_frac.std()

plt.errorbar(labels, diff_mean, yerr=diff_std)
plt.xlabel("SDSS Dn4000")
plt.ylabel("< (BGS-SDSS) / SDSS >")
plt.xlim(0.8, 2.4)
plt.ylim(-0.75, 0.75)
plt.draw()

## Dn4000 Lgal Bin Analysis

Run Color Analysis and Dn4000 Comparison first

In [None]:
# Make a plot of Dn4000 in each logLgal bin
fig,axes=plt.subplots(dpi=80, figsize=(10, 3*len(BGS_LOGLGAL_BINS)//2), ncols=2, nrows=len(BGS_LOGLGAL_BINS)//2)
axes = np.ravel(axes)

for i in range(0, len(BGS_LOGLGAL_BINS)-1):
    galaxy_idx_for_this_bin = logLgal_bin_idx == i+1

    junk=axes[i].hist(dn4000[galaxy_idx_for_this_bin], bins=np.arange(1,2.2,0.02), label=f"Dn4000 for logLgal Bin {i+1}", align='mid')
    axes[i].legend()
    axes[i].set_xlim(1, 2.2)
    axes[i].set_xticks(np.arange(1, 2.2, 0.1))

    # draw a vertical line at get_SDSS_Dcrit(logLgal)
    axes[i].axvline(x=get_SDSS_Dcrit(BGS_LOGLGAL_BINS[i]), color='r', linestyle='-')

axes = np.reshape(axes, (2, len(BGS_LOGLGAL_BINS)//2))


# Randoms Analysis for Footprint

In [None]:
RANDOMS_DENSITY = 2500 # per square degree, Ashley Ross paper on LSS pipeline or elsewhere in docs

In [None]:
rtable = Table.read(BGS_RAND_FILE, format='fits')
rtable.columns
rtable.keep_columns(['LOCATION', 'FIBER', 'TARGETID', 'RA', 'DEC', 'PRIORITY', 'TILEID', 'TILELOCID', 'NTILE', 'TILES'])

In [None]:
r_dec = rtable['DEC'].astype("<f8")
r_ra = rtable['RA'].astype("<f8")
r_ntiles = rtable['NTILE'].astype("<i8")
r_tileid = rtable['TILEID'].astype("<i8")

# TODO TILEID is just one, TILES has them all... look at rtable['TILES'][135000] for example
randoms_df = pd.DataFrame({'RA': r_ra, 'Dec': r_dec, 'NTILE': r_ntiles, 'TILEID': r_tileid})


onepass_footprint = len(r_dec) / RANDOMS_DENSITY # in degrees squared
onepass_frac_area = onepass_footprint / DEGREES_ON_SPHERE

three_pass_filter = r_ntiles >= 3 # 3pass coverage
r_dec3 = r_dec[three_pass_filter]
r_ra3 = r_ra[three_pass_filter]

threepass_footprint = len(r_dec3) / RANDOMS_DENSITY # in degrees squared
threepass_frac_area = threepass_footprint / DEGREES_ON_SPHERE

# My estimation procedure, which we don't use
estimate = estimate_frac_area(r_ra, r_dec)
estimate3 = estimate_frac_area(r_ra3, r_dec3)

In [None]:
print("INCORRECT RESULTS THAT USE NTILE, NOT NTILE_MINE")
print(f"BGS 1pass Footprint calculated from randoms is {onepass_footprint} square degrees or frac_area={onepass_frac_area}")
print(f"BGS 3pass Footprint calculated from randoms is {threepass_footprint} square degrees or frac_area={threepass_frac_area}")
#print(f"BGS 1pass Footprint estimated from my algorithm: frac_area={estimate}")
#print(f"BGS 3pass Footprint estimated from my algorithm: frac_area={estimate3}")

### Make map showing why we cannot use NTILE >= 3

In [None]:
# Make maps
tiles_BGS = read_tiles_Y1_file()
#tiles_BGS = tiles_BGS.loc[tiles_BGS.FAFLAVOR == 'sv3bright']

# Plot the galaxy positions (randoms) and return the set of tile_id associated with them
plot_positions(randoms_df, randoms_df[randoms_df.NTILE >= 3], tiles_df=tiles_BGS, DEG_LONG=10, ra_min=180, dec_min=-5, split=False)

### Make map with NTILE_MINE

Uses a NN lookup of the tiles for each 'galaxy'.

In [None]:
# LOAD FROM LAST TIME WE DID THIS
randoms_df = pickle.load(open(BIN_FOLDER + "randoms_df.p", "rb"))

In [None]:
# TO MAKE IT ANEW
NTILE_MIN = 10
ntiles_inside, nearest_tile_ids = find_tiles_for_galaxies(tiles_BGS, randoms_df, NTILE_MIN)
randoms_df['NTILE_MINE'] = ntiles_inside

print(np.sum(ntiles_inside >= 3) / len(ntiles_inside))
print(np.sum(ntiles_inside >= 3))

# Not sure if I really need the nearest tile IDs for anything
pickle.dump(randoms_df, open(BIN_FOLDER + "randoms_df.p", "wb"))

In [None]:
# Plot the galaxy positions (randoms) and return the set of tile_id associated with them
plot_positions(randoms_df, randoms_df[randoms_df.NTILE_MINE >= 3], tiles_df=tiles_BGS, DEG_LONG=10, split=False)

In [None]:
# Recalculate Footprint given this definition
for i in range(1, 11):
    n_pass_filter = randoms_df.NTILE_MINE >= i
    n_pass_footprint = len(randoms_df[n_pass_filter].RA) / RANDOMS_DENSITY # in degrees squared
    n_pass_frac_area = n_pass_footprint / DEGREES_ON_SPHERE
    print(f"BGS {i}pass Footprint calculated from randoms is {n_pass_footprint} square degrees or frac_area={n_pass_frac_area}")
    