In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.colors as c
from astropy.wcs import WCS
import astropy.coordinates as coord
import astropy.units as u
from pyutils import *
import astropy.io.fits as fits
import healpy as hp
from astropy.table import Table,join
from dataloc import *

%load_ext autoreload
%autoreload 2

In [None]:
# TODO ensure this is right
def get_app_mag(flux):
    """This converts nanomaggies into Pogson magnitudes"""
    return 22.5 - 2.5*np.log10(flux)

# Create a merged master BGS data file

In [None]:
# fastspecfit for DN4000
hdul = fits.open(BGS_FASTSPEC_FILE, memmap=True)
#hdul[1].columns
data = hdul[1].data
fastspecfit_id = data['TARGETID']
DN4000 = data['DN4000'] # TODO there is also DN4000_OBS and DN4000_MODEL (and inverse variance)
hdul.close()

print(len(ids))
print(len(DN4000))

fastspecfit_table = Table([fastspecfit_id, DN4000], names=('TARGETID', 'DN4000'))


In [None]:
# Main file
main_table = Table.read(BGS_ANY_FULL_FILE, format='fits')
print(len(main_table))

In [None]:
# Prob obs file
p_table = Table.read(BGS_PROB_OBS_FILE, format='fits')
print(len(p_table))

# Join them all on TARGETID
joined_table = join(main_table, p_table, keys="TARGETID")
print(len(joined_table))


In [None]:
# The lost galaxies will not have fastspecfit rows I think
final_table = join(joined_table, fastspecfit_table, join_type='left', keys="TARGETID")
print(len(final_table))

In [None]:
# Sanity check that everything went as intended
assert len(final_table) == len(main_table)

In [None]:
# Filter to needed columns only and save
final_table.keep_columns(['TARGETID', 'SPECTYPE', 'DEC', 'RA', 'Z_not4clus', 'FLUX_R', 'FLUX_G', 'BITWEIGHTS', 'PROB_OBS', 'ZWARN', 'DELTACHI2', 'NTILE', 'ABSMAG_SDSS_R', 'ABSMAG_SDSS_G', 'DN4000'])
final_table.write(IAN_BGS_MERGED_FILE, format='fits', overwrite='True')

In [None]:
del(main_table)
del(p_table)
del(fastspecfit_table)
del(final_table)

# Examine data in Merged BGS File

In [None]:
table = Table.read(IAN_BGS_MERGED_FILE, format='fits')

In [None]:
# See two equivalent ways of determining which rows are for unobserved galaxies
one=table['ZWARN'] == 999999
two=table['Z_not4clus'].mask
assert(np.all(one == two))


In [None]:
plt.hist(table['Z_not4clus'], bins=50)
plt.title("Z_not4clus")
plt.yscale('log')
print(np.min(table['Z_not4clus']), np.max(table['Z_not4clus']))
print(table['Z_not4clus'].mask)

In [None]:
print(np.unique(table['ZWARN']))
#print(np.unique(table['ZWARN_MTL']))
print(np.unique(table['SPECTYPE']))
print(np.unique(table['NTILE']))
#print(np.unique(table['TARGET_STATE']))

## Cut to the galaxy data we actually need

In [None]:
# TODO this gets easilly out of sync with the .py file that does the 'production' filtering

APP_MAG_CUT = 19.5
Z_MIN = 0.01
Z_MAX = 0.8
obj_type = table['SPECTYPE'].data.data
dec = table['DEC']
ra = table['RA']
z_obs = table['Z_not4clus'].data.data
target_id = table['TARGETID']
flux_r = table['FLUX_R']
flux_g = table['FLUX_G']
app_mag_r = get_app_mag(table['FLUX_R'])
app_mag_g = get_app_mag(table['FLUX_G'])
g_r_apparent = app_mag_g - app_mag_r
sdss_g_r = table['ABSMAG_SDSS_G'] - table['ABSMAG_SDSS_R'] 
p_obs = table['PROB_OBS'] 
unobserved = table['Z_not4clus'].mask
deltachi2 = table['DELTACHI2'].data.data
ntiles = table['NTILE']
abs_mag_sdss = table['ABSMAG_SDSS_R']
dn4000 = table['DN4000'].data.data

before_count = len(dec)
print(before_count, "objects in FITS file")


# Make filter array (True/False values)
three_pass_filter = table['NTILE'] >= 3 # 3pass coverage
galaxy_filter = np.logical_or(obj_type == b'GALAXY', obj_type == b'')
galaxy_observed_filter = obj_type == b'GALAXY'
app_mag_filter = app_mag_r < APP_MAG_CUT
redshift_filter = z_obs > Z_MIN
redshift_hi_filter = z_obs < Z_MAX
deltachi2_filter = deltachi2 > 40
abs_mag_sdss_filter = abs_mag_sdss < 100
observed_requirements = np.all([galaxy_observed_filter, app_mag_filter, redshift_filter, redshift_hi_filter, deltachi2_filter, abs_mag_sdss_filter], axis=0)

treat_as_unobserved = np.all([galaxy_observed_filter, app_mag_filter, np.invert(deltachi2_filter)], axis=0)
unobserved = np.all([app_mag_filter, np.logical_or(unobserved, treat_as_unobserved)], axis=0)
keep = np.all([three_pass_filter, np.logical_or(observed_requirements, unobserved)], axis=0)
unobserved_3 = np.all([three_pass_filter, unobserved], axis=0)

print(f"There are {np.sum(galaxy_filter)} galaxy targets in the sample, of which {np.sum(galaxy_observed_filter)} are observed.") 
print(f"Of these, {np.sum(observed_requirements)} are in the bright (<{APP_MAG_CUT} mag) sample and pass our quality checks.")
print(f"Of those, {np.sum(keep)} are in the 3-pass coverage area.")
print(f"There are {np.sum(unobserved_3)} unobserved galaxies in the 3-pass coverage area, including bad observed galaxies.")

# Filter to observed 3 pass galaxies, which rest of the file usually wants
obj_type = obj_type[keep]
dec = dec[keep]
ra = ra[keep]
z_obs = z_obs[keep]
target_id = target_id[keep] 
flux_r = flux_r[keep]
app_mag_r = app_mag_r[keep]
app_mag_g = app_mag_g[keep]
g_r_apparent = g_r_apparent[keep]
p_obs = p_obs[keep]
unobserved = unobserved[keep]
deltachi2 = deltachi2[keep]
ntiles = ntiles[keep]
abs_mag_sdss = abs_mag_sdss[keep]
sdss_g_r = sdss_g_r[keep]
dn4000 = dn4000[keep]
indexes_not_assigned = np.argwhere(unobserved)

after_count = len(dec)

print(f"\nAfter all filters we have {after_count} of the original {before_count} rows.")

## Color Analysis

Lesson from this analysis: the BGS data, workign with my 0.1^G-R with GAMA k-corrections, does not distribute a per logLgal bin G-R; the global 0.76 split seems to work for all bins.

In [None]:
G = app_mag_to_abs_mag(app_mag_g, z_obs)
R = app_mag_to_abs_mag(app_mag_r, z_obs)

G_R = G - R

Gk = k_correct_bgs(G, z_obs, g_r_apparent, band='g')
Rk = k_correct_bgs(R, z_obs, g_r_apparent, band='r')

G_R_k = Gk - Rk

Gk_GAMA = k_correct_gama(G, z_obs, g_r_apparent, band='g')
Rk_GAMA = k_correct_gama(R, z_obs, g_r_apparent, band='r')

G_R_k_GAMA = Gk_GAMA - Rk_GAMA

In [None]:
# Comparison of g-r computed a few ways
bins = np.linspace(0, 2.0, 200)

junk=plt.hist(g_r_apparent, bins=bins, alpha=0.5, label="g-r")
#junk=plt.hist(sdss_g_r, bins=bins, alpha=0.5, label='From LSS Pipeline')
#junk=plt.hist(G_R, bins=bins, alpha=0.5, label="G-R")
junk=plt.hist(G_R_k, bins=bins, alpha=0.5, label="0.1^(G-R) BGS poly")
junk=plt.hist(G_R_k_GAMA, bins=bins, alpha=0.5, label="0.1^(G-R) GAMA poly")
plt.xlabel("g-r")
plt.ylabel("Count")
plt.legend()
plt.xlim(0.0, 2)

In [None]:
# Can see global GLOBAL_RED_COLOR_CUT=0.76 here
junk=plt.hist(G_R_k, bins=300, alpha=0.5, label="0.1^(G-R) GAMA-style")
plt.legend()
plt.xlim(0.5, 1.0)

In [None]:
from pyutils import *
print(BGS_LOGLGAL_BINS)
print(BINWISE_RED_COLOR_CUT)

In [None]:
is_quiescent_BGS_gmr(np.array([5.8, 9.0, 14.5]), np.array([0.5, 0.9, 0.9]))

In [None]:
# Get logLgal bins
log_L_gal = abs_mag_r_to_log_solar_L(Rk) 
logLgal_bin_idx = np.digitize(log_L_gal, BGS_LOGLGAL_BINS)
# 0 is less than the lowest, len(BGS_LOGLGAL_BINS) is greater than the highest entry in BGS_LOGLGAL_BINS

In [None]:
print(np.min(log_L_gal))
print(np.max(log_L_gal))
print(np.min(logLgal_bin_idx))
print(np.max(logLgal_bin_idx))
plt.hist(log_L_gal, bins=BGS_LOGLGAL_BINS, align='mid')
#plt.yscale('log')

In [None]:
# Make a plot of G_R_k in each logLgal bin
for i in range(0, len(BGS_LOGLGAL_BINS)+1):
    galaxy_idx_for_this_bin = logLgal_bin_idx == i

    plt.figure(dpi=80, figsize=(10, 6))
    junk=plt.hist(G_R_k[galaxy_idx_for_this_bin], bins=np.arange(0,1.3,0.02), label=f"0.1^(G-R) Bin {i}", align='mid')
    plt.legend()
    plt.xlim(0.4, 1.2)
    plt.xticks(np.arange(0.4, 1.2, 0.04))

In [None]:
mag1 = abs_mag_sdss
mag2 = R

In [None]:
# Compare Absolute Magnitudes
# Difference is how we k-correct I believe
bins = np.linspace(-25, -10, 100)
my_counts, my_bins, my_p = plt.hist(mag2, label="my abs_mag", bins=bins, alpha=0.5)
alex_counts, alex_bins, alex_p = plt.hist(mag1, label="ABSMAG_SDSS_R", bins=bins, alpha=0.5)
plt.xlabel("Absolute Mag")
plt.ylabel("Count")
plt.title("Compare Absolute Mags")
#plt.yscale('log')
plt.legend()

print(f"The peak is shifted from ABSMAG_SDSS_R {alex_bins[np.argmax(alex_counts)]:.1f} to my {my_bins[np.argmax(my_counts)]:.1f}")


In [None]:
fig=make_map(ra, dec)

## Dn4000 Comparison (BGS, SDSS)

In [None]:
sdss = pd.read_csv(SDSS_v1_DAT_FILE, delimiter=' ', names=('RA', 'Dec', 'z', 'logLgal', 'V_max', 'quiescent', 'chi'), index_col=False)
sdss_galprops = pd.read_csv("../data/sdss_galprops_v1.0.dat", delimiter=' ', names=('Mag_g', 'Mag_r', 'sigma_v', 'Dn4000', 'concentration', 'log_M_star'))
sdss = pd.merge(sdss, sdss_galprops, left_index=True, right_index=True)


In [None]:
plt.hist(dn4000, bins=np.linspace(-0.5, 5.0, 100), alpha=0.6, label="BGS Y1")
plt.hist(sdss.Dn4000, bins=np.linspace(-0.5, 5.0, 100), alpha=0.8, label="SDSS")
plt.yscale('log')
plt.legend()
plt.xlabel('Dn4000')
plt.ylabel('Count')

In [None]:
plt.hist(dn4000, bins=np.linspace(-0.5, 5.0, 100), alpha=0.6, label="BGS Y1")
plt.hist(sdss.Dn4000, bins=np.linspace(-0.5, 5.0, 100), alpha=0.8, label="SDSS")
plt.legend()
plt.xlabel('Dn4000')
plt.ylabel('Count')
plt.xlim(0.9,2.5)

In [None]:
sdss_catalog = coord.SkyCoord(ra=sdss.RA.to_numpy()*u.degree, dec=sdss.Dec.to_numpy()*u.degree, frame='icrs')
BGS_catalog = coord.SkyCoord(ra=ra*u.degree, dec=dec*u.degree, frame='icrs')

neighbor_indexes, d2d, d3d = coord.match_coordinates_sky(BGS_catalog, sdss_catalog, storekdtree='sdss')
ang_distances = d2d.to(u.arcsec).value

match_found_filter = ang_distances < 3.0
bgs_matches = dn4000[match_found_filter]
sdss_indexes = neighbor_indexes[match_found_filter]
sdss_matches = sdss.iloc[sdss_indexes].Dn4000.to_numpy()

In [None]:
print(f"{np.isclose(bgs_matches, sdss_matches, atol=0.05).sum() / len(bgs_matches)} of the matches are within 0.05 of each other.")
print(f"{np.isclose(bgs_matches, sdss_matches, atol=0.1).sum() / len(bgs_matches)} of the matches are within 0.1 of each other.")
print(f"{np.isclose(bgs_matches, sdss_matches, atol=0.2).sum() / len(bgs_matches)} of the matches are within 0.2 of each other.")
print(f"{np.isclose(bgs_matches, sdss_matches, atol=0.3).sum() / len(bgs_matches)} of the matches are within 0.3 of each other.")


In [None]:
fig=make_map(ra, dec)
fig=make_map(sdss.RA.to_numpy(), sdss.Dec.to_numpy(), fig=fig, alpha=0.05)

In [None]:
plt.scatter(sdss_matches, bgs_matches, s=1, alpha=.2)
plt.xlabel("SDSS Dn4000")
plt.ylabel("BGS Dn4000")
plt.xlim(1, 2.3)
plt.ylim(1, 2.3)

In [None]:
df = pd.DataFrame({'SDSS_Dn4000': sdss_matches, 'BGS_Dn4000': bgs_matches})
df['diff_frac'] =  (df['BGS_Dn4000'] - df['SDSS_Dn4000']) / df['SDSS_Dn4000']
bins = np.linspace(-1, 5, 60)
labels = bins[0:len(bins)-1] 
df['dn4000_sdssbin'] = pd.cut(x = sdss_matches, bins = bins, labels = labels, include_lowest = True)

In [None]:
plt.figure(dpi=80)
diff_mean = df.groupby('dn4000_sdssbin').diff_frac.mean()
diff_std= df.groupby('dn4000_sdssbin').diff_frac.std()

plt.errorbar(labels, diff_mean, yerr=diff_std)
plt.xlabel("SDSS Dn4000")
plt.ylabel("< (BGS-SDSS) / SDSS >")
plt.xlim(0.8, 2.4)
plt.ylim(-0.75, 0.75)
plt.draw()

## Dn4000 Lgal Bin Analysis

Run Color Analysis and Dn4000 Comparison first

In [None]:
# Make a plot of Dn4000 in each logLgal bin
fig,axes=plt.subplots(dpi=80, figsize=(10, 3*len(BGS_LOGLGAL_BINS)//2), ncols=2, nrows=len(BGS_LOGLGAL_BINS)//2)
axes = np.ravel(axes)

for i in range(0, len(BGS_LOGLGAL_BINS)-1):
    galaxy_idx_for_this_bin = logLgal_bin_idx == i+1

    junk=axes[i].hist(dn4000[galaxy_idx_for_this_bin], bins=np.arange(1,2.2,0.02), label=f"Dn4000 for logLgal Bin {i+1}", align='mid')
    axes[i].legend()
    axes[i].set_xlim(1, 2.2)
    axes[i].set_xticks(np.arange(1, 2.2, 0.1))

    # draw a vertical line at get_SDSS_Dcrit(logLgal)
    axes[i].axvline(x=get_SDSS_Dcrit(BGS_LOGLGAL_BINS[i]), color='r', linestyle='-')

axes = np.reshape(axes, (2, len(BGS_LOGLGAL_BINS)//2))


# Randoms Analysis for Footprint

In [None]:
rtable = Table.read(BGS_RAND_FILE, format='fits')

In [None]:
r_dec = rtable['DEC']
r_ra = rtable['RA']
r_ntiles = rtable['NTILE']

RANDOMS_DENSITY = 2500 # per square degree, Ashley Ross paper on LSS pipeline or elsewhere in docs

onepass_footprint = len(r_dec) / RANDOMS_DENSITY # in degrees squared
onepass_frac_area = onepass_footprint / DEGREES_ON_SPHERE

three_pass_filter = r_ntiles >= 3 # 3pass coverage
r_dec3 = r_dec[three_pass_filter]
r_ra3 = r_ra[three_pass_filter]

threepass_footprint = len(r_dec3) / RANDOMS_DENSITY # in degrees squared
threepass_frac_area = threepass_footprint / DEGREES_ON_SPHERE

In [None]:
print(f"BGS Y1 1pass Footprint calculated from randoms is {onepass_footprint} square degrees or frac_area={onepass_frac_area}")
print(f"BGS Y1 3pass Footprint calculated from randoms is {threepass_footprint} square degrees or frac_area={threepass_frac_area}")

# View Healpix Files

In [None]:
BGS_N = hp.read_map(ROOT_FOLDER + 'BGS_BRIGHT_mapprops_healpix_nested_nside256_N.fits')
BGS_S = hp.read_map(ROOT_FOLDER + 'BGS_BRIGHT_mapprops_healpix_nested_nside256_S.fits')

In [None]:
hp.mollview(
    BGS_N,
    coord=["G", "E"],
    title="Histogram equalized Ecliptic",
    unit="Galaxies",
    norm="hist",
    min=0,
    max=1,
)
hp.graticule()

In [None]:
hp.mollview(
    BGS_S,
    coord=["G", "E"],
    title="Histogram equalized Ecliptic",
    unit="Galaxies",
    norm="hist",
    min=0,
    max=1,
)
hp.graticule()