In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import astropy.coordinates as coord
import astropy.units as u
import emcee
import sys
from astropy.table import Table

if './SelfCalGroupFinder/py/' not in sys.path:
    sys.path.append('./SelfCalGroupFinder/py/')
from pyutils import *
import plotting as pp
from dataloc import *
from bgs_helpers import *
import catalog_definitions as cat
from groupcatalog import *

%load_ext autoreload
%autoreload 2

After the group finder is run, this notebook is used to postprocess the results, generating plots and such for analysis.


## Loading existing datasets 

In [None]:
#mxxl_all=deserialize(cat.mxxl_all)
#mxxl_fiberonly=deserialize(cat.mxxl_fiberonly)
#mxxl_nn=deserialize(cat.mxxl_nn)
mxxl_simple_4=deserialize(cat.mxxl_simple_4)

In [None]:
sdss_vanilla_v2 = deserialize(cat.sdss_vanilla_v2)
sdss_colors_v2 = deserialize(cat.sdss_colors_v2)
sdss_colors_chi_v2 = deserialize(cat.sdss_colors_chi_v2)
sdss_vanilla_v1 = deserialize(cat.sdss_vanilla)
sdss_colors_v1 = deserialize(cat.sdss_colors)
sdss_colors_chi_v1 = deserialize(cat.sdss_colors_chi)

sdss_bgscut = deserialize(cat.sdss_bgscut)

cat.sdss_published.postprocess()
sdss_published = cat.sdss_published # It really is ~exactly sdss_colors_chi, which is great news for reproducibility

In [None]:
bgs_y1_pzp_2_4 = deserialize(cat.bgs_y1_pzp_2_4)
bgs_y1_pzp_2_4_c1 = deserialize(cat.bgs_y1_pzp_2_4_c1)
bgs_y3_pzp_2_4 = deserialize(cat.bgs_y3_pzp_2_4)
bgs_y3_pzp_2_4_c1 = deserialize(cat.bgs_y3_pzp_2_4_c1)

In [None]:
# This technique of removing tiles isn't as accurate as taking Y3 and cutting to SV3 footprint
#bgs_sv3_nn_7p = deserialize(cat.bgs_sv3_nn_7p)
#bgs_sv3_simple_5_7p = deserialize(cat.bgs_sv3_simple_5_7p)
#bgs_sv3_pz_2_0_7p = deserialize(cat.bgs_sv3_pz_2_0_7p)
#bgs_sv3_pz_2_4_7p = deserialize(cat.bgs_sv3_pz_2_4_7p)
#bgs_sv3_pz_3_1_7p = deserialize(cat.bgs_sv3_pz_3_1_7p)

# Our best source of BGS Truth
bgs_sv3_pz_2_4_10p = deserialize(cat.bgs_sv3_pz_2_4_10p) 
bgs_sv3_pz_2_4_10p_c1 = deserialize(cat.bgs_sv3_pz_2_4_10p_c1) 
#bgs_sv3_pz_2_4_10p_old = deserialize(cat.bgs_sv3_pz_2_4_10p_old) 
bgs_sv3_fiberonly_10p = deserialize(cat.bgs_sv3_fiberonly_10p)
#bgs_sv3_nn_10p = deserialize(cat.bgs_sv3_nn_10p)

# These ones are the best way to compare main survey to SV3 for fiber incompleteness study
bgs_y3_like_sv3_fiberonly = deserialize(cat.bgs_y3_like_sv3_fiberonly)
bgs_y3_like_sv3_pz_2_4 = deserialize(cat.bgs_y3_like_sv3_pz_2_4)
bgs_y3_like_sv3_pz_2_4_c1 = deserialize(cat.bgs_y3_like_sv3_pz_2_4_c1)
bgs_y3_like_sv3_pz_2_0 = deserialize(cat.bgs_y3_like_sv3_pz_2_0)
bgs_y3_like_sv3_nn = deserialize(cat.bgs_y3_like_sv3_nn)

bgs_sv3_10p_mcmc = deserialize(cat.bgs_sv3_10p_mcmc)

In [None]:
all_u = deserialize(cat.uchuu_all)

### MCMC of z assignment

In [None]:
# Get best preprocess photo-z-plus v2 result for SV3
path = f'/mount/sirocco1/imw2293/GROUP_CAT/mcmc13_m4_2_4.h5'
reader = emcee.backends.HDFBackend(path, read_only=True)
bgs_sv3_pz2_mcmcbest = BGSGroupCatalog.from_MCMC(reader, Mode.PHOTOZ_PLUS_v2)
bgs_sv3_pz2_mcmcbest.run_group_finder(popmock=False)
bgs_sv3_pz2_mcmcbest.postprocess()
serialize(bgs_sv3_pz2_mcmcbest)

# 64.94% neighbor
#[6.32102907 1.59813654 1.49851461 3.18966963 0.83098626 2.83411711 3.26649451 1.75386219 1.95862571 2.56928697 0.91387857 1.70360255 3.6181996 ]

In [None]:
# Get best preprocess photo-z-plus v3 result for SV3
path = f'/mount/sirocco1/imw2293/GROUP_CAT/mcmc13_m4_3_1.h5'
reader = emcee.backends.HDFBackend(path, read_only=True)
bgs_sv3_pz3_mcmcbest = BGSGroupCatalog.from_MCMC(reader, Mode.PHOTOZ_PLUS_v3)
bgs_sv3_pz3_mcmcbest.run_group_finder(popmock=False)
bgs_sv3_pz3_mcmcbest.postprocess()
serialize(bgs_sv3_pz3_mcmcbest)

# 37% Neighbor for this, not bad
#[3.68776334 1.03045493 1.00947751 2.79354858 0.88263756 1.15014321 2.71197235 0.63517952 1.44684275 2.63171751 1.16820625 0.96790557 3.02351026]

# This one is great at 63.13% Neighbor.
# [8.26010114 1.29383299 1.54671643 3.01349293 1.2229046  0.86286149 2.58828658 0.87067123 0.61260216 2.44470607 1.11635435 1.29386183 3.16506802]

In [None]:
# In case MCMC has been dumb, check similar parameter values
import copy
bb, rb, br, rr = bgs_sv3_pz3_mcmcbest.extra_params[1:13].reshape(4, 3)

params = [bb, rb, br, rr]
colors = [[0, 0, 1.0], [1.0, 0, 0.4], [0.2, 0.7, 0.2], [1.0, 0.0, 0.0]]
variants = []

for i, (param, color) in enumerate(zip(params, colors), start=1):
    variant = BGSGroupCatalog(
        f"PZP 3 Variant {i}",
        bgs_sv3_pz3_mcmcbest.mode,
        bgs_sv3_pz3_mcmcbest.mag_cut,
        bgs_sv3_pz3_mcmcbest.catalog_mag_cut,
        bgs_sv3_pz3_mcmcbest.sdss_fill,
        bgs_sv3_pz3_mcmcbest.num_passes,
        bgs_sv3_pz3_mcmcbest.drop_passes,
        bgs_sv3_pz3_mcmcbest.data_cut,
        bgs_sv3_pz3_mcmcbest.extra_params
    )
    variant.extra_params = [bgs_sv3_pz3_mcmcbest.extra_params[0], param, param, param, param]
    variant.color = color
    variant.preprocess()
    variant.run_group_finder(popmock=False)
    variant.postprocess()
    variants.append(variant)

bgs_sv3_pz3_mcmcbest_var1, bgs_sv3_pz3_mcmcbest_var2, bgs_sv3_pz3_mcmcbest_var3, bgs_sv3_pz3_mcmcbest_var4 = variants


# Release Candidate Catalogs

In [None]:
#sdss_published.sanity_tests() # The published catalog has some issues
sdss_vanilla_v2.sanity_tests()
sdss_colors_v2.sanity_tests()
#sdss_colors_chi_v2.sanity_tests()

sdss_vanilla_v1.sanity_tests()
sdss_colors_v1.sanity_tests()
#sdss_colors_chi_v1.sanity_tests()

In [None]:
bgs_sv3_pz_2_4_10p_c1.basic_stats()
bgs_sv3_pz_2_4_10p_c1.sanity_tests()
bgs_y3_like_sv3_pz_2_4_c1.basic_stats()
bgs_y3_like_sv3_pz_2_4_c1.sanity_tests()
#bgs_y1_pzp_2_4.basic_stats()
#bgs_y1_pzp_2_4.sanity_tests()
bgs_y1_pzp_2_4_c1.basic_stats()
bgs_y1_pzp_2_4_c1.sanity_tests()
#bgs_y3_pzp_2_4.basic_stats()
#bgs_y3_pzp_2_4.sanity_tests()
bgs_y3_pzp_2_4_c1.basic_stats()
bgs_y3_pzp_2_4_c1.sanity_tests()

In [None]:
pp.plots(bgs_sv3_pz_2_4_10p, bgs_y1_pzp_2_4, bgs_y3_pzp_2_4)

In [None]:

pp.plots(bgs_sv3_pz_2_4_10p_c1, bgs_y3_like_sv3_pz_2_4_c1, bgs_y1_pzp_2_4_c1, bgs_y3_pzp_2_4_c1, sdss_colors_chi_v2)

In [None]:
bgs_y1_pzp_2_4_c1.name = "BGS Y1"
bgs_y1_pzp_2_4_c1.color = 'k'
bgs_y1_pzp_2_4_c1.marker = '-'
sdss_published.name = "SDSS"
pp.plots(bgs_y1_pzp_2_4_c1, sdss_published)

In [None]:
pp.qf_cen_plot(bgs_sv3_pz_2_4_10p, test_methods=True)

In [None]:
bgs_y1_pzp_2_4.all_data.sort_values('L_GAL', ascending=False).loc[:, ['RA', 'DEC', 'Z', 'Z_PHOT', 'Z_ASSIGNED_FLAG']].head(30)

In [None]:
bgs_y1_pzp_2_4.color = 'red'
bgs_y3_pzp_2_4.color = 'blue'
pp.plots(bgs_sv3_pz_2_4_10p, bgs_y1_pzp_2_4, bgs_y3_pzp_2_4)

In [None]:
pp.plots(sdss_colors_chi_v2)

# Fiber Incompleteness Study

### SV3 'Truth' Handling

In [None]:
#bgs_sv3_pz_2_4_10p.calculate_projected_clustering(with_extra_randoms=True)
#bgs_sv3_pz_2_4_10p.calculate_projected_clustering_in_magbins(with_extra_randoms=True)

# TODO run this again
bgs_sv3_pz_2_4_10p.add_jackknife_err_to_proj_clustering(with_extra_randoms=True, for_mag_bins=False)
#bgs_sv3_pz_2_4_10p.add_jackknife_err_to_proj_clustering(with_extra_randoms=False, for_mag_bins=True) # BUG Broken
serialize(bgs_sv3_pz_2_4_10p)

In [None]:
with np.printoptions(precision=2, suppress=True, linewidth=200):
    print(bgs_sv3_pz_2_4_10p.wp_all_extra[1])
    print(bgs_sv3_pz_2_4_10p.wp_err)

    std_devs = np.sqrt(np.diag(bgs_sv3_pz_2_4_10p.wp_cov))
    print(std_devs)

    print(bgs_sv3_pz_2_4_10p.wp_cov)

In [None]:
# Question - does using the small set of randoms vs the full set of randoms make a difference?
percent_diff = np.abs(bgs_sv3_pz_2_4_10p.wp_all[1] - bgs_sv3_pz_2_4_10p.wp_all_extra[1]) / bgs_sv3_pz_2_4_10p.wp_all[1] * 100
print(percent_diff)
red_p_diff = np.abs(bgs_sv3_pz_2_4_10p.wp_all[2] - bgs_sv3_pz_2_4_10p.wp_all_extra[2]) / bgs_sv3_pz_2_4_10p.wp_all[2] * 100
print(red_p_diff)
blue_p_diff = np.abs(bgs_sv3_pz_2_4_10p.wp_all[3] - bgs_sv3_pz_2_4_10p.wp_all_extra[3]) / bgs_sv3_pz_2_4_10p.wp_all[3] * 100
print(blue_p_diff)
# Answer - Less than 1% generally

colors = ['k', 'r', 'b']
f = bgs_sv3_pz_2_4_10p
plt.figure(figsize=(5, 5))
if f.wp_err is not None:
    plt.errorbar(f.wp_all[0][:-1], f.wp_all[1], yerr=f.wp_err, marker='o', linestyle='-', label='All', color=colors[0], alpha=0.5)
    plt.errorbar(f.wp_all[0][:-1], f.wp_all[2], yerr=f.wp_r_err, marker='o', linestyle='-', label='Red', color=colors[1], alpha=0.5)
    plt.errorbar(f.wp_all[0][:-1], f.wp_all[3], yerr=f.wp_b_err, marker='o', linestyle='-', label='Blue', color=colors[2], alpha=0.5)
else:
    plt.plot(f.wp_all[0][:-1], f.wp_all[1], marker='o', linestyle='-', label='All', color=colors[0])
    plt.plot(f.wp_all[0][:-1], f.wp_all[2], marker='o', linestyle='-', label='Red', color=colors[1])
    plt.plot(f.wp_all[0][:-1], f.wp_all[3], marker='o', linestyle='-', label='Blue', color=colors[2])

plt.plot(f.wp_all_extra[0][:-1], f.wp_all_extra[1], marker='x', linestyle='--', label='All Extra Rands', color=colors[0])
plt.plot(f.wp_all_extra[0][:-1], f.wp_all_extra[2], marker='x', linestyle='--', label='Red Extra Rands', color=colors[1])
plt.plot(f.wp_all_extra[0][:-1], f.wp_all_extra[3], marker='x', linestyle='--', label='Blue Extra Rands', color=colors[2])

plt.xscale('log')
plt.ylim(8, 2000)
plt.yscale('log')
plt.xlabel(r'$r_p$ [Mpc/h]')
plt.ylabel(r'$w_p(r_p)$')
plt.legend()
plt.title('Full Sample $w_p(r_p)$ ')
plt.grid(True)
plt.show()

### Main Study

In [None]:
sets = [bgs_y3_like_sv3_fiberonly, bgs_y3_like_sv3_nn, bgs_y3_like_sv3_pz_2_0, bgs_y3_like_sv3_pz_2_4] 
#sets = [bgs_y3_like_sv3_pz_2_4_c1]

bgs_y3_like_sv3_pz_2_4.name = "New Technique"
bgs_y3_like_sv3_pz_2_4_c1.name = "New Technique"
bgs_y3_like_sv3_nn.name = "Use Nearest Neighbor"
bgs_y3_like_sv3_pz_2_0.name = "Use Photo-z"
bgs_y3_like_sv3_fiberonly.name = "Drop Lost Galaxies"
bgs_sv3_pz_2_4_10p.name = "SV3 ~Truth"
bgs_sv3_pz_2_4_10p_c1.name = "SV3 ~Truth C1"
bgs_sv3_pz_2_4_10p.color = 'k'
bgs_sv3_pz_2_4_10p_c1.color = 'k'

for s in [bgs_sv3_pz_2_4_10p_c1, *sets]:
    print(f"--- {s.name} ---")
    print(f"Has extra randoms: {s.wp_all_extra is not None}")
    # Check if s.wp_slices_extra is None or amy it's slices are None
    flag = s.wp_slices_extra !=  np.repeat(None, len(s.wp_slices_extra))
    print(f"Has extra randoms for slices: {flag}")


In [None]:
pp.plots(*sets, show_err=bgs_sv3_pz_2_4_10p)

In [None]:
truth_catalog = bgs_sv3_pz_2_4_10p
truth_df = truth_catalog.all_data
for s in sets:
    print(s.name)
    s.get_true_z_from(truth_df)
    s.refresh_df_views()

In [None]:
# TODO debug all this
pp.Lfunc_compare(bgs_y3_like_sv3_fiberonly, bgs_sv3_pz_2_4_10p)

In [None]:
pp.correct_redshifts_assigned_plot(bgs_y3_like_sv3_nn, bgs_y3_like_sv3_pz_2_0, bgs_y3_like_sv3_pz_2_4)

In [None]:
pp.luminosity_function_plots(bgs_y3_like_sv3_nn, bgs_y3_like_sv3_pz_2_0, bgs_y3_like_sv3_pz_2_4)

In [None]:
for s in sets:
    pp.single_plots(s)

pp.single_plots(bgs_sv3_pz_2_4_10p)

In [None]:
for s in sets:
    data = s.all_data.loc[z_flag_is_not_spectro_z(s.all_data['Z_ASSIGNED_FLAG'])]
    delta_red = data['Z'] - data['Z_T'] # I used to do z_obs for SV3 dropping passes... TODO
    plt.hist(delta_red, bins=100, range=(-0.05, 0.05), histtype='step', label=s.name, color=s.color)
    plt.yscale('log')
    plt.legend()

In [None]:
# TODO BUG - issue. Targets in the 'truth' catalog that are NOT IN the other one aren't accounted for in the purity and completeness calculations.
# This becomes obvious issue for the fiberonly runs.
pp.test_purity_and_completeness(*sets, truth_catalog=truth_catalog, lost_only=False)
pp.purity_complete_plots(*sets)

In [None]:
# Make a bar plot of the z_assigned_flag values for each set
for s in sets:
    j=plt.hist(s.all_data['Z_ASSIGNED_FLAG'], bins=[-3,-2,-1,0,1,2,3,4,5,6,7,8,9,10,11,12], histtype='step', label=s.name)

#plt.yscale('log')
plt.ylim(0, 10000)
plt.legend()

# Popmock Lsat / Clustering Results
Must have run popmock and corrfunc.

In [None]:
popmock_sets = [sdss_vanilla_v1, sdss_vanilla_v2, sdss_colors_v1, sdss_colors_v2, sdss_colors_chi_v1, sdss_colors_chi_v2]
popmock_sets = [bgs_sv3_10p_mcmc, bgs_y1_pzp_2_4, bgs_y1_pzp_2_4_c1]

for s in popmock_sets:
    print(f"--- {s.name} ---")
    s.chisqr()

In [None]:
for s in popmock_sets:
    pp.proj_clustering_plot(s)

for s in popmock_sets:
    pp.lsat_data_compare_plot(s)

# Clustering

In [None]:
sv3_pip_bad = pickle.load(open(OUTPUT_FOLDER + 'sv3_pip_clustering.pkl', 'rb'))
sv3_pip_clustering = pickle.load(open(OUTPUT_FOLDER + 'sv3_pip_clustering_proper.pkl', 'rb'))
y3_likesv3_clustering = pickle.load(open(OUTPUT_FOLDER + 'y3_likesv3_pip_clustering_proper.pkl', 'rb'))

In [None]:
# Compare Clustering from processed ones to ~Truth
for s in sets:
    pp.wp_rp(s)
    pp.compare_wp_rp(s, bgs_sv3_pz_2_4_10p)

In [None]:
pp.wp_rp(bgs_sv3_pz_2_4_10p)
pp.wp_rp(sv3_pip_bad)
pp.wp_rp(sv3_pip_clustering)
pp.wp_rp(y3_likesv3_clustering)

In [None]:
# Load up SV3 PIP Clustering as created by the LS

In [None]:
# Does using randoms from full vs clustering with weights matter?
pp.compare_wp_rp(sv3_pip_bad, sv3_pip_clustering)
# BUG how is overall not average of red and blue?


In [None]:
# This plot really should be close to 0% difference
# It's SV3 10p, so 2% filled with my method, compared to the PIP method
pp.compare_wp_rp(bgs_sv3_pz_2_4_10p, sv3_pip_clustering)

In [None]:
# If LSS team is calculating PIP weights right and I'm doing nothing wrong,
# then these two should also largely agree
pp.compare_wp_rp(y3_likesv3_clustering, sv3_pip_clustering)

In [None]:
# Rough check that the footprints all match up
for s in sets:
    fig=pp.make_map(s.all_data['RA'].to_numpy(), s.all_data['DEC'].to_numpy())

# Other PLots

## Asorted

In [None]:
sdss_colors_chi_v2.Mr_gal_labels = Mr_gal_labels[15:]
sdss_colors_chi_v1.Mr_gal_labels = Mr_gal_labels[15:]
sdss_colors_chi_v2.color = 'darkred'

In [None]:
pp.plots(sdss_colors_chi_v2, sdss_colors_chi_v1, deserialize(cat.bgs_sv3_10p_mcmc))

In [None]:
sets = [bgs_y3_like_sv3_fiberonly, bgs_y3_like_sv3_nn, bgs_y3_like_sv3_pz_2_0, bgs_y3_like_sv3_pz_2_4] 


In [None]:
# Targets in SV3 region observed in main survey got new redshift measurements
# Q: How different are those z's compared to SV3 z's? 
# A: They are similar, but not identical. The difference is less than 0.001 for 99.7% so it's OK for us I think.
#    (Subdominant to v_peculiar)

from astropy.coordinates import SkyCoord
from astropy import units as u
from astropy.coordinates import match_coordinates_sky

def find_unique_and_matched_objects(cat1, cat2):
    df1 = cat1.all_data.loc[z_flag_is_spectro_z(cat1.all_data['Z_ASSIGNED_FLAG'])].reset_index()
    df2 = cat2.all_data.loc[z_flag_is_spectro_z(cat2.all_data['Z_ASSIGNED_FLAG'])].reset_index()
                                                 
    # Extract RA and Dec from the catalogs
    ra1, dec1 = df1['RA'].to_numpy(), df1['DEC'].to_numpy()
    ra2, dec2 = df2['RA'].to_numpy(), df2['DEC'].to_numpy()
        
    # Create SkyCoord objects
    coords1 = SkyCoord(ra=ra1*u.degree, dec=dec1*u.degree)
    coords2 = SkyCoord(ra=ra2*u.degree, dec=dec2*u.degree)
    
    # Match coordinates
    idx, d2d, _ = match_coordinates_sky(coords1, coords2)

    df1['FID'] = idx
    df2['FID'] = df2.index
    
    # Find objects in df1 that are not in df2
    unique_mask = d2d > 1*u.arcsec  # You can adjust the threshold as needed

    # join with df2 for matched_objects on the FID
    matched_objects = df1.join(df2.set_index('FID'), on='FID', rsuffix='_2')
    matched_objects = matched_objects[~unique_mask]

    print(f"Total spectroscopic galaxies in cat1: {len(df1)}, cat2: {len(df2)}")
    print(f'Unique objects in cat1: {unique_mask.sum()}, Matched objects in cat1: {len(matched_objects)}')
    
    return df1[unique_mask], matched_objects

# Example usage
unique_objects, matched_objects = find_unique_and_matched_objects(bgs_sv3_pz_2_4_10p, bgs_y3_like_sv3_pz_2_4)

print(np.isclose(matched_objects['Z'], matched_objects['z_2'], atol=0.001, rtol=0).sum() / len(matched_objects))
#fig=pp.make_map(unique_objects.RA.to_numpy(), unique_objects['DEC'].to_numpy())

plt.hist(matched_objects['Z'] - matched_objects['z_2'], bins=np.linspace(-0.005, 0.005, 100))
plt.yscale('log')

# Draw verticle line at 0.005
plt.axvline(x=0.005, color='r', linestyle='--')
plt.axvline(x=-0.005, color='r', linestyle='--')

In [None]:
# SV3 10p and SDSS BGS-cut are very similar!
bgs_sv3_pz_2_4_10p.color = 'k'
pp.plots(bgs_sv3_pz_2_4_10p, sdss_bgscut)

In [None]:
# Cutting SDSS to remove regions with poor BGS overlap barely improves the completeness
pp.plots(sdss_bgscut, sdss_vanilla_v2)
print(f"{spectroscopic_complete_percent(sdss_bgscut.all_data['Z_ASSIGNED_FLAG']):.2f}% spectroscopic complete for BGS cut")
print(f"{spectroscopic_complete_percent(sdss_vanilla_v2.all_data['Z_ASSIGNED_FLAG']):.2f}% spectroscopic complete for Vanilla")

In [None]:
sdss_vanilla_v2.marker = '--'
sdss_colors_mine.color = 'navy'
sdss_colors_mine.marker = '--'
sdss_colors_chi_v2.color = 'deeppink'
sdss_colors_chi_v2.marker = '--'
bgs_y1_pzp_2_4.color = 'brown'
bgs_y1_pzp_2_4.marker = '-'
pp.plots(bgs_y1_pzp_2_4, bgs_sv3_pz_2_4_10p, sdss_vanilla_v2, sdss_colors_mine, sdss_colors_chi_v2)
#pp.plots(cat.sdss_published, sdss_colors_chi)

In [None]:
# Why doesn't mstar missing % exactly match z_assigned_flag? 
# Probably redshift failures. Still have a spectra so still have mstar
print(np.sum(np.isnan(bgs_y1_pzp_2_4.all_data['MSTAR'])) / len(bgs_y1_pzp_2_4.all_data['MSTAR']))
print(np.sum(bgs_y1_pzp_2_4.all_data['Z_ASSIGNED_FLAG'] != 0) / len(bgs_y1_pzp_2_4.all_data['Z_ASSIGNED_FLAG']))

In [None]:
pp.compare_fsat_color_split(sdss_vanilla_v1, sdss_vanilla_v2, project_percent=0.52)


In [None]:
pp.compare_fsat_color_split(bgs_sv3_pz_2_4_10p, sdss_vanilla_v2)


In [None]:
pp.qf.centered_plot(bgs_y1_pzp_2_4)
pp.qf.centered_plot(sdss_published)

In [None]:
pp.fsat_by_z_bins(bgs_y1_pzp_2_4, z_bins=np.array([0.0, 0.05, 0.1, 0.15, 0.2, 0.25, 0.3, 0.35, 0.4, 0.5, 1.0]), show_plots=True)
#pp.fsat_by_z_bins(mxxl_simple_4, z_bins=np.array([0.0, 0.05, 0.1, 0.15, 0.2, 0.25, 0.3, 0.35, 0.4, 0.5, 1.0]), show_plots=False, aggregation=pp.fsat_truth_vmax_weighted)


In [None]:
plt.hist(bgs_y1_pzp_2_4.all_data['MSTAR'].dropna(), np.logspace(6, 13, 100))
plt.xlabel('Stellar Mass')
plt.ylabel('Frequency')
plt.title('Distribution of Stellar Masses for bgs_y1_pzp_2_4.all_data')
plt.yscale('log')
plt.xscale('log')
plt.show()

In [None]:
bgs_y1_pzp_2_4.all_data['Mstar_bin'].value_counts()

In [None]:
#bgs_y1_pz_2_4.postprocess()
#bgs_y1_pz_2_4.all_data['Mstar_bin'] = pd.cut(x = bgs_y1_pz_2_4.all_data['MSTAR'], bins = mstar_bins, labels = mstar_labels, include_lowest = True)
pp.qf_cen_plot(bgs_y1_pzp_2_4, mstar=True)

In [None]:
pp.fsat_by_z_bins(bgs_y1_pzp_2_4, z_bins=np.array([0.0, 0.2, 1.0]))


In [None]:
# Print out biggest group size
for dataset in [bgs_y1_pzp_2_4, sdss_vanilla_v2]:
    print(dataset.name)
    print(dataset.all_data.groupby('IGRP').size().max())


In [None]:
# SDSS Examine Bimodality

z=sdss_vanilla_v2.all_data['Z']
gmr=sdss_vanilla_v2.all_data['MAG_G'] - sdss_vanilla_v2.all_data['MAG_R']
junk=plt.hist(gmr, bins=np.linspace(-1,3,300), alpha=0.4)
#junk=plt.hist(k_correct(sdss_vanilla.all_data['MAG_G'], z, gmr, band='g')  - k_correct(sdss_vanilla.all_data['MAG_R'], z, gmr, band='r'), bins=500, alpha=0.4)
junk=plt.hist(sdss_vanilla_v2.all_data['DN4000'], bins=np.linspace(0,4,300), alpha=0.4)
plt.xlim(-1, 3)

In [None]:
# Investigate changes in halo mass function from wcen
m1=np.log10(sdss_vanilla_v2.all_data['M_HALO'])
m2=np.log10(sdss_colors_v2.all_data['M_HALO'])
m3=np.log10(sdss_colors_chi_v2.all_data['M_HALO'])

# bin m1,m2,m3 the same way
n_bins = 20
bins = np.linspace(10.8, 15.0, n_bins)
d1 = np.digitize(m1, bins)
d2 = np.digitize(m2, bins)
d3 = np.digitize(m3, bins)

# count the number of galaxies in each bin
n1 = np.array([np.sum(d1==i) for i in range(1, n_bins+1)])
n2 = np.array([np.sum(d2==i) for i in range(1, n_bins+1)])
n3 = np.array([np.sum(d3==i) for i in range(1, n_bins+1)])

# Do the same but for log10(counts)
n1 = np.log10(n1)
n2 = np.log10(n2)
n3 = np.log10(n3)
print(n1,n2,3)

# Log difference
p1 = np.abs(n1-n2)
p2 = np.abs(n1-n3)

plt.plot(bins, p1, label='SDSS Colors vs Vanilla')
plt.plot(bins, p2, label='SDSS Colors+Chi vs Vanilla')

plt.xlabel('log10(M_halo)')
plt.ylabel('Log10 Difference in Counts')
plt.legend()

## Make single group CSV for legacysurvey.org/viewer visualization

In [None]:
df = pd.read_csv(OUTPUT_FOLDER + 'NERSC_BGS_1pass_v1.out')
centrals_of_big_groups = df['N_SAT'] > 0
group_ids = df.loc[centrals_of_big_groups]['IGRP'].unique()

In [None]:
df[df['IGRP'] == 1644058]

In [None]:
print(group_ids[0:10])

In [None]:
#for i in group_ids[0:10]:
for i in [1644058, 1644051]:
    #df.loc[df['IGRP'] == i, ['RA', 'DEC']].to_csv(OUTPUT_FOLDER + f'group{i}.csv', index=False)
    print(df.loc[df['IGRP'] == i, ['RA', 'DEC', 'Z', 'Z_ASSIGNED_FLAG']])

## Study z_phot vs z_spectra

In [None]:
df = bgs_sv3_pz_2_4_10p.all_data

low_cut = 0.0001 # not a dramatic shift when moving from here to 0.1
quality = (df['Z_PHOT'] != NO_PHOTO_Z) & z_flag_is_spectro_z(df['Z_ASSIGNED_FLAG']) & (df['Z_OBS'] > low_cut) & (df['L_GAL'] > 3E10)
#quality = (df['Z_PHOT'] != NO_PHOTO_Z) & z_flag_is_spectro_z(df['Z_ASSIGNED_FLAG']) & (df['Z_OBS'] > low_cut)

# Investigate the photo-z error distribution for red and blue galaxies
# Blue exhibits and offset and a less peaked distribution than red
# Red does have some skew
data = df.loc[np.logical_and(df['QUIESCENT'], quality)]
blue = df.loc[np.logical_and(~df['QUIESCENT'], quality)]
delta_red = data['Z_PHOT'] - data['Z_OBS']
delta_blue = blue['Z_PHOT'] - blue['Z_OBS']
delta_all = df.loc[quality, 'Z_PHOT'] - df.loc[quality, 'Z_OBS']

x = 0.1
plt.figure(figsize=(10, 5))
plt.hist(delta_red, bins=50, range=(-x, x), histtype='step', color='red', label='Red')
plt.hist(delta_blue, bins=50, range=(-x, x), histtype='step', color='blue', label='Blue')
plt.hist(delta_all, bins=50, range=(-x, x), histtype='step', color='k', label='All')
#plt.yscale('log')
plt.xlabel('Photo-z - Spectro-z')
plt.legend()

# draw a vertical line at 0
plt.axvline(0, color='black', lw=1)
plt.axvline(-SIM_Z_THRESH, color='green')
plt.axvline(SIM_Z_THRESH, color='green')

percentiles = np.percentile(delta_all, [16, 50, 84])
print(f"Median delta z: {percentiles[1]:.4f}, 16th percentile: {percentiles[0]:.4f}, 84th percentile: {percentiles[2]:.4f}")
# add bars for the percentiles
#plt.axvline(percentiles[0], color='green')
#plt.axvline(percentiles[2], color='green')

# What % fall within 0.005 of the true redshift?
within_5_milli = np.abs(delta_all) < SIM_Z_THRESH
print(f"{np.sum(within_5_milli) / len(delta_all) * 100:.2f}% of galaxies have a photometric redshift within {SIM_Z_THRESH} of the spectroscopic redshift.")
print(f"For red: {np.sum(np.abs(delta_red) < SIM_Z_THRESH) / len(delta_red) * 100:.2f}%")
print(f"For blue: {np.sum(np.abs(delta_blue) < SIM_Z_THRESH) / len(delta_blue) * 100:.2f}%")

# Find the +/- that gives 95% of the data
percentiles = np.percentile(delta_all, [2.5, 97.5])
print(f"2.5th percentile: {percentiles[0]:.4f}, 97.5th percentile: {percentiles[1]:.4f}")


## BGS and SDSS Target Overlap Analysis

TODO: SDSS magnitudes have e-corrections of them that fastspecfit on BGS does not have.

In [None]:
# Sloan BGS overlap - are their abs mag's the same? 
# Control for different z fits

bgs_to_use = bgs_y3_pzp_2_4_c1.all_data
#lost_bgs = bgs_to_use.loc[z_flag_is_not_spectro_z(bgs_to_use['Z_ASSIGNED_FLAG'])]
sdss_obs = sdss_vanilla_v1.all_data.loc[sdss_vanilla_v1.all_data['Z_ASSIGNED_FLAG'] == AssignedRedshiftFlag.SDSS_SPEC.value]
bgs_obs = bgs_to_use.loc[bgs_to_use['Z_ASSIGNED_FLAG'] == AssignedRedshiftFlag.DESI_SPEC.value].reset_index()

catalog = coord.SkyCoord(ra=sdss_obs.RA.to_numpy()*u.degree, dec=sdss_obs['DEC'].to_numpy()*u.degree, frame='icrs')
to_match = coord.SkyCoord(ra=bgs_obs.RA.to_numpy()*u.degree, dec=bgs_obs['DEC'].to_numpy()*u.degree, frame='icrs')

idx, d2d, d3d = coord.match_coordinates_sky(to_match, catalog, nthneighbor=1, storekdtree=False)
matched = d2d < 1*u.arcsec

bgs_obs['SDSS_LOGLGAL'] = np.where(matched, sdss_obs['LOGLGAL'].to_numpy()[idx], np.nan)
bgs_obs['SDSS_Z'] = np.where(matched, sdss_obs['Z'].to_numpy()[idx], np.nan)
bgs_obs = bgs_obs.loc[matched]
print(f"Matched {len(bgs_obs)} out of {len(bgs_to_use)}")

bgs_obs['M_R'] = log_solar_L_to_abs_mag_r(bgs_obs['LOGLGAL'])
Q=1.6
e_corr = Q*(bgs_obs['SDSS_Z'] - 0.1)
bgs_obs['SDSS_M_R'] = log_solar_L_to_abs_mag_r(bgs_obs['SDSS_LOGLGAL']) - e_corr

In [None]:
# Histograms to compare
sample = bgs_obs
sample = bgs_obs.loc[np.logical_or(bgs_obs['M_R'] < -22, bgs_obs['SDSS_M_R'] < -22)]

# plot the difference
diff = sample['M_R'] - sample['SDSS_M_R']
delta_z = sample['Z'] - sample['SDSS_Z']
zagreed = np.abs(delta_z) < 0.0001
print(f"Catastrophic z mismatch: {np.sum(np.abs(delta_z) > 0.005) / len(delta_z) * 100:.2f}%")
diff = diff[zagreed]
print(f"Keeping {len(diff)} out of {len(sample)}")
print(f"Catastrophic Mr mismatch: {np.sum(np.abs(diff) > 0.5) / len(diff) * 100:.2f}%")

plt.figure()
plt.hist(diff, bins=np.linspace(-1, 1, 100), histtype='step')
plt.xlabel('Difference in M_R')
plt.ylabel('Number of Galaxies')
plt.title('BGS $M_r$ - SDSS $M_r$')
plt.axvline(0, color='red', linestyle='--')
plt.yscale('log')
plt.tight_layout()
plt.plot()

plt.figure()
plt.hist(delta_z, bins=np.linspace(-0.005, 0.005, 200), histtype='step')
plt.xlabel('Difference in z')
plt.ylabel('Number of Galaxies')
plt.title('BGS z - SDSS z')
plt.axvline(0, color='red', linestyle='--')
plt.yscale('log')
plt.tight_layout()
plt.plot()

# For 1,2,3 sigma
percentages = np.percentile(np.abs(diff), [68, 95, 99.7])
print(f"68% of galaxies have an absolute difference within {percentages[0]:.2f}")
print(f"95% of galaxies have an absolute difference within {percentages[1]:.2f}")
print(f"99.7% of galaxies have an absolute difference within {percentages[2]:.2f}")

In [None]:
# difference in abs mag as a function of BGS mag
delta_z = bgs_obs['Z'] - bgs_obs['SDSS_Z']
zagreed = np.abs(delta_z) < 0.0001
bgs_obs_zagreed = bgs_obs.loc[zagreed]

# make abs mag bins
bins = np.linspace(-23.5, -16.5, 29)
bgs_obs_zagreed['M_R_BIN'] = pd.cut(bgs_obs_zagreed['M_R'], bins=bins, labels=bins[:-1], include_lowest=True)
bgs_obs_zagreed['M_R_DIFF'] = bgs_obs_zagreed['M_R'] - bgs_obs_zagreed['SDSS_M_R']
bgs_obs_zagreed['M_R_ABS_DIFF'] = np.abs(bgs_obs_zagreed['M_R'] - bgs_obs_zagreed['SDSS_M_R'])

# Print off counts in each bin
counts = bgs_obs_zagreed.groupby('M_R_BIN').size()
#print(counts)

# Within each bin, calculate the median and 2sigma differences
binned = bgs_obs_zagreed.groupby('M_R_BIN')['M_R_DIFF'].apply(lambda x: np.percentile(x, [2.5, 50, 97.5]))
binned = pd.DataFrame(binned.tolist(), index=binned.index, columns=['2.5', 'median', '97.5'])

plt.figure()
plt.errorbar(binned.index, binned['median'], yerr=[binned['median'] - binned['2.5'], binned['97.5'] - binned['median']], fmt='o', capsize=3)
plt.xlabel('$M_r^{BGS}$')
plt.xticks(rotation=45)
plt.ylabel('95% Interval of $M_r^{BGS} - M_r^{SDSS}$')
plt.ylim(-0.8, 0.8)
plt.title('BGS vs SDSS Difference in $M_r$')
plt.axhline(0, color='red', linestyle='--')
plt.tight_layout()
plt.grid()
plt.plot()

# Within each bin, calculate the median and 2sigma differences
#binned = bgs_obs.groupby('M_R_BIN')['M_R_ABS_DIFF'].agg(['median', 'std'])
# Don't use std, we want asymmetric errors
binned = bgs_obs_zagreed.groupby('M_R_BIN')['M_R_ABS_DIFF'].apply(lambda x: np.percentile(x, [2.5, 50, 97.5]))
binned = pd.DataFrame(binned.tolist(), index=binned.index, columns=['2.5', 'median', '97.5'])

plt.figure()
plt.errorbar(binned.index, binned['median'], yerr=[binned['median'] - binned['2.5'], binned['97.5'] - binned['median']], fmt='o', capsize=3)
plt.xlabel('$M_r^{BGS}$')
plt.xticks(rotation=45)
plt.ylabel('95% Interval of |$M_r^{BGS} - M_r^{SDSS}$|')
plt.ylim(-0.05, 1.0)
plt.title('BGS vs SDSS Absolute Difference in $M_r$')
plt.axhline(0, color='red', linestyle='--')
plt.tight_layout()
plt.grid()
plt.plot()


## Info for clustering

In [None]:
bgs_sv3_pz_2_4_10p.all_data

In [None]:
bgs_sv3_pz_2_4_10p.all_data

tbl = Table.read(IAN_BGS_SV3_MERGED_NOY3_FILE)
tbl.keep_columns(['TARGETID', 'ABSMAG01_SDSS_R'])
df = tbl.to_pandas()

df = bgs_sv3_pz_2_4_10p.all_data.join(df.set_index('TARGETID'), on='TARGETID')

df['R'] = log_solar_L_to_abs_mag_r(df['LOGLGAL'])


In [None]:
# Why are some observed redshifts higher than my theoretical max? 
# Peculiar velocity of course. Since it should be symmetric in the redshift direction, it's okay to use the theoretical max.
mags = np.array([-14, -15, -16, -17, -18, -19, -20, -21, -22, -23])

LIM = 17.6
print("SDSS FLUXLIM")
print("DIM")
for m in mags:
    print(f"M_R={m}: THEORY MAX={get_max_observable_z(m, LIM).value:.5f}")
    #print(f"M_R={m}: THEORY MAX={get_max_observable_z_m30(m, LIM).value:.5f}")
print("BRIGHT")

LIM = 19.5
print("BGS BRIGHT FLUXLIM")
print("DIM")
for m in mags:
    print(f"M_R={m}: THEORY MAX={get_max_observable_z(m, LIM).value:.5f}")
print("BRIGHT")

#LIM = 20.0
#print("BGS FAINT FLUXLIM")
#print("DIM")
#for m in mags:
#    print(f"M_R={m}: THEORY MAX={get_max_observable_z(m, LIM).value:.5f}")
#print("BRIGHT")

In [None]:
# Figure out abs mag bins redshift maxes to use
#df = bgs_sv3_pz_2_4_10p.all_data
#df['MAG'] = log_solar_L_to_abs_mag_r(np.log10(df['L_GAL']))

df = df.loc[df['Z_ASSIGNED_FLAG'] == AssignedRedshiftFlag.DESI_SPEC.value]

mags = np.array([-14, -15, -16, -17, -18, -19, -20, -21, -22, -23])

# Why are some observed redshifts higher than my theoretical max? 
# Peculiar velocity of course. Since it should be symmetric in the redshift direction, it's okay to use the theoretical max.
print("DIM")
for m in mags:
    # This column is k-corr to 0.1 and uses h=1.0
    # Not sure why OBS MAX is so high for some of them (beyond vpec...)
    print(f"ABSMAG01_SDSS_R > {m}: THEORY MAX={get_max_observable_z(m, 19.5).value:.5f}  OBS MAX={df.loc[df['ABSMAG01_SDSS_R'] > m, 'Z'].max()}")
print("BRIGHT")

print("DIM")
for m in mags:
    # This is k-corr to 0.1 as well using my method I think
    print(f"R^0.1 > {m}: THEORY MAX={get_max_observable_z(m, 19.5).value:.5f}  OBS MAX={df.loc[df['R'] > m, 'Z'].max()}")
print("BRIGHT")

# For noncumulative mag ranges now, using the theory values
print("\n Generic Cosmology")
mags = np.array([-14, -15, -16, -17, -18, -19, -20, -21, -22, -23])
zmaxes = np.array([get_max_observable_z(m, 19.5).value for m in mags])
zmins = np.array([get_max_observable_z(m+1, 19.5).value for m in mags])
for m, zmin, zmax in zip(mags, zmins, zmaxes):
    print(f"{m} < Mag-5log(h) <= {m+1}:  zmin={zmin:.5f}  zmax={zmax:.5f}")

print("\n MXXL Cosmology")
zmaxes = np.array([get_max_observable_z_mxxlcosmo(m, 19.5).value for m in mags])
zmins = np.array([get_max_observable_z_mxxlcosmo(m+1, 19.5).value for m in mags])
for m, zmin, zmax in zip(mags, zmins, zmaxes):
    print(f"{m} < Mag-{5*np.log10(0.7):.2f} <= {m+1}:  zmin={zmin:.5f}  zmax={zmax:.5f}")

In [None]:
bgs_sv3_pz_2_4_10p.calculate_projected_clustering()
# why is n=0? # BUG
pp.wp_rp(bgs_sv3_pz_2_4_10p.wp_all[0], bgs_sv3_pz_2_4_10p.wp_all[1])#, bgs_sv3_pz_2_4_10p.wp_all[2], bgs_sv3_pz_2_4_10p.wp_all[3])

In [None]:
print(f"Co-moving Dist:  {get_cosmology().comoving_distance([0.01, 0.1, 0.2, 0.4]).value}") # / Mpc/h
print(f"Co-moving Dist:  {get_cosmology().luminosity_distance([0.01, 0.1, 0.2, 0.4]).value / np.array([1.01, 1.1, 1.2, 1.4])}") # / Mpc/h
print(f"Luminosity Dist: {get_cosmology().luminosity_distance([0.01, 0.1, 0.2, 0.4]).value}") # / Mpc/h

### SDSS Tutorial of Corrfunc

In [None]:
# Mock catalog (SDSS-North) supplied with Corrfunc
mock_catalog = pjoin(dirname(abspath(Corrfunc.__file__)), "../mocks/tests/data/", "Mr19_mock_northonly.rdcz.ff")
RA, DEC, CZ = read_catalog(mock_catalog)

# Randoms catalog (SDSS-North) supplied with Corrfunc
randoms_catalog = pjoin(dirname(abspath(Corrfunc.__file__)), "../mocks/tests/data/", "Mr19_randoms_northonly.rdcz.ff")
RAND_RA, RAND_DEC, RAND_CZ = read_catalog(randoms_catalog)

rbins, wp = calculate_wp(RA, DEC, CZ, RAND_RA, RAND_DEC, RAND_CZ)

pp.wp_rp(rbins, wp)

In [None]:
j=plt.hist(RAND_CZ, bins=100, histtype='step', density=True)
j=plt.hist(CZ, bins=100, histtype='step', density=True)

## Code to support others projects

### Bernie

In [None]:
MASS_CUT = 10**15.0
df = bgs_y3_pzp_2_4.all_data[bgs_y3_pzp_2_4.all_data['M_HALO'] > MASS_CUT]
df = df[df['DEC'] < 30.0] # Rough for ACT footprint matching

print("\nBGS Y3")
#print(df['Z_ASSIGNED_FLAG'].value_counts())    
print(df.sort_values('L_GAL', ascending=False).loc[:, ['RA', 'DEC', 'L_GAL', 'Z', 'Z_PHOT', 'Z_OBS', 'Z_ASSIGNED_FLAG']].head(10))

centrals = df[df['IGRP'] == df.index]
clusters = centrals[centrals['N_SAT'] >= 14]
print(f"Clusters with 15+ members: {len(clusters)}")

x = df['IGRP'].value_counts() > 14
print(f"Clusters with 15+ members: {np.sum(x)}")

# Why do the above 2 not agree?

df_spec = df[z_flag_is_spectro_z(df['Z_ASSIGNED_FLAG'])]
y = df_spec['IGRP'].value_counts() > 14
print(f"Clusters with 15+ spectroscopic members: {np.sum(y)}")



### Viraj Targets

In [None]:
# Viraj Compare
path = DATA_FOLDER + 'VIRAJ/jura_bgs_bright_catalog_for_ian.fits'
table = Table.read(path)
viraj_df = table.to_pandas()
viraj_df.set_index('TARGETID', inplace=True)

In [None]:
def merge_viraj_ian(viraj_df: pd.DataFrame, gc : GroupCatalog):
    ian_df = gc.all_data.set_index('TARGETID').loc[:, ['Z', 'L_GAL', 'VMAX', 'P_SAT', 'M_HALO', 'N_SAT', 'L_TOT',
       'IGRP', 'WEIGHT', 'app_mag', 'Z_ASSIGNED_FLAG', 'G_R', 'Z_PHOT', 'IS_SAT', 'QUIESCENT']]
    print(ian_df['IGRP'].dtype)
    #ian_df['QUIESCENT'] = ian_df['QUIESCENT'].astype(float)
    ian_df['N_SAT'] = ian_df['N_SAT'].astype(int)
    together = viraj_df.join(ian_df, how='inner', validate='one_to_one')
    print(together['IGRP'].dtype)
    print(f"Viraj targets: {len(viraj_df):,}, Ian {gc.name} Catalog: {len(ian_df):,}, # of Viraj Targets found in Ian's: {(~np.isnan(together['IS_SAT'])).sum():,}")
    return together

In [None]:
together1 = merge_viraj_ian(viraj_df, bgs_simple_4_1pass)
together2 = merge_viraj_ian(viraj_df, simple4_BGS)
together3 = merge_viraj_ian(viraj_df, bgs_y3_simple_5)

In [None]:
missing=together3.loc[np.isnan(together3['IS_SAT'])]
missing

In [None]:
print(bgs_y3_simple_5.all_data['IGRP'].dtype)
print(together3['IGRP'].dtype)

In [None]:
to_write = Table.from_pandas(together3, index=True)
to_write.write(DATA_FOLDER + 'VIRAJ/jura_bgs_bright_catalog_for_ian_matched.fits', overwrite=True)

In [None]:
combined = Table.read(DATA_FOLDER + 'VIRAJ/jura_bgs_bright_catalog_for_ian_matched.fits', format='fits')
df = combined.to_pandas()

In [None]:
df

## Mock and SV3 Analysis

### UCHUU Issues

In [None]:
plt.hist(all_u.all_data['M_HALO'], bins=pp.Mhalo_bins, alpha=0.4)
plt.hist(all_u.all_data['uchuu_halo_mass']*10**10, bins=pp.Mhalo_bins, alpha=0.4)
plt.loglog()

# TODO do we expect the mass distribution of halos to be so different from the UCHUU SHAM catalog and our assigned halo?

In [None]:
# TODO 1 / VMax corrections do odd thing to UCHUU Truth. Why?
pp.hod_plots(all_u)

### What effect does Fiber Assignment have on the luminosity function?

In [None]:
#pp.group_finder_centrals_halo_masses_plots(mxxl_all, [mxxl_fiberonly, mxxl_simple_4])
pp.group_finder_centrals_halo_masses_plots(bgs_sv3_pz_2_4_10p, [bgs_sv3_pz_1_7p, bgs_sv3_simple_5_7p])

### Compare halos to truth

In [None]:
pp.assigned_halo_analysis(mxxl_simple_4)

### Compare assigned implied abs mags to truth from MXXL

In [None]:
all_unobs_counts = mxxl_all.all_data[mxxl_all.all_data['Z_ASSIGNED_FLAG'] != 0].groupby('LGAL_BIN').RA.count()
simple_4_ubobs_counts = mxxl_simple_4.all_data.groupby('LGAL_BIN').RA.count()


In [None]:
pp.L_func_plot([mxxl_all, mxxl_simple_4], [all_unobs_counts, simple_4_ubobs_counts])



#pp.L_func_plot([all, simple_4], [all.all_data.L_gal[all.all_data['Z_ASSIGNED_FLAG'] == 0], simple_4.all_data.L_gal[simple_4.all_data['Z_ASSIGNED_FLAG'] == 0]])


## SV3 Edge Effects Quantification

In [None]:
gc = bgs_sv3_pz_2_4_10p
inner_galaxies = filter_SV3_to_avoid_edges(gc)
inner_galaxies.color = 'k'
inner_galaxies.name = 'SV3 Inner Galaxies'
pp.plots(inner_galaxies, gc)

In [None]:
fig = pp.make_map(gc.all_data.RA.to_numpy(), gc.all_data['DEC'].to_numpy())
fig = pp.make_map(inner_galaxies.all_data.RA.to_numpy(), inner_galaxies.all_data['DEC'].to_numpy(), fig=fig)

In [None]:
gc.all_data.groupby('LGAL_BIN')['Z'].median()

In [None]:
centering_versions = [
    filter_SV3_to_avoid_edges(gc, 1.5),
    filter_SV3_to_avoid_edges(gc, 1.4),
    filter_SV3_to_avoid_edges(gc, 1.3),
    filter_SV3_to_avoid_edges(gc, 1.2),
    filter_SV3_to_avoid_edges(gc, 1.1),
    filter_SV3_to_avoid_edges(gc, 1.0),
    filter_SV3_to_avoid_edges(gc, 0.9),
]
pickle.dump(centering_versions, open('centering_versions.pkl', 'wb'))

In [None]:
import copy
lowz = gc.all_data.loc[gc.all_data.z < 0.03]
lowz_gc = copy.deepcopy(gc)
lowz_gc.all_data = lowz
lowz_gc.refresh_df_views()
centering_versions_lowz = [
    filter_SV3_to_avoid_edges(lowz_gc, 1.5),
    filter_SV3_to_avoid_edges(lowz_gc, 1.4),
    filter_SV3_to_avoid_edges(lowz_gc, 1.3),
    filter_SV3_to_avoid_edges(lowz_gc, 1.2),
    filter_SV3_to_avoid_edges(lowz_gc, 1.1),
    filter_SV3_to_avoid_edges(lowz_gc, 1.0),
    filter_SV3_to_avoid_edges(lowz_gc, 0.9),
]

In [None]:
pickle.load(open('centering_versions.pkl', 'rb'))

for i, d in enumerate(centering_versions):
    d.color = [0, i/len(centering_versions), 0]
    d.name = f'SV3 10p, {1.5-i*0.1:.1f} deg center cut'

pp.LEGENDS_ON = False
gc.color = 'blue'
pp.fsat_by_zbins_sv3_centers(gc, *centering_versions, z_bins=np.array([0.0, 0.03, 1.0]))
pp.single_plots(gc)
pp.single_plots(centering_versions[2])
pp.single_plots(centering_versions[4])
pp.single_plots(centering_versions[6])
pp.LEGENDS_ON = True

#pp.fsat_by_z_bins(gc, z_bins=np.array([0.0, 0.03, 1.0]))
#for d in centering_versions:
#    pp.fsat_by_z_bins(d, z_bins=np.array([0.0, 0.03, 1.0]))


In [None]:
lowz_gc.color = 'blue'
pp.single_plots(lowz_gc)
pp.single_plots(centering_versions_lowz[2])
pp.single_plots(centering_versions_lowz[4])
pp.single_plots(centering_versions_lowz[6])


In [None]:
#fig = pp.make_map(gc.all_data.RA.to_numpy(), gc.all_data['DEC'].to_numpy())

#for i, gc in enumerate(centering_versions):
#    fig = pp.make_map(gc.all_data.RA.to_numpy(), gc.all_data['DEC'].to_numpy(), fig=fig)

#plot_positions(gc, *centering_versions, tiles_df=None, split=False, DEG_LONG=7, ra_min = 186.5, dec_min = 60)
# BUG pass in all_data, not the GroupCatalog object
plot_positions(gc.all_data, *centering_versions, tiles_df=None, split=False, DEG_LONG=6, ra_min = 147, dec_min = -1)

## Lost Galaxy Luminosity Function

Take a cut of SV3 whose completeness is similar to Y1 BGS.

Question: is the luminosity function of lost galaxies (that were later observed) is different from the luminosity function observed galaxies?

They seem similar; perhaps a mild slant. Overall it seems that trying to match the observed luminosity function with the lost ones is ok.

Now for lost galaxies in 6pass that we have later got redshifts for.

Question: What did our processing do to the luminosity function for lost galaxies?

Our processing squeezes the luminosity function. We move galaxies from the wings towards the middle.

# Galaxy Neighborhood Examiner

In [None]:
def add_bsat_column(catalog: GroupCatalog):
    bprob = 10
    if 'beta0q' in catalog.GF_props:
        beta0q = catalog.GF_props['beta0q']
        beta0sf = catalog.GF_props['beta0sf']
        betaLq = catalog.GF_props['betaLq']
        betaLsf = catalog.GF_props['betaLsf']
        bprob = np.zeros(len(catalog.all_data))
        bprob = np.where(catalog.all_data['QUIESCENT'], beta0q + betaLq*(catalog.all_data['LOGLGAL']-9.5), beta0sf + betaLsf*(catalog.all_data['LOGLGAL']-9.5))
        bprob = np.where(bprob < 0.001, 0.001, bprob)

    catalog.all_data['BSAT'] = bprob

In [None]:
add_halo_columns(bgs_sv3_pz_2_4_10p_c1)
add_bsat_column(bgs_sv3_pz_2_4_10p_c1)


In [None]:
df = bgs_sv3_pz_2_4_10p_c1.all_data
groups = df.loc[np.logical_and(df['N_SAT'] > 1, df['IGRP'] == df.index)]
groups = groups.loc[groups['Z'] < 0.21]
groups = groups.loc[groups['Z'] > 0.19]
groups = groups.loc[groups['M_HALO'] < 10**13.1]
groups = groups.loc[groups['M_HALO'] > 10**12.9]
print(len(groups))
bighalos = df.sort_values('M_HALO', ascending=False).head(60)

brightest_gals = df.sort_values('L_GAL', ascending=False).head(60)

lost_galaxies = df.loc[z_flag_is_not_spectro_z(df['Z_ASSIGNED_FLAG'])]
obs_galaxies = df.loc[z_flag_is_spectro_z(df['Z_ASSIGNED_FLAG'])]
#print("Lost galaxies: ", len(lost_galaxies), "Observed Galaxies: ", len(obs_galaxies))

In [None]:
groups

In [None]:
PLOTS_TO_MAKE = 10
GALAXY_POOL = groups.reset_index()

#START_INDEX = 777
#for i in range(START_INDEX, START_INDEX + PLOTS_TO_MAKE):
#    index = lost_galaxies.index[i]
#    examine_around(index)
print("Number of galaxies to choose from: ", len(GALAXY_POOL))
indexes = np.random.randint(0, len(GALAXY_POOL)-1, size=PLOTS_TO_MAKE)
#indexes = np.arange(0, PLOTS_TO_MAKE)
for i in indexes:
    target = GALAXY_POOL.iloc[i]
    pp.examine_groups_near(target, data, nearby_angle=coord.Angle('7m'), zfilt=0.07)
    #deg = coord.Angle('5m').to(u.degree).value
    #pp.examine_area(target.RA - deg, target.RA + deg, target.DEC - deg, target.DEC + deg, data)

# Tests

In [None]:
# How many halos were assigned below a certain cutoff?
df = bgs_y1_pzp_2_4.all_data
M_HALL_CUT = 10**11
small_halo_df = df[df['M_HALO'] < M_HALL_CUT]

print(len(small_halo_df), len(df))

junk=plt.hist(small_halo_df.z, bins=100)
plt.xlabel('Redshift')
plt.ylabel('Count')
plt.tight_layout()
