In [8]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.colors as c
import h5py
import astropy.coordinates as coord
import astropy.units as u
import numpy.ma as ma
from random import randint
from ctypes import c_uint64
import pickle
from astropy.table import Table
import sys

if './SelfCalGroupFinder/py/' not in sys.path:
    sys.path.append('./SelfCalGroupFinder/py/')
from pyutils import *
from plotting import *
from dataloc import *
import k_correction as kcorr
import kcorr.k_corrections as desikc
#import k_corr_new.k_corrections as desikc2
from nnanalysis import *


In [2]:
%load_ext autoreload
%autoreload 2

# MXXL Experiments

In [3]:
BITWORD = 'bitweight0'
BIT_CHOICE = 0
FIBER_ASSIGNED_SELECTOR = 2**BIT_CHOICE

infile = h5py.File(MXXL_DATA_DIR + 'weights_3pass.hdf5', 'r')

dec = infile['Data/dec'][:]
ra = infile['Data/ra'][:]
z_obs = infile['Data/z_obs'][:]
app_mag = infile['Data/app_mag'][:]
g_r = infile['Data/g_r'][:]
abs_mag = infile['Data/abs_mag'][:] # We aren't using these; computing ourselves. 
galaxy_type = infile['Data/galaxy_type'][:]
mxxl_halo_mass = infile['Data/halo_mass'][:]
mxxl_halo_id = infile['Data/mxxl_id'][:]
observed = (infile['Weight/'+BITWORD][:] & FIBER_ASSIGNED_SELECTOR ).astype(bool)


In [4]:
df = pd.DataFrame(data={
    'dec': dec, 
    'ra': ra,
    'z_obs': z_obs,
    'app_mag': app_mag,
    'g_r': g_r,
    'abs_mag': abs_mag,
    'galaxy_type': galaxy_type,
    'mxxl_halo_mass': mxxl_halo_mass,
    'mxxl_halo_id': mxxl_halo_id,
    'observed': observed
    })

bright_df = df[df.app_mag < 19.5]
df20 = df[df.app_mag < 20.0]


## Various Simple Plots

In [None]:
def plots_by_app_mag_bins(df):
    # Makes bins of app_mag
    mag_bins = np.linspace(16, 19.5, 6)
    df['mag_bin'] = pd.cut(df.app_mag, mag_bins, labels=False)

    color_bins = np.linspace(0.0, 2.0, 200)
    z_bins = np.linspace(0, 0.5, 50)

    # Make histogram of g_r by the app mag bins
    plt.figure()
    for bin_num in range(len(mag_bins)-1):
        bin_mask = df.mag_bin == bin_num
        g_r_bin = df.g_r[bin_mask]
        plt.hist(g_r_bin, bins=color_bins, histtype='step', label=f'{mag_bins[bin_num]:.1f} < mag < {mag_bins[bin_num+1]:.1f}', density=True)
    plt.xlabel('g-r')
    plt.legend()
    plt.xlim(0.5, 1.0)

    # Make quiescent cut in df
    df['quiescent'] = is_quiescent_BGS_gmr(None, df.g_r)

    # Make histogram of z_obs by the app mag bins only for quiescent and then not quiescent galaxies
    plt.figure()
    for bin_num in range(len(mag_bins)-1):
        bin_mask = df.mag_bin == bin_num
        z_bin = df.z_obs[bin_mask]
        plt.hist(z_bin[df.quiescent[bin_mask]], bins=z_bins, histtype='step', label=f'{mag_bins[bin_num]:.1f} < mag < {mag_bins[bin_num+1]:.1f} quiescent', density=True)
    plt.xlabel('z_obs')
    plt.legend()
    plt.xlim(0, 0.5)
    plt.title('Red galaxies')

    plt.figure()
    for bin_num in range(len(mag_bins)-1):
        bin_mask = df.mag_bin == bin_num
        z_bin = df.z_obs[bin_mask]
        plt.hist(z_bin[~df.quiescent[bin_mask]], bins=z_bins, histtype='step', label=f'{mag_bins[bin_num]:.1f} < mag < {mag_bins[bin_num+1]:.1f} star-forming', density=True)
    plt.xlabel('z_obs')
    plt.legend()
    plt.xlim(0, 0.5)
    plt.title('Blue galaxies')

    # Make histogram of z_obs by the app mag bins
    plt.figure()
    for bin_num in range(len(mag_bins)-1):
        bin_mask = df.mag_bin == bin_num
        z_bin = df.z_obs[bin_mask]
        plt.hist(z_bin[df.observed[bin_mask]], bins=z_bins, histtype='step', label=f'{mag_bins[bin_num]:.1f} < mag < {mag_bins[bin_num+1]:.1f}', density=True)
    plt.xlabel('z_obs')
    plt.legend()
    plt.xlim(0, 0.5)
    plt.title('Observed galaxies')

    # Make histogram of the lost galaxies by the app mag bins
    plt.figure()
    for bin_num in range(len(mag_bins)-1):
        bin_mask = df.mag_bin == bin_num
        z_bin = df.z_obs[bin_mask]
        plt.hist(z_bin[~df.observed[bin_mask]], bins=z_bins, histtype='step', label=f'{mag_bins[bin_num]:.1f} < mag < {mag_bins[bin_num+1]:.1f} lost', density=True)
    plt.xlabel('z_obs')
    plt.legend()
    plt.xlim(0, 0.5)
    plt.title('Lost galaxies')

    # Bin the observed galaxies and the lost ones seperately and then subtract them to see the difference
    plt.figure()
    for bin_num in range(len(mag_bins)-1):
        bin_mask = df.mag_bin == bin_num
        z_bin = df.z_obs[bin_mask]
        obs_density = np.histogram(z_bin[df.observed[bin_mask]], bins=z_bins, density=True)
        lost_density = np.histogram(z_bin[~df.observed[bin_mask]], bins=z_bins, density=True)
        plt.plot(z_bins[0:49], lost_density[0]-obs_density[0], label=f'{mag_bins[bin_num]:.1f} < mag < {mag_bins[bin_num+1]:.1f}')
    plt.xlabel('z_obs')
    plt.legend()
    #plt.yscale('log')
    plt.title("Compare Lost-Observed Distributions")

plots_by_app_mag_bins(df)

In [None]:

# Show abs_mag distribution for observed and unobserved galaxies
bins = np.linspace(-24, -12, 50)
plt.figure()
plt.hist(bright_df.abs_mag[bright_df.observed], bins=bins, histtype='step', label='Observed', density=True)
plt.hist(bright_df.abs_mag[~bright_df.observed], bins=bins, histtype='step', label='Unobserved', density=True)
plt.xlabel('abs_mag')
#plt.yscale('log')
plt.legend()

# Show app_mag distribution for observed and unobserved galaxies
bins = np.linspace(15, 19.5, 50)
plt.figure()
plt.hist(bright_df.app_mag[bright_df.observed], bins=bins, histtype='step', label='Observed', density=True)
plt.hist(bright_df.app_mag[~bright_df.observed], bins=bins, histtype='step', label='Unobserved', density=True)
plt.xlabel('app_mag')
#plt.yscale('log')
plt.legend()

In [None]:
keep = df.app_mag < 19.5
df = df[keep].reset_index(drop=True)
indexes_assigned = np.argwhere(df.observed)
assert np.max(indexes_assigned) <= len(df), "Indexes not assigned are out of range"

In [None]:
# Looks a lot like BGS, 0.76 is fine for color cut
junk=plt.hist(df.g_r, bins=100)
plt.xlim(0.5, 0.9)

In [None]:
DATA_CUT_INDEX = 1000000 #21201544 #3000000 

weights = h5py.File(MXXL_DATA_DIR + 'weights_3pass.hdf5', 'r')
print(list(weights))
print(list(weights['Data']))
print(list(weights['Weight']))

In [None]:
small_z_obs = weights['Data/z_obs'][0:DATA_CUT_INDEX]
angular_bins = plt.hist(small_z_obs, bins=50)
plt.xlabel("$z_{obs}$")
plt.title("Histogram of Observed Redshifts")


In [None]:
mxxl_ra = weights['Data/ra'][0:DATA_CUT_INDEX]
mxxl_dec = weights['Data/dec'][0:DATA_CUT_INDEX]
mxxl_app_mag = weights['Data/app_mag'][0:DATA_CUT_INDEX]
bright_filter = mxxl_app_mag < 19.5 
mxxl_ra_bright = mxxl_ra[bright_filter]
mxxl_dec_bright = mxxl_dec[bright_filter]  
mxxl_app_mag_bright = mxxl_app_mag[bright_filter]

In [None]:
print(estimate_frac_area(mxxl_ra, mxxl_dec))
print(estimate_frac_area(mxxl_ra_bright, mxxl_dec_bright))

In [None]:
rnd_indices = np.random.choice(len(mxxl_ra), len(mxxl_ra)//50, replace=False)
fig = make_map(mxxl_ra[rnd_indices], mxxl_dec[rnd_indices]), # This looks like Alex' paper, good


In [None]:

mxxl_halo_id = weights['Data/mxxl_id'][0:DATA_CUT_INDEX]
np.sum(mxxl_halo_id == 0) / len(mxxl_halo_id)
# 2.5% of galaxies have 0 for the MXXL Halo ID because that are in halos that were added by hand post-simulation
# This was done because the small halos were not resolved in the simulation
# Gal type 2 and 3 are central and satellite galaxies that were unresolved

small_gal_type = weights['Data/galaxy_type'][0:DATA_CUT_INDEX]

weird_indexes = np.argwhere(np.invert(mxxl_halo_id.astype(bool)))
weird_types = small_gal_type[weird_indexes]
trash = plt.hist(weird_types)

In [None]:
app_mag = df.app_mag.to_numpy()
angular_bins = plt.hist(app_mag, bins=50)
plt.xlabel("Apparent Mag")
plt.title("Histogram of Apparent Mags")

In [None]:
# Density of galaxies per sq degree
print(f"There are ~{np.sum(df.app_mag < 19.5) / 14000:.0f} galaxies/deg^2 < 19.5 mag")
print(f"There are ~{np.sum(np.all([df.app_mag > 19.5, df.app_mag < 20.0], axis=0)) / 14000:.0f} galaxies/deg^2 between 19.5 and 20.0 mag")

## Abs Mag, K correction Analysis

In [None]:
z_obs = df.z_obs.to_numpy()
g_r = df.g_r.to_numpy()

R = app_mag_to_abs_mag(df.app_mag.to_numpy(), z_obs)

kcorr_r_gama = kcorr.GAMA_KCorrection(band='R')
R_k_GAMA = R - kcorr_r_gama.k(z_obs, g_r)

In [None]:
kcorr_r_bgs  = desikc.DESI_KCorrection(band='R', file='jmext', photsys='N')
R_k_BGS = R - kcorr_r_bgs.k(z_obs, g_r)

In [None]:
kcorr_r_bgs2  = desikc.DESI_KCorrection(band='R', file='jmext', photsys='S')
R_k_BGS2 = R - kcorr_r_bgs.k(z_obs, g_r)

In [None]:
# N vs S doesn't matter
np.sum(np.isclose(R_k_BGS, R_k_BGS2, rtol=10E-6)) / len(R_k_BGS)

In [None]:
# Compare my_abs_mag to abs_mag. 
bins = np.linspace(-25, -10, 100)
#my_counts, my_bins, my_p = plt.hist(R, label="my abs_mag", bins=bins, alpha=0.5)
#alex_counts, alex_bins, alex_p = plt.hist(df.abs_mag.to_numpy(), label="alex abs_mag", bins=bins, alpha=0.5)
my_k_counts, my_k_bins, my_k_p = plt.hist(R_k_GAMA, label="my GAMA k abs_mag", bins=bins, alpha=0.5)
my_k_counts, my_k_bins, my_k_p = plt.hist(R_k_BGS, label="my BGS k abs_mag N", bins=bins, alpha=0.5)
#my_k_counts, my_k_bins, my_k_p = plt.hist(R_k_BGS2, label="my BGS k abs_mag S", bins=bins, alpha=0.5)
#z = plt.hist(my_abs_mag_k, label="my k abs_mag", bins=50)
plt.xlabel("Absolute Mag")
plt.ylabel("Count")
plt.title("Compare Absolute Mags")
plt.yscale('log')
plt.legend()

#print(f"The peaks are Alex: {alex_bins[np.argmax(alex_counts)]:.1f}, My {my_bins[np.argmax(my_counts)]:.1f}, My K {my_k_bins[np.argmax(my_k_counts)]:.1f}")

## What is a reasonable z fudge factor for 'close enough' redshifts given galaxies $v_{\mathrm{pec}}$?

Galaxies move at hundreds of km/s usually, or thousands in a rich cluster.

Two galaxies moving at 750 km/s towards each other along LOS but at same cosmological redshift would have a total redshift difference of 0.005.

Adopting z +/- 0.005 is a generous definition then that works for somewhat extremal cases in richer areas.

In [None]:
# What is a reasonable z +/- fudge factor for 'close enough' redshifts? 
# Consider peculiar velocities.
z_test = [0.001, 0.002, 0.003, 0.005, 0.01] * u.dimensionless_unscaled
v_pec = z_test.to(u.km / u.s, u.equivalencies.doppler_redshift())
for i in range(len(z_test)):
    print(f"z={z_test[i]:.3f} is {v_pec[i]:.0f}")



## Get Truth Abs Mag for Correcting

This is for the 'fancy' approach that we don't use.

In [None]:
app_mag = weights['Data/app_mag'][:]
z_obs = weights['Data/z_obs'][:]
APP_MAG_CUT = 19.5
bright_filter = app_mag < APP_MAG_CUT 
redshift_filter = z_obs > 0 
keep = np.all([bright_filter, redshift_filter], axis=0)

app_mag = app_mag[keep]
z_obs = z_obs[keep]

my_abs_mag = app_mag_to_abs_mag(app_mag, z_obs)

In [None]:
angular_bins = np.linspace(min(my_abs_mag), max(my_abs_mag), 100)
densities, angular_bins  = np.histogram(my_abs_mag, bins=angular_bins, density=True)
t = plt.hist(my_abs_mag, angular_bins, density=True)

with open('bin/abs_mag_weight.npy', 'wb') as f:
    np.save(f, densities, allow_pickle=False)
    np.save(f, angular_bins, allow_pickle=False)

In [None]:
with open('bin/abs_mag_weight.npy', 'rb') as f:
    densities = np.load(f)
    angular_bins = np.load(f)

plt.plot(angular_bins[0:99], densities)
#plt.yscale('log')

## Build map of apparent mag to z distribution

In [None]:
# use all galaxies
app_mag_bins, the_map = build_app_mag_to_z_map(df20.app_mag, df20.z_obs)

# Now use only lost galaxies
lost_df = df20[~df20.observed]
app_mag_bins2, the_map2 = build_app_mag_to_z_map(lost_df.app_mag, lost_df.z_obs)

In [None]:
plt.figure()
trash=plt.hist(the_map[10],bins=30, color='blue', density=True, histtype='step')
trash=plt.hist(the_map[50],bins=30, color='red', density=True, histtype='step')
trash=plt.hist(the_map[90],bins=30, color='green',  density=True, histtype='step')
trash=plt.hist(the_map2[10],bins=30, color='cyan', density=True, histtype='step')
trash=plt.hist(the_map2[50],bins=30, color='orange', density=True, histtype='step')
trash=plt.hist(the_map2[90],bins=30, color='lightgreen', density=True, histtype='step')

In [None]:
print(len(lost_df))
print(len(df20))

with open(IAN_MXXL_LOST_APP_TO_Z_FILE, 'wb') as f:
    pickle.dump((app_mag_bins2, the_map2), f)


In [None]:
with open(IAN_MXXL_LOST_APP_TO_Z_FILE, 'rb') as f:
    app_mag_bins_read, the_map_read = pickle.load(f)

assert len(the_map_read[5]) == len(the_map2[5])

## Nearest Neighbor Angular Separation and Same-Halo Analysis

In [5]:
APP_MAG_CUT = 20.0
bright_filter = app_mag < APP_MAG_CUT # makes a filter array (True/False values)
redshift_filter = z_obs > 0 # makes a filter array (True/False values)
keep = np.all([bright_filter, redshift_filter], axis=0)

dec = dec[keep]
ra = ra[keep]
z_obs = z_obs[keep]
app_mag = app_mag[keep]
mxxl_halo_id = mxxl_halo_id[keep]
g_r = g_r[keep]
quiescent = is_quiescent_BGS_gmr(None, g_r)
observed = observed[keep]
unobserved = np.invert(observed)

with open(MXXL_PROB_OBS_FILE, 'rb') as f:
    prob_obs = np.load(f)
prob_obs_cut = prob_obs[keep]

try:
    with open(MXXL_ABS_MAG_R_FILE, 'rb') as f:
        abs_mag = pickle.load(f)
except:
    print("Error getting abs mag")
    abs_mag = app_mag_to_abs_mag_k(app_mag, z_obs, g_r, band='r')
    with open(MXXL_ABS_MAG_R_FILE, 'wb') as f:
        pickle.dump(abs_mag, f)

In [None]:
obj = NNAnalyzer(dec, ra, z_obs, app_mag, abs_mag, mxxl_halo_id, g_r, quiescent, observed, prob_obs_cut)
obj.set_row_locator( app_mag < 19.5 ) # BRIGHT only
obj.find_nn_properties(LOST_GALAXIES_ONLY=True)
obj.make_bins()

print(np.sum(obj.all_ang_bincounts))
print(np.sum(obj.all_same_halo_bincounts))
print(np.sum(obj.all_sim_z_bincounts))

In [None]:
newobj = NNAnalyzer_cic.from_data(dec, ra, z_obs, app_mag, abs_mag, g_r, quiescent, observed, prob_obs_cut)
#newobj.set_row_locator(app_mag < 19.5) # BRIGHT only
newobj.find_nn_properties(LOST_GALAXIES_ONLY=True) 
newobj.make_bins()
newobj.save(NEIGHBOR_ANALYSIS_MXXL_BINS_FILE)

print(np.sum(obj.all_ang_bincounts))
print(np.sum(obj.all_sim_z_bincounts))

In [None]:
# This shows that there is little information in NN ABS MAG
obj.plot_angdist_absmag_per_zbin_cc()

In [None]:
# There is good information in Lost app r-mag because it is, statistically a distance proxy!
obj.plot_angdist_appmag_per_zbin_cc()

In [None]:
obj.plot_angdist_pobs_per_zbin_cc()

In [None]:
newobj.plot_angdist_pobs_per_zbin_cc()

In [None]:
obj.plot_angdist_pobs_per_zbin_cc(simz=False)

In [None]:
#'pobs_bin', 'nn_quiescent', 'quiescent', 'nn1_z_bin', 'app_mag_bin', 'nn_ang_dist_bin']
def value_to_key(pobs, nn_q, q, z, mag, dist, nn_abs_mag):
    return (np.digitize(pobs, POBS_BINS), nn_q, q, np.digitize(z, Z_BINS), np.digitize(mag, APP_MAG_BINS), np.digitize(dist, ANGULAR_BINS), np.digitize(nn_abs_mag, ABS_MAG_BINS))

#print(obj.pt[( 1, True, True, 7.0, 1, 11)])
print(obj.pt[value_to_key(0.9, True, True, 0.2, 19.0, 25.0, -20.0)])
print(obj.pt[value_to_key(0.9, False, False, 0.2, 19.0, 25.0, -20.0)])

### Calculate P_obs

In [None]:
def bitsum(bitstring):
    return bin(c_uint64(bitstring).value).count("1")
v_bitsum = np.vectorize(bitsum)

def summate(a):
    return np.sum(v_bitsum(a))


In [None]:
# Skip this if iips were loaded OK. Takes ~8 minutes.

# Read all 32 64-bitstrings into memory from the file
num_bitstrings = 32
galaxy_count = len(input['Weight/bitweight0'])
bitweights = np.empty((num_bitstrings, galaxy_count), dtype='i8')

for i in range(num_bitstrings):
    bitweights[i] = input['Weight/bitweight{0}'.format(i)][:]
    
prob_obs = np.apply_along_axis(summate, 0, bitweights) / 2048

with open('bin/prob_obs.npy', 'wb') as f:
    np.save(f, prob_obs)


In [None]:
specimen = 123
bit_selector = c_uint64(2).value
print('{:064b}'.format(bit_selector))
print('')
for i in range(num_bitstrings):
    value = bitweights[(i,specimen)]
    converted = c_uint64(value).value
    print('{:064b}'.format(converted), '{:2.0f}'.format(bitsum(value)), bool(converted & bit_selector))

print("Averaged Probability of being targetted: ", prob_obs[specimen])

In [None]:
prob_obs_cut = prob_obs[keep]

pobs_bins_temp = np.linspace(0,1)
trash=plt.hist(prob_obs, bins=pobs_bins_temp, label="All galaxies")
trash2=plt.hist(prob_obs_cut, bins=pobs_bins_temp, label=f"Galaxies below {APP_MAG_CUT} mag")
plt.yscale('log')
plt.legend()

In [None]:
prob_obs_dim = prob_obs[np.invert(keep)]
trash=plt.hist(prob_obs_dim, bins=pobs_bins_temp, alpha=0.5, label=f"Galaxies above {APP_MAG_CUT} mag")
trash2=plt.hist(prob_obs_cut, bins=pobs_bins_temp, alpha=0.5, label=f"Galaxies below {APP_MAG_CUT} mag")
plt.yscale('log')
plt.xlabel('$P_{obs}$')
plt.ylabel("Count")
plt.legend()

### Trying to fit the 40% NN success curve

In [None]:
def get_prob_nn_same_halo(ang_dist, nn_z, my_app_mag, my_pobs):
    my_ang_bin = np.digitize(ang_dist, obj.angular_bins)
    nn_z_bin = np.digitize(nn_z, z_bins)
    my_app_mag_bin = np.digitize(my_app_mag, obj.app_mag_bins)
    my_pobs_bin = np.digitize(my_pobs, obj.POBS_bins)
    
    #print(f"There are {all_ang_bincounts_2[my_pobs_bin,0,nn_z_bin,my_app_mag_bin,my_ang_bin]} galaxies in this bin")
    return obj.frac_same_halo_full[my_pobs_bin,0,nn_z_bin,my_app_mag_bin, my_ang_bin]

In [None]:
get_prob_nn_same_halo(13, 0.13, 16.7, 0.4)

In [None]:
def get_prob_nn_same_halo_index(my_ang_bin, nn_z_bin, my_app_mag_bin, my_pobs_bin):
    return obj.frac_same_halo_full[my_pobs_bin,0,nn_z_bin,my_app_mag_bin, my_ang_bin]

#all_ang_bincounts_2 = np.ones((POBS_BIN_COUNT, len(nn_bins), len(z_bins), APP_MAG_BIN_COUNT, BIN_COUNT))
#all_same_halo_bincounts_2 = np.zeros((POBS_BIN_COUNT, len(nn_bins), len(z_bins), APP_MAG_BIN_COUNT, BIN_COUNT))
#all_sim_z_bincounts_2 = np.zeros((POBS_BIN_COUNT, len(nn_bins), len(z_bins), APP_MAG_BIN_COUNT, BIN_COUNT))

from scipy.interpolate import interpn

points = (range(len(angular_bins)), range(len(z_bins)), range(len(app_mag_bins)), range(len(obj.POBS_bins)))
values = get_prob_nn_same_halo_index(*np.meshgrid(*points, indexing='ij'))

point = np.array([25, 0.13, 16.7, 0.4])

In [None]:
from scipy.optimize import curve_fit
def func(x, a, b, c):
    return a * np.exp(-b * x) + c

xdata = np.linspace(0, 4, 50)
y = func(xdata, 2.5, 1.3, 0.5)
rng = np.random.default_rng()
y_noise = 0.2 * rng.normal(size=xdata.size)
ydata = y + y_noise
plt.plot(xdata, ydata, 'b-', label='data')

popt, pcov = curve_fit(func, xdata, ydata)
print(popt)
print(pcov)
np.array([2.56274217, 1.37268521, 0.47427475])
plt.plot(xdata, func(xdata, *popt), 'r-', label='fit: a=%5.3f, b=%5.3f, c=%5.3f' % tuple(popt))

#popt, pcov = curve_fit(func, xdata, ydata, bounds=(0, [3., 1., 0.5]))
#np.array([2.43736712, 1.        , 0.34463856])
#plt.plot(xdata, func(xdata, *popt), 'g--', label='fit: a=%5.3f, b=%5.3f, c=%5.3f' % tuple(popt))

plt.xlabel('x')
plt.ylabel('y')
plt.legend()
plt.show()

### Fancy Algorithm

In [None]:
NUM_NEIGHBORS = 20
fancy_to_match = coord.SkyCoord(ra=ra[unobserved]*u.degree, dec=dec[unobserved]*u.degree, frame='icrs')

In [None]:
neighbor_indexes = np.zeros(shape=(NUM_NEIGHBORS, len(fancy_to_match)), dtype=np.int32) # indexes point to CATALOG locations
ang_distances = np.zeros(shape=(NUM_NEIGHBORS, len(fancy_to_match)))

print(f"Finding nearest {NUM_NEIGHBORS} neighbors... ", end='\r')   
for n in range(0, NUM_NEIGHBORS):
    idx, d2d, d3d = coord.match_coordinates_sky(fancy_to_match, catalog, nthneighbor=n+1, storekdtree=treename)
    neighbor_indexes[n] = idx # TODO is that right?
    ang_distances[n] = d2d.to(u.arcsec).value
print(f"Finding nearest {NUM_NEIGHBORS} neighbors... done!")   

In [None]:
with FancyRedshiftGuesser(NUM_NEIGHBORS, debug=False) as scorer:
    halo_matches = 0
    z_matches = 0

    print(f"Assinging missing redshifts... ")   
    # TODO don't loop?
    j = 0 # index of the fancy_to_match sized arrays
    
    #for i in special_id:
    for i in indexes_not_assigned: # index of the master arrays

        #if i not in [7793057, 11425052]:
        #    j+=1
        #    continue

        if j%10000==0:
            print(f"{j}/{len(fancy_to_match)} complete", end='\r')

        neighbors = neighbor_indexes[:,j]
        neighbors_z = z_obs_catalog[neighbors]
        neighbors_ang_dist = ang_distances[:,j]
        my_prob_obs = prob_obs_cut[i]
        my_app_mag = app_mag[i]

        winning_num = scorer.choose_winner(neighbors_z, neighbors_ang_dist, my_prob_obs, my_app_mag, z_obs[i])
        winner_index = neighbors[winning_num]

        # Track total correct
        z_chosen = z_obs_catalog[winner_index] 
        if np.isclose(z_chosen, z_obs[i], rtol=0, atol=SIM_Z_THRESH):
            z_matches += 1
        halo_chosen = mxxl_halo_id_catalog[winner_index]
        if halo_chosen == mxxl_halo_id[i]:
            halo_matches += 1

        j += 1 

    print(f"{j}/{len(fancy_to_match)} complete")



In [None]:
print(f"Halo matches: {halo_matches / len(fancy_to_match)}")
print(f"z matches: {z_matches / len(fancy_to_match)}")

In [None]:
# View results from a run of the FancyRedshiftGuesser. Must put in the right filename (number)
filename = 'bin/redshift_guesser_1691466513.171286.npy'
with open(filename, 'rb') as f:
    quick_nn = np.load(f)
    quick_correct = np.load(f)
    nn_used = np.load(f)
    nn_correct = np.load(f)

print(f"Quick NN uses: {quick_nn}. Success: {quick_correct / (quick_nn+1)}")
print(f"NN bin uses: {nn_used}. Success: {nn_correct / (nn_used+1)}")

In [None]:
# THIS IS N^2 CALCULATION do not run on full sky.
total_bincounts = np.ones((len(z_bins), BIN_COUNT))
total_same_halo_bincounts = np.zeros((len(z_bins), BIN_COUNT))

# Examine each galaxy in the sample pair once
for i in range(len(ra)-1):
    ang_distance = coord.angular_separation(ra[i]*u.degree, dec[i]*u.degree, ra[i+1:len(ra)]*u.degree, dec[i+1:len(ra)]*u.degree).to(u.arcsec)
        
    same_halo = mxxl_halo_id[i] == mxxl_halo_id[i+1:len(ra)]
    #print("Same halo fraction for {0}:".format(i), np.sum(same_halo) / len(same_halo))

    angdist_bin_ind = np.digitize(ang_distance.value, angular_bins)
    #print(bin_ind)
    bincounts = np.bincount(angdist_bin_ind)[0:BIN_COUNT]
    same_halo_bincounts = np.bincount(angdist_bin_ind, weights= same_halo.astype(int)) [0:BIN_COUNT]

    z_bin = np.digitize(z_obs[i], z_bins)
    total_bincounts[z_bin] = total_bincounts[z_bin] + bincounts
    total_same_halo_bincounts[z_bin] = total_same_halo_bincounts[z_bin] + same_halo_bincounts
    #print(total_same_halo_bincounts)

#print("Total counts in each bin:", total_bincounts)

fraction_same_halo = total_same_halo_bincounts / total_bincounts
#print(fraction_same_halo)

In [None]:
# Plots for galaxy pairs
plt.figure()
for i in range(len(z_bins)):
    if i==0:
        label = "< {0}".format(z_bins[i])
    else:
        label = "{0} - {1}".format(z_bins[i-1], z_bins[i])
    plt.plot(angular_bins, total_bincounts[i], label=label)
plt.legend()
plt.xscale('log')
plt.yscale('log')
plt.xlabel('Angular Separation (arcsec)')
plt.ylabel('Count of Galaxies Pairs')
plt.title("Galaxy Pair Counts (by ang separation and z)")
plt.draw()

plt.figure()
for i in range(len(z_bins)):
    if i==0:
        label = "< {0}".format(z_bins[i])
    else:
        label = "{0} - {1}".format(z_bins[i-1], z_bins[i])
    plt.plot(angular_bins, fraction_same_halo[i], label=label)
plt.legend()
plt.xscale('log')
plt.xlabel('Angular Separation (arcsec)')
plt.ylabel('Fraction Pair in Same Halo')
plt.ylim(-0.01, 1.0)
plt.title("Fraction Pair in Same Halo (by ang separation and z)")
plt.draw()

# UCHUU Experiments


### Experiments

In [None]:
#[('R_MAG_APP', '>f4'), ('R_MAG_ABS', '>f4'), ('G_R_REST', '>f4'), ('G_R_OBS', '>f4'), ('DEC', '>f8'), ('HALO_MASS', '>f4'), ('CEN', '>i4'), ('RES', '>i4'), ('RA', '>f8'), ('Z_COSMO', '>f4'), ('Z', '>f4'), ('STATUS', '>i4'), ('FIRST_ACC_SCALE', '>f4'), ('M_ACC', '>f4'), ('M_VIR_ALL', '>f4'), ('R_VIR', '>f4'), ('V_PEAK', '>f4'), ('R_S', '>f4'), ('V_RMS', '>f4'), ('NGC', '>f4'), ('SGC', '>f4'), ('HALO_ID', '>i8'), ('PID', '>i8')]))
filename='/export/sirocco2/tinker/DESI/UCHUU_MOCKS/BGS_LC_Uchuu.fits'
u_table = Table.read(filename, format='fits')

In [None]:
APP_MAG_CUT = 19.5

In [None]:
u_table.columns
#G_R_OBS

In [None]:
dec = u_table['DEC']
ra = u_table['RA']
z_obs = u_table['Z']
app_mag = u_table['R_MAG_APP']
abs_mag = u_table['R_MAG_ABS']
g_r = u_table['G_R_REST'] # TODO before using ensure it should be rest and not observed
g_r_obs = u_table['G_R_OBS']
central = u_table['CEN']
uchuu_halo_mass = u_table['HALO_MASS']
uchuu_halo_id = u_table['HALO_ID']

bright_filter = app_mag < APP_MAG_CUT 
redshift_filter = z_obs > 0 
keep = np.all([mass_filter, bright_filter, redshift_filter], axis=0)

dec = dec[keep]
ra = ra[keep]
z_obs = z_obs[keep]
app_mag = app_mag[keep]
abs_mag = abs_mag[keep]
g_r = g_r[keep]
g_r_obs = g_r_obs[keep]
central = central[keep]
uchuu_halo_mass = uchuu_halo_mass[keep]
uchuu_halo_id = uchuu_halo_id[keep]

abs_mag_me = app_mag_to_abs_mag(app_mag, z_obs)


In [None]:
abs_mag_me_k = k_correct(app_mag, z_obs, g_r)
# using true g-r instead of the observed g-r gives the reported distribution as shown by plot below


In [None]:
# Compare my_abs_mag to abs_mag. 
angular_bins = np.linspace(-25, -10, 100)
#my_counts, my_bins, my_p = plt.hist(abs_mag_me, label="my abs_mag", bins=bins, alpha=0.5)
alex_counts, alex_bins, alex_p = plt.hist(abs_mag, label="UCHUU abs_mag", bins=angular_bins, alpha=0.5)
my_k_counts, my_k_bins, my_k_p = plt.hist(abs_mag_me_k, label="my k abs_mag", bins=angular_bins, alpha=0.5)
#z = plt.hist(my_abs_mag_k, label="my k abs_mag", bins=50)
plt.xlabel("Absolute Mag")
plt.ylabel("Count")
plt.title("Compare Absolute Mags")
plt.yscale('log')
plt.legend()

#print(f"The peaks are UCHUU: {alex_bins[np.argmax(alex_counts)]:.1f}, My {my_bins[np.argmax(my_counts)]:.1f}, My K {my_k_bins[np.argmax(my_k_counts)]:.1f}")

In [None]:
# UCHUU
print(len(ra))
estimate_frac_area(ra, dec)


In [None]:
rnd_indices = np.random.choice(len(ra), len(ra)//100, replace=False)
fig = make_map(ra[rnd_indices], dec[rnd_indices]), 


In [None]:
# TODO only centrals...
plt.hist(np.log10(uchuu_halo_mass*10**10), bins=30, alpha=0.5, density=True, label="UCHUU")
plt.hist(np.log10(all.all_data['mxxl_halo_mass']*10**10), bins=30, alpha=0.5, density=True, label="MXXL")
#plt.yscale('log')
plt.title("MXXL vs UCHUU Truth Halo Masses")
plt.xlabel('log(M_halo)')
plt.ylabel('Density')
plt.legend()

# Tests

In [None]:
# Test estimate_frac_area for a completely filled sky
_ra = np.linspace(0.01, 359.9, 1000)
_dec = np.linspace(0.01, 179.9, 1000)
_ra, _dec = np.meshgrid(_ra, _dec)
_ra = _ra.flatten()
_dec = _dec.flatten()

estimate_frac_area(_ra, _dec)

In [None]:
# Test out smooth redshift comparison function works as desired for the relevant redshift differences
x=np.arange(-0.01, 0.0105, 0.0005)
plt.plot(x, sim_z_score(0.2, 0.2+x))
plt.tight_layout()

assert np.isclose(sim_z_score(0.2, 0.3), 0.0)
assert np.isclose(sim_z_score(0.2, 0.25), 0.0)
assert sim_z_score(0.2, 0.210) < 0.1, sim_z_score(0.2, 0.210)
assert sim_z_score(0.2, 0.207) < 0.3, sim_z_score(0.2, 0.207)
assert sim_z_score(0.2, 0.205) > 0.8, sim_z_score(0.2, 0.205)
assert sim_z_score(0.2, 0.203) > 0.99, sim_z_score(0.2, 0.203)
assert np.isclose(sim_z_score(0.2, 0.2001), 1.0)
