In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.colors as c
import h5py
from astropy.wcs import WCS
import astropy.coordinates as coord
import astropy.units as u
from pyutils import *
import types
import numpy.ma as ma
import sys
from random import randint
from matplotlib.patches import Circle
from ctypes import c_uint64
import pickle
from astropy.table import Table
from postprocess import *
import k_correction as kcorr

#ROOT_FOLDER = "/Volumes/Seagate Backup Plus Drive/galaxy-groups-data/"
#ROOT_FOLDER = "/mnt/f/galaxy-groups-data/"
ROOT_FOLDER = "../bin/"
BIG_FILES_FOLDER="/export/sirocco2/tinker/DESI/MXXL_MOCKS/"


In [None]:
%load_ext autoreload
%autoreload 2

# MXXL Experiments

## Simple plots of basic data

In [None]:
BITWORD = 'bitweight0'
BIT_CHOICE = 0
FIBER_ASSIGNED_SELECTOR = 2**BIT_CHOICE

infile = h5py.File(BIG_FILES_FOLDER + 'weights_3pass.hdf5', 'r')

dec = infile['Data/dec'][:]
ra = infile['Data/ra'][:]
z_obs = infile['Data/z_obs'][:]
app_mag = infile['Data/app_mag'][:]
g_r = infile['Data/g_r'][:]
#abs_mag = infile['Data/abs_mag'][:] # We aren't using these; computing ourselves. 
galaxy_type = infile['Data/galaxy_type'][:]
mxxl_halo_mass = infile['Data/halo_mass'][:]
mxxl_halo_id = infile['Data/mxxl_id'][:]
observed = (infile['Weight/'+BITWORD][:] & FIBER_ASSIGNED_SELECTOR ).astype(bool)


In [None]:
df = pd.DataFrame(data={
    'dec': dec, 
    'ra': ra,
    'z_obs': z_obs,
    'app_mag': app_mag,
    'g_r': g_r,
    'galaxy_type': galaxy_type,
    'mxxl_halo_mass': mxxl_halo_mass,
    'mxxl_halo_id': mxxl_halo_id,
    'observed': observed
    })


In [None]:
keep = df.app_mag < 17.9
df = df[keep].reset_index(drop=True)
indexes_assigned = np.argwhere(df.observed)
assert np.max(indexes_assigned) <= len(df), "Indexes not assigned are out of range"

In [None]:
df.z_eff = np.copy(df.z_obs)

In [None]:

z_obs_catalog = df.z_obs[0:10].to_numpy()


In [None]:
z_obs_catalog[3]

In [None]:
DATA_CUT_INDEX = 1000000 #21201544 #3000000 

weights = h5py.File(BIG_FILES_FOLDER + 'weights_3pass.hdf5', 'r')
print(list(weights))
print(list(weights['Data']))
print(list(weights['Weight']))

In [None]:
small_z_obs = weights['Data/z_obs'][0:DATA_CUT_INDEX]
bins = plt.hist(small_z_obs, bins=50)
plt.xlabel("$z_{obs}$")
plt.title("Histogram of Observed Redshifts")


In [None]:
mxxl_ra = weights['Data/ra'][0:DATA_CUT_INDEX]
mxxl_dec = weights['Data/dec'][0:DATA_CUT_INDEX]
mxxl_app_mag = weights['Data/app_mag'][0:DATA_CUT_INDEX]
bright_filter = mxxl_app_mag < 19.5 
mxxl_ra_bright = mxxl_ra[bright_filter]
mxxl_dec_bright = mxxl_dec[bright_filter]  
mxxl_app_mag_bright = mxxl_app_mag[bright_filter]

In [None]:
print(estimate_frac_area(mxxl_ra, mxxl_dec))
print(estimate_frac_area(mxxl_ra_bright, mxxl_dec_bright))

In [None]:
rnd_indices = np.random.choice(len(mxxl_ra), len(mxxl_ra)//50, replace=False)
fig = make_map(mxxl_ra[rnd_indices], mxxl_dec[rnd_indices]), # This looks like Alex' paper, good


In [None]:

mxxl_halo_id = weights['Data/mxxl_id'][0:DATA_CUT_INDEX]
np.sum(mxxl_halo_id == 0) / len(mxxl_halo_id)
# 2.5% of galaxies have 0 for the MXXL Halo ID because that are in halos that were added by hand post-simulation
# This was done because the small halos were not resolved in the simulation
# Gal type 2 and 3 are central and satellite galaxies that were unresolved

small_gal_type = weights['Data/galaxy_type'][0:DATA_CUT_INDEX]

weird_indexes = np.argwhere(np.invert(mxxl_halo_id.astype(bool)))
weird_types = small_gal_type[weird_indexes]
trash = plt.hist(weird_types)

In [None]:
small_app_mag = weights['Data/app_mag'][0:DATA_CUT_INDEX]
bins = plt.hist(small_app_mag, bins=50)
plt.xlabel("Apparent Mag")
plt.title("Histogram of Apparent Mags")

In [None]:
DATA_CUT_INDEX = 1000000
ra=weights['Data/ra'][0:DATA_CUT_INDEX]
dec=weights['Data/dec'][0:DATA_CUT_INDEX]
small_app_mag = weights['Data/app_mag'][0:DATA_CUT_INDEX]
small_abs_mag = weights['Data/abs_mag'][0:DATA_CUT_INDEX]
small_colours = weights['Data/g_r'][0:DATA_CUT_INDEX]
small_z_obs = weights['Data/z_obs'][0:DATA_CUT_INDEX]
small_z_cos = weights['Data/z_cos'][0:DATA_CUT_INDEX]
bright_filter = small_app_mag < 19.5 

ra=ra[bright_filter]
dec=dec[bright_filter]
small_app_mag=small_app_mag[bright_filter]
small_abs_mag=small_abs_mag[bright_filter]
small_colours=small_colours[bright_filter]
small_z_obs=small_z_obs[bright_filter]
small_z_cos=small_z_cos[bright_filter]



In [None]:
plt.hist(small_z_obs - small_z_cos, bins=50)

In [None]:
print(np.max(small_colours), np.min(small_colours))

In [None]:
my_abs_mag = app_mag_to_abs_mag(small_app_mag, small_z_obs)
#my_abs_mag2 = app_mag_to_abs_mag(small_app_mag, small_z_cos)


In [None]:
my_abs_mag_k = app_mag_to_abs_mag_k(small_app_mag, small_z_obs, small_colours)
#my_abs_mag_k2 = app_mag_to_abs_mag_k(small_app_mag, small_z_cos, small_colours)

# z cos vs z obs does not make a difference


In [None]:
# Compare my_abs_mag to abs_mag. 
bins = np.linspace(-25, -10, 100)
#my_counts, my_bins, my_p = plt.hist(my_abs_mag, label="my abs_mag", bins=bins, alpha=0.5)
#my_counts, my_bins, my_p = plt.hist(my_abs_mag2, label="my abs_mag 2", bins=bins, alpha=0.5)
alex_counts, alex_bins, alex_p = plt.hist(small_abs_mag, label="alex abs_mag", bins=bins, alpha=0.5)
my_k_counts, my_k_bins, my_k_p = plt.hist(my_abs_mag_k, label="my k abs_mag", bins=bins, alpha=0.5)
#z = plt.hist(my_abs_mag_k, label="my k abs_mag", bins=50)
plt.xlabel("Absolute Mag")
plt.ylabel("Count")
plt.title("Compare Absolute Mags")
plt.yscale('log')
plt.legend()

#print(f"The peaks are Alex: {alex_bins[np.argmax(alex_counts)]:.1f}, My {my_bins[np.argmax(my_counts)]:.1f}, My K {my_k_bins[np.argmax(my_k_counts)]:.1f}")

In [None]:
# At what distance (luminosity distance) would the objects appear to be 19.5 mag?
v_max = get_max_observable_volume_est(my_abs_mag, small_z_obs, 19.5, ra, dec)
v_max2 = get_max_observable_volume_est(small_abs_mag, small_z_obs, 19.5, ra, dec)
v_max3 = get_max_observable_volume_est(my_abs_mag_k, small_z_obs, 19.5, ra, dec)

bins = plt.hist(np.log10(v_max), label="my abs_mag", bins=100, alpha=0.5)
bins = plt.hist(np.log10(v_max2), label="alex abs_mag", bins=100, alpha=0.5)
bins = plt.hist(np.log10(v_max3), label="my k abs_mag", bins=100, alpha=0.5)
plt.title("Compare V_max")
plt.legend()
plt.xlabel("log(V_max) [Mpc]")
plt.ylabel("Count")

### What is a reasonable z fudge factor for 'close enough' redshifts given galaxies $v_{\mathrm{pec}}$?

Galaxies move at hundreds of km/s usually, or thousands in a rich cluster.

Two galaxies moving at 600 km/s towards each other along LOS but at same cosmological redshift would have a total redshift difference of 0.004. This suggests a z +/- 0.002 is totally reasonable. In richer areas this could be as high as z +/- 0.010. 

Adopting z +/- 0.003 for now seems fine. Can refine later.

In [None]:
# What is a reasonable z +/- fudge factor for 'close enough' redshifts? 
# Consider peculiar velocities.
z_test = [0.001, 0.002, 0.003, 0.005, 0.01] * u.dimensionless_unscaled
v_pec = z_test.to(u.km / u.s, u.equivalencies.doppler_redshift())
for i in range(len(z_test)):
    print(f"z={z_test[i]:.3f} is {v_pec[i]:.0f}")



## Get Truth Abs Mag for Correcting

This is for the 'fancy' approach that we don't use.

In [None]:
app_mag = weights['Data/app_mag'][:]
z_obs = weights['Data/z_obs'][:]
APP_MAG_CUT = 19.5
bright_filter = app_mag < APP_MAG_CUT 
redshift_filter = z_obs > 0 
keep = np.all([bright_filter, redshift_filter], axis=0)

app_mag = app_mag[keep]
z_obs = z_obs[keep]

my_abs_mag = app_mag_to_abs_mag(app_mag, z_obs)

In [None]:
bins = np.linspace(min(my_abs_mag), max(my_abs_mag), 100)
densities, bins  = np.histogram(my_abs_mag, bins=bins, density=True)
t = plt.hist(my_abs_mag, bins, density=True)

with open('bin/abs_mag_weight.npy', 'wb') as f:
    np.save(f, densities, allow_pickle=False)
    np.save(f, bins, allow_pickle=False)

In [None]:
with open('bin/abs_mag_weight.npy', 'rb') as f:
    densities = np.load(f)
    bins = np.load(f)

plt.plot(bins[0:99], densities)
#plt.yscale('log')

## Examine map of apparent mag to z distribution

In [None]:
# Builds a map of apparent mags to a pdf of redshifts.plt
# Build the map all the way to 20th mag
app_mag = weights['Data/app_mag'][:]
z_obs = weights['Data/z_obs'][:]
APP_MAG_CUT = 20.0
bright_filter = app_mag < APP_MAG_CUT 
redshift_filter = z_obs > 0 
keep = np.all([bright_filter, redshift_filter], axis=0)
app_mag = app_mag[keep]
z_obs = z_obs[keep]

In [None]:
app_mag_bins, the_map = build_app_mag_to_z_map(app_mag, z_obs)

counts, app_mag_bins_2  = np.histogram(app_mag, bins=app_mag_bins, density=False)
plt.figure()
t = plt.hist(app_mag, app_mag_bins, density=False)
plt.yscale('log')

plt.figure()
trash=plt.hist(the_map[0],bins=30, density=True)
trash=plt.hist(the_map[50],bins=30, density=True)
trash=plt.hist(the_map[100],bins=30, density=True)

### Density of Galaxies per square degree

In [None]:
app_mag = weights['Data/app_mag'][:]
print(f"There are ~{np.sum(app_mag < 19.5) / 14000:.0f} galaxies/deg^2 < 19.5 mag")
print(f"There are ~{np.sum(np.all([app_mag > 19.5, app_mag < 20.0], axis=0)) / 14000:.0f} galaxies/deg^2 between 19.5 and 20.0 mag")

## Nearest Neighbor Angular Separation and Same-Halo Analysis


In [None]:
input = weights
dec = input['Data/dec'][:]
ra = input['Data/ra'][:]
z_obs = input['Data/z_obs'][:]
app_mag = input['Data/app_mag'][:]

APP_MAG_CUT = 19.5
bright_filter = app_mag < APP_MAG_CUT # makes a filter array (True/False values)
redshift_filter = z_obs > 0 # makes a filter array (True/False values)
#location_filter_1 = ra < 270.0
#location_filter_2 = ra > 120.0
#location_filter_3 = dec > 0.0
#location_filter_4 = dec < 45.0
keep = np.all([bright_filter, redshift_filter], axis=0)
#keep = np.all([bright_filter, redshift_filter, location_filter_1, location_filter_2, location_filter_3, location_filter_4], axis=0)

dec = dec[keep]
ra = ra[keep]
z_obs = z_obs[keep]
app_mag = app_mag[keep]
sim_halo_id = input['Data/mxxl_id'][:]
sim_halo_id = sim_halo_id[keep]


len(dec)

In [None]:
BIT_CHOICE = 0
FIBER_ASSIGNED_SELECTOR = 2**BIT_CHOICE
fassigned = (input['Weight/bitweight0'][:] & FIBER_ASSIGNED_SELECTOR).astype(bool) # choose 1 of the 2048 fiber assignment realizations with this bitstring
fnotassigned = np.invert(fassigned)

fassigned = fassigned[keep]
fnotassigned = fnotassigned[keep]
indexes_not_assigned = np.argwhere(fnotassigned)

print(np.sum(fassigned) / len(dec))

with open('../bin/prob_obs.npy', 'rb') as f:
    prob_obs = np.load(f)
prob_obs_cut = prob_obs[keep]

### Calculate P_obs

In [None]:
def bitsum(bitstring):
    return bin(c_uint64(bitstring).value).count("1")
v_bitsum = np.vectorize(bitsum)

def summate(a):
    return np.sum(v_bitsum(a))


In [None]:
# Skip this if iips were loaded OK. Takes ~8 minutes.

# Read all 32 64-bitstrings into memory from the file
num_bitstrings = 32
galaxy_count = len(input['Weight/bitweight0'])
bitweights = np.empty((num_bitstrings, galaxy_count), dtype='i8')

for i in range(num_bitstrings):
    bitweights[i] = input['Weight/bitweight{0}'.format(i)][:]
    
prob_obs = np.apply_along_axis(summate, 0, bitweights) / 2048

with open('bin/prob_obs.npy', 'wb') as f:
    np.save(f, prob_obs)


In [None]:
specimen = 123
bit_selector = c_uint64(2).value
print('{:064b}'.format(bit_selector))
print('')
for i in range(num_bitstrings):
    value = bitweights[(i,specimen)]
    converted = c_uint64(value).value
    print('{:064b}'.format(converted), '{:2.0f}'.format(bitsum(value)), bool(converted & bit_selector))

print("Averaged Probability of being targetted: ", prob_obs[specimen])

In [None]:
prob_obs_cut = prob_obs[keep]

pobs_bins_temp = np.linspace(0,1)
trash=plt.hist(prob_obs, bins=pobs_bins_temp, label="All galaxies")
trash2=plt.hist(prob_obs_cut, bins=pobs_bins_temp, label=f"Galaxies below {APP_MAG_CUT} mag")
plt.yscale('log')
plt.legend()

In [None]:
prob_obs_dim = prob_obs[np.invert(keep)]
trash=plt.hist(prob_obs_dim, bins=pobs_bins_temp, alpha=0.5, label=f"Galaxies above {APP_MAG_CUT} mag")
trash2=plt.hist(prob_obs_cut, bins=pobs_bins_temp, alpha=0.5, label=f"Galaxies below {APP_MAG_CUT} mag")
plt.yscale('log')
plt.xlabel('$P_{obs}$')
plt.ylabel("Count")
plt.legend()

### Same Halo / Similar z Analysis

What fraction of time nearest neighbors in same halo? 

What is the distribution of Angular distances?

What fraction of time is nearest neighbors at a similar enough redshift?

In [None]:
# Now bin so that things with ang distances higher than the max we care about are thrown out
BIN_COUNT = 25
bins = np.logspace(np.log10(3), np.log10(60*60), BIN_COUNT)
print("Angular Distance Bin Markers", bins)

z_bins = SimpleRedshiftGuesser.z_bins
print("Redshift Bin Markers", z_bins)

POBS_BIN_COUNT = 25
POBS_bins = np.linspace(0.01, 1.0, POBS_BIN_COUNT)
print("Pobs Bin Markers", POBS_bins)

APP_MAG_BIN_COUNT = 25
app_mag_bins = np.linspace(15.0, 20.01, APP_MAG_BIN_COUNT)
print("App mag bin markers", app_mag_bins)

LOST_GALAXIES_ONLY = True

if LOST_GALAXIES_ONLY:
    treename = 'mxxl_same_halo_analysis_fiberassigned_b' + str(BIT_CHOICE)
    catalog = coord.SkyCoord(ra=ra[fassigned]*u.degree, dec=dec[fassigned]*u.degree, frame='icrs')
    sim_halo_id_catalog = sim_halo_id[fassigned]
    z_obs_catalog = z_obs[fassigned]
else:
    treename = 'mxxl_same_halo_analysis_all'
    catalog = coord.SkyCoord(ra=ra*u.degree, dec=dec*u.degree, frame='icrs')
    sim_halo_id_catalog = sim_halo_id
    z_obs_catalog = z_obs


In [None]:
# Get NN's angular distance distribution and same halo truth from MXXL

# Though this is binned by z of the target and not the NN, it shouldn't be able to affect results
# by recipricality of NN
z_bin = np.digitize(z_obs, z_bins)

if LOST_GALAXIES_ONLY:
    #nn_bins = np.arange(5)+1
    nn_bins=[1]
else:
    nn_bins = [2]#[2,3,4] # this means closest 3. '1' will find the same object.

all_ang_bincounts = np.ones((len(z_bins), len(nn_bins), len(bins)))
all_same_halo_bincounts = np.zeros((len(z_bins), len(nn_bins), len(bins)))
all_same_z_bincounts = np.zeros((len(z_bins), len(nn_bins), len(bins)))
all_sim_z_bincounts = np.zeros((len(z_bins), len(nn_bins), len(bins)))

for i in range(len(z_bins)):
    for j in range(len(nn_bins)):
        if LOST_GALAXIES_ONLY:
            filter = np.all([z_bin == i, fnotassigned], axis=0)
        else:
            filter = z_bin == i
        to_match = coord.SkyCoord(ra=ra[filter]*u.degree, dec=dec[filter]*u.degree, frame='icrs')
        idx, d2d, d3d = coord.match_coordinates_sky(to_match, catalog, nthneighbor=nn_bins[j], storekdtree=treename)

        same_halo = sim_halo_id[filter] == sim_halo_id_catalog[idx]
        same_z = np.isclose(z_obs[filter], z_obs_catalog[idx], rtol=0, atol=0.000001)
        sim_z = np.isclose(z_obs[filter], z_obs_catalog[idx], rtol=0, atol=SIM_Z_THRESH)
        
        angdist_bin_ind = np.digitize(d2d.to(u.arcsec).value, bins)
        
        bincounts = np.bincount(angdist_bin_ind, minlength=len(bins)) + 1 # avoids divide by 0, won't hurt statistics
        all_ang_bincounts[i][j] = bincounts

        bincounts2 = np.bincount(angdist_bin_ind, minlength=len(bins), weights=same_halo.astype(int))
        all_same_halo_bincounts[i][j] = bincounts2

        bincount3 = np.bincount(angdist_bin_ind, minlength=len(bins), weights=same_z.astype(int))
        all_same_z_bincounts[i][j] = bincount3

        bincount4 = np.bincount(angdist_bin_ind, minlength=len(bins), weights=sim_z.astype(int))
        all_sim_z_bincounts[i][j] = bincount4
    

In [None]:
print("Galaxies studied: {0}. Same halo: {1}. Similar z: {2}".format(np.sum(all_ang_bincounts), np.sum(all_same_halo_bincounts), np.sum(all_sim_z_bincounts)))

In [None]:
# TODO 
#for b in range(len(all_same_halo_bincounts)):
#    print(all_same_halo_bincounts[b], all_same_z_bincounts[b], len(all_same_z_bincounts))

np.all(np.isclose(all_same_halo_bincounts, all_same_z_bincounts))

In [None]:
def getlabel(index, z_bins):
    if index==0:
        label = "< {0}".format(z_bins[index])
    else:
        label = "{0} - {1}".format(z_bins[index-1], z_bins[index])
    return label

In [None]:
# Plots for nearest-neighbor angular distances and same-halo analysis

for j in range(len(nn_bins)):
    if j < 5:
        plt.figure()
        for i in range(len(z_bins)):
            label = getlabel(i, z_bins)
        plt.plot(bins, all_ang_bincounts[i][j], label=label, color=get_color(i))

        plt.title(f"Nearest Neighbor {j} Ang. Distance Distribution")
        plt.ylabel("Count")
        plt.xlabel("Angular Distance (arcsec)")
        plt.yscale('log')
        plt.xscale('log')
        plt.legend()
        plt.draw()

        plt.figure()
        for i in range(len(z_bins)):
            label = getlabel(i, z_bins)
            plt.plot(bins, all_same_halo_bincounts[i][j]/all_ang_bincounts[i][j], label=label, color=get_color(i))
            print("Total fraction of nearest neighbors in same halo (z {0}, NN-{1}): {2:.3f}".format(label, j+1, np.sum(all_same_halo_bincounts[i][j]) / np.sum(all_ang_bincounts[i][j])))

        plt.title(f"Nearest Neighbor {j} Same Halo Fraction")
        plt.ylabel("NN Same Halo Fraction")
        plt.xlabel("Angular Distance (arcsec)")
        plt.xscale('log')
        plt.legend()
        plt.draw()

        plt.figure()
        for i in range(len(z_bins)):
            label = getlabel(i, z_bins)
            plt.plot(bins, all_sim_z_bincounts[i][j]/all_ang_bincounts[i][j], label=label, color=get_color(i))
            
            print("Total fraction of nearest neighbors at sim z (z {0}, NN-{1}): {2:.3f}".format(label, j+1, np.sum(all_sim_z_bincounts[i][j]) / np.sum(all_ang_bincounts[i][j])))

        plt.title(f"Nearest Neighbor {j} Sim z Fraction")
        plt.ylabel("NN Sim z Fraction")
        plt.xlabel("Angular Distance (arcsec)")
        plt.xscale('log')
        plt.legend()
        plt.draw()

#print("What fraction of the time is the NN >19.5 mag?")

### Color plots of NN Same Halo in z / ang distance / P_obs space

In [None]:
# TODO consider:
# Why do I treat z differently? 
# Should I move the looks to where the z loop is? 
# Have less calls to match_coord_sky and post-process? Is that possible?
# I think its a lot faster to do that
POBS_bin = np.digitize(prob_obs_cut, POBS_bins)
app_mag_bin = np.digitize(app_mag, app_mag_bins)

if LOST_GALAXIES_ONLY: 
    nn_bins = [1]
else:
    nn_bins = [2] # since catalog includes the targets in this case

all_ang_bincounts_2 = np.ones((POBS_BIN_COUNT, len(nn_bins), len(z_bins), APP_MAG_BIN_COUNT, BIN_COUNT))
all_same_halo_bincounts_2 = np.zeros((POBS_BIN_COUNT, len(nn_bins), len(z_bins), APP_MAG_BIN_COUNT, BIN_COUNT))
all_sim_z_bincounts_2 = np.zeros((POBS_BIN_COUNT, len(nn_bins), len(z_bins), APP_MAG_BIN_COUNT, BIN_COUNT))

for i in range(len(POBS_bins)):
    for j in range(len(nn_bins)):
        for k in range(len(app_mag_bins)):

            # TODO app mag bins?

            if LOST_GALAXIES_ONLY:
                filter = np.all([POBS_bin == i, app_mag_bin == k, fnotassigned], axis=0)
            else:
                filter = np.all([POBS_bin == i, app_mag_bin == k], axis=0)

            to_match = coord.SkyCoord(ra=ra[filter]*u.degree, dec=dec[filter]*u.degree, frame='icrs')
            idx, d2d, d3d = coord.match_coordinates_sky(to_match, catalog, nthneighbor=nn_bins[j], storekdtree=treename)
            same_halo = sim_halo_id[filter] == sim_halo_id_catalog[idx]
            sim_z = np.isclose(z_obs[filter], z_obs_catalog[idx], rtol=0, atol=SIM_Z_THRESH)

            nn_z_bin_ind = np.digitize(z_obs_catalog[idx], z_bins)
            angdist_bin_ind = np.digitize(d2d.to(u.arcsec).value, bins)
            
            for zb in range(len(z_bins)):
                right_z_bin = nn_z_bin_ind == zb
                
                bincounts = np.bincount(angdist_bin_ind, minlength=len(bins), weights=right_z_bin.astype(int)) + 1 # avoids divide by 0, won't hurt statistics
                all_ang_bincounts_2[i][j][zb][k] = bincounts

                bincounts2 = np.bincount(angdist_bin_ind, minlength=len(bins), weights=np.all([same_halo, right_z_bin], axis=0).astype(int))
                all_same_halo_bincounts_2[i][j][zb][k] = bincounts2

                bincounts3 = np.bincount(angdist_bin_ind, minlength=len(bins), weights=np.all([sim_z, right_z_bin], axis=0).astype(int))
                all_sim_z_bincounts_2[i][j][zb][k] = bincounts3

In [None]:
# To visualize things we need to reduce dimensionality. 
# Aggregate of either Pobs or app mag, or choose a single value from them to examine.
# The below always picks one z per plot.

# axis 3 will sum over app mag. Axis 0 will sum over P_obs
axis_to_sumover = 3
# TODO why I am not not exactly reproducing previous results? Close, but not exact...

# Use this to aggregate
all_same_halo_bincounts_reduced = np.sum(all_same_halo_bincounts_2, axis=axis_to_sumover)
all_ang_bincounts_reduced = np.sum(all_ang_bincounts_2, axis=axis_to_sumover)
all_sim_z_bincounts_reduced = np.sum(all_sim_z_bincounts_2, axis=axis_to_sumover)

# Use this instead to pick out a value
#index_to_use = 20
#all_same_halo_bincounts_reduced = np.take(all_same_halo_bincounts_2, index_to_use, axis=axis_to_sumover)
#all_ang_bincounts_reduced = np.take(all_ang_bincounts_2, index_to_use, axis=axis_to_sumover)
#all_sim_z_bincounts_reduced = np.take(all_sim_z_bincounts_2, index_to_use, axis=axis_to_sumover)

if axis_to_sumover == 0:
    all_same_halo_bincounts_reduced = np.swapaxes(all_same_halo_bincounts_reduced, 0,1)
    all_same_halo_bincounts_reduced = np.swapaxes(all_same_halo_bincounts_reduced, 0,2)
    all_ang_bincounts_reduced = np.swapaxes(all_ang_bincounts_reduced, 0,1)
    all_ang_bincounts_reduced = np.swapaxes(all_ang_bincounts_reduced, 0,2)
    all_sim_z_bincounts_reduced = np.swapaxes(all_sim_z_bincounts_reduced, 0,1)
    all_sim_z_bincounts_reduced = np.swapaxes(all_sim_z_bincounts_reduced, 0,2)

frac_same = all_same_halo_bincounts_reduced / all_ang_bincounts_reduced
frac_sim_z = all_sim_z_bincounts_reduced / all_ang_bincounts_reduced

# Make rough bins of just over a threshold or not
nn_success_thresh = 0.4 # change fit lines below if you change this!
success_bins = [0,nn_success_thresh,1.0]
frac_same_binned = np.digitize(frac_same, bins=success_bins)

# Resultant shape must be consistent
print(np.shape(all_ang_bincounts_reduced))

In [None]:
ncols = 1 # there is code for 4 plots per row (z), but can make a subplot of it
z_bin_numbers_to_plot = range(len(z_bins))
z_bin_numbers_to_plot = [1]

fig, axes = plt.subplots(nrows=len(z_bin_numbers_to_plot), ncols=ncols, figsize=(6*ncols, 4*len(z_bin_numbers_to_plot)))

if (axis_to_sumover == 3):
    y_axis_bins = POBS_bins
if (axis_to_sumover == 0):
    y_axis_bins = app_mag_bins

row=-1
for zb in z_bin_numbers_to_plot:
    row+=1
    density = all_ang_bincounts_reduced[:,0,zb,:] #/ np.sum(all_ang_bincounts_reduced[:,0,zb,:])
    print(f"Galaxies in this z-bin: {np.sum(density)}")

    if len(z_bin_numbers_to_plot) == 1:
        axrow = axes
    else:
        axrow = axes[row]
    
    if (ncols != 1):
        ax=axrow[0]
    else:
        ax = axrow
    cplot = ax.pcolor(bins, y_axis_bins, frac_same[:,0,zb,:], shading='auto', cmap='RdYlGn', norm=c.Normalize(vmin=0, vmax=0.8))
    fig.colorbar(cplot, ax=ax)
    ax.set_title(f"NN Same Halo Fraction (NN z {getlabel(zb, z_bins)})")
    if (axis_to_sumover == 3):
        ax.set_ylabel("Lost Galaxy $P_{obs}$")
    if (axis_to_sumover == 0):
        ax.set_ylabel("Lost Galaxy app r-mag")
    ax.set_xlabel("Angular Distance (arcsec) to NN")
    ax.set_xscale('log')
    ax.set_xlim(2.5,250)
    
    """
    cplot = axrow[1].pcolor(bins, y_axis_bins, frac_same_binned[:,0,zb,:], shading='auto', cmap='RdYlGn')
    fig.colorbar(cplot, ax=axrow[1])
    axrow[1].set_title(f"NN Same Halo Over 40% (NN z {getlabel(zb, z_bins)})")
    if (axis_to_sumover == 3):
        axrow[1].set_ylabel("Lost Galaxy $P_{obs}$")
    if (axis_to_sumover == 0):
        axrow[1].set_ylabel("Lost Galaxy app r-mag")    
    axrow[1].set_xlabel("Angular Distance (arcsec) to NN")
    axrow[1].set_xscale('log')
    axrow[1].set_xlim(2.5,250)
    """
    """
    cplot = axes[zb][2].pcolor(bins, y_axis_bins, frac_sim_z[:,0,zb,:], shading='auto', cmap='RdYlGn', norm=c.Normalize(vmin=0, vmax=0.8))
    fig.colorbar(cplot, ax=axes[zb][2])
    axrow[2].set_title(f"Nearest Neighbor Sim z Fraction (NN z {getlabel(zb, z_bins)})")
    if (axis_to_sumover == 3):
        axrow[2].set_ylabel("Lost Galaxy $P_{obs}$")
    if (axis_to_sumover == 0):
        axrow[2].set_ylabel("Lost Galaxy app r-mag")    
    axrow[2].set_xlabel("Angular Distance (arcsec) to NN")
    axrow[2].set_xscale('log')
    axrow[2].set_xlim(2.5,250)
    """
    #cplot = axrow[2].pcolor(bins, y_axis_bins, density, shading='auto', cmap='YlGn', norm=c.LogNorm(vmin=0.0001, vmax=0.1))
    """
    cplot = axrow[2].pcolor(bins, y_axis_bins, density, shading='auto', cmap='YlGn', norm=c.LogNorm(vmin=10, vmax=5000))
    fig.colorbar(cplot, ax=axrow[2])
    axrow[2].set_title(f"Counts (NN z {getlabel(zb, z_bins)})")
    if (axis_to_sumover == 3):
        axrow[2].set_ylabel("Lost Galaxy $P_{obs}$")
    if (axis_to_sumover == 0):
        axrow[2].set_ylabel("Lost Galaxy app r-mag")    
    axrow[2].set_xlabel("Angular Distance (arcsec) to NN")
    axrow[2].set_xscale('log')
    axrow[2].set_xlim(2.5,250)
    """
    if axis_to_sumover == 3:
        if ncols == 1 and len(z_bin_numbers_to_plot) == 1:
            axes.scatter(get_NN_40_line(z_bins[zb]-0.01, POBS_bins), POBS_bins)
            axes.scatter(get_NN_40_line(z_bins[zb]-0.01, POBS_bins), POBS_bins)
        else:
            ax.scatter(get_NN_40_line(z_bins[zb]-0.01, POBS_bins), POBS_bins)
            ax[1].scatter(get_NN_40_line(z_bins[zb]-0.01, POBS_bins), POBS_bins)
    
    #axes[plot_index][2].scatter(get_NN_40_line(z_bins[zb]-0.01, POBS_bins), POBS_bins)
    
fig.tight_layout() 


### Fancy Algorithm

In [None]:
NUM_NEIGHBORS = 20
fancy_to_match = coord.SkyCoord(ra=ra[fnotassigned]*u.degree, dec=dec[fnotassigned]*u.degree, frame='icrs')

In [None]:
neighbor_indexes = np.zeros(shape=(NUM_NEIGHBORS, len(fancy_to_match)), dtype=np.int32) # indexes point to CATALOG locations
ang_distances = np.zeros(shape=(NUM_NEIGHBORS, len(fancy_to_match)))

print(f"Finding nearest {NUM_NEIGHBORS} neighbors... ", end='\r')   
for n in range(0, NUM_NEIGHBORS):
    idx, d2d, d3d = coord.match_coordinates_sky(fancy_to_match, catalog, nthneighbor=n+1, storekdtree=treename)
    neighbor_indexes[n] = idx # TODO is that right?
    ang_distances[n] = d2d.to(u.arcsec).value
print(f"Finding nearest {NUM_NEIGHBORS} neighbors... done!")   

In [None]:
with FancyRedshiftGuesser(NUM_NEIGHBORS, debug=False) as scorer:
    halo_matches = 0
    z_matches = 0

    print(f"Assinging missing redshifts... ")   
    # TODO don't loop?
    j = 0 # index of the fancy_to_match sized arrays
    
    #for i in special_id:
    for i in indexes_not_assigned: # index of the master arrays

        #if i not in [7793057, 11425052]:
        #    j+=1
        #    continue

        if j%10000==0:
            print(f"{j}/{len(fancy_to_match)} complete", end='\r')

        neighbors = neighbor_indexes[:,j]
        neighbors_z = z_obs_catalog[neighbors]
        neighbors_ang_dist = ang_distances[:,j]
        my_prob_obs = prob_obs_cut[i]
        my_app_mag = app_mag[i]

        winning_num = scorer.choose_winner(neighbors_z, neighbors_ang_dist, my_prob_obs, my_app_mag, z_obs[i])
        winner_index = neighbors[winning_num]

        # Track total correct
        z_chosen = z_obs_catalog[winner_index] 
        if np.isclose(z_chosen, z_obs[i], rtol=0, atol=SIM_Z_THRESH):
            z_matches += 1
        halo_chosen = sim_halo_id_catalog[winner_index]
        if halo_chosen == sim_halo_id[i]:
            halo_matches += 1

        j += 1 

    print(f"{j}/{len(fancy_to_match)} complete")



In [None]:
print(f"Halo matches: {halo_matches / len(fancy_to_match)}")
print(f"z matches: {z_matches / len(fancy_to_match)}")

In [None]:
# View results from a run of the FancyRedshiftGuesser. Must put in the right filename (number)
filename = 'bin/redshift_guesser_1691466513.171286.npy'
with open(filename, 'rb') as f:
    quick_nn = np.load(f)
    quick_correct = np.load(f)
    nn_used = np.load(f)
    nn_correct = np.load(f)

print(f"Quick NN uses: {quick_nn}. Success: {quick_correct / (quick_nn+1)}")
print(f"NN bin uses: {nn_used}. Success: {nn_correct / (nn_used+1)}")

### Galaxy Pairs Angular Separation and Same-Halo Analysis
Continuation of the above.

THIS IS N^2 CALCULATION do not run on full sky. Adjust data


In [None]:
# THIS IS N^2 CALCULATION do not run on full sky.
total_bincounts = np.ones((len(z_bins), BIN_COUNT))
total_same_halo_bincounts = np.zeros((len(z_bins), BIN_COUNT))

# Examine each galaxy in the sample pair once
for i in range(len(ra)-1):
    ang_distance = coord.angular_separation(ra[i]*u.degree, dec[i]*u.degree, ra[i+1:len(ra)]*u.degree, dec[i+1:len(ra)]*u.degree).to(u.arcsec)
        
    same_halo = sim_halo_id[i] == sim_halo_id[i+1:len(ra)]
    #print("Same halo fraction for {0}:".format(i), np.sum(same_halo) / len(same_halo))

    angdist_bin_ind = np.digitize(ang_distance.value, bins)
    #print(bin_ind)
    bincounts = np.bincount(angdist_bin_ind)[0:BIN_COUNT]
    same_halo_bincounts = np.bincount(angdist_bin_ind, weights= same_halo.astype(int)) [0:BIN_COUNT]

    z_bin = np.digitize(z_obs[i], z_bins)
    total_bincounts[z_bin] = total_bincounts[z_bin] + bincounts
    total_same_halo_bincounts[z_bin] = total_same_halo_bincounts[z_bin] + same_halo_bincounts
    #print(total_same_halo_bincounts)

#print("Total counts in each bin:", total_bincounts)

fraction_same_halo = total_same_halo_bincounts / total_bincounts
#print(fraction_same_halo)

In [None]:
# Plots for galaxy pairs
plt.figure()
for i in range(len(z_bins)):
    if i==0:
        label = "< {0}".format(z_bins[i])
    else:
        label = "{0} - {1}".format(z_bins[i-1], z_bins[i])
    plt.plot(bins, total_bincounts[i], label=label)
plt.legend()
plt.xscale('log')
plt.yscale('log')
plt.xlabel('Angular Separation (arcsec)')
plt.ylabel('Count of Galaxies Pairs')
plt.title("Galaxy Pair Counts (by ang separation and z)")
plt.draw()

plt.figure()
for i in range(len(z_bins)):
    if i==0:
        label = "< {0}".format(z_bins[i])
    else:
        label = "{0} - {1}".format(z_bins[i-1], z_bins[i])
    plt.plot(bins, fraction_same_halo[i], label=label)
plt.legend()
plt.xscale('log')
plt.xlabel('Angular Separation (arcsec)')
plt.ylabel('Fraction Pair in Same Halo')
plt.ylim(-0.01, 1.0)
plt.title("Fraction Pair in Same Halo (by ang separation and z)")
plt.draw()

# UCHUU Experiments


### Experiments

In [None]:
#[('R_MAG_APP', '>f4'), ('R_MAG_ABS', '>f4'), ('G_R_REST', '>f4'), ('G_R_OBS', '>f4'), ('DEC', '>f8'), ('HALO_MASS', '>f4'), ('CEN', '>i4'), ('RES', '>i4'), ('RA', '>f8'), ('Z_COSMO', '>f4'), ('Z', '>f4'), ('STATUS', '>i4'), ('FIRST_ACC_SCALE', '>f4'), ('M_ACC', '>f4'), ('M_VIR_ALL', '>f4'), ('R_VIR', '>f4'), ('V_PEAK', '>f4'), ('R_S', '>f4'), ('V_RMS', '>f4'), ('NGC', '>f4'), ('SGC', '>f4'), ('HALO_ID', '>i8'), ('PID', '>i8')]))
filename='/export/sirocco2/tinker/DESI/UCHUU_MOCKS/BGS_LC_Uchuu.fits'
u_table = Table.read(filename, format='fits')

In [None]:
APP_MAG_CUT = 19.5

In [None]:
u_table.columns
#G_R_OBS

In [None]:
dec = u_table['DEC']
ra = u_table['RA']
z_obs = u_table['Z']
app_mag = u_table['R_MAG_APP']
abs_mag = u_table['R_MAG_ABS']
g_r = u_table['G_R_REST'] # TODO before using ensure it should be rest and not observed
g_r_obs = u_table['G_R_OBS']
central = u_table['CEN']
uchuu_halo_mass = u_table['HALO_MASS']
uchuu_halo_id = u_table['HALO_ID']

bright_filter = app_mag < APP_MAG_CUT 
redshift_filter = z_obs > 0 
keep = np.all([mass_filter, bright_filter, redshift_filter], axis=0)

dec = dec[keep]
ra = ra[keep]
z_obs = z_obs[keep]
app_mag = app_mag[keep]
abs_mag = abs_mag[keep]
g_r = g_r[keep]
g_r_obs = g_r_obs[keep]
central = central[keep]
uchuu_halo_mass = uchuu_halo_mass[keep]
uchuu_halo_id = uchuu_halo_id[keep]

abs_mag_me = app_mag_to_abs_mag(app_mag, z_obs)


In [None]:
abs_mag_me_k = k_correct(app_mag, z_obs, g_r)
# using true g-r instead of the observed g-r gives the reported distribution as shown by plot below


In [None]:
# Compare my_abs_mag to abs_mag. 
bins = np.linspace(-25, -10, 100)
#my_counts, my_bins, my_p = plt.hist(abs_mag_me, label="my abs_mag", bins=bins, alpha=0.5)
alex_counts, alex_bins, alex_p = plt.hist(abs_mag, label="UCHUU abs_mag", bins=bins, alpha=0.5)
my_k_counts, my_k_bins, my_k_p = plt.hist(abs_mag_me_k, label="my k abs_mag", bins=bins, alpha=0.5)
#z = plt.hist(my_abs_mag_k, label="my k abs_mag", bins=50)
plt.xlabel("Absolute Mag")
plt.ylabel("Count")
plt.title("Compare Absolute Mags")
plt.yscale('log')
plt.legend()

#print(f"The peaks are UCHUU: {alex_bins[np.argmax(alex_counts)]:.1f}, My {my_bins[np.argmax(my_counts)]:.1f}, My K {my_k_bins[np.argmax(my_k_counts)]:.1f}")

In [None]:
# UCHUU
print(len(ra))
estimate_frac_area(ra, dec)


In [None]:
rnd_indices = np.random.choice(len(ra), len(ra)//100, replace=False)
fig = make_map(ra[rnd_indices], dec[rnd_indices]), 


In [None]:
# TODO only centrals...
plt.hist(np.log10(uchuu_halo_mass*10**10), bins=30, alpha=0.5, density=True, label="UCHUU")
plt.hist(np.log10(all.all_data['mxxl_halo_mass']*10**10), bins=30, alpha=0.5, density=True, label="MXXL")
#plt.yscale('log')
plt.title("MXXL vs UCHUU Truth Halo Masses")
plt.xlabel('log(M_halo)')
plt.ylabel('Density')
plt.legend()

# Tests

In [None]:
# Test estimate_frac_area for a completely filled sky
_ra = np.linspace(0.01, 359.9, 1000)
_dec = np.linspace(0.01, 179.9, 1000)
_ra, _dec = np.meshgrid(_ra, _dec)
_ra = _ra.flatten()
_dec = _dec.flatten()

estimate_frac_area(_ra, _dec)