In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.colors as c
import h5py
import astropy.coordinates as coord
import astropy.units as u
from pyutils import *
import numpy.ma as ma
from random import randint
from ctypes import c_uint64
import pickle
from astropy.table import Table
from postprocess import *
from dataloc import *
import k_correction as kcorr
import kcorr.k_corrections as desikc


In [None]:
%load_ext autoreload
%autoreload 2

# MXXL Experiments

In [None]:
BITWORD = 'bitweight0'
BIT_CHOICE = 0
FIBER_ASSIGNED_SELECTOR = 2**BIT_CHOICE

infile = h5py.File(MXXL_DATA_DIR + 'weights_3pass.hdf5', 'r')

dec = infile['Data/dec'][:]
ra = infile['Data/ra'][:]
z_obs = infile['Data/z_obs'][:]
app_mag = infile['Data/app_mag'][:]
g_r = infile['Data/g_r'][:]
abs_mag = infile['Data/abs_mag'][:] # We aren't using these; computing ourselves. 
galaxy_type = infile['Data/galaxy_type'][:]
mxxl_halo_mass = infile['Data/halo_mass'][:]
mxxl_halo_id = infile['Data/mxxl_id'][:]
observed = (infile['Weight/'+BITWORD][:] & FIBER_ASSIGNED_SELECTOR ).astype(bool)


In [None]:
df = pd.DataFrame(data={
    'dec': dec, 
    'ra': ra,
    'z_obs': z_obs,
    'app_mag': app_mag,
    'g_r': g_r,
    'abs_mag': abs_mag,
    'galaxy_type': galaxy_type,
    'mxxl_halo_mass': mxxl_halo_mass,
    'mxxl_halo_id': mxxl_halo_id,
    'observed': observed
    })


## Various Simple Plots

In [None]:
def plots_by_app_mag_bins(df):
    # Makes bins of app_mag
    mag_bins = np.linspace(16, 19.5, 6)
    df['mag_bin'] = pd.cut(df.app_mag, mag_bins, labels=False)

    color_bins = np.linspace(0.0, 2.0, 200)

    # Make histogram of g_r by the app mag bins
    plt.figure()
    for bin_num in range(len(mag_bins)-1):
        bin_mask = df.mag_bin == bin_num
        g_r_bin = df.g_r[bin_mask]
        plt.hist(g_r_bin, bins=color_bins, histtype='step', label=f'{mag_bins[bin_num]:.1f} < mag < {mag_bins[bin_num+1]:.1f}', density=True)
    plt.xlabel('g-r')
    plt.legend()
    plt.xlim(0.5, 1.0)

    # Make histogram of z_obs by the app mag bins
    plt.figure()
    z_bins = np.linspace(0, 0.5, 50)
    for bin_num in range(len(mag_bins)-1):
        bin_mask = df.mag_bin == bin_num
        z_bin = df.z_obs[bin_mask]
        plt.hist(z_bin[df.observed[bin_mask]], bins=z_bins, histtype='step', label=f'{mag_bins[bin_num]:.1f} < mag < {mag_bins[bin_num+1]:.1f}', density=True)
    plt.xlabel('z_obs')
    plt.legend()
    plt.xlim(0, 0.5)
    plt.title('Observed galaxies')

    # Make quiescent cut in df
    df['quiescent'] = is_quiescent_BGS_gmr(None, df.g_r)

    # Make histogram of z_obs by the app mag bins only for quiescent and then not quiescent galaxies
    plt.figure()
    for bin_num in range(len(mag_bins)-1):
        bin_mask = df.mag_bin == bin_num
        z_bin = df.z_obs[bin_mask]
        plt.hist(z_bin[df.quiescent[bin_mask]], bins=z_bins, histtype='step', label=f'{mag_bins[bin_num]:.1f} < mag < {mag_bins[bin_num+1]:.1f} quiescent', density=True)
    plt.xlabel('z_obs')
    plt.legend()
    plt.xlim(0, 0.5)
    plt.title('Red galaxies')

    plt.figure()
    for bin_num in range(len(mag_bins)-1):
        bin_mask = df.mag_bin == bin_num
        z_bin = df.z_obs[bin_mask]
        plt.hist(z_bin[~df.quiescent[bin_mask]], bins=z_bins, histtype='step', label=f'{mag_bins[bin_num]:.1f} < mag < {mag_bins[bin_num+1]:.1f} star-forming', density=True)
    plt.xlabel('z_obs')
    plt.legend()
    plt.xlim(0, 0.5)
    plt.title('Blue galaxies')

    # Make histogram of the lost galaxies by the app mag bins
    plt.figure()
    for bin_num in range(len(mag_bins)-1):
        bin_mask = df.mag_bin == bin_num
        z_bin = df.z_obs[bin_mask]
        plt.hist(z_bin[~df.observed[bin_mask]], bins=z_bins, histtype='step', label=f'{mag_bins[bin_num]:.1f} < mag < {mag_bins[bin_num+1]:.1f} lost', density=True)
    plt.xlabel('z_obs')
    plt.legend()
    plt.xlim(0, 0.5)
    plt.title('Lost galaxies')

plots_by_app_mag_bins(df)

In [None]:
keep = df.app_mag < 19.5
df = df[keep].reset_index(drop=True)
indexes_assigned = np.argwhere(df.observed)
assert np.max(indexes_assigned) <= len(df), "Indexes not assigned are out of range"

In [None]:
# Looks a lot like BGS, 0.76 is fine for color cut
junk=plt.hist(df.g_r, bins=100)
plt.xlim(0.5, 0.9)

In [None]:
DATA_CUT_INDEX = 1000000 #21201544 #3000000 

weights = h5py.File(MXXL_DATA_DIR + 'weights_3pass.hdf5', 'r')
print(list(weights))
print(list(weights['Data']))
print(list(weights['Weight']))

In [None]:
small_z_obs = weights['Data/z_obs'][0:DATA_CUT_INDEX]
angular_bins = plt.hist(small_z_obs, bins=50)
plt.xlabel("$z_{obs}$")
plt.title("Histogram of Observed Redshifts")


In [None]:
mxxl_ra = weights['Data/ra'][0:DATA_CUT_INDEX]
mxxl_dec = weights['Data/dec'][0:DATA_CUT_INDEX]
mxxl_app_mag = weights['Data/app_mag'][0:DATA_CUT_INDEX]
bright_filter = mxxl_app_mag < 19.5 
mxxl_ra_bright = mxxl_ra[bright_filter]
mxxl_dec_bright = mxxl_dec[bright_filter]  
mxxl_app_mag_bright = mxxl_app_mag[bright_filter]

In [None]:
print(estimate_frac_area(mxxl_ra, mxxl_dec))
print(estimate_frac_area(mxxl_ra_bright, mxxl_dec_bright))

In [None]:
rnd_indices = np.random.choice(len(mxxl_ra), len(mxxl_ra)//50, replace=False)
fig = make_map(mxxl_ra[rnd_indices], mxxl_dec[rnd_indices]), # This looks like Alex' paper, good


In [None]:

mxxl_halo_id = weights['Data/mxxl_id'][0:DATA_CUT_INDEX]
np.sum(mxxl_halo_id == 0) / len(mxxl_halo_id)
# 2.5% of galaxies have 0 for the MXXL Halo ID because that are in halos that were added by hand post-simulation
# This was done because the small halos were not resolved in the simulation
# Gal type 2 and 3 are central and satellite galaxies that were unresolved

small_gal_type = weights['Data/galaxy_type'][0:DATA_CUT_INDEX]

weird_indexes = np.argwhere(np.invert(mxxl_halo_id.astype(bool)))
weird_types = small_gal_type[weird_indexes]
trash = plt.hist(weird_types)

In [None]:
app_mag = df.app_mag.to_numpy()
angular_bins = plt.hist(app_mag, bins=50)
plt.xlabel("Apparent Mag")
plt.title("Histogram of Apparent Mags")

## Abs Mag, K correction Analysis

In [None]:
z_obs = df.z_obs.to_numpy()
g_r = df.g_r.to_numpy()

R = app_mag_to_abs_mag(df.app_mag.to_numpy(), z_obs)

kcorr_r_gama = kcorr.GAMA_KCorrection(band='R')
R_k_GAMA = R - kcorr_r_gama.k(z_obs, g_r)

In [None]:
kcorr_r_bgs  = desikc.DESI_KCorrection(band='R', file='jmext', photsys='N')
R_k_BGS = R - kcorr_r_bgs.k(z_obs, g_r)

In [None]:
kcorr_r_bgs2  = desikc.DESI_KCorrection(band='R', file='jmext', photsys='S')
R_k_BGS2 = R - kcorr_r_bgs.k(z_obs, g_r)

In [None]:
# N vs S doesn't matter
np.sum(np.isclose(R_k_BGS, R_k_BGS2, rtol=10E-6)) / len(R_k_BGS)

In [None]:
# Compare my_abs_mag to abs_mag. 
bins = np.linspace(-25, -10, 100)
#my_counts, my_bins, my_p = plt.hist(R, label="my abs_mag", bins=bins, alpha=0.5)
#alex_counts, alex_bins, alex_p = plt.hist(df.abs_mag.to_numpy(), label="alex abs_mag", bins=bins, alpha=0.5)
my_k_counts, my_k_bins, my_k_p = plt.hist(R_k_GAMA, label="my GAMA k abs_mag", bins=bins, alpha=0.5)
my_k_counts, my_k_bins, my_k_p = plt.hist(R_k_BGS, label="my BGS k abs_mag N", bins=bins, alpha=0.5)
#my_k_counts, my_k_bins, my_k_p = plt.hist(R_k_BGS2, label="my BGS k abs_mag S", bins=bins, alpha=0.5)
#z = plt.hist(my_abs_mag_k, label="my k abs_mag", bins=50)
plt.xlabel("Absolute Mag")
plt.ylabel("Count")
plt.title("Compare Absolute Mags")
plt.yscale('log')
plt.legend()

#print(f"The peaks are Alex: {alex_bins[np.argmax(alex_counts)]:.1f}, My {my_bins[np.argmax(my_counts)]:.1f}, My K {my_k_bins[np.argmax(my_k_counts)]:.1f}")

## What is a reasonable z fudge factor for 'close enough' redshifts given galaxies $v_{\mathrm{pec}}$?

Galaxies move at hundreds of km/s usually, or thousands in a rich cluster.

Two galaxies moving at 600 km/s towards each other along LOS but at same cosmological redshift would have a total redshift difference of 0.004. This suggests a z +/- 0.002 is totally reasonable. In richer areas this could be as high as z +/- 0.010. 

Adopting z +/- 0.003 for now seems fine. Can refine later.

In [None]:
# What is a reasonable z +/- fudge factor for 'close enough' redshifts? 
# Consider peculiar velocities.
z_test = [0.001, 0.002, 0.003, 0.005, 0.01] * u.dimensionless_unscaled
v_pec = z_test.to(u.km / u.s, u.equivalencies.doppler_redshift())
for i in range(len(z_test)):
    print(f"z={z_test[i]:.3f} is {v_pec[i]:.0f}")



## Get Truth Abs Mag for Correcting

This is for the 'fancy' approach that we don't use.

In [None]:
app_mag = weights['Data/app_mag'][:]
z_obs = weights['Data/z_obs'][:]
APP_MAG_CUT = 19.5
bright_filter = app_mag < APP_MAG_CUT 
redshift_filter = z_obs > 0 
keep = np.all([bright_filter, redshift_filter], axis=0)

app_mag = app_mag[keep]
z_obs = z_obs[keep]

my_abs_mag = app_mag_to_abs_mag(app_mag, z_obs)

In [None]:
angular_bins = np.linspace(min(my_abs_mag), max(my_abs_mag), 100)
densities, angular_bins  = np.histogram(my_abs_mag, bins=angular_bins, density=True)
t = plt.hist(my_abs_mag, angular_bins, density=True)

with open('bin/abs_mag_weight.npy', 'wb') as f:
    np.save(f, densities, allow_pickle=False)
    np.save(f, angular_bins, allow_pickle=False)

In [None]:
with open('bin/abs_mag_weight.npy', 'rb') as f:
    densities = np.load(f)
    angular_bins = np.load(f)

plt.plot(angular_bins[0:99], densities)
#plt.yscale('log')

## Examine map of apparent mag to z distribution

In [None]:
# Builds a map of apparent mags to a pdf of redshifts.plt
# Build the map all the way to 20th mag
app_mag = infile['Data/app_mag'][:]
z_obs = infile['Data/z_obs'][:]
APP_MAG_CUT = 20.0
bright_filter = app_mag < APP_MAG_CUT 
redshift_filter = z_obs > 0 
keep = np.all([bright_filter, redshift_filter], axis=0)
app_mag = app_mag[keep]
z_obs = z_obs[keep]

In [None]:
app_mag_bins, the_map = build_app_mag_to_z_map(app_mag, z_obs)

counts, app_mag_bins_2  = np.histogram(app_mag, bins=app_mag_bins, density=False)
plt.figure()
t = plt.hist(app_mag, app_mag_bins, density=False)
plt.yscale('log')

plt.figure()
trash=plt.hist(the_map[0],bins=30, density=True)
trash=plt.hist(the_map[50],bins=30, density=True)
trash=plt.hist(the_map[100],bins=30, density=True)

In [None]:
# Density of galaxies per sq degree
app_mag = weights['Data/app_mag'][:]
print(f"There are ~{np.sum(app_mag < 19.5) / 14000:.0f} galaxies/deg^2 < 19.5 mag")
print(f"There are ~{np.sum(np.all([app_mag > 19.5, app_mag < 20.0], axis=0)) / 14000:.0f} galaxies/deg^2 between 19.5 and 20.0 mag")

## Nearest Neighbor Angular Separation and Same-Halo Analysis

In [None]:
APP_MAG_CUT = 19.5
bright_filter = app_mag < APP_MAG_CUT # makes a filter array (True/False values)
redshift_filter = z_obs > 0 # makes a filter array (True/False values)
#location_filter_1 = ra < 270.0
#location_filter_2 = ra > 120.0
#location_filter_3 = dec > 0.0
#location_filter_4 = dec < 45.0
keep = np.all([bright_filter, redshift_filter], axis=0)
#keep = np.all([bright_filter, redshift_filter, location_filter_1, location_filter_2, location_filter_3, location_filter_4], axis=0)

dec = dec[keep]
ra = ra[keep]
z_obs = z_obs[keep]
app_mag = app_mag[keep]
mxxl_halo_id = mxxl_halo_id[keep]
g_r = g_r[keep]
quiescent = is_quiescent_BGS_gmr(None, g_r)

observed = observed[keep]
unobserved = np.invert(observed)

with open('../bin/prob_obs.npy', 'rb') as f:
    prob_obs = np.load(f)
prob_obs_cut = prob_obs[keep]

### Calculate P_obs

In [None]:
def bitsum(bitstring):
    return bin(c_uint64(bitstring).value).count("1")
v_bitsum = np.vectorize(bitsum)

def summate(a):
    return np.sum(v_bitsum(a))


In [None]:
# Skip this if iips were loaded OK. Takes ~8 minutes.

# Read all 32 64-bitstrings into memory from the file
num_bitstrings = 32
galaxy_count = len(input['Weight/bitweight0'])
bitweights = np.empty((num_bitstrings, galaxy_count), dtype='i8')

for i in range(num_bitstrings):
    bitweights[i] = input['Weight/bitweight{0}'.format(i)][:]
    
prob_obs = np.apply_along_axis(summate, 0, bitweights) / 2048

with open('bin/prob_obs.npy', 'wb') as f:
    np.save(f, prob_obs)


In [None]:
specimen = 123
bit_selector = c_uint64(2).value
print('{:064b}'.format(bit_selector))
print('')
for i in range(num_bitstrings):
    value = bitweights[(i,specimen)]
    converted = c_uint64(value).value
    print('{:064b}'.format(converted), '{:2.0f}'.format(bitsum(value)), bool(converted & bit_selector))

print("Averaged Probability of being targetted: ", prob_obs[specimen])

In [None]:
prob_obs_cut = prob_obs[keep]

pobs_bins_temp = np.linspace(0,1)
trash=plt.hist(prob_obs, bins=pobs_bins_temp, label="All galaxies")
trash2=plt.hist(prob_obs_cut, bins=pobs_bins_temp, label=f"Galaxies below {APP_MAG_CUT} mag")
plt.yscale('log')
plt.legend()

In [None]:
prob_obs_dim = prob_obs[np.invert(keep)]
trash=plt.hist(prob_obs_dim, bins=pobs_bins_temp, alpha=0.5, label=f"Galaxies above {APP_MAG_CUT} mag")
trash2=plt.hist(prob_obs_cut, bins=pobs_bins_temp, alpha=0.5, label=f"Galaxies below {APP_MAG_CUT} mag")
plt.yscale('log')
plt.xlabel('$P_{obs}$')
plt.ylabel("Count")
plt.legend()

### Setup Bins

In [None]:
# Now bin so that things with ang distances higher than the max we care about are thrown out
BIN_COUNT = 20
BIGGER_THAN_ANY_NN_DIST = 3600
angular_bins = np.append(np.logspace(np.log10(3), np.log10(900), BIN_COUNT - 1), BIGGER_THAN_ANY_NN_DIST)
print("Angular Distance Bin Markers", angular_bins)

z_bins = np.array(SimpleRedshiftGuesser.z_bins)
print("Redshift Bin Markers", z_bins)

POBS_BIN_COUNT = 15
POBS_bins = np.linspace(0.01, 1.0, POBS_BIN_COUNT)
print("Pobs Bin Markers", POBS_bins)

APP_MAG_BIN_COUNT = 2
app_mag_bins = np.linspace(15.0, 20.01, APP_MAG_BIN_COUNT)
print("App mag bin markers", app_mag_bins)

LOST_GALAXIES_ONLY = True

if LOST_GALAXIES_ONLY:
    treename = 'mxxl_same_halo_analysis_fiberassigned_b' + str(BIT_CHOICE)
    catalog = coord.SkyCoord(ra=ra[observed]*u.degree, dec=dec[observed]*u.degree, frame='icrs')
    mxxl_halo_id_catalog = mxxl_halo_id[observed]
    z_obs_catalog = z_obs[observed]
    color_catalog = quiescent[observed].astype(int)
else:
    treename = 'mxxl_same_halo_analysis_all'
    catalog = coord.SkyCoord(ra=ra*u.degree, dec=dec*u.degree, frame='icrs')
    mxxl_halo_id_catalog = mxxl_halo_id
    z_obs_catalog = z_obs
    color_catalog = quiescent.astype(int)


def getlabel(index, z_bins):
    if index==0:
        label = "< {0}".format(z_bins[index])
    else:
        label = "{0} - {1}".format(z_bins[index-1], z_bins[index])
    return label


### Examine Same-Halo as function of angular distance to NN only

In [None]:
# Get NN's angular distance distribution and same halo truth from MXXL

# Though this is binned by z of the target and not the NN, it shouldn't be able to affect results
# by recipricality of NN
z_bin = np.digitize(z_obs, z_bins)

if LOST_GALAXIES_ONLY:
    #nn_bins = np.arange(5)+1
    nn_bins=[1]
else:
    nn_bins = [2]#[2,3,4] # this means closest 3. '1' will find the same object.

all_ang_bincounts = np.ones((len(z_bins), len(nn_bins), len(angular_bins)))
all_same_halo_bincounts = np.zeros((len(z_bins), len(nn_bins), len(angular_bins)))
all_same_z_bincounts = np.zeros((len(z_bins), len(nn_bins), len(angular_bins)))
all_sim_z_bincounts = np.zeros((len(z_bins), len(nn_bins), len(angular_bins)))

for i in range(len(z_bins)):
    for j in range(len(nn_bins)):
        if LOST_GALAXIES_ONLY:
            target_filter = np.all([z_bin == i, unobserved], axis=0)
        else:
            target_filter = z_bin == i
        to_match = coord.SkyCoord(ra=ra[target_filter]*u.degree, dec=dec[target_filter]*u.degree, frame='icrs')
        idx, d2d, d3d = coord.match_coordinates_sky(to_match, catalog, nthneighbor=nn_bins[j], storekdtree=treename)

        same_halo = mxxl_halo_id[target_filter] == mxxl_halo_id_catalog[idx]
        same_z = np.isclose(z_obs[target_filter], z_obs_catalog[idx], rtol=0, atol=0.000001)
        sim_z = np.isclose(z_obs[target_filter], z_obs_catalog[idx], rtol=0, atol=SIM_Z_THRESH)
        
        angdist_bin_ind = np.digitize(d2d.to(u.arcsec).value, angular_bins)
        
        bincounts = np.bincount(angdist_bin_ind, minlength=len(angular_bins)) + 1 # avoids divide by 0, won't hurt statistics
        all_ang_bincounts[i][j] = bincounts

        bincounts2 = np.bincount(angdist_bin_ind, minlength=len(angular_bins), weights=same_halo.astype(int))
        all_same_halo_bincounts[i][j] = bincounts2

        bincount3 = np.bincount(angdist_bin_ind, minlength=len(angular_bins), weights=same_z.astype(int))
        all_same_z_bincounts[i][j] = bincount3

        bincount4 = np.bincount(angdist_bin_ind, minlength=len(angular_bins), weights=sim_z.astype(int))
        all_sim_z_bincounts[i][j] = bincount4
    

In [None]:
print("Galaxies studied: {0}. Same halo: {1}. Similar z: {2}".format(np.sum(all_ang_bincounts), np.sum(all_same_halo_bincounts), np.sum(all_sim_z_bincounts)))

In [None]:
# Plots for nearest-neighbor angular distances and same-halo analysis

for j in range(len(nn_bins)):
    if j < 5:
        plt.figure()
        for i in range(len(z_bins)):
            label = getlabel(i, z_bins)
        plt.plot(angular_bins, all_ang_bincounts[i][j], label=label, color=get_color(i))

        plt.title(f"Nearest Neighbor {j} Ang. Distance Distribution")
        plt.ylabel("Count")
        plt.xlabel("Angular Distance (arcsec)")
        plt.yscale('log')
        plt.xscale('log')
        plt.legend()
        plt.draw()

        plt.figure()
        for i in range(len(z_bins)):
            label = getlabel(i, z_bins)
            plt.plot(angular_bins, all_same_halo_bincounts[i][j]/all_ang_bincounts[i][j], label=label, color=get_color(i))
            print("Total fraction of nearest neighbors in same halo (z {0}, NN-{1}): {2:.3f}".format(label, j+1, np.sum(all_same_halo_bincounts[i][j]) / np.sum(all_ang_bincounts[i][j])))

        plt.title(f"Nearest Neighbor {j} Same Halo Fraction")
        plt.ylabel("NN Same Halo Fraction")
        plt.xlabel("Angular Distance (arcsec)")
        plt.xscale('log')
        plt.legend()
        plt.draw()

        plt.figure()
        for i in range(len(z_bins)):
            label = getlabel(i, z_bins)
            plt.plot(angular_bins, all_sim_z_bincounts[i][j]/all_ang_bincounts[i][j], label=label, color=get_color(i))
            
            print("Total fraction of nearest neighbors at sim z (z {0}, NN-{1}): {2:.3f}".format(label, j+1, np.sum(all_sim_z_bincounts[i][j]) / np.sum(all_ang_bincounts[i][j])))

        plt.title(f"Nearest Neighbor {j} Sim z Fraction")
        plt.ylabel("NN Sim z Fraction")
        plt.xlabel("Angular Distance (arcsec)")
        plt.xscale('log')
        plt.legend()
        plt.draw()

#print("What fraction of the time is the NN >19.5 mag?")

### Color plots of NN Same Halo in z / ang distance / P_obs space

In [None]:
# TODO consider:
# Why do I treat z differently? 
# Should I move the looks to where the z loop is? 
# Have less calls to match_coord_sky and post-process? Is that possible?
# I think its a lot faster to do that
POBS_bin = np.digitize(prob_obs_cut, POBS_bins)
app_mag_bin = np.digitize(app_mag, app_mag_bins)

if LOST_GALAXIES_ONLY: 
    nthneighbor = 1
else:
    nthneighbor = 2 # since catalog includes the targets in this case

all_ang_bincounts_2 = np.ones((POBS_BIN_COUNT, 2, 2, len(z_bins), APP_MAG_BIN_COUNT, BIN_COUNT))
all_same_halo_bincounts_2 = np.zeros((POBS_BIN_COUNT, 2, 2, len(z_bins), APP_MAG_BIN_COUNT, BIN_COUNT))
all_sim_z_bincounts_2 = np.zeros((POBS_BIN_COUNT, 2, 2, len(z_bins), APP_MAG_BIN_COUNT, BIN_COUNT))

# Loop through properties of the lost galaxy
for i in range(len(POBS_bins)):
    for j in [0,1]: # 0 blue, 1 quiescent
        for k in range(len(app_mag_bins)):

            # Properties of the lost galaxy we need to filter down to 
            if LOST_GALAXIES_ONLY:
                target_filter = np.all([POBS_bin == i, app_mag_bin == k, unobserved, quiescent == j], axis=0)
            else:
                target_filter = np.all([POBS_bin == i, app_mag_bin == k, quiescent == j], axis=0)

            to_match = coord.SkyCoord(ra=ra[target_filter]*u.degree, dec=dec[target_filter]*u.degree, frame='icrs')
            idx, d2d, d3d = coord.match_coordinates_sky(to_match, catalog, nthneighbor=nthneighbor, storekdtree=treename)
            same_halo = mxxl_halo_id[target_filter] == mxxl_halo_id_catalog[idx]
            sim_z = np.isclose(z_obs[target_filter], z_obs_catalog[idx], rtol=0, atol=SIM_Z_THRESH)
            same_halo = np.where(mxxl_halo_id[target_filter] == 0, sim_z, mxxl_halo_id[target_filter] == mxxl_halo_id_catalog[idx])

            # Properties of the nearest neighbor galaxy we are binning
            nn_z_bin_ind = np.digitize(z_obs_catalog[idx], z_bins)
            angdist_bin_ind = np.digitize(d2d.to(u.arcsec).value, angular_bins)
            
            for t_c in [0,1]:
                for zb in range(len(z_bins)):
                    right_bin = np.all([nn_z_bin_ind == zb, color_catalog[idx] == t_c], axis=0)
                    
                    bincounts = np.bincount(angdist_bin_ind, minlength=len(angular_bins), weights=right_bin.astype(int))
                    all_ang_bincounts_2[i][j][t_c][zb][k] = bincounts

                    bincounts2 = np.bincount(angdist_bin_ind, minlength=len(angular_bins), weights=np.all([same_halo, right_bin], axis=0).astype(int))
                    all_same_halo_bincounts_2[i][j][t_c][zb][k] = bincounts2

                    bincounts3 = np.bincount(angdist_bin_ind, minlength=len(angular_bins), weights=np.all([sim_z, right_bin], axis=0).astype(int))
                    all_sim_z_bincounts_2[i][j][t_c][zb][k] = bincounts3

In [None]:
frac_same_halo_full = all_same_halo_bincounts_2 / (all_ang_bincounts_2)
frac_sim_z_full = all_sim_z_bincounts_2 / (all_ang_bincounts_2)

frac_same_halo_full=np.nan_to_num(frac_same_halo_full, copy=False) # empty bins we call 0% TODO
frac_sim_z_full=np.nan_to_num(frac_sim_z_full, copy=False)

In [None]:
# To visualize things we need to reduce dimensionality. 
# Aggregate of either Pobs or app mag, or choose a single value from them to examine.
# The below always picks one z per plot.

# axis 4 will sum over app mag. Axis 0 will sum over P_obs
axis_to_sumover = 4

# Use this to aggregate
all_same_halo_bincounts_reduced = np.sum(all_same_halo_bincounts_2, axis=axis_to_sumover)
all_ang_bincounts_reduced = np.sum(all_ang_bincounts_2, axis=axis_to_sumover)
all_sim_z_bincounts_reduced = np.sum(all_sim_z_bincounts_2, axis=axis_to_sumover)

# Use this instead to pick out a value
index_to_use = 3
#all_same_halo_bincounts_reduced = np.take(all_same_halo_bincounts_2, index_to_use, axis=axis_to_sumover)
#all_ang_bincounts_reduced = np.take(all_ang_bincounts_2, index_to_use, axis=axis_to_sumover)
#all_sim_z_bincounts_reduced = np.take(all_sim_z_bincounts_2, index_to_use, axis=axis_to_sumover)

if axis_to_sumover == 0:
    all_same_halo_bincounts_reduced = np.swapaxes(all_same_halo_bincounts_reduced, 0,1)
    all_same_halo_bincounts_reduced = np.swapaxes(all_same_halo_bincounts_reduced, 0,2)
    all_ang_bincounts_reduced = np.swapaxes(all_ang_bincounts_reduced, 0,1)
    all_ang_bincounts_reduced = np.swapaxes(all_ang_bincounts_reduced, 0,2)
    all_sim_z_bincounts_reduced = np.swapaxes(all_sim_z_bincounts_reduced, 0,1)
    all_sim_z_bincounts_reduced = np.swapaxes(all_sim_z_bincounts_reduced, 0,2)

frac_same = all_same_halo_bincounts_reduced / (all_ang_bincounts_reduced)
frac_sim_z = all_sim_z_bincounts_reduced / (all_ang_bincounts_reduced)
frac_same = np.nan_to_num(frac_same, copy=False, nan=0.0)
frac_zim_z = np.nan_to_num(frac_sim_z, copy=False, nan=0.0)

# Make rough bins of just over a threshold or not
nn_success_thresh = 0.4 # change fit lines below if you change this!
success_bins = [0,nn_success_thresh,1.01]
frac_same_binned = np.digitize(frac_same, bins=success_bins)

# Resultant shape must be consistent
print(np.shape(all_ang_bincounts_reduced))

In [None]:
def get_color_label(target_q, nn_q):

    if color_index == 1 and NN_color_index == 1:
        title = "Red target, Red NN"
    elif color_index == 0 and NN_color_index == 1:
        title = "Blue target, Red NN"
    elif color_index == 1 and NN_color_index == 0:
        title = "Red target, Blue NN"
    elif color_index == 0 and NN_color_index == 0:
        title = "Blue target, Blue NN"

    return title

In [None]:
for color_index in [1,0]:
    for NN_color_index in [1,0]:

        print(get_color_label(color_index, NN_color_index))

        ncols = 2 # there is code for 4 plots per row (z), but can make a subplot of it
        z_bin_numbers_to_plot = range(len(z_bins))
        #z_bin_numbers_to_plot = [1,2]

        fig, axes = plt.subplots(nrows=len(z_bin_numbers_to_plot), ncols=ncols, figsize=(6*ncols, 4*len(z_bin_numbers_to_plot)))

        if (axis_to_sumover == 4):
            y_axis_bins = POBS_bins
        if (axis_to_sumover == 0):
            y_axis_bins = app_mag_bins

        row=-1
        for zb in z_bin_numbers_to_plot:
            
            row+=1
            density = all_ang_bincounts_reduced[:,color_index,NN_color_index,zb,:]
            #print(f"Galaxies in this z-bin: {np.sum(density)}")

            if len(z_bin_numbers_to_plot) == 1:
                axrow = axes
            else:
                axrow = axes[row]
            
            if (ncols != 1):
                ax=axrow[0]
            else:
                ax = axrow
            
            cplot = ax.pcolor(angular_bins, y_axis_bins, frac_same[:,color_index,NN_color_index,zb,:], shading='auto', cmap='RdYlGn', norm=c.Normalize(vmin=0, vmax=0.8))
            fig.colorbar(cplot, ax=ax)
            ax.set_title(f"NN Same Halo Fraction (NN z {getlabel(zb, z_bins)})")
            if (axis_to_sumover == 4):
                ax.set_ylabel("Lost Galaxy $P_{obs}$")
            if (axis_to_sumover == 0):
                ax.set_ylabel("Lost Galaxy app r-mag")
            ax.set_xlabel("Angular Distance (arcsec) to NN")
            ax.set_xscale('log')
            
            
            cplot = axrow[1].pcolor(angular_bins, y_axis_bins, frac_same_binned[:,color_index,NN_color_index,zb,:], shading='auto', cmap='RdYlGn')
            fig.colorbar(cplot, ax=axrow[1])
            axrow[1].set_title(f"NN Same Halo Over 40% (NN z {getlabel(zb, z_bins)})")
            if (axis_to_sumover == 4):
                axrow[1].set_ylabel("Lost Galaxy $P_{obs}$")
            if (axis_to_sumover == 0):
                axrow[1].set_ylabel("Lost Galaxy app r-mag")    
            axrow[1].set_xlabel("Angular Distance (arcsec) to NN")
            axrow[1].set_xscale('log')
            
            """
            cplot = axrow[2].pcolor(angular_bins, y_axis_bins, density, shading='auto', cmap='YlGn', norm=c.LogNorm(vmin=1, vmax=5000))
            fig.colorbar(cplot, ax=axrow[2])
            axrow[2].set_title(f"Counts (NN z {getlabel(zb, z_bins)})")
            if (axis_to_sumover == 4):
                axrow[2].set_ylabel("Lost Galaxy $P_{obs}$")
            if (axis_to_sumover == 0):
                axrow[2].set_ylabel("Lost Galaxy app r-mag")    
            axrow[2].set_xlabel("Angular Distance (arcsec) to NN")
            axrow[2].set_xscale('log')
            """
            if axis_to_sumover == 4:
                if ncols == 1 and len(z_bin_numbers_to_plot) == 1:
                    ax.scatter(get_NN_40_line(z_bins[zb]-0.01, POBS_bins, color_index, NN_color_index), POBS_bins)
                else:
                    for i in range(len(axrow)):
                        axrow[i].scatter(get_NN_40_line(z_bins[zb]-0.01, POBS_bins, color_index, NN_color_index), POBS_bins)
                

        #fig.suptitle(title)
        fig.tight_layout() 


In [None]:
# Make a plot comparing Simple v4 and v2 thresholds

# make array of midpoints of z_bins
z_bin_midpoints = np.zeros(len(z_bins))
for i in range(len(z_bins)):
    if i == 0:
        z_bin_midpoints[i] = z_bins[i] / 2
    else:
        z_bin_midpoints[i] = (z_bins[i] + z_bins[i-1]) / 2

# Make a plot with 4 lines, one for each color combination, where x axis is the 40% line angular seperation value and y axis is the z bin
# Use p_obs_index for the p obs value

fig, ax = plt.subplots()
for color_index in [0,1]:
    for NN_color_index in [0,1]:
        dat = np.zeros(len(z_bins))
        for i in range(len(z_bins)):
            dat[i] = get_NN_40_line(z_bins[i]-0.01, 0.5, color_index, NN_color_index)

        if color_index == 1:
            color = 'r'
        else:
            color = 'b'
        if NN_color_index == 1:
            marker = '-'
        else:
            marker = '--'
        ax.plot(z_bin_midpoints, dat, marker, label=get_color_label(color_index, NN_color_index), color=color)

dat = np.zeros(len(z_bins))
for i in range(len(z_bins)):
    dat[i] = get_NN_40_line_v2(z_bins[i]-0.01, 0.5)
ax.plot(z_bin_midpoints, dat, label="v2", color='black')

plt.xlabel("z")
plt.ylabel("40% Threshold Distance [arcsec]")
plt.legend()

### Trying to fit the 40% NN success curve

In [None]:
def get_prob_nn_same_halo(ang_dist, nn_z, my_app_mag, my_pobs):
    my_ang_bin = np.digitize(ang_dist, angular_bins)
    nn_z_bin = np.digitize(nn_z, z_bins)
    my_app_mag_bin = np.digitize(my_app_mag, app_mag_bins)
    my_pobs_bin = np.digitize(my_pobs, POBS_bins)
    
    #print(f"There are {all_ang_bincounts_2[my_pobs_bin,0,nn_z_bin,my_app_mag_bin,my_ang_bin]} galaxies in this bin")
    return frac_same_halo_full[my_pobs_bin,0,nn_z_bin,my_app_mag_bin, my_ang_bin]

In [None]:
get_prob_nn_same_halo(13, 0.13, 16.7, 0.4)

In [None]:
def get_prob_nn_same_halo_index(my_ang_bin, nn_z_bin, my_app_mag_bin, my_pobs_bin):
    return frac_same_halo_full[my_pobs_bin,0,nn_z_bin,my_app_mag_bin, my_ang_bin]

#all_ang_bincounts_2 = np.ones((POBS_BIN_COUNT, len(nn_bins), len(z_bins), APP_MAG_BIN_COUNT, BIN_COUNT))
#all_same_halo_bincounts_2 = np.zeros((POBS_BIN_COUNT, len(nn_bins), len(z_bins), APP_MAG_BIN_COUNT, BIN_COUNT))
#all_sim_z_bincounts_2 = np.zeros((POBS_BIN_COUNT, len(nn_bins), len(z_bins), APP_MAG_BIN_COUNT, BIN_COUNT))

from scipy.interpolate import interpn

points = (range(len(angular_bins)), range(len(z_bins)), range(len(app_mag_bins)), range(len(POBS_bins)))
values = get_prob_nn_same_halo_index(*np.meshgrid(*points, indexing='ij'))

point = np.array([25, 0.13, 16.7, 0.4])

In [None]:
from scipy.optimize import curve_fit
def func(x, a, b, c):
    return a * np.exp(-b * x) + c

xdata = np.linspace(0, 4, 50)
y = func(xdata, 2.5, 1.3, 0.5)
rng = np.random.default_rng()
y_noise = 0.2 * rng.normal(size=xdata.size)
ydata = y + y_noise
plt.plot(xdata, ydata, 'b-', label='data')

popt, pcov = curve_fit(func, xdata, ydata)
print(popt)
print(pcov)
np.array([2.56274217, 1.37268521, 0.47427475])
plt.plot(xdata, func(xdata, *popt), 'r-', label='fit: a=%5.3f, b=%5.3f, c=%5.3f' % tuple(popt))

#popt, pcov = curve_fit(func, xdata, ydata, bounds=(0, [3., 1., 0.5]))
#np.array([2.43736712, 1.        , 0.34463856])
#plt.plot(xdata, func(xdata, *popt), 'g--', label='fit: a=%5.3f, b=%5.3f, c=%5.3f' % tuple(popt))

plt.xlabel('x')
plt.ylabel('y')
plt.legend()
plt.show()

### Fancy Algorithm

In [None]:
NUM_NEIGHBORS = 20
fancy_to_match = coord.SkyCoord(ra=ra[unobserved]*u.degree, dec=dec[unobserved]*u.degree, frame='icrs')

In [None]:
neighbor_indexes = np.zeros(shape=(NUM_NEIGHBORS, len(fancy_to_match)), dtype=np.int32) # indexes point to CATALOG locations
ang_distances = np.zeros(shape=(NUM_NEIGHBORS, len(fancy_to_match)))

print(f"Finding nearest {NUM_NEIGHBORS} neighbors... ", end='\r')   
for n in range(0, NUM_NEIGHBORS):
    idx, d2d, d3d = coord.match_coordinates_sky(fancy_to_match, catalog, nthneighbor=n+1, storekdtree=treename)
    neighbor_indexes[n] = idx # TODO is that right?
    ang_distances[n] = d2d.to(u.arcsec).value
print(f"Finding nearest {NUM_NEIGHBORS} neighbors... done!")   

In [None]:
with FancyRedshiftGuesser(NUM_NEIGHBORS, debug=False) as scorer:
    halo_matches = 0
    z_matches = 0

    print(f"Assinging missing redshifts... ")   
    # TODO don't loop?
    j = 0 # index of the fancy_to_match sized arrays
    
    #for i in special_id:
    for i in indexes_not_assigned: # index of the master arrays

        #if i not in [7793057, 11425052]:
        #    j+=1
        #    continue

        if j%10000==0:
            print(f"{j}/{len(fancy_to_match)} complete", end='\r')

        neighbors = neighbor_indexes[:,j]
        neighbors_z = z_obs_catalog[neighbors]
        neighbors_ang_dist = ang_distances[:,j]
        my_prob_obs = prob_obs_cut[i]
        my_app_mag = app_mag[i]

        winning_num = scorer.choose_winner(neighbors_z, neighbors_ang_dist, my_prob_obs, my_app_mag, z_obs[i])
        winner_index = neighbors[winning_num]

        # Track total correct
        z_chosen = z_obs_catalog[winner_index] 
        if np.isclose(z_chosen, z_obs[i], rtol=0, atol=SIM_Z_THRESH):
            z_matches += 1
        halo_chosen = mxxl_halo_id_catalog[winner_index]
        if halo_chosen == mxxl_halo_id[i]:
            halo_matches += 1

        j += 1 

    print(f"{j}/{len(fancy_to_match)} complete")



In [None]:
print(f"Halo matches: {halo_matches / len(fancy_to_match)}")
print(f"z matches: {z_matches / len(fancy_to_match)}")

In [None]:
# View results from a run of the FancyRedshiftGuesser. Must put in the right filename (number)
filename = 'bin/redshift_guesser_1691466513.171286.npy'
with open(filename, 'rb') as f:
    quick_nn = np.load(f)
    quick_correct = np.load(f)
    nn_used = np.load(f)
    nn_correct = np.load(f)

print(f"Quick NN uses: {quick_nn}. Success: {quick_correct / (quick_nn+1)}")
print(f"NN bin uses: {nn_used}. Success: {nn_correct / (nn_used+1)}")

### Galaxy Pairs Angular Separation and Same-Halo Analysis
Continuation of the above.

THIS IS N^2 CALCULATION do not run on full sky. Adjust data


In [None]:
# THIS IS N^2 CALCULATION do not run on full sky.
total_bincounts = np.ones((len(z_bins), BIN_COUNT))
total_same_halo_bincounts = np.zeros((len(z_bins), BIN_COUNT))

# Examine each galaxy in the sample pair once
for i in range(len(ra)-1):
    ang_distance = coord.angular_separation(ra[i]*u.degree, dec[i]*u.degree, ra[i+1:len(ra)]*u.degree, dec[i+1:len(ra)]*u.degree).to(u.arcsec)
        
    same_halo = mxxl_halo_id[i] == mxxl_halo_id[i+1:len(ra)]
    #print("Same halo fraction for {0}:".format(i), np.sum(same_halo) / len(same_halo))

    angdist_bin_ind = np.digitize(ang_distance.value, angular_bins)
    #print(bin_ind)
    bincounts = np.bincount(angdist_bin_ind)[0:BIN_COUNT]
    same_halo_bincounts = np.bincount(angdist_bin_ind, weights= same_halo.astype(int)) [0:BIN_COUNT]

    z_bin = np.digitize(z_obs[i], z_bins)
    total_bincounts[z_bin] = total_bincounts[z_bin] + bincounts
    total_same_halo_bincounts[z_bin] = total_same_halo_bincounts[z_bin] + same_halo_bincounts
    #print(total_same_halo_bincounts)

#print("Total counts in each bin:", total_bincounts)

fraction_same_halo = total_same_halo_bincounts / total_bincounts
#print(fraction_same_halo)

In [None]:
# Plots for galaxy pairs
plt.figure()
for i in range(len(z_bins)):
    if i==0:
        label = "< {0}".format(z_bins[i])
    else:
        label = "{0} - {1}".format(z_bins[i-1], z_bins[i])
    plt.plot(angular_bins, total_bincounts[i], label=label)
plt.legend()
plt.xscale('log')
plt.yscale('log')
plt.xlabel('Angular Separation (arcsec)')
plt.ylabel('Count of Galaxies Pairs')
plt.title("Galaxy Pair Counts (by ang separation and z)")
plt.draw()

plt.figure()
for i in range(len(z_bins)):
    if i==0:
        label = "< {0}".format(z_bins[i])
    else:
        label = "{0} - {1}".format(z_bins[i-1], z_bins[i])
    plt.plot(angular_bins, fraction_same_halo[i], label=label)
plt.legend()
plt.xscale('log')
plt.xlabel('Angular Separation (arcsec)')
plt.ylabel('Fraction Pair in Same Halo')
plt.ylim(-0.01, 1.0)
plt.title("Fraction Pair in Same Halo (by ang separation and z)")
plt.draw()

# UCHUU Experiments


### Experiments

In [None]:
#[('R_MAG_APP', '>f4'), ('R_MAG_ABS', '>f4'), ('G_R_REST', '>f4'), ('G_R_OBS', '>f4'), ('DEC', '>f8'), ('HALO_MASS', '>f4'), ('CEN', '>i4'), ('RES', '>i4'), ('RA', '>f8'), ('Z_COSMO', '>f4'), ('Z', '>f4'), ('STATUS', '>i4'), ('FIRST_ACC_SCALE', '>f4'), ('M_ACC', '>f4'), ('M_VIR_ALL', '>f4'), ('R_VIR', '>f4'), ('V_PEAK', '>f4'), ('R_S', '>f4'), ('V_RMS', '>f4'), ('NGC', '>f4'), ('SGC', '>f4'), ('HALO_ID', '>i8'), ('PID', '>i8')]))
filename='/export/sirocco2/tinker/DESI/UCHUU_MOCKS/BGS_LC_Uchuu.fits'
u_table = Table.read(filename, format='fits')

In [None]:
APP_MAG_CUT = 19.5

In [None]:
u_table.columns
#G_R_OBS

In [None]:
dec = u_table['DEC']
ra = u_table['RA']
z_obs = u_table['Z']
app_mag = u_table['R_MAG_APP']
abs_mag = u_table['R_MAG_ABS']
g_r = u_table['G_R_REST'] # TODO before using ensure it should be rest and not observed
g_r_obs = u_table['G_R_OBS']
central = u_table['CEN']
uchuu_halo_mass = u_table['HALO_MASS']
uchuu_halo_id = u_table['HALO_ID']

bright_filter = app_mag < APP_MAG_CUT 
redshift_filter = z_obs > 0 
keep = np.all([mass_filter, bright_filter, redshift_filter], axis=0)

dec = dec[keep]
ra = ra[keep]
z_obs = z_obs[keep]
app_mag = app_mag[keep]
abs_mag = abs_mag[keep]
g_r = g_r[keep]
g_r_obs = g_r_obs[keep]
central = central[keep]
uchuu_halo_mass = uchuu_halo_mass[keep]
uchuu_halo_id = uchuu_halo_id[keep]

abs_mag_me = app_mag_to_abs_mag(app_mag, z_obs)


In [None]:
abs_mag_me_k = k_correct(app_mag, z_obs, g_r)
# using true g-r instead of the observed g-r gives the reported distribution as shown by plot below


In [None]:
# Compare my_abs_mag to abs_mag. 
angular_bins = np.linspace(-25, -10, 100)
#my_counts, my_bins, my_p = plt.hist(abs_mag_me, label="my abs_mag", bins=bins, alpha=0.5)
alex_counts, alex_bins, alex_p = plt.hist(abs_mag, label="UCHUU abs_mag", bins=angular_bins, alpha=0.5)
my_k_counts, my_k_bins, my_k_p = plt.hist(abs_mag_me_k, label="my k abs_mag", bins=angular_bins, alpha=0.5)
#z = plt.hist(my_abs_mag_k, label="my k abs_mag", bins=50)
plt.xlabel("Absolute Mag")
plt.ylabel("Count")
plt.title("Compare Absolute Mags")
plt.yscale('log')
plt.legend()

#print(f"The peaks are UCHUU: {alex_bins[np.argmax(alex_counts)]:.1f}, My {my_bins[np.argmax(my_counts)]:.1f}, My K {my_k_bins[np.argmax(my_k_counts)]:.1f}")

In [None]:
# UCHUU
print(len(ra))
estimate_frac_area(ra, dec)


In [None]:
rnd_indices = np.random.choice(len(ra), len(ra)//100, replace=False)
fig = make_map(ra[rnd_indices], dec[rnd_indices]), 


In [None]:
# TODO only centrals...
plt.hist(np.log10(uchuu_halo_mass*10**10), bins=30, alpha=0.5, density=True, label="UCHUU")
plt.hist(np.log10(all.all_data['mxxl_halo_mass']*10**10), bins=30, alpha=0.5, density=True, label="MXXL")
#plt.yscale('log')
plt.title("MXXL vs UCHUU Truth Halo Masses")
plt.xlabel('log(M_halo)')
plt.ylabel('Density')
plt.legend()

# Tests

In [None]:
# Test estimate_frac_area for a completely filled sky
_ra = np.linspace(0.01, 359.9, 1000)
_dec = np.linspace(0.01, 179.9, 1000)
_ra, _dec = np.meshgrid(_ra, _dec)
_ra = _ra.flatten()
_dec = _dec.flatten()

estimate_frac_area(_ra, _dec)