In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy import special
import h5py
from astropy.wcs import WCS
import astropy.coordinates as coord
import astropy.units as u
from pyutils import *
import types
import numpy.ma as ma
import sys

ROOT_FOLDER = "/Volumes/Seagate Backup Plus Drive/galaxy-groups-data/"
#ROOT_FOLDER = "D:\\galaxy-groups-data\\"
#ROOT_FOLDER = "bin/"


## Basic read-in of HDF5 data from MXXL


In [None]:
DATA_CUT_INDEX = 100000 #21201544 #3000000 

In [None]:
weights = h5py.File(ROOT_FOLDER + 'weights_3pass.hdf5', 'r')
print(list(weights))
print(list(weights['Data']))
print(list(weights['Weight']))


In [None]:
assigned = weights['Weight/bitweight0'][0:DATA_CUT_INDEX] & 1 # choose 1 of the 2048 fiber assignment realizations with this bitstring
print(np.sum(assigned == 1), "galaxies were assigned a fiber")
print(np.sum(assigned == 0), "galaxies were NOT assigned a fiber")


# Test of nearest neighbors implementations


In [None]:

input = weights
dec = input['Data/dec'][0:DATA_CUT_INDEX]
ra = input['Data/ra'][0:DATA_CUT_INDEX]
z_obs = input['Data/z_obs'][0:DATA_CUT_INDEX]
app_mag = input['Data/app_mag'][0:DATA_CUT_INDEX]
sim_halo_mass = input['Data/halo_mass'][0:DATA_CUT_INDEX]
sim_halo_id = input['Data/mxxl_id'][0:DATA_CUT_INDEX]

bright_filter = app_mag < 19.5 # makes a filter array (True/False values)
redshift_filter = z_obs > 0 # makes a filter array (True/False values)
keep = np.all([bright_filter, redshift_filter], axis=0)
dec = dec[keep]
ra = ra[keep]
z_obs = z_obs[keep]
sim_halo_mass = sim_halo_mass[keep]
sim_halo_id = sim_halo_id[keep]

count = len(dec)
print(count, "galaxies in HDF5 file")

# choose 1 of the 2048 fiber assignment realizations with this bitstring
fiber_assigned_0 = assigned.astype(bool)
fiber_assigned_0 = fiber_assigned_0[keep]
fiber_not_assigned_0 = np.invert(fiber_assigned_0)
indexes_not_assigned = np.argwhere(fiber_not_assigned_0)
print(np.sum(fiber_assigned_0), "galaxies were assigned a fiber")

In [None]:
# Astropy NN Search with kdtrees
catalog = coord.SkyCoord(ra=ra[fiber_assigned_0]*u.degree, dec=dec[fiber_assigned_0]*u.degree, frame='icrs')
z_cat = z_obs[fiber_assigned_0]
halo_mass_cat = sim_halo_mass[fiber_assigned_0]
to_match = coord.SkyCoord(ra=ra[fiber_not_assigned_0]*u.degree, dec=dec[fiber_not_assigned_0]*u.degree, frame='icrs')

idx, d2d, d3d = coord.match_coordinates_sky(to_match, catalog, storekdtree=False)

z_eff_a = np.copy(z_obs)
z_err_a = np.zeros(len(z_obs))

# i is the index of the full sized array that needed a NN z value
# j is the index along the to_match list corresponding to that
# idx are the indexes of the NN from the catalog

# Mhalo - (Mhalo of the NN galaxy)
halo_delta = np.zeros(len(idx))

j = 0
for i in indexes_not_assigned:
    assert np.isclose(ra[i], to_match[j].ra.value)
    new_z = z_cat[idx[j]]
    halo_delta[j] = sim_halo_mass[i] - halo_mass_cat[idx[j]]
    z_err_a[i] = abs(z_eff_a[i] - new_z) / z_eff_a[i]
    z_eff_a[i] = new_z
    j = j + 1


In [None]:
# Examine fractional error in assigned redshifts 
plt.hist(np.log10(z_err_a[fiber_not_assigned_0]), bins=50)
plt.yscale('log')
plt.xlabel('log(fractional error) from known simulation value')
plt.ylabel('Count')
print(np.sum(z_err_a))
print(np.count_nonzero(z_err_a))

# convert to km/s and think about velocity dispersions of galaxies


In [None]:
print("Fraction of NN-assigned halos that were different (using simulations halos)", np.count_nonzero(halo_delta) / len(halo_delta))
np.isclose(halo_delta)


In [None]:
# My naive NN
z_cat = z_obs[fiber_assigned_0]
nno = NearestNeighbor(ra[fiber_assigned_0], dec[fiber_assigned_0], z_cat)

# z_eff: same as z_obs if a fiber was assigned and thus a real redshift measurement was made
# otherwise, it is an assigned value.
# nearest neighbor will find the nearest (measured) galaxy and use its redshift.
z_eff = np.copy(z_obs)
z_err = np.zeros(len(z_obs))
for i in range(0, count):
    if not fiber_assigned_0[i]:
        ind = nno.get_closest_index(coord.Angle(ra[i]*u.degree).radian, coord.Angle(dec[i]*u.degree).radian)     
        new_z = z_cat[ind]
        z_err[i] = abs(z_eff[i] - new_z) / z_eff[i]
        z_eff[i] = new_z

In [None]:
# Examine fractional error in assigned redshifts 
plt.hist(np.log10(z_err[fiber_not_assigned_0]), bins=50)
plt.yscale('log')
plt.xlabel('log(fractional error) from known simulation value')
plt.ylabel('Count')
print(np.sum(z_err))
print(np.count_nonzero(z_err))

In [None]:
# First ensure the non-assigned stuff is exactly the same
print("Are all galaxies that should just use z_obs the same?", np.all(np.isclose(z_eff[fiber_assigned_0],z_eff_a[fiber_assigned_0],rtol=1e-05,atol=0.0)))
print("Do all galaxies that had nearest neighbors assigned match up (me vs astropy)?", np.all(np.isclose(z_eff[fiber_not_assigned_0],z_eff_a[fiber_not_assigned_0],rtol=1e-05,atol=0.0)))

print("  Fraction of NN with same value (want 1.0):", np.sum(np.isclose(z_eff[fiber_not_assigned_0],z_eff_a[fiber_not_assigned_0],rtol=1e-05,atol=0.0)) / len(z_eff[fiber_not_assigned_0]))

# TODO why do astropy and I result in a different assigned redshift ~25% of the time

In [None]:
ind_different = np.argwhere(np.invert(np.isclose(z_eff,z_eff_a,rtol=1e-05,atol=0.0)))
print(len(ind_different), "galaxies don't agree between implementations")

print("MINE         ASTROPY      SIMULATION")
for i in range(0, min(len(ind_different), 20)):
    print(z_eff[ind_different[i]], z_eff_a[ind_different[i]], z_obs[ind_different[i]])

# Experiments on MXXL Data Directly

In [None]:
small_gal_type = weights['Data/galaxy_type'][0:DATA_CUT_INDEX] # 0 1 2 3 possible
bins = plt.hist(small_gal_type, bins=50)

In [None]:
small_z_obs = weights['Data/z_obs'][0:DATA_CUT_INDEX]
bins = plt.hist(small_z_obs, bins=50)
plt.xlabel("$z_{obs}$")
plt.title("Histogram of Observed Redshifts")


In [None]:
ra = weights['Data/ra'][0:DATA_CUT_INDEX]
dec = weights['Data/dec'][0:DATA_CUT_INDEX]

In [None]:
# Build a map of the galaxies

ra_angles = coord.Angle(ra*u.degree)
ra_angles = ra_angles.wrap_at(180*u.degree)
dec_angles = coord.Angle(dec*u.degree)

fig = plt.figure(figsize=(12,9))
ax = fig.add_subplot(111, projection="mollweide")
ax.scatter(ra_angles.radian, dec_angles.radian, alpha=0.002)
# This looks like Alex' paper, good
# TODO how to get frac_area from this?


In [None]:
plt.plot(ra, dec)

In [None]:
mxxl_halo_id = weights['Data/mxxl_id'][0:DATA_CUT_INDEX]
mxxl_halo_id

In [None]:
small_app_mag = weights['Data/app_mag'][0:DATA_CUT_INDEX]
bins = plt.hist(small_app_mag, bins=50)
plt.xlabel("Apparent Mag")
plt.title("Histogram of Apparent Mags")

In [None]:
small_abs_mag = weights['Data/abs_mag'][0:DATA_CUT_INDEX]

In [None]:
# Calculating luminosity distances from the cosmology is a bit slow
my_abs_mag = app_mag_to_abs_mag(small_app_mag, small_z_obs)


In [None]:
# Compare my_abs_mag to abs_mag. 
x = plt.hist(my_abs_mag, label="my abs_mag", bins=50)
y = plt.hist(small_abs_mag, label="alex abs_mag", bins=50)
plt.xlabel("Absolute Mag")
plt.ylabel("Count")
plt.title("Compare Absolute Mags")
plt.legend()

In [None]:
# At what distance (luminosity distance) would the objects appear to be 19.5 mag?
v_max = get_max_observable_volume(my_abs_mag, small_z_obs, 19.5)
v_max2 = get_max_observable_volume(small_abs_mag, small_z_obs, 19.5)

bins = plt.hist(np.log10(v_max), label="my abs_mag", bins=50)
bins = plt.hist(np.log10(v_max2), label="alex abs_mag", bins=50)
plt.title("Compare V_max")
plt.legend()
plt.xlabel("log(V_max) [Mpc]")
plt.ylabel("Count")

In [None]:
# What fraction of the time are galaxy pairs in the same halo?
# As a function of angular separation and redshift.

# Do calculation on a small patch of 100,000 galaxies for now
# TODO need to load all data and then manually cut it down into a region of the sky instead of this

input = weights
dec = input['Data/dec'][0:DATA_CUT_INDEX]
ra = input['Data/ra'][0:DATA_CUT_INDEX]
z_obs = input['Data/z_obs'][0:DATA_CUT_INDEX]
app_mag = input['Data/app_mag'][0:DATA_CUT_INDEX]
sim_halo_mass = input['Data/halo_mass'][0:DATA_CUT_INDEX]
sim_halo_id = input['Data/mxxl_id'][0:DATA_CUT_INDEX]

bright_filter = app_mag < 19.5 # makes a filter array (True/False values)
redshift_filter = z_obs > 0 # makes a filter array (True/False values)
keep = np.all([bright_filter, redshift_filter], axis=0)
dec = dec[keep]
ra = ra[keep]
z_obs = z_obs[keep]
sim_halo_mass = sim_halo_mass[keep]
sim_halo_id = sim_halo_id[keep]



catalog = coord.SkyCoord(ra=ra*u.degree, dec=dec*u.degree, frame='icrs')
#z_cat = z_obs[fiber_assigned_0]
#halo_mass_cat = sim_halo_mass[fiber_assigned_0]
to_match = coord.SkyCoord(ra=ra[fiber_not_assigned_0]*u.degree, dec=dec[fiber_not_assigned_0]*u.degree, frame='icrs')

# TODO 
#coord.angular_separation(ra*u.degree, dec*u.degree, catalog.ra, catalog.dec)




# Plots on data after running group finder

In [None]:
def process(filename):

    filename_props = str.replace(filename, ".out", "_galprops.dat")

    # TODO add in mxxl_halo_id, assigned_halo_id
    df = pd.read_csv(filename, delimiter=' ', names=('RA', 'Dec', 'z', 'L_gal', 'V_max', 'P_sat', 'M_halo', 'N_sat', 'L_tot', 'igrp', 'unknown'))
    galprops = pd.read_csv(filename_props, delimiter=' ', names=('app_mag', 'g_r', 'galaxy_type', 'mxxl_halo_mass', 'fiber_assigned_0', 'assigned_halo_mass', 'z_obs', 'mxxl_halo_id', 'assigned_halo_id'), dtype={'mxxl_halo_id': np.int32, 'assigned_halo_id': np.int32})
    all_data = pd.merge(df, galprops, left_index=True, right_index=True)

    # Drop bad data, should have been cleaned up earlier though!
    orig_count = len(all_data)
    all_data = all_data[all_data.M_halo != 0]
    new_count = len(all_data)
    if (orig_count != new_count):
        print("Dropped {0} bad galaxies".format(orig_count - new_count))

    all_data['is_sat'] = (all_data.index != all_data.igrp).astype(int)
    all_data['is_sat_truth'] = np.logical_or(all_data.galaxy_type == 1, all_data.galaxy_type == 3).astype(int)
    #print(all_data['is_sat'])
    all_data['logLgal'] = np.log10(all_data.L_gal)

    bins = np.logspace(np.log10(min(all_data.M_halo)), np.log10(max(all_data.M_halo)), 30)
    labels = bins[0:len(bins)-1] # using bottom (or top?) value, not middle
    all_data['Mh_bin'] = pd.cut(x = all_data['M_halo'], bins = bins, labels = labels, include_lowest = True)
    
    centrals = all_data[all_data.index == all_data.igrp]
    #logmstar_means = centrals.groupby('Mh_bin').log_M_star.mean()
    #logmstar_scatter = centrals.groupby('Mh_bin').log_M_star.std()
    loglcen_means = centrals.groupby('Mh_bin').logLgal.mean()
    loglcen_scatter = centrals.groupby('Mh_bin').logLgal.std()

    # Compute f_sat(Lgal)
    bins_Lgal = np.logspace(np.log10(min(all_data.L_gal)), np.log10(max(all_data.L_gal)), 30)
    labels_Lgal = bins_Lgal[0:len(bins_Lgal)-1] # using bottom (or top?) value, not middle
    all_data['Lgal_bin'] = pd.cut(x = all_data['L_gal'], bins = bins_Lgal, labels = labels_Lgal, include_lowest = True)
    
    f_sat = all_data.groupby('Lgal_bin').is_sat.mean()
    Lgal_counts = all_data.groupby('Lgal_bin').RA.count()

    dataset = types.SimpleNamespace()
    dataset.filename = filename[filename.rfind('/')+1 : len(filename)-4]
    dataset.all_data = all_data
    dataset.bins = bins
    dataset.labels = labels
    dataset.centrals = centrals
    #dataset.logmstar_means = logmstar_means
    #dataset.logmstar_scatter = logmstar_scatter
    dataset.loglcen_means = loglcen_means
    dataset.loglcen_scatter = loglcen_scatter
    dataset.bins_Lgal = bins_Lgal
    dataset.labels_Lgal = labels_Lgal
    dataset.f_sat = f_sat
    dataset.Lgal_counts = Lgal_counts

    return dataset

def plots(*frames):
    
    plt.figure()
    for f in frames:
        plt.errorbar(f.labels, f.loglcen_means, yerr=f.loglcen_scatter)
    plt.xscale('log')
    plt.xlabel('$M_{halo}$')
    plt.ylabel('$log(L_{cen})$')
    plt.title("Central Luminosity vs. Halo Mass")
    plt.legend()
    plt.draw()

    plt.figure()    
    for f in frames:
        plt.errorbar(f.labels, f.loglcen_scatter)
    plt.xscale('log')
    plt.xlabel('$M_{halo}$')
    plt.ylabel('$\\sigma(\\log(L_{cen})$')
    plt.title("Central Luminosity Scatter vs. Halo Mass")
    plt.draw()

    """     
    plt.figure()
    for f in frames:
        plt.scatter(f.centrals.M_halo, f.centrals.L_gal, alpha=0.002)
    plt.loglog()
    plt.xlabel('M_halo / h')
    plt.ylabel('L_gal / $h^2$)')
    plt.draw() 
    """

    plt.figure()
    for f in frames:
        Nsat_means = f.all_data.groupby('Mh_bin').N_sat.mean()
        plt.plot(f.labels, Nsat_means)
        #plt.hist(f.centrals.N_sat, np.arange(0,50,1), alpha=0.5)
    plt.loglog()    
    plt.ylabel("$<N_{sat}>$")    
    plt.xlabel('$M_{halo}$')
    plt.title("Mean Number of Satellites by Halo Mass")
    plt.draw()

    plt.figure()
    for f in frames:
        plt.plot(f.labels_Lgal, f.f_sat)
    truth_f_sat = frames[0].all_data.groupby('Lgal_bin').is_sat_truth.mean()
    plt.plot(frames[0].labels_Lgal, truth_f_sat, 'k')
    #centrals = frames[0].all_data['galaxy_type' == 0 or 'galaxy_type' == 2].groupby('Lgal_bin').count()
    #sats = frames[0].all_data['galaxy_type' == 1 or 'galaxy_type' == 3].groupby('Lgal_bin').count()
    #truth_f_sat = sats / (centrals + sats)
    plt.xscale('log')
    plt.xlabel("$L_{gal}$")
    plt.ylabel("$f_{sat}$")
    plt.title("Satellite fraction vs Galaxy Luminosity")
    plt.draw()
    
    plt.figure()
    for f in frames:
        plt.plot(f.labels_Lgal, f.Lgal_counts)
    plt.xscale('log')
    plt.yscale('log')
    plt.xlabel("$L_{gal}$")
    plt.ylabel("Count of Galaxies")
    plt.title("Galaxy Luminosity Counts")
    plt.draw()

    print("TOTAL f_sat: ")
    for f in frames:
        print(f.filename, f.all_data['is_sat'].sum() / f.all_data['is_sat'].count())

    print("MXXL Truth", frames[0].all_data['is_sat_truth'].sum() / f.all_data['is_sat_truth'].count())

In [None]:
all = process(ROOT_FOLDER + "mxxl_3pass_all.out")


In [None]:
fiberonly = process(ROOT_FOLDER + "mxxl_3pass_fiberonly.out")


In [None]:
# This is is no good
#nn = process(ROOT_FOLDER + "mxxl_3pass_nn.out")

In [None]:
nn_kd = process(ROOT_FOLDER + "mxxl_3pass_nn_kd.out")


In [None]:
nn_kd.all_data

In [None]:
plots(all, fiberonly, nn_kd)
# BLUE: ALL     ORANGE: FIBER ASSIGNED ONLY     GREEN: NEAREST NEIGHBOR

In the above plots, the NN ones have some galaxies at higher $L_{gal}$ than the 'all' sample. This is because some of the assigned redshifts imply a larger luminosity than any galaxy seen in MXXL.

## Compare NN-assigned implied abs mags to truth from MXXL

In [None]:
not_assigned = np.invert(nn_kd.all_data.fiber_assigned_0.astype(bool))
app_mags = nn_kd.all_data.app_mag[not_assigned].to_numpy()
my_assigned_abs_mag = app_mag_to_abs_mag(app_mags, nn_kd.all_data.z[not_assigned].to_numpy())
my_raw_abs_mag = app_mag_to_abs_mag(app_mags, nn_kd.all_data.z_obs[not_assigned].to_numpy())

print(len(my_raw_abs_mag), len(my_assigned_abs_mag))

In [None]:
# Compare absolute mags. Using my way of computing for both.
x = plt.hist(my_raw_abs_mag, label="Truth", bins=50, alpha=0.5)
y = plt.hist(my_assigned_abs_mag, label="NN Assigned", bins=50, alpha=0.5)
plt.xlabel("Absolute Mag")
plt.ylabel("Count")
plt.yscale('log')
plt.title("Compare NN-assigned Abs Mags")
plt.legend()

In [None]:
# First ensure the non-assigned stuff is exactly the same
print("NEAREST NEIGHBOR COMPARISON\n")
nn_fiber_assigned_0 = nn.all_data.fiber_assigned_0.astype(bool)
nn_fiber_not_assigned_0 = np.invert(nn_fiber_assigned_0)
nnkd_fiber_assigned_0 = nn.all_data.fiber_assigned_0.astype(bool)
nnkd_fiber_not_assigned_0 = np.invert(nnkd_fiber_assigned_0)

print("Are all galaxies that should just use z_obs the same?", np.all(np.isclose(nn.all_data.z[nn_fiber_assigned_0],nn_kd.all_data.z[nnkd_fiber_assigned_0],rtol=1e-03,atol=0.0)))
compare = np.isclose(nn.all_data.z[nn_fiber_not_assigned_0],nn_kd.all_data.z[nnkd_fiber_not_assigned_0],rtol=1e-03,atol=0.0)
print("Do all galaxies that had nearest neighbors assigned match up (me vs astropy)?", np.all(compare))
print("  Fraction of NN with same value (expect 1.0):", np.sum(compare) / len(nn.all_data.z[nn_fiber_not_assigned_0]))

# TODO why do astropy and I result in a different assigned redshift ~38% of the time
# Now it's 99.9% of the time?!



In [None]:
# Plot points that I differ from astropy
disagree = np.invert(compare)
ra_angles = coord.Angle(nn.all_data.RA[nn_fiber_not_assigned_0][disagree]*u.degree)
ra_angles = ra_angles.wrap_at(180*u.degree)
dec_angles = coord.Angle(nn.all_data.Dec[nn_fiber_not_assigned_0][disagree]*u.degree)

print(len(nn.all_data.RA), len(nn.all_data.RA[nn_fiber_not_assigned_0]), len(nn.all_data.RA[nn_fiber_not_assigned_0][disagree]))

In [None]:
fig = plt.figure(figsize=(12,9))
ax = fig.add_subplot(111, projection="mollweide")
ax.scatter(ra_angles.radian, dec_angles.radian, alpha=0.002)
# There appears to be no special area that we disagree in

## Find fraction of time the NN is in the same halo

The answer is basically never!

In [None]:
#x=plt.hist(nn.all_data['assigned_halo_mass'], bins=50)
#x=plt.hist(nn.all_data['mxxl_halo_mass'], bins=50)
#x=plt.hist(all.all_data['mxxl_halo_mass'], bins=30) # should be exact same as above
#plt.yscale('log')

def nn_halo_analysis(*sets):

    for data in sets:

        #same_halo_mass = np.isclose(data.all_data['assigned_halo_mass'], data.all_data['mxxl_halo_mass'], atol=0.0, rtol=1e-03)
        #same_mxxl_halo = data.all_data['assigned_halo_mass']
        #data.all_data['same_mxxl_halo'] = same_mxxl_halo

        no_fiber = data.all_data[data.all_data.fiber_assigned_0 == 0]

        no_fiber_same_halo = np.equal(no_fiber['assigned_halo_id'], no_fiber['mxxl_halo_id'])
        print("Fraction of time NN-assigned halo ID is the same as the galaxy's actual halo ID:", np.sum(no_fiber_same_halo) / len(no_fiber_same_halo))
        
        no_fiber_same_halo_mass = np.isclose(no_fiber['assigned_halo_mass'], no_fiber['mxxl_halo_mass'], atol=0.0, rtol=1e-03)
        print("Fraction of time NN-assigned halo mass is \'the same\' as the galaxy's actual halo mass:", np.sum(no_fiber_same_halo_mass) / len(no_fiber_same_halo_mass))

        #z_bins = np.linspace(min(data.all_data.z), max(data.all_data.z), 20)
        #z_labels = z_bins[0:len(z_bins)-1] 
        #data.all_data['z_bin'] = pd.cut(x = data.all_data['z'], bins = z_bins, labels = z_labels, include_lowest = True)

        #groupby_z = no_fiber.groupby('z_bin')['same_halo_mass'].sum() / no_fiber.groupby('z_bin')['same_halo_mass'].count()

        #plt.plot(z_labels, groupby_z)
        #plt.xlabel('$z_{eff}$ (effective/assigned redshift)')
        #plt.ylabel('Fraction Assigned Halo = True Host Halo')
        


In [None]:
nn_halo_analysis(nn_kd)

# TODO could compare z_cos instead


## Lost galaxy Examiner

In [None]:
data = nn_kd.all_data


In [None]:
lost_galaxies = data.loc[data['fiber_assigned_0'] == 0]
#lost_galaxies_2 = nn.all_data.loc[nn.all_data['fiber_assigned_0'] == 0]
obs_galaxies = data.loc[data['fiber_assigned_0'] == 1]
#obs_galaxies_2 = nn.all_data.loc[nn.all_data['fiber_assigned_0'] == 1]
print(len(lost_galaxies), len(obs_galaxies))

# TODO could use angular size / redshift relation as part of this :-)
def getsize(z):
    if z < 0.05:
        return 100
    elif z < 0.1:
        return 70
    elif z < 0.2:
        return 45
    elif z < 0.2:
        return 30
    elif z < 0.3:
        return 20
    elif z < 0.4:
        return 13
    elif z < 0.5:
        return 8
    elif z < 0.6:
        return 4
    else:
        return 2

def examine_plot(index):

    target = lost_galaxies.iloc[index]
    #target2 = lost_galaxies_2.iloc[index]
    #assert np.isclose(target.RA, target2.RA)

    z_eff = target.z
    #z_eff2 = target2.z

    ra_max = (coord.Angle(lost_galaxies.iloc[index].RA*u.degree) + coord.Angle('5m')).value
    ra_min = (coord.Angle(lost_galaxies.iloc[index].RA*u.degree) - coord.Angle('5m')).value
    dec_max = (coord.Angle(lost_galaxies.iloc[index].Dec*u.degree) + coord.Angle('5m')).value
    dec_min = (coord.Angle(lost_galaxies.iloc[index].Dec*u.degree) - coord.Angle('5m')).value

    nearby = obs_galaxies.query('RA < @ra_max and RA > @ra_min and Dec < @dec_max and Dec > @dec_min')
    #nearby2 = obs_galaxies_2.query('RA < @ra_max and RA > @ra_min and Dec < @dec_max and Dec > @dec_min')

    z_match = nearby.query('z == @z_eff')
    #z_match2 = nearby2.query('z == @z_eff2')

    assert len(z_match) == 1, len(z_match) # or we need a better way to verify which row is the one that we assigned the z from
    #assert len(z_match2) == 1, len(z_match2) # or we need a better way to verify which row is the one that we assigned the z from

    # Which ones to bother plotting?
    if abs(target.z_obs - target.z) > 0.1:
        plt.figure(figsize=(5,5))
        s = list(map(getsize, nearby.z))
        plt.scatter(nearby.RA, nearby.Dec, s=s)
        plt.scatter(target.RA, target.Dec, s=getsize(z_eff))
        #plt.scatter(target.RA, target.Dec, s=getsize(target.z_obs), facecolors='none', edgecolors='g')
        #plt.scatter(z_match2.RA, z_match2.Dec, marker='^', s=list(map(getsize, z_match2.z)), alpha=0.5)
        plt.scatter(z_match.RA, z_match.Dec, color='r', s=list(map(getsize, z_match.z)))
        plt.xlim(ra_min, ra_max)
        plt.ylim(dec_min, dec_max)
        plt.xlabel('RA')
        plt.xlabel('Dec')
        plt.title("Lost Galaxy {0}: z_true={1:.2f}, z_eff={2:.2f}".format(index, target.z_obs, target.z))
        plt.draw()

for i in range(0,30):
    index = lost_galaxies.index[i]
    examine_plot(index)

Astropy's NN is working as expected. My version is not selecting the nearest neighbor