In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import astropy.coordinates as coord
import astropy.units as u
from pyutils import *
import types
import numpy.ma as ma
from matplotlib.patches import Circle
import postprocess as pp
from postprocess import save_dataset, load_dataset
from dataloc import *

%load_ext autoreload
%autoreload 2

In [None]:
DPI = 80
pp.DPI = DPI


After the group finder is run, this notebook is used to post pp.process the results, generating plots and such for analysis.


## Loading existing datasets 

In [None]:
# SDSS
sdss_vanilla = types.SimpleNamespace()
sdss_vanilla.name = "SDSS Vanilla"
sdss_colors = types.SimpleNamespace()
sdss_colors.name = "SDSS Colors"
sdss_colors_chi = types.SimpleNamespace()
sdss_colors_chi.name = "SDSS Colors Chi"
sdss_stripped = types.SimpleNamespace()
sdss_stripped.name = "SDSS Stripped"

# MXXL
all = types.SimpleNamespace()
all.name = "All MXXL <19.5"
all_c = types.SimpleNamespace()
all_c.name = "All MXXL <19.5 c"
all20 = types.SimpleNamespace()
all20.name = "All MXXL <20"
fiberonly = types.SimpleNamespace()
fiberonly.name = "Fiber Only MXXL <19.5"
fiberonly20 = types.SimpleNamespace()
fiberonly20.name = "Fiber Only MXXL <20"
nn_kd = types.SimpleNamespace()
nn_kd.name = "Nearest Neighbor MXXL <19.5"
nn_kd_c = types.SimpleNamespace()
nn_kd_c.name = "Nearest Neighbor MXXL <19.5 c"
nn_kd20 = types.SimpleNamespace()
nn_kd20.name = "Nearest Neighbor MXXL <20"
simple_2 = types.SimpleNamespace()
simple_2.name = "Simple v2 MXXL <19.5"
simple_2_c = types.SimpleNamespace()
simple_2_c.name = "Simple v2 MXXL <19.5 c"
simple_2_20 = types.SimpleNamespace()
simple_4 = types.SimpleNamespace()
simple_4.name = "Simple v4 MXXL <19.5"
simple_4_c = types.SimpleNamespace()
simple_4_c.name = "Simple v4 MXXL <19.5 c"

# UCHUU
all_u = types.SimpleNamespace()
all_u.name = "All UCHUU <19.5"

# DESI BGS
fiberonly_BGS = types.SimpleNamespace()
fiberonly_BGS.name = "Fiber Only BGS <19.5"
fiberonly_1pass_BGS = types.SimpleNamespace()
fiberonly_1pass_BGS.name = "Observed 1pass+ BGS <19.5"
nn_BGS = types.SimpleNamespace()
nn_BGS.name = "Nearest Neighbor BGS <19.5"
simple2_BGS = types.SimpleNamespace()
simple2_BGS.name = "Simple v2 BGS <19.5"
simple2_BGS_c = types.SimpleNamespace()
simple2_BGS_c.name = "Simple v2 BGS <19.5 C"
simple4_BGS = types.SimpleNamespace()
simple4_BGS.name = "Simple v4 BGS <19.5"
simple4_BGS_c = types.SimpleNamespace()
simple4_BGS_c.name = "Simple v4 BGS <19.5 C"
simple4_BGS_SDSSlike = types.SimpleNamespace()
simple4_BGS_SDSSlike.name = "Simple v4 BGS SDSS-like"
simple4_BGS_SDSSlike_c = types.SimpleNamespace()
simple4_BGS_SDSSlike_c.name = "Simple v4 BGS SDSS-like C"


In [None]:
all=load_dataset(all)
#all_c=load_dataset(all_c)
#fiberonly=load_dataset(fiberonly)
nn_kd=load_dataset(nn_kd)
#nn_kd_c=load_dataset(nn_kd_c)
simple_2=load_dataset(simple_2)
#simple_2_c=load_dataset(simple_2_c)

In [None]:
sdss_vanilla = load_dataset(sdss_vanilla)
sdss_colors = load_dataset(sdss_colors)
sdss_colors_chi = load_dataset(sdss_colors_chi)

In [None]:
#fiberonly_BGS=load_dataset(fiberonly_BGS)
#fiberonly_1pass_BGS=load_dataset(fiberonly_1pass_BGS)

simple2_BGS=load_dataset(simple2_BGS)
simple2_BGS_SDSSlike=load_dataset(simple4_BGS_SDSSlike)
simple2_BGS_c=load_dataset(simple2_BGS_c)
simple2_BGS_SDSSlike_c=load_dataset(simple4_BGS_SDSSlike_c)

## Process New Datasets

### SDSS

In [None]:
sdss_stripped = pp.process_sdss(ROOT_FOLDER + "sdss_ian_stripped.out")
sdss_stripped.name = "SDSS Stripped"
sdss_stripped.color = pp.get_color(4)
sdss_stripped.marker = '<'
save_dataset(sdss_stripped)

In [None]:
sdss_vanilla = pp.process_sdss(ROOT_FOLDER + "sdss_ian_1.out")
sdss_vanilla.name = "SDSS Vanilla"
sdss_vanilla.color = pp.get_color(4)
sdss_vanilla.marker = '-'
save_dataset(sdss_vanilla)

In [None]:
sdss_colors = pp.process_sdss(ROOT_FOLDER + "sdss_ian_2.out")
sdss_colors.name = "SDSS Colors"
sdss_colors.color = pp.get_color(4)
sdss_colors.marker = '--'
save_dataset(sdss_colors)

In [None]:
sdss_colors_chi = pp.process_sdss(ROOT_FOLDER + "sdss_ian_3.out")
sdss_colors_chi.name = "SDSS Colors Chi"
sdss_colors_chi.color = pp.get_color(4)
sdss_colors_chi.marker = '.'
save_dataset(sdss_colors_chi)

### MXXL

In [None]:
all = pp.process_MXXL(ROOT_FOLDER + "mxxl_3pass_all.out")
all.name = "All MXXL <19.5"
all.color = pp.get_color(0)
all.marker = '-'
pp.post_process(all)
save_dataset(all)
del(all)

In [None]:
all_c = pp.process_MXXL(ROOT_FOLDER + "mxxl_3pass_all_c.out")
all_c.name = "All MXXL <19.5 c"
all_c.color = pp.get_color(0)
all_c.marker = '--'
pp.post_process(all_c)
save_dataset(all_c)
del(all_c)

In [None]:
all20 = pp.process_MXXL(ROOT_FOLDER + "mxxl_3pass_all20.out")
all20.name = "All MXXL <20"
all20.color = pp.get_color(0)
all20.marker = '--'
pp.post_process(all20)
save_dataset(all20)
del(all20)

In [None]:
fiberonly = pp.process_MXXL(ROOT_FOLDER + "mxxl_3pass_fiberonly.out")
fiberonly.name = "Fiber Only MXXL <19.5"
fiberonly.color = pp.get_color(1)
fiberonly.marker = '-'
pp.post_process(fiberonly)
save_dataset(fiberonly)
del(fiberonly)

In [None]:
fiberonly20 = pp.process_MXXL(ROOT_FOLDER + "mxxl_3pass_fiberonly20.out")
fiberonly20.name = "Fiber Only MXXL <20"
fiberonly20.color = pp.get_color(1)
fiberonly20.marker = '--'
pp.post_process(fiberonly20)
save_dataset(fiberonly20)
del(fiberonly20)

In [None]:
nn_kd = pp.process_MXXL(ROOT_FOLDER + "mxxl_3pass_nn_kd.out")
nn_kd.name = "Nearest Neighbor MXXL <19.5"
nn_kd.color = pp.get_color(2)
nn_kd.marker = '-'
pp.post_process(nn_kd)
save_dataset(nn_kd)
del(nn_kd)

In [None]:
nn_kd_c = pp.process_MXXL(ROOT_FOLDER + "mxxl_3pass_nn_kd_c.out")
nn_kd_c.name = "Nearest Neighbor MXXL <19.5 c"
nn_kd_c.color = pp.get_color(2)
nn_kd_c.marker = '--'
pp.post_process(nn_kd_c)
save_dataset(nn_kd_c)
del(nn_kd_c)

In [None]:
nn_kd20 = pp.process_MXXL(ROOT_FOLDER + "mxxl_3pass_nn_kd20.out")
nn_kd20.name = "Nearest Neighbor MXXL <20"
nn_kd20.color = pp.get_color(2)
nn_kd20.marker = '--'
pp.post_process(nn_kd20)
save_dataset(nn_kd20)
del(nn_kd20)

In [None]:
simple_2 = pp.process_MXXL(ROOT_FOLDER + "mxxl_3pass_simple_2.out")
pp.post_process(simple_2)
simple_2.name = "Simple v2 MXXL <19.5"
simple_2.color = pp.get_color(6)
simple_2.marker = '-'
save_dataset(simple_2)
del(simple_2)

In [None]:
simple_2_c = pp.process_MXXL(ROOT_FOLDER + "mxxl_3pass_simple_2_c.out")
pp.post_process(simple_2_c)
simple_2_c.name = "Simple v2 MXXL <19.5 c"
simple_2_c.color = pp.get_color(6)
simple_2_c.marker = '--'
save_dataset(simple_2_c)
del(simple_2_c)

In [None]:
simple_2_20 = pp.process_MXXL(ROOT_FOLDER + "mxxl_3pass_simple_2_20.out")
pp.post_process(simple_2_20)
simple_2_20.name = "Simple v2 MXXL <20"
simple_2_20.color = pp.get_color(6)
simple_2_20.marker = '--'
save_dataset(simple_2_20)
del(simple_2_20)

In [None]:
simple_4 = pp.process_MXXL(ROOT_FOLDER + "mxxl_3pass_simple_4_c.out")
pp.post_process(simple_4)
simple_4.name = "Simple v4 MXXL <19.5"
simple_4.color = pp.get_color(7)
simple_4.marker = '-'
save_dataset(simple_4)

In [None]:
simple_4_c = pp.process_MXXL(ROOT_FOLDER + "mxxl_3pass_simple_4_c.out")
pp.post_process(simple_4_c)
simple_4_c.name = "Simple v4 MXXL <19.5 c"
simple_4_c.color = pp.get_color(7)
simple_4_c.marker = '-'
save_dataset(simple_4_c)

### UCHUU

In [None]:
all_u = pp.process_uchuu(ROOT_FOLDER + "uchuu_all.out")
all_u.name = "All UCHUU <19.5"
all_u.color = pp.get_color(9)
all_u.marker = '-'
save_dataset(all_u)
del(all_u)

### DESI BGS

In [None]:
fiberonly_BGS = pp.process_BGS(ROOT_FOLDER + "BGS_fiberonly_1.out")
fiberonly_BGS.name = "Fiber Only BGS <19.5"
fiberonly_BGS.color = pp.get_color(1)
fiberonly_BGS.marker = '-'
save_dataset(fiberonly_BGS)

In [None]:
fiberonly_1pass_BGS = pp.process_BGS(ROOT_FOLDER + "BGS_fiberonly_1passok_1.out")
fiberonly_1pass_BGS.name = "Observed 1pass+ BGS <19.5"
fiberonly_1pass_BGS.color = pp.get_color(3)
fiberonly_1pass_BGS.marker = '-'
save_dataset(fiberonly_1pass_BGS)

In [None]:
nn_BGS = pp.process_BGS(ROOT_FOLDER + "BGS_nn.out")
nn_BGS.name = "Nearest Neighbor BGS <19.5"
nn_BGS.color = pp.get_color(2)
nn_BGS.marker = '-'
save_dataset(nn_BGS)

In [None]:
simple2_BGS = pp.process_BGS(ROOT_FOLDER + "BGS_simple_2.out")
simple2_BGS.name = "Simple v2 BGS <19.5"
simple2_BGS.color = pp.get_color(6)
simple2_BGS.marker = '-'
save_dataset(simple2_BGS)

In [None]:
simple2_BGS_c = pp.process_BGS(ROOT_FOLDER + "BGS_simple_2_c.out")
simple2_BGS_c.name = "Simple v2 BGS <19.5 C"
simple2_BGS_c.color = pp.get_color(6)
simple2_BGS_c.marker = '--'
save_dataset(simple2_BGS_c)

In [None]:
simple4_BGS = pp.process_BGS(ROOT_FOLDER + "BGS_simple_4.out")
simple4_BGS.name = "Simple v4 BGS <19.5"
simple4_BGS.color = pp.get_color(7)
simple4_BGS.marker = '-'
save_dataset(simple4_BGS)

In [None]:
# JUST RAN THIS ONLY WITH g-r < 1.0 instead of NN's z k corr
simple4_BGS_c = pp.process_BGS(ROOT_FOLDER + "BGS_simple_4_c.out")
simple4_BGS_c.name = "Simple v4 BGS <19.5 C"
simple4_BGS_c.color = pp.get_color(7)
simple4_BGS_c.marker = '--'
save_dataset(simple4_BGS_c)

In [None]:
simple4_BGS_SDSSlike = pp.process_BGS(ROOT_FOLDER + "BGS_simple_4_sdsslike.out")
simple4_BGS_SDSSlike.name = "Simple v4 BGS SDSS-like"
simple4_BGS_SDSSlike.color = pp.get_color(5)
simple4_BGS_SDSSlike.marker = '-'
save_dataset(simple4_BGS_SDSSlike)

In [None]:
simple4_BGS_SDSSlike_c = pp.process_BGS(ROOT_FOLDER + "BGS_simple_4_sdsslike_c.out")
simple4_BGS_SDSSlike_c.name = "Simple v4 BGS SDSS-like C"
simple4_BGS_SDSSlike_c.color = pp.get_color(5)
simple4_BGS_SDSSlike_c.marker = '--'
save_dataset(simple4_BGS_SDSSlike_c)

## View Plots

In [None]:
pp.compare_fsat_color_split(sdss_vanilla, nn_BGS)


In [None]:
#pp.plots_color_split(sdss_vanilla, sdss_stripped)
#pp.plots_color_split(sdss_vanilla, sdss_colors)
pp.plots_color_split(simple2_BGS, simple2_BGS_c)
pp.plots_color_split(simple4_BGS, simple4_BGS_c)
#pp.plots_color_split(simple2_BGS_SDSSlike, simple2_BGS_SDSSlike_c)


In [None]:
pp.qf_cen_plot(simple2_BGS)
pp.qf_cen_plot(simple2_BGS_c)

In [None]:
pp.plots(simple4_BGS, simple2_BGS, nn_BGS, fiberonly_BGS)



In [None]:
# Print out biggest group size
for dataset in [simple2_BGS, fiberonly_BGS, fiberonly_1pass_BGS]:
    print(dataset.name)
    print(dataset.all_data.groupby('igrp').size().max())


In [None]:
print(f"The catalog contains {np.sum(np.invert(simple2_BGS.all_data['z_assigned_flag']))} truly observed galaxies and {np.sum(simple2_BGS.all_data['z_assigned_flag'])} galaxies with redshifts assigned to them.")


In [None]:
fig=make_map(simple2_BGS.all_data['RA'].to_numpy(), simple2_BGS.all_data['Dec'].to_numpy(), alpha=0.05, dpi=500)

In [None]:
# SDSS Examine Bimodality

z=sdss_vanilla.all_data['z']
gmr=sdss_vanilla.all_data['Mag_g'] - sdss_vanilla.all_data['Mag_r']
junk=plt.hist(gmr, bins=np.linspace(-1,3,300), alpha=0.4)
#junk=plt.hist(k_correct(sdss_vanilla.all_data['Mag_g'], z, gmr, band='g')  - k_correct(sdss_vanilla.all_data['Mag_r'], z, gmr, band='r'), bins=500, alpha=0.4)
junk=plt.hist(sdss_vanilla.all_data['Dn4000'], bins=np.linspace(0,4,300), alpha=0.4)
plt.xlim(-1, 3)

## Simulations-Only Analysis

### UCHUU Issues

In [None]:
plt.hist(all_u.all_data['M_halo'], bins=pp.Mhalo_bins, alpha=0.4)
plt.hist(all_u.all_data['uchuu_halo_mass']*10**10, bins=pp.Mhalo_bins, alpha=0.4)
plt.loglog()

# TODO do we expect the mass distribution of halos to be so different from the UCHUU SHAM catalog and our assigned halo?

In [None]:
# TODO 1 / VMax corrections do odd thing to UCHUU Truth. Why?
pp.hod_plots(all_u)

### What effect does Fiber Assignment have on the luminosity function?

In [None]:
pp.group_finder_centrals_halo_masses_plots(all, [fiberonly])

### Purity and Completeness

In [None]:
pp.test_purity_and_completeness(all, nn_kd, simple_2)

In [None]:
pp.purity_complete_plots(all, nn_kd, simple_2)

### Compare halos to truth

In [None]:
pp.assigned_halo_analysis(simple_2)

### Compare assigned implied abs mags to truth from MXXL

In [None]:
# TODO not sure this all makes sense
run_to_check = simple_2

not_assigned = np.invert(run_to_check.all_data.fiber_assigned_0.astype(bool))
app_mags = run_to_check.all_data.app_mag[not_assigned].to_numpy()
my_assigned_abs_mag = app_mag_to_abs_mag(app_mags, run_to_check.all_data.z[not_assigned].to_numpy())
my_raw_abs_mag = app_mag_to_abs_mag(app_mags, run_to_check.all_data.z_obs[not_assigned].to_numpy())

print(len(my_raw_abs_mag), len(my_assigned_abs_mag))

In [None]:
# Compare absolute mags. Using my way of computing for both.
abs_mag_bins = np.linspace(-25.5, -16, num=50)
plt.figure(dpi=DPI)
x = plt.hist(my_raw_abs_mag, bins=abs_mag_bins, label="Truth", alpha=0.5)
y = plt.hist(my_assigned_abs_mag, bins=abs_mag_bins, label="Our Algorithm", alpha=0.5)
plt.xlabel("Absolute Mag")
plt.ylabel("Count")
plt.ylim(400,400000)
plt.yscale('log')
plt.title("Compare Lost Galaxies Abs Mags")
plt.legend()

## Galaxy Neighborhood Examiner

In [None]:
data = all.all_data


In [None]:
lost_galaxies = data.loc[data['fiber_assigned_0'] == 0]
#lost_galaxies_2 = nn.all_data.loc[nn.all_data['fiber_assigned_0'] == 0]
obs_galaxies = data.loc[data['fiber_assigned_0'] == 1]
#obs_galaxies_2 = nn.all_data.loc[nn.all_data['fiber_assigned_0'] == 1]
print("Lost galaxies: ", len(lost_galaxies), "Observed Galaxies: ", len(obs_galaxies))

# TODO could use angular size / redshift relation as part of this :-)
def getsize(z):
    if z < 0.05:
        return 300
    elif z < 0.1:
        return 200
    elif z < 0.2:
        return 120
    elif z < 0.2:
        return 75
    elif z < 0.3:
        return 45
    elif z < 0.4:
        return 25
    elif z < 0.5:
        return 15
    elif z < 0.6:
        return 8
    else:
        return 3

nearby_angle = coord.Angle('5m')

def neighbor_exam(target):
    z_eff = target.z
    target_dist_true = z_to_ldist(target.z_obs)

    ra_max = (coord.Angle(target.RA*u.degree) + nearby_angle).value
    ra_min = (coord.Angle(target.RA*u.degree) - nearby_angle).value
    dec_max = (coord.Angle(target.Dec*u.degree) + nearby_angle).value
    dec_min = (coord.Angle(target.Dec*u.degree) - nearby_angle).value

    nearby = obs_galaxies.query('RA < @ra_max and RA > @ra_min and Dec < @dec_max and Dec > @dec_min')

    close_neighbors = 0
    if len(nearby) > 0:
        close_neighbors = np.isclose(nearby.ldist_true.to_numpy(), target_dist_true, rtol=0.0, atol=20)

    return (np.sum(close_neighbors), len(nearby), np.sum(close_neighbors)/len(nearby))
    


In [None]:
catalog = coord.SkyCoord(ra=data.RA.to_numpy()*u.degree, dec=data.Dec.to_numpy()*u.degree, frame='icrs')

# This is too slow when called 1 at a time, not using. 
# TODO Could be faster when batched for the whole sample?
def neighbors_within(max_angle: coord.Angle, to_match: coord.Angle, catalog: np.ndarray, treekey: str):

    angular_distance = coord.Angle(0*u.arcsec)
    nth = 1 # cap at 100 for now, TODO remove when safe
    neighbor_ind = []
    neighbor_dist = []

    while angular_distance < max_angle and nth < 100:
        idx, d2d, d3d = coord.match_coordinates_sky(to_match, catalog, nthneighbor=nth, storekdtree=treekey)
        angular_distance = d2d
        nth = nth + 1
        neighbor_ind.append(idx)
        neighbor_dist.append(angular_distance)

    return neighbor_ind, neighbor_dist

def examine_around(target):

    target_observed = target.fiber_assigned_0
    #target = data.loc[index]

    target_pos = coord.SkyCoord(ra=target.RA*u.degree, dec=target.Dec*u.degree, frame='icrs')

    z_eff = target.z
    #target_dist_true = z_to_ldist(target.z_obs)

    ra_max = (coord.Angle(target.RA*u.degree) + nearby_angle).value
    ra_min = (coord.Angle(target.RA*u.degree) - nearby_angle).value
    dec_max = (coord.Angle(target.Dec*u.degree) + nearby_angle).value
    dec_min = (coord.Angle(target.Dec*u.degree) - nearby_angle).value

    # TODO replace with a non-angular search so all redshifts are treated equally
    #indexes, angular_distances = neighbors_within(nearby_angle, target_pos, catalog, 'treekey_nnkd')
    #nearby = data.iloc[indexes]
    nearby = data.query('RA < @ra_max and RA > @ra_min and Dec < @dec_max and Dec > @dec_min')
    nearby = nearby.drop(target.name) # drop the target itself from this df

    nearby_obs = nearby.loc[nearby['fiber_assigned_0'] == 1]
    nearby_unobs = nearby.loc[nearby['fiber_assigned_0'] == 0]

    z_match = nearby_obs.query('z == @z_eff')
    #assert len(z_match) == 1, len(z_match) # TODO need a better way to verify which row is the one that we assigned the z from
    if len(z_match) > 0:
        z_match = z_match.iloc[0]
    #nearby_obs = nearby_obs.drop(z_match.name)

    good_obs_z_filter = list(map(lambda a: close_enough(target.z_obs, a), nearby_obs.z))
    nearby_obs_good_z = nearby_obs.loc[good_obs_z_filter]
    nearby_obs_good_z_dim = nearby_obs_good_z.loc[nearby_obs_good_z.app_mag > 19.5]
    nearby_obs_good_z = nearby_obs_good_z.loc[np.invert(nearby_obs_good_z.app_mag > 19.5)]

    if len(good_obs_z_filter) > 0:
        nearby_obs_other = nearby_obs.loc[np.invert(good_obs_z_filter)]
    else:
        nearby_obs_other = nearby_obs
    nearby_obs_other_dim = nearby_obs_other.loc[nearby_obs_other.app_mag > 19.5]
    nearby_obs_other = nearby_obs_other.loc[np.invert(nearby_obs_other.app_mag > 19.5)]

    good_unobs_z_filter = list(map(lambda a: close_enough(target.z_obs, a), nearby_unobs.z))

    nearby_unobs_good_z = nearby_unobs.loc[good_unobs_z_filter]
    if good_unobs_z_filter:
        nearby_unobs_other = nearby_unobs.loc[np.invert(good_unobs_z_filter)]
        nearby_unobs_other_dim = nearby_unobs_other.loc[nearby_unobs_other.app_mag > 19.5]
        nearby_unobs_other = nearby_unobs_other.loc[np.invert(nearby_unobs_other.app_mag > 19.5)]
    else:
        nearby_unobs_other = nearby_unobs_good_z # empty df
        nearby_unobs_other_dim = nearby_unobs_good_z

    nearby_unobs_good_z_dim = nearby_unobs_good_z.loc[nearby_unobs_good_z.app_mag > 19.5]
    nearby_unobs_good_z = nearby_unobs_good_z.loc[np.invert(nearby_unobs_good_z.app_mag > 19.5)]

    if target_observed:
        title = "Observed Galaxy {0}: z_true={1:.3f}, z_NN={2:.3f}".format(target.name, target.z_obs, target.z)
    else:
        title = "Lost Galaxy {0}: z_true={1:.3f}, z_NN={2:.3f}".format(target.name, target.z_obs, target.z)

    if len(nearby) > 1:

        fig,ax = plt.subplots(1)
        fig.set_size_inches(10,10)
        ax.set_aspect('equal')

        # Add virial radii or MXXL Halos to the observed galaxies
        for k in range(len(nearby_obs)):
            current = nearby_obs.iloc[k]
            radius = current.mxxl_halo_vir_radius_guess_arcsec / 3600 # arcsec to degrees, like the plot
            circ = Circle((current.RA,current.Dec), radius, color=get_color(0), alpha=0.10)
            ax.add_patch(circ)

        textsize = 9
        dimalpha = 0.4

        plt.scatter(nearby_obs_other.RA, nearby_obs_other.Dec, s=list(map(getsize, nearby_obs_other.z)), color=get_color(0), label="Obs ({0})".format(len(nearby_obs_other)))
        if len(nearby_obs_other_dim) > 0:
            plt.scatter(nearby_obs_other_dim.RA, nearby_obs_other_dim.Dec, s=list(map(getsize, nearby_obs_other_dim.z)), color=get_color(2), alpha=dimalpha, label="Obs dim ({0})".format(len(nearby_obs_other_dim)))
        
        plt.scatter(nearby_obs_good_z.RA, nearby_obs_good_z.Dec, s=list(map(getsize, nearby_obs_good_z.z)), color=get_color(2), label="Obs good z ({0})".format(len(nearby_obs_good_z)))
        if len(nearby_obs_good_z_dim) > 0:
            plt.scatter(nearby_obs_good_z_dim.RA, nearby_obs_good_z_dim.Dec, s=list(map(getsize, nearby_obs_good_z_dim.z)), color=get_color(0), alpha=dimalpha, label="Obs good z dim ({0})".format(len(nearby_obs_good_z_dim)))

        plt.scatter(nearby_unobs_other.RA, nearby_unobs_other.Dec, marker='x', s=list(map(getsize, nearby_unobs_other.z)), color=get_color(0), label="Unobs ({0})".format(len(nearby_unobs_other)))
        if len(nearby_unobs_other_dim) > 0:
            plt.scatter(nearby_unobs_other_dim.RA, nearby_unobs_other_dim.Dec, marker='x', s=list(map(getsize, nearby_unobs_other_dim.z)), color=get_color(0), alpha=dimalpha, label="Unobs dim ({0})".format(len(nearby_unobs_other_dim)))
        
        plt.scatter(nearby_unobs_good_z.RA, nearby_unobs_good_z.Dec, marker='x', s=list(map(getsize, nearby_unobs_good_z.z)), color=get_color(2), label="Unobs good z ({0})".format(len(nearby_unobs_good_z)))
        if len(nearby_unobs_good_z_dim) > 0:
            plt.scatter(nearby_unobs_good_z_dim.RA, nearby_unobs_good_z_dim.Dec, marker='x', s=list(map(getsize, nearby_unobs_good_z_dim.z)), color=get_color(2), alpha=dimalpha, label="Unobs good z dim ({0})".format(len(nearby_unobs_good_z_dim)))
        
        # redshift data labels
        for k in range(len(nearby_obs)):
            plt.text(nearby_obs.iloc[k].RA, nearby_obs.iloc[k].Dec, "{0:.3f}".format(nearby_obs.iloc[k].z), size=textsize)
        for k in range(len(nearby_unobs)):
            plt.text(nearby_unobs.iloc[k].RA, nearby_unobs.iloc[k].Dec, "{0:.3f}".format(nearby_unobs.iloc[k].z), size=textsize)

        # Circle assigned one
        if len(z_match) > 0:
            plt.scatter(z_match.RA, z_match.Dec, color=get_color(3), facecolors='none', s=getsize(z_match.z)*2, label="Assigned")
            plt.text(z_match.RA, z_match.Dec, "{0:.3f}".format(z_match.z), size=textsize)

        # Target galaxy
        if target_observed:
            plt.scatter(target.RA, target.Dec, s=getsize(target.z_obs), color=get_color(1), label="Target")
        else:
            plt.scatter(target.RA, target.Dec, s=getsize(target.z_obs), marker='X', color=get_color(1), label="Target")  
        plt.text(target.RA, target.Dec, "{0:.3f}".format(target.z_obs), size=textsize)

        plt.xlim(ra_min, ra_max)
        plt.ylim(dec_min, dec_max)
        plt.xlabel('RA')
        plt.xlabel('Dec')
        plt.legend()
        plt.title(title)
        plt.draw()
    
    else:
        print("Skipping empty plot for {0}".format(title))

In [None]:
PLOTS_TO_MAKE = 10
GALAXY_POOL = lost_galaxies

#START_INDEX = 777
#for i in range(START_INDEX, START_INDEX + PLOTS_TO_MAKE):
#    index = lost_galaxies.index[i]
#    examine_around(index)
print("Number of galaxies to choose from: ", len(GALAXY_POOL))
indexes = np.random.randint(0, len(GALAXY_POOL)-1, size=PLOTS_TO_MAKE)
for i in indexes:
    target = GALAXY_POOL.iloc[i]
    examine_around(target)

### Idea: analyze entire neighborhood and look for groups of similar z galaxies, choose a z from the biggest group

In [None]:
MAX = 300
close = np.empty(MAX)
total = np.empty(MAX)
frac = np.empty(MAX)
for i in range(0,MAX):
    target = lost_galaxies.iloc[i]
    close[i], total[i], frac[i] = neighbor_exam(target)

In [None]:
max_finished = 81408
finished_close = close[0:max_finished]
finished_total = total[0:max_finished]
with open(ROOT_FOLDER + 'mxxl_lostgal_neighborhood_close.npy', 'wb') as f:
    np.save(f, finished_close)
with open(ROOT_FOLDER + 'mxxl_lostgal_neighborhood_total.npy', 'wb') as f:
    np.save(f, finished_total)

In [None]:
with open(ROOT_FOLDER + 'mxxl_lostgal_neighborhood_close.npy', 'rb') as f:
    close = np.load(f)

with open(ROOT_FOLDER + 'mxxl_lostgal_neighborhood_total.npy', 'rb') as f:
    total = np.load(f)

frac = close / total

In [None]:
bins = np.linspace(0,30,31)
trash = plt.hist(close, bins=bins)
plt.title("Lost Galaxies Neighbors at ~Correct z")
plt.xlabel("Count of Similar z Neighbors")
plt.ylabel("Count of Lost Galaxies")
print("Hopeless Fraction: ", np.sum(close==0) / len(close))
print("Essentially Hopeless Fraction: ", (np.sum(close==0) + np.sum(close==1)) / len(close))

In [None]:
viable = close > 1
frac[viable]
trash=plt.hist(frac[viable], bins=30)
plt.title("Viable Lost Galaxies: Correct z Neighbor Fraction")
plt.xlabel("Fraction with Similar z")
plt.ylabel("Count of Viable Lost Galaxies")

# Tests

In [None]:
blue_color = GLOBAL_RED_COLOR_CUT - 0.1
red_color = GLOBAL_RED_COLOR_CUT + 0.1

blue_dn = -1
red_dn = 3

results = is_quiescent_BGS_smart(np.array([7,8,9]), np.array([red_dn, np.nan, blue_dn]), np.array([blue_color, blue_color, red_color]))
assert results[0] == True
assert results[1] == False
assert results[2] == False