In [None]:
import numpy as np
import astropy.coordinates as coord
import sys
import matplotlib.pyplot as plt
from astropy.table import Table,join

if './SelfCalGroupFinder/py/' not in sys.path:
    sys.path.append('./SelfCalGroupFinder/py/')
from groupcatalog import *
import catalog_definitions as cat
from pyutils import *
from dataloc import *
import plotting as pp
%load_ext autoreload
%autoreload 2

## Running Group Finder

This notebook will call the functions to preprocess, run group finder, and run postprocessing code to build up a GroupCatalog object, which mostly wraps a pandas DataFrame containing the resulting group catalog data.

After running this on a given GroupCatalog definition, a serialized (via pickle) version of the GroupCatalog object will exist which can be deserialized elsewhere for analysis. See post_plots.ipynb for that.

In [None]:
datasets_to_run: list[GroupCatalog] = []
#datasets_to_run.extend(cat.sdss_list)
#datasets_to_run.extend(cat.uchuu_list)
#datasets_to_run.extend(cat.mxxl_list)
datasets_to_run.extend(cat.bgs_sv3_list)  
#datasets_to_run.extend(bgs_y1_list)
#datasets_to_run.extend(bgs_y3_list)


# To just run postprocessing on GF output, comment out run_group_finder()
for d in datasets_to_run:
    #d = deserialize(d)
    #d.preprocess()
    d.run_group_finder(popmock=False)
    d.postprocess()
    #d.run_corrfunc()
    serialize(d)
    #del(d)


In [54]:
bgs_sv3_pz_2_4_7p = deserialize(cat.bgs_sv3_pz_2_4_7p)
bgs_y3_like_sv3_pz_2_4 = deserialize(cat.bgs_y3_like_sv3_pz_2_4)
bgs_sv3_pz_2_4_10p = deserialize(cat.bgs_sv3_pz_2_4_10p)

In [None]:
import plotting as pp
import astropy.units as u
fig=pp.make_map(bgs_sv3_pz_2_4_7p.all_data.RA.to_numpy(), bgs_sv3_pz_2_4_7p.all_data.Dec.to_numpy())

In [None]:
pp.plot_positions(bgs_sv3_pz_2_4_7p.all_data, bgs_y3_like_sv3_pz_2_4.all_data, DEG_LONG=4, split=True, ra_min=148, dec_min=0)

In [None]:
from astropy.coordinates import SkyCoord
from astropy import units as u
from astropy.coordinates import match_coordinates_sky

def find_unique_objects(cat1, cat2):
    df1 = cat1.all_data.loc[z_flag_is_spectro_z(cat1.all_data['z_assigned_flag'])].reset_index()
    df2 = cat2.all_data.loc[z_flag_is_spectro_z(cat2.all_data['z_assigned_flag'])].reset_index()
                                                 
    # Extract RA and Dec from the catalogs
    ra1, dec1 = df1['RA'].to_numpy(), df1['Dec'].to_numpy()
    ra2, dec2 = df2['RA'].to_numpy(), df2['Dec'].to_numpy()
        
    # Create SkyCoord objects
    coords1 = SkyCoord(ra=ra1*u.degree, dec=dec1*u.degree)
    coords2 = SkyCoord(ra=ra2*u.degree, dec=dec2*u.degree)
    
    # Match coordinates
    idx, d2d, _ = match_coordinates_sky(coords1, coords2)

    df1['FID'] = idx
    df2['FID'] = df2.index
    
    # Find objects in df1 that are not in df2
    unique_mask = d2d > 1*u.arcsec  # You can adjust the threshold as needed

    # join with df2 for matched_objects on the FID
    matched_objects = df1.join(df2.set_index('FID'), on='FID', rsuffix='_2')
    matched_objects = matched_objects[~unique_mask]

    print(f"Total spectroscopic galaxies in cat1: {len(df1)}, cat2: {len(df2)}")
    print(f'Unique objects in cat1: {unique_mask.sum()}, Matched objects in cat1: {len(matched_objects)}')
    
    return unique_objects, matched_objects

# Example usage
unique_objects, matched_objects = find_unique_objects(bgs_sv3_pz_2_4_10p, bgs_y3_like_sv3_pz_2_4)

np.isclose(matched_objects['z'], matched_objects['z_2'], atol=0.0001, rtol=0).sum() / len(matched_objects)
#fig=pp.make_map(unique_objects.RA.to_numpy(), unique_objects.Dec.to_numpy())

In [None]:
plt.hist(matched_objects['z'] - matched_objects['z_2'], bins=np.linspace(-0.005, 0.005, 100))
plt.yscale('log')

# Draw verticle line at 0.005
plt.axvline(x=0.005, color='r', linestyle='--')
plt.axvline(x=-0.005, color='r', linestyle='--')