## Running Group Finder

This notebook will call the functions to preprocess, run group finder, and run postprocessing code to build up a GroupCatalog object, which mostly wraps a pandas DataFrame containing the resulting group catalog data.

After running this on a given GroupCatalog definition, a serialized (via pickle) version of the GroupCatalog object will exist which can be deserialized elsewhere for analysis. See post_plots.ipynb for that.

In [None]:
import numpy as np
import astropy.coordinates as coord
import sys
import matplotlib.pyplot as plt
from astropy.table import Table,join
import astropy.io.fits as fits

if './SelfCalGroupFinder/py/' not in sys.path:
    sys.path.append('./SelfCalGroupFinder/py/')
from groupcatalog import *
import catalog_definitions as cat
from pyutils import *
from dataloc import *
import plotting as pp
%load_ext autoreload
%autoreload 2

In [None]:
datasets_to_run: list[GroupCatalog] = []
#datasets_to_run.extend(cat.sdss_list)
#datasets_to_run.extend(cat.uchuu_list)
#datasets_to_run.extend(cat.mxxl_list)
#datasets_to_run.extend(cat.bgs_sv3_list)  
#datasets_to_run.extend(cat.bgs_y1_list)  
#datasets_to_run.extend(cat.bgs_y3_list)  
#datasets_to_run.extend(cat.bgs_aux_list)

# TODO LOA columns not same as KIBO I guess...

datasets_to_run.extend([
    cat.bgs_y3_pzp_2_4
    #cat.bgs_sv3_pz_2_4_10p_c1
    #cat.sdss_colors_chi,
])


# To just run postprocessing on GF output, comment out run_group_finder()
for d in datasets_to_run:
    #d = deserialize(d)
    success = d.run_group_finder(popmock=True)
    if not success:
        print(f"Group finder failed for {d.name}")
        continue
    d.calc_wp_for_mock()
    d.postprocess()
    d.dump()

    #d = deserialize(d)
    #d.calculate_projected_clustering(with_extra_randoms=True) # 15m
    #d.calculate_projected_clustering_in_magbins(with_extra_randoms=True) # 45m
    #serialize(d)

    #pp.proj_clustering_plot(d)
    #pp.lsat_data_compare_plot(d)


In [None]:
with np.printoptions(precision=3, suppress=True, linewidth=300, threshold=sys.maxsize, formatter={'float_kind':'{:6.3f}'.format}):
    for d in datasets_to_run:
        print(d.hod)

In [None]:
bgs_sv3_pz_2_4_10p = deserialize(cat.bgs_sv3_pz_2_4_10p)
bgs_sv3_pz_2_4_10p.add_jackknife_err_to_proj_clustering(with_extra_randoms=True, for_mag_bins=False)
serialize(bgs_sv3_pz_2_4_10p)

## Test of writing


In [None]:
catalog = deserialize(cat.bgs_sv3_pz_2_4_10p)
catalog.all_data['Z_ASSIGNED_FLAG'] = catalog.all_data['Z_ASSIGNED_FLAG'].astype('int32')
columns_to_write = [
            'TARGETID', 
            'RA',
            'DEC',
            'Z',
            'L_GAL', 
            'VMAX',
            'P_SAT', 
            'M_HALO',
            'N_SAT', 
            'L_TOT', 
            'IGRP', 
            'WEIGHT', 
            'APP_MAG_R', 
            'Z_ASSIGNED_FLAG',
            'G_R',
            'IS_SAT', 
            'QUIESCENT', 
            'MSTAR' 
        ]

table = Table.from_pandas(
    catalog.all_data.loc[:, columns_to_write],
    units={ 
        'RA': u.degree,
        'DEC': u.degree,
        'L_GAL': u.solLum,
        'VMAX': u.Mpc**3,
        'M_HALO': u.solMass,
        'L_TOT': u.solLum,
        'MSTAR': u.solMass
    } # Others are dimensionless
    )
table.info.name = "GALAXIES"
table.info

In [None]:
frompath = catalog.write_sharable_output_file()

read = Table.read(frompath)
read

In [None]:

hdul = fits.open(frompath, memmap=True)
hdul.info()
hdul[1].name = "GALAXIES"
hdul.info()
hdul.writeto(frompath, overwrite=True)
hdul = fits.open(frompath, memmap=True)
hdul.info()