## Running Group Finder

This notebook will call the functions to preprocess, run group finder, and run postprocessing code to build up a GroupCatalog object, which mostly wraps a pandas DataFrame containing the resulting group catalog data.

After running this on a given GroupCatalog definition, a serialized (via pickle) version of the GroupCatalog object will exist which can be deserialized elsewhere for analysis. See post_plots.ipynb for that.

In [None]:
import sys
import matplotlib.pyplot as plt
from astropy.table import Table,join
import astropy.io.fits as fits

if './SelfCalGroupFinder/py/' not in sys.path:
    sys.path.append('./SelfCalGroupFinder/py/')
from groupcatalog import *
import catalog_definitions as cat
from pyutils import *
from dataloc import *
import plotting as pp
%load_ext autoreload
%autoreload 2

In [None]:
datasets_to_run: list[GroupCatalog] = []
#datasets_to_run.extend(cat.sdss_list)
#datasets_to_run.extend(cat.uchuu_list)
#datasets_to_run.extend(cat.mxxl_list)
#datasets_to_run.extend(cat.bgs_sv3_list)  
#datasets_to_run.extend(cat.bgs_y1_list)  
#datasets_to_run.extend(cat.bgs_y3_list)  
#datasets_to_run.extend(cat.bgs_aux_list)

#gc = cat.bgs_y1_hybrid8_mcmc

datasets_to_run.extend([
    cat.bgs_y1_hybrid8_v1_mcmc
    #cat.bgs_y1_pzp_2_6_c2_serial
    #cat.bgs_sv3_pz_2_6_10p_c2, # groupfind() took 46.82 sec. (47.11)
    #cat.bgs_y1_pzp_2_6_c2, # groupfind() took 46.82 sec. (47.11). After 2nd RC fixes, 50.5 sec.
    #cat.bgs_y3_pzp_2_6_c2, # groupfind() took 334.34 339.94 
])


# 7/16 fix to use fsf abs mags made C2 go from 199 to 239 chi squared

for d in datasets_to_run:
    #d = deserialize(d)
    #d.preprocess()
    success = d.run_group_finder(popmock=True, profile=False, silent=False, serial=False)
    if not success:
        print(f"Group finder failed for {d.name}")
        continue
    d.calc_wp_for_mock()
    d.postprocess()
    d.dump()
    d.chisqr()
    pp.plots(d)

In [None]:
path = '/mount/sirocco1/imw2293/GROUP_CAT/OUTPUT/BGS_Y3_C2/BGS Y3 C2.dat'
df_from_pre = pd.read_csv(path, delim_whitespace=True, names=['RA', 'DEC', 'Z', 'LOGLGAL', 'VMAX', 'QUIESCENT', 'CHI'])
df_from_pre_sf = df_from_pre[df_from_pre['QUIESCENT'] == 0]
df_from_pre_q = df_from_pre[df_from_pre['QUIESCENT'] == 1]

In [None]:
#bgs_sv3_pz_2_4_10p = deserialize(cat.bgs_sv3_pz_2_4_10p)
#bgs_sv3_pz_2_4_10p.add_jackknife_err_to_proj_clustering(with_extra_randoms=True, for_mag_bins=False)
#serialize(bgs_sv3_pz_2_4_10p)

In [None]:
filepath = '/mount/sirocco1/imw2293/GROUP_CAT/GROUP_CATALOG_BGS_Y3_v0.5.fits'
tbl = Table.read(filepath)

# Optionally convert to a pandas Dataframe
df = tbl.to_pandas()

print(tbl.colnames)
for c in tbl.colnames:
    assert(np.isnan(tbl[c]).sum() == 0), f"Column {c} has NaN values!"
print("All columns checked, no NaN values found.")

j=plt.hist(df['Z_ASSIGNED_FLAG'], bins=np.arange(-3, 10, 1), align='left')
plt.xlabel('Z_ASSIGNED_FLAG')
plt.xticks(np.arange(-3, 10, 1))
#plt.yscale('log')
plt.show()

j=plt.hist(np.log10(df['L_GAL']), bins=50)
plt.xlabel('log($L_{gal}$)')
plt.yscale('log')
plt.show()
