In [2]:
import numpy as np
import astropy.coordinates as coord
import sys
import matplotlib.pyplot as plt
from astropy.table import Table,join

if './SelfCalGroupFinder/py/' not in sys.path:
    sys.path.append('./SelfCalGroupFinder/py/')
from groupcatalog import GroupCatalog, BGSGroupCatalog, TestGroupCatalog, serialize, deserialize, SDSSGroupCatalog
import catalog_definitions as cat
from dataloc import *
import pyutils as pyutils
import plotting as pp
%load_ext autoreload
%autoreload 2

## Running Group Finder

This notebook will call the functions to preprocess, run group finder, and run postprocessing code to build up a GroupCatalog object, which mostly wraps a pandas DataFrame containing the resulting group catalog data.

After running this on a given GroupCatalog definition, a serialized (via pickle) version of the GroupCatalog object will exist which can be deserialized elsewhere for analysis. See post_plots.ipynb for that.

In [None]:
sdss_list : list[GroupCatalog] = [
    cat.sdss_vanilla,
    cat.sdss_colors,
    cat.sdss_colors_chi,
    cat.sdss_vanilla_v2,
    cat.sdss_colors_v2,
    cat.sdss_colors_chi_v2,
]
uchuu_list : list[GroupCatalog] = [
    cat.uchuu_all,
]
# TODO NEED TO move MXXL file to make available to re-run these.
mxxl_list : list[GroupCatalog] = [
    cat.mxxl_all,
    cat.mxxl_all_c,
    cat.mxxl_fiberonly,
    cat.mxxl_fiberonly_c,
    cat.mxxl_nn,
    cat.mxxl_nn_c,
    cat.mxxl_simple_2,
    cat.mxxl_simple_2_c,
    cat.mxxl_simple_4,
    cat.mxxl_simple_4_c,
]
bgs_sv3_list : list[GroupCatalog] = [
    cat.bgs_sv3_nn_6p,
    cat.bgs_sv3_fiberonly_10p,
    cat.bgs_sv3_simple_4_10p,
    cat.bgs_sv3_simple_4_9p,
    cat.bgs_sv3_simple_4_8p,
    cat.bgs_sv3_simple_4_7p,
    cat.bgs_sv3_simple_4_6p,
    cat.bgs_sv3_simple_4_5p,
    cat.bgs_sv3_simple_4_4p,
    cat.bgs_sv3_simple_4_3p,
    cat.bgs_sv3_simple_4_2p,
    cat.bgs_sv3_simple_4_1p,
    cat.bgs_sv3_simple_5_10p,
    cat.bgs_sv3_simple_5_9p,
    cat.bgs_sv3_simple_5_8p,
    cat.bgs_sv3_simple_5_7p,
    cat.bgs_sv3_simple_5_6p,
    cat.bgs_sv3_simple_5_5p,
    cat.bgs_sv3_simple_5_4p,
    cat.bgs_sv3_simple_5_3p,
    cat.bgs_sv3_simple_5_2p,
    cat.bgs_sv3_simple_5_1p,
]
bgs_y1_list : list[GroupCatalog] = [
    cat.bgs_simple_4_old,
    cat.bgs_simple_4,
    cat.bgs_simple_4_1pass,
    cat.bgs_simple_4_no_sdss,
    cat.bgs_simple_4_4p,
    cat.bgs_simple_4_c,
    cat.bgs_fiberonly,
    cat.bgs_fiberonly_1pass,
    cat.bgs_nn,
    cat.bgs_nn_sdsslike,
    cat.bgs_simple_2,
    cat.bgs_simple_2_c,
    cat.bgs_simple_5,
]
bgs_y3_list : list[GroupCatalog] = [
    cat.bgs_y3_simple_4,
    #cat.bgs_y3_simple_4_4p,
    #cat.bgs_y3_fiberonly_1pass,
    #cat.bgs_y3_fiberonly,
    cat.bgs_y3_simple_5,
]

datasets_to_run: list[GroupCatalog] = []
#datasets_to_run.extend(sdss_list)
#datasets_to_run.extend(uchuu_list)
#datasets_to_run.extend(mxxl_list)
#datasets_to_run.extend(bgs_sv3_list)  
#datasets_to_run.extend(bgs_y1_list)
datasets_to_run.extend(bgs_y3_list)

# To just run postprocessing on GF output, comment out run_group_finder()
for d in datasets_to_run:
    #d = deserialize(d)
    d.run_group_finder(popmock=False)
    d.postprocess()
    #d.run_corrfunc()
    serialize(d)
    #del(d)


In [None]:
import pickle
with open('SimpleRedshiftGuesserMap.pkl', 'rb') as f:    
    app_mag_bins, the_map = pickle.load(f)
the_map

In [None]:
indexes = [10,11,30,45]
# histogram of the map at those indexes
for i in indexes:
    plt.hist(the_map[i], bins = 20)
    plt.title(f'app mag ~ {app_mag_bins[i-1]}')
    plt.show()

# Tests

## Python Unit Tests

In [None]:
import pyutils as pyu
test_mags = np.linspace(12.0, 20.0, 10000)
test_z = np.linspace(0.0, 0.5, 10000) * np.random.rand(10000)
app_mag_bins, the_map = pyu.build_app_mag_to_z_map_new(test_mags, test_z)

print(the_map)

In [None]:
SV3_test = BGSGroupCatalog("SV3 Test", pyutils.Mode.SIMPLE_v4, 19.5, 21.0, num_passes=10, drop_passes=3, data_cut='sv3', sdss_fill=False)
SV3_test.GF_props = cat.GF_PROPS_VANILLA.copy()

SV3_test.preprocess()

# Read in BGS_SV3_ANY_FULL_FILE and ensure no precision is lost from there to SV3_test.preprocess_file and the like
merged_table = Table.read(IAN_BGS_SV3_MERGED_FILE, format='fits')
print(merged_table['RA'][0:10])

# read in and print out the first few lines of SV3_test.preprocess_file
with open(SV3_test.preprocess_file, 'r') as f:
    for i in range(10):
        print(f.readline(), end='')

#with open(SV3_test.preprocess_file + "~", 'r') as f:
#    for i in range(10):
#        print(f.readline(), end='')

galprops_file = str.replace(SV3_test.GF_outfile, ".out", "_galprops.dat")
with open(galprops_file, 'r') as f:
    for i in range(10):
        print(f.readline(), end='')

#with open(galprops_file + "~", 'r') as f:
#    for i in range(10):
#        print(f.readline(), end='')


## GF Tests

In [None]:
# Only needs to be run once, unless you want to change the test data
#catalog = TestGroupCatalog("Test")
#catalog.create_test_dat_files() 

In [None]:
np.set_printoptions(threshold=sys.maxsize)

# Baseline vanilla group finder test 
catalog = TestGroupCatalog("Test")
catalog.run_group_finder(silent=True) 
catalog.postprocess()
df=catalog.all_data
baseline_total_mass = df['M_halo'].sum()
assert len(np.unique(df['igrp'])) == 200
assert len(df) == 246 
assert df['quiescent'].sum() == 129
assert np.isclose(df['weight'].sum(), 246 * 1.0) # no weights, just 1 per gal
m1=df['M_halo'].to_numpy()

# Test that when omega0 are 0, the others don't matter
catalog = TestGroupCatalog("Test")
catalog.GF_props['omegaL_sf'] = 123
catalog.GF_props['sigma_sf'] = 345
catalog.GF_props['omegaL_q'] = 456
catalog.GF_props['sigma_q'] = 678
catalog.GF_props['omega0_sf'] = 0.0
catalog.GF_props['omega0_q'] = 0.0
catalog.run_group_finder(silent=True)
catalog.postprocess()
df=catalog.all_data
assert len(np.unique(df['igrp'])) == 200
assert len(df) == 246 
assert df['quiescent'].sum() == 129
assert np.isclose(df['weight'].sum(), 246 * 1.0) # no weights, just 1 per gal
assert np.isclose(df['M_halo'].sum(), baseline_total_mass)
m2=df['M_halo'].to_numpy()

catalog = TestGroupCatalog("Test")
#catalog.GF_props['colors'] = 1
catalog.GF_props['omegaL_sf'] = 10.0
catalog.GF_props['sigma_sf'] = 3.0
catalog.GF_props['omegaL_q'] = 0.0
catalog.GF_props['sigma_q'] = 0.0
catalog.GF_props['omega0_sf'] = 10.0
catalog.GF_props['omega0_q'] = 0.0
catalog.run_group_finder(silent=True)
catalog.postprocess()
df=catalog.all_data
assert len(np.unique(df['igrp'])) >= 200 # these parameters make assigned halos smaller
assert len(df) == 246 
assert df['quiescent'].sum() == 129
assert df['weight'].sum() < 246 
# TODO BUG I feel like this should be true, but it's not. Weighting doesn't preseve the halo mass function
#assert np.isclose(df['M_halo'].sum(), baseline_total_mass) 
m3=df['M_halo'].to_numpy()

plt.hist(np.stack([np.log10(m1), np.log10(m2), np.log10(m3)], axis=-1))


print("All tests passed")

In [None]:
pp.examine_area(np.min(df.RA), np.max(df.RA), np.min(df.Dec), np.max(df.Dec), df)
