In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import sys
import astropy.constants as const

if './SelfCalGroupFinder/py/' not in sys.path:
    sys.path.append('./SelfCalGroupFinder/py/')
from pyutils import *
from dataloc import *
from bgs_helpers import *
from plotting import *
import wp
import catalog_definitions as cat

%load_ext autoreload
%autoreload 2

This notebook is for calculating PIP clustering using the clustering catalogs and randoms.

For group catalog purposes, it is to get clustering results to compare clustering compute from the full sample that has our redshfit assignments in it.

In [None]:
def get_clustering_catalog(filename, year):
    table = Table.read(filename, format='fits')
    print(table.colnames)
    
    table.keep_columns(['TARGETID', 'DEC', 'RA', 'Z','NTILE', 'WEIGHT', 'WEIGHT_ZFAIL'])
    
    
    add_NTILE_MINE_to_table(table, year)
    table['NTID'] = table['NEAREST_TILEIDS'][:,0]
    table.remove_columns(['NEAREST_TILEIDS'])
    
    sv3tiles = read_tiles_Y3_sv3()
    galaxies_df = table_to_df(table)
    ntiles_inside, nearest_tile_ids = find_tiles_for_galaxies(sv3tiles, galaxies_df, 10)
    if 'NTILE_MINE_SV3' in table.columns:
        table.remove_columns(['NTILE_MINE_SV3'])
    #if 'NEAREST_TILEIDS_SV3' in table.columns:
    #    table.remove_columns(['NEAREST_TILEIDS_SV3'])
    table.add_column(ntiles_inside, name="NTILE_MINE_SV3")
    #table.add_column(nearest_tile_ids, name="NEAREST_TILEIDS_SV3")
    
    return table.to_pandas()


def prep_for_clustering(df: pd.DataFrame):
        
    # check for duplicate targetid
    df = df.drop_duplicates(subset='TARGETID', keep='first')
    print(len(df))

    df.rename(columns={'DEC': 'Dec', 'Z': 'z'}, inplace=True)
    df['REGION'] = tile_to_region(df['NTID'])

    innerdf = df[df['NTILE_MINE_SV3'] >= 10]
    print(len(innerdf))

    # Drop the bad two regions for equal comparison
    to_remove = np.isin(innerdf['REGION'], sv3_poor_y3overlap)
    innerdf = innerdf.loc[~to_remove]

    innerdf.reset_index(drop=True, inplace=True)

    group_catalog = deserialize(cat.bgs_sv3_fiberonly_10p)
    group_catalog.all_data.rename(columns={'target_id': 'TARGETID'}, inplace=True)
    innerdf = pd.merge(innerdf, group_catalog.all_data.loc[:, ['TARGETID', 'quiescent']], on='TARGETID', how='inner', validate='one_to_one')


    return innerdf


In [None]:
# SV3 PIP Clustering Calculation
randoms = pickle.load(open(MY_RANDOMS_SV3_CLUSTERING, 'rb'))
randoms = randoms[randoms['NTILE_MINE'] >= 10] # Match footrpint of data
dfN = get_clustering_catalog(BGS_SV3_CLUSTERING_N_BRIGHT_FILE, 'sv3')
dfS = get_clustering_catalog(BGS_SV3_CLUSTERING_S_BRIGHT_FILE, 'sv3')
df = pd.concat([dfN, dfS])
innerdf = prep_for_clustering(df)
results = wp.calculate_wp_from_df(innerdf, randoms, data_weights=innerdf['WEIGHT'], rand_weights=randoms['WEIGHT'])
pickle.dump(results, open(OUTPUT_FOLDER + 'sv3_pip_clustering_proper.pkl', 'wb'))

randoms = pickle.load(open(MY_RANDOMS_SV3, 'rb'))
randoms = randoms[randoms['NTILE_MINE'] >= 10] # Match footrpint of data
dfN = get_clustering_catalog(BGS_SV3_CLUSTERING_N_BRIGHT_FILE, 'sv3')
dfS = get_clustering_catalog(BGS_SV3_CLUSTERING_S_BRIGHT_FILE, 'sv3')
df = pd.concat([dfN, dfS])
innerdf = prep_for_clustering(df)
results = wp.calculate_wp_from_df(innerdf, randoms, data_weights=innerdf['WEIGHT'])
pickle.dump(results, open(OUTPUT_FOLDER + 'sv3_pip_clustering.pkl', 'wb')) # no random weights, bad

In [None]:
# Y3 Cut to SV3 Clustering Calculation
# Using SV3 randoms, not Y3 cut to SV3. Is that right? TODO BUG
# No, the weights will be all wrong. Need to use Y3 randoms

y3_likesv3_df = get_clustering_catalog(BGS_Y3_CLUSTERING_FILE, '3')
y3_innerdf = prep_for_clustering(y3_likesv3_df)

In [None]:
randoms = pickle.load(open(MY_RANDOMS_Y3_LIKESV3_CLUSTERING, 'rb'))
results = wp.calculate_wp_from_df(y3_innerdf, randoms, data_weights=y3_innerdf['WEIGHT'], rand_weights=randoms['WEIGHT'])
pickle.dump(results, open(OUTPUT_FOLDER + 'y3_likesv3_pip_clustering_proper.pkl', 'wb'))

In [None]:
# Inspect difference between full and clustering randoms
rtable = Table.read(BGS_SV3_CLUSTERING_RAND_FILE.replace("X", str(0)), format='fits')
rtable2 = Table.read(BGS_SV3_RAND_FILE.replace("X", str(0)), format='fits')

print(rtable.colnames)
print(rtable2.colnames)

In [None]:
# What do SV3 Weights look like?
randoms = pickle.load(open(MY_RANDOMS_SV3_CLUSTERING, 'rb'))
print(np.isclose(randoms.WEIGHT, 1.0).sum() / len(randoms))
print(np.average(randoms.WEIGHT))
plt.hist(randoms.WEIGHT, bins=np.logspace(-.01, 1.15, 40))
plt.yscale('log')
plt.xscale('log')