In [1]:
%load_ext autoreload
%autoreload 2

import sys
import os
import numpy as np
import matplotlib.pyplot as plt
import fitsio
from pycorr import TwoPointCorrelationFunction, TwoPointEstimator, project_to_multipoles, project_to_wp, utils, setup_logging
from scipy.optimize import curve_fit
from LSS.common_tools import mknz
from astropy.table import Table
import itertools

from dataloc import *

# MAKE ALL PLOTS TEXT BIGGER
plt.rcParams.update({'font.size': 15})
# But legend a bit smaller
plt.rcParams.update({'legend.fontsize': 12})
# Set DPI up a bit
plt.rcParams.update({'figure.dpi': 150})


In [None]:
# Look at the prepared data
#dir = os.path.join(CUSTOM_CLUSTERING_RESULTS_FOLDER, 'Y1', 'LSS', 'iron', 'LSScats', 'v1.5pip')
dir = os.path.join(CUSTOM_CLUSTERING_RESULTS_FOLDER_NEW, 'Y1', 'LSS', 'iron', 'LSScats', 'v1.5pip', '19-20_Q_i1')
tbl1 = Table(fitsio.read(os.path.join(dir, 'BGS_BRIGHT_clustering.dat.fits')))
tbl2 = Table(fitsio.read(os.path.join(dir, 'BGS_BRIGHT_0_clustering.ran.fits')))
print(len(tbl1), len(tbl2))

In [None]:
print(tbl1.columns)
print(tbl2.columns)

In [2]:
def get_new_wp(weights, survey, verspec, ver, zmax, nran, njack, region, mag: int, imagingver, quiescent=None):
    if quiescent:
        folder = f'{np.abs(mag)}-{np.abs(mag)+1}_Q_{imagingver}'
    elif quiescent is None:
        folder = f'{np.abs(mag)}-{np.abs(mag)+1}_{imagingver}'
    else:
        folder = f'{np.abs(mag)}-{np.abs(mag)+1}_SF_{imagingver}'

    dir = os.path.join(CUSTOM_CLUSTERING_RESULTS_FOLDER_NEW, survey, 'LSS', verspec, 'LSScats', ver, folder, 'rppi')

    if not os.path.exists(dir):
        print("WARNING: Directory does not exist: ", dir)
        return None, None
        
    wp_fn = os.path.join(dir, f'wp_BGS_BRIGHT_{region}_0.001_{zmax}_{weights}_custom_njack{njack}_nran{nran}_split20.txt')

    if not os.path.exists(wp_fn):
        print("WARNING: File does not exist: ", wp_fn)
        return None, None

    try:
        allcounts_fn = os.path.join(dir, f'allcounts_BGS_BRIGHT_{region}_0.001_{zmax}_{weights}_custom_njack{njack}_nran{nran}_split20.npy')
        tpc = TwoPointEstimator.load(allcounts_fn)
        s, xiell, cov = tpc.get_corr(return_sep=True, return_cov=True, mode='wp')
    except Exception as e:
        print("WARNING: Could not load TwoPointEstimator")
        cov = None

    return np.loadtxt(wp_fn), cov

def get_wp_for(tracer, weights, survey, verspec, ver, bins, zmax, nran, njack, region, quiescent=None):
    dir = os.path.join(CUSTOM_CLUSTERING_RESULTS_FOLDER, survey, 'LSS', verspec, 'LSScats', ver, 'rppi')

    if not os.path.exists(dir):
        print("WARNING: Directory does not exist: ", dir)
        return None

    if quiescent is not None:
        addon = '_QUIESCENT' + ('1' if quiescent else '0')
    else:
        addon = ''
        
    wp_fn = os.path.join(dir, f'wp_{tracer}_{region}_0.001_{zmax}_{weights}_{bins}_njack{njack}_nran{nran}_split20{addon}.txt')

    if not os.path.exists(wp_fn):
        print("WARNING: File does not exist: ", wp_fn)
        return None

    return np.loadtxt(wp_fn)

def get_fn_for(weights, survey, verspec, ver, zmax, nran, njack, region, quiescent=None):
    dir = os.path.join(CUSTOM_CLUSTERING_RESULTS_FOLDER, survey, 'LSS', verspec, 'LSScats', ver, 'rppi')

    if not os.path.exists(dir):
        print("WARNING: Directory does not exist: ", dir)
        return None

    if quiescent is not None:
        addon = '_QUIESCENT' + ('1' if quiescent else '0')
    else:
        addon = ''
        
    allcounts_fn = os.path.join(dir, f'allcounts_BGS_BRIGHT_{region}_0.001_{zmax}_{weights}_custom_njack{njack}_nran{nran}_split20{addon}.npy')
    return allcounts_fn


def plot_wp(red_results, blue_results, all_results, zmaxes, magbins):
    plt.figure(figsize=(10, 10))
    for i in range(len(red_results)):
        plt.subplot(3, 3, i+1)
        red_wp, red_cov = red_results[i]
        blue_wp, blue_cov = blue_results[i]
        #all_wp, all_cov = all_results[i]

        plt.xscale('log')
        plt.yscale('log')
        plt.ylim(2, 4000)
        plt.ylabel(r'$w_p(r_p)$')
        plt.xlabel(r'$r_p$ [Mpc/h]')
        plt.title(f'${magbins[i]} > M_r > {magbins[i+1]}$')
        plt.text(0.75, 0.9, f'$z<{zmaxes[i]}$', transform=plt.gca().transAxes, ha='center', va='center', fontsize=10)

        if red_wp is not None:
            if red_wp.shape[1] > 3:
                plt.errorbar(red_wp[:,0], red_wp[:,2], yerr=red_wp[:,3], label=f'Quiescent {zmaxes[i]}', fmt='r.', capsize=3)
            else:
                plt.plot(red_wp[:,0], red_wp[:,2], label=f'Quiescent {zmaxes[i]}', color='r')
        if blue_wp is not None:
            if blue_wp.shape[1] > 3:
                plt.errorbar(blue_wp[:,0], blue_wp[:,2], yerr=blue_wp[:,3], label=f'Star-Forming {zmaxes[i]}', fmt='b.', capsize=3)
            else:
                plt.plot(blue_wp[:,0], blue_wp[:,2], label=f'Star-Forming {zmaxes[i]}', color='b')

    plt.tight_layout()
    plt.show()

In [3]:
zmaxes = [0.02595, 0.04067, 0.06336, 0.09792, 0.14977, 0.22620, 0.33694, 0.49523] 
magbins = [-15, -16, -17, -18, -19, -20, -21, -22, -23]
tracer = "BGS_BRIGHT" 
jack_official = 0
red_results_i1 = []
blue_results_i1 = []
main_all_results = []
for m, z in zip(magbins[:-1], zmaxes):
    for q in [True, False]:
        wp, cov = get_new_wp('pip_angular_bitwise', 'Y1', 'iron', 'v1.5pip', z, 9, jack_official, 'GCcomb', m, "i1", quiescent=q)

        if q is True:
            red_results_i1.append((wp, cov))
        elif q is False:
            blue_results_i1.append((wp, cov))
        else:
            main_all_results.append((wp, cov))

red_result_i0 = []
blue_results_i0 = []
main_all_results_i0 = []
for m, z in zip(magbins[:-1], zmaxes):
    for q in [True, False]:
        wp, cov = get_new_wp('pip_angular_bitwise', 'Y1', 'iron', 'v1.5pip', z, 9, jack_official, 'GCcomb', m, "i0", quiescent=q)

        if q is True:
            red_result_i0.append((wp, cov))
        elif q is False:
            blue_results_i0.append((wp, cov))
        else:
            main_all_results_i0.append((wp, cov))




In [None]:
plot_wp(red_results_i1, blue_results_i1, main_all_results, zmaxes, magbins)

In [4]:
# Load the equivalent old results from the other NERSC clustering view notebook
# Assuming the old results are stored in a similar directory structure but under CUSTOM_CLUSTERING_RESULTS_FOLDER
zmaxes = [0.02595, 0.04067, 0.06336, 0.09792, 0.14977, 0.22620, 0.33694, 0.49523] 
magbins = [-15, -16, -17, -18, -19, -20, -21, -22, -23]
tracer = "BGS_BRIGHT" 
jack_official = 128
old_red_results = []
old_blue_results = []
old_all_results = []
for z, q in itertools.product(zmaxes, [True, False, None]):

    wp = get_wp_for(tracer, 'pip_angular_bitwise', 'Y1', 'iron', 'v1.5pip', 'custom', z, 18, jack_official, 'GCcomb', quiescent=q)
    cov = None
    if wp is not None:
        savedir = get_fn_for('pip_angular_bitwise', 'Y1', 'iron', 'v1.5pip', z, 18, jack_official, 'GCcomb', quiescent=q)
        tpc = TwoPointEstimator.load(savedir)
        s, xiell, cov = tpc.get_corr(return_sep=True, return_cov=True, mode='wp')

    if q is True:
        old_red_results.append((wp, cov))
    elif q is False:
        old_blue_results.append((wp, cov))
    else:
        old_all_results.append((wp, cov))



In [5]:
def plot_new_vs_old_comparison(new_results, new_i0_results, old_results, zmaxes, magbins, title_suffix, color='r'):
    """
    Plot comparison between new and old clustering results
    
    Parameters:
    -----------
    new_results : list
        List of (wp, cov) tuples for new results
    new_i0_results : list  
        List of (wp, cov) tuples for new i0 results
    old_results : list
        List of (wp, cov) tuples for old results
    zmaxes : list
        Redshift limits for each magnitude bin
    magbins : list
        Magnitude bin edges
    title_suffix : str
        Suffix to add to plot title (e.g., "Red Galaxies", "Blue Galaxies")
    color : str
        Color for new results plots
    """
    plt.figure(figsize=(10, 10))
    for i in range(len(old_results)):
        plt.subplot(3, 3, i+1)
        new_wp, _ = new_results[i]
        new_i0, _ = new_i0_results[i] if new_i0_results else (None, None)
        old_wp, _ = old_results[i]

        plt.xscale('log')
        plt.yscale('log')
        plt.ylim(2, 4000)
        plt.ylabel(r'$w_p(r_p)$')
        plt.xlabel(r'$r_p$ [Mpc/h]')
        plt.title(f'${magbins[i]} > M_r > {magbins[i+1]}$')
        plt.text(0.75, 0.9, f'$z<{zmaxes[i]}$', transform=plt.gca().transAxes, ha='center', va='center', fontsize=10)

        if new_wp is not None:
            if new_wp.shape[1] > 3:
                plt.errorbar(new_wp[:,0], new_wp[:,2], yerr=new_wp[:,3], label='New i1', fmt=f'{color}.', capsize=3, alpha=0.7)
            else:
                plt.plot(new_wp[:,0], new_wp[:,2], label='New i1', color=color, alpha=0.7)
        
        if new_i0 is not None:
            if new_i0.shape[1] > 3:
                plt.errorbar(new_i0[:,0], new_i0[:,2], yerr=new_i0[:,3], label='New i0', fmt=f'{color}--', capsize=3)
            else:
                plt.plot(new_i0[:,0], new_i0[:,2], label='New i0', color=color, linestyle='--')
        
        if old_wp is not None:
            if old_wp.shape[1] > 3:
                plt.errorbar(old_wp[:,0], old_wp[:,2], yerr=old_wp[:,3], label='Old', fmt='k.', capsize=3, alpha=0.7)
            else:
                plt.plot(old_wp[:,0], old_wp[:,2], label='Old', color='k', alpha=0.7)

        if i == 4:  # Add legend only to first subplot
            plt.legend()

    plt.suptitle(f'New vs Old Results - {title_suffix}')
    plt.tight_layout()
    plt.show()

In [None]:
# Overlay new vs old results for red galaxies
plot_new_vs_old_comparison(red_results_i1, red_result_i0, old_red_results, 
                          zmaxes, magbins, "Red Galaxies (Quiescent)", color='r')

In [None]:
# Overlay new vs old results for blue galaxies  
plot_new_vs_old_comparison(blue_results_i1, blue_results_i0, old_blue_results,
                          zmaxes, magbins, "Blue Galaxies (Star-Forming)", color='b')

In [6]:
def get_FSF(indata,fsf_cols,fsf_dir='/pscratch/sd/i/ioannis/fastspecfit/data/loa/catalogs/',prog='bright'):
    from astropy.table import Table,join
    fsl = []
    for hp in range(0,12):
        fsi = fitsio.read(fsf_dir+f'fastspec-iron-main-{prog}-nside1-hp'+str(hp).zfill(2)+'.fits',ext='SPECPHOT',columns = fsf_cols)
        fsl.append(fsi)
    fs = np.concatenate(fsl)
    del fsl
    ol = len(indata)
    # Print fs columns names
    indata = join(indata,fs,keys=['TARGETID']) #note, anything missing from fastspecfit will now be missing
    del fs
    return indata
    

In [13]:
# It appears with new method i0 vs i1 does not matter
# BUT Old method vs this is totally different
# Let's inspect samples
# Look at the prepared data
olddir = os.path.join(CUSTOM_CLUSTERING_RESULTS_FOLDER, 'Y1', 'LSS', 'iron', 'LSScats', 'v1.5pip')
dir = os.path.join(CUSTOM_CLUSTERING_RESULTS_FOLDER_NEW, 'Y1', 'LSS', 'iron', 'LSScats', 'v1.5pip', '19-20_Q_i0')
df_old= Table(fitsio.read(os.path.join(olddir, 'BGS_BRIGHT_NGC_clustering.dat.fits')))
df_new = Table(fitsio.read(os.path.join(dir, 'BGS_BRIGHT_NGC_clustering.dat.fits')))
df_new = get_FSF(df_new, ['TARGETID', 'ABSMAG01_SDSS_R'], fsf_dir='/global/cfs/cdirs/desi/public/dr1/vac/dr1/fastspecfit/iron/v3.0/catalogs/', prog='bright')

# Drop BITWEIGHT column and convert to pandas
df_old.remove_columns(['BITWEIGHTS'])
df_new.remove_columns(['BITWEIGHTS'])
df_old = df_old.to_pandas()
df_new = df_new.to_pandas()

df_old.set_index('TARGETID', inplace=True)
df_new.set_index('TARGETID', inplace=True)

#df_old.add_index('TARGETID')
#df_new.add_index('TARGETID')

In [14]:
# filter them to same z and abs mag range
# 0.001 < z < 0.14977
# -20 < M_r < -19
old_size = len(df_old)
new_size = len(df_new)
df_old = df_old.loc[(df_old['Z'] > 0.001) & (df_old['Z'] < 0.14977) & (df_old['ABSMAG_R'] < -19) & (df_old['ABSMAG_R'] > -20) & df_old['QUIESCENT'] == 1]
#df_old = df_old.loc[(df_old['Z'] > 0.001) & (df_old['Z'] < 0.14977) & df_old['QUIESCENT'] == 1]
df_new = df_new.loc[(df_new['Z'] > 0.001) & (df_new['Z'] < 0.14977)]
print(f"Old size: {old_size:,}, New size: {new_size:,}")
print(f"Filtered Old size: {len(df_old):,}, Filtered New size: {len(df_new):,}")

Old size: 2,909,876, New size: 206,125
Filtered Old size: 100,097, Filtered New size: 100,073


In [15]:
# Compare df_old and df_new in several ways

# 1. Print basic info
print("df_old: rows =", len(df_old), "columns =", len(df_old.columns))
print("df_new: rows =", len(df_new), "columns =", len(df_new.columns))

# 2. Print column names
print("\ndf_old columns:", df_old.columns)
print("df_new columns:", df_new.columns)

# 3. Find columns in one but not the other
print("\nColumns in df_old but not df_new:", set(df_old.columns) - set(df_new.columns))
print("Columns in df_new but not df_old:", set(df_new.columns) - set(df_old.columns))

# 4. Compare summary statistics for columns in common
common_cols = set(df_old.columns) & set(df_new.columns)
for col in common_cols:
    if df_old[col].dtype.kind in 'if' and df_new[col].dtype.kind in 'if':
        print(f"\nColumn: {col}")
        print("  df_old: mean =", df_old[col].mean(), "std =", df_old[col].std())
        print("  df_new: mean =", df_new[col].mean(), "std =", df_new[col].std())


df_old: rows = 100097 columns = 20
df_new: rows = 100073 columns = 20

df_old columns: Index(['Z', 'NTILE', 'RA', 'DEC', 'PHOTSYS', 'FRAC_TLOBS_TILES',
       'WEIGHT_ZFAIL', 'PROB_OBS', 'WEIGHT', 'WEIGHT_COMP', 'WEIGHT_SYS',
       'flux_g_dered', 'flux_r_dered', 'flux_z_dered', 'flux_w1_dered',
       'flux_w2_dered', 'NX', 'WEIGHT_FKP', 'ABSMAG_R', 'QUIESCENT'],
      dtype='object')
df_new columns: Index(['TILEID', 'Z', 'NTILE', 'RA', 'DEC', 'PHOTSYS', 'FRAC_TLOBS_TILES',
       'WEIGHT_ZFAIL', 'PROB_OBS', 'WEIGHT', 'WEIGHT_COMP', 'WEIGHT_SYS',
       'flux_g_dered', 'flux_r_dered', 'flux_z_dered', 'flux_w1_dered',
       'flux_w2_dered', 'NX', 'WEIGHT_FKP', 'ABSMAG01_SDSS_R'],
      dtype='object')

Columns in df_old but not df_new: {'ABSMAG_R', 'QUIESCENT'}
Columns in df_new but not df_old: {'ABSMAG01_SDSS_R', 'TILEID'}

Column: RA
  df_old: mean = 194.590712081077 std = 47.17363451738618
  df_new: mean = 194.59979895560846 std = 47.17771299389695

Column: WEIGHT
  df_old: mean =

In [28]:
import pandas as pd
# Find rows in df_old but not in df_new, and vice versa
missing_in_new = df_old.loc[~df_old.index.isin(df_new.index)]
missing_in_old = df_new.loc[~df_new.index.isin(df_old.index)]
print(f"Rows in df_old but not in df_new: {len(missing_in_new)}")
print(f"Rows in df_new but not in df_old: {len(missing_in_old)}")

# Find common TARGETIDs (intersection of indexes)
common_ids = df_old.index.intersection(df_new.index)
print(f"Number of matched TARGETIDs: {len(common_ids)}")

# Prepare arrays for old and new values (fill unmatched with np.nan)
old_absmag = df_old['ABSMAG_R'].reindex(common_ids).values
old_weight = df_old['WEIGHT'].reindex(common_ids).values
new_absmag = df_new['ABSMAG01_SDSS_R'].reindex(common_ids).values
new_weight = df_new['WEIGHT'].reindex(common_ids).values

# Percent difference calculation
absmag_diff = 100 * (new_absmag - old_absmag) / old_absmag
weight_diff = 100 * (new_weight - old_weight) / old_weight

# Store differences in a DataFrame for summary
diff_df = pd.DataFrame({
    'old_ABSMAG_R': old_absmag,
    'new_ABSMAG01_SDSS_R': new_absmag,
    'ABSMAG_R_diff': absmag_diff,
    'old_WEIGHT': old_weight,
    'new_WEIGHT': new_weight,
    'WEIGHT_diff': weight_diff
}, index=common_ids)

# Summarize the differences
mean_diff_absmag = diff_df['ABSMAG_R_diff'].median()
std_diff_absmag = diff_df['ABSMAG_R_diff'].std()
mean_diff_weight = diff_df['WEIGHT_diff'].median()
std_diff_weight = diff_df['WEIGHT_diff'].std()

print("\nSummary of differences:")
print(f"ABSMAG_R: Median = {mean_diff_absmag:.3f}, 1σ = {std_diff_absmag:.3f}, 2σ = {2*std_diff_absmag:.3f}, 3σ = {3*std_diff_absmag:.3f}")
print(f"WEIGHT: Median = {mean_diff_weight:.3f}, 1σ = {std_diff_weight:.3f}, 2σ = {2*std_diff_weight:.3f}, 3σ = {3*std_diff_weight:.3f}")

Rows in df_old but not in df_new: 230
Rows in df_new but not in df_old: 206
Number of matched TARGETIDs: 99867

Summary of differences:
ABSMAG_R: Median = -0.000, 1σ = 0.021, 2σ = 0.042, 3σ = 0.064
WEIGHT: Median = -5.462, 1σ = 1.654, 2σ = 3.309, 3σ = 4.963


In [None]:
# Debug the weight differences
print("Debugging weight differences...")
print(f"Number of common IDs: {len(common_ids)}")
print(f"Number of non-NaN weight differences: {(~np.isnan(weight_diff)).sum()}")
print(f"Number of NaN weight differences: {np.isnan(weight_diff).sum()}")

# Check for extreme outliers in weight differences
print(f"\nWeight difference percentiles:")
print(f"  5th percentile: {np.nanpercentile(weight_diff, 5):.3f}%")
print(f"  25th percentile: {np.nanpercentile(weight_diff, 25):.3f}%")
print(f"  50th percentile (median): {np.nanpercentile(weight_diff, 50):.3f}%")
print(f"  75th percentile: {np.nanpercentile(weight_diff, 75):.3f}%")
print(f"  95th percentile: {np.nanpercentile(weight_diff, 95):.3f}%")

# Show histogram of weight differences
plt.figure(figsize=(10, 6))
plt.subplot(1, 2, 1)
valid_weight_diff = weight_diff[~np.isnan(weight_diff)]
plt.hist(valid_weight_diff, bins=50, alpha=0.7, edgecolor='black')
plt.xlabel('Weight % Difference')
plt.ylabel('Frequency')
plt.title('Distribution of Weight Differences')
plt.axvline(np.median(valid_weight_diff), color='red', linestyle='--', label=f'Median: {np.median(valid_weight_diff):.1f}%')
plt.legend()

# Zoom in on the central region
plt.subplot(1, 2, 2)
central_range = np.abs(valid_weight_diff) < 50  # Focus on differences within ±50%
if central_range.sum() > 0:
    plt.hist(valid_weight_diff[central_range], bins=30, alpha=0.7, edgecolor='black')
    plt.xlabel('Weight % Difference')
    plt.ylabel('Frequency')
    plt.title('Weight Differences (±50% range)')
    plt.axvline(np.median(valid_weight_diff), color='red', linestyle='--', label=f'Median: {np.median(valid_weight_diff):.1f}%')
    plt.legend()

plt.tight_layout()
plt.show()

# Check if there are systematic issues with the reindex operation
print(f"\nChecking reindex operation...")
print(f"Old weight array has {np.isnan(old_weight).sum()} NaN values")
print(f"New weight array has {np.isnan(new_weight).sum()} NaN values")

In [None]:
# Alternative approach: merge dataframes to avoid reindex issues
print("Alternative analysis using merge...")

# Create temporary dataframes with just the columns we need
df_old_temp = df_old.reset_index()[['TARGETID', 'WEIGHT', 'ABSMAG_R']].copy()
df_new_temp = df_new.reset_index()[['TARGETID', 'WEIGHT', 'ABSMAG01_SDSS_R']].copy()

# Merge on TARGETID
merged_df = df_old_temp.merge(df_new_temp, on='TARGETID', suffixes=('_old', '_new'))
print(f"Successfully merged {len(merged_df)} rows")

# Calculate differences
merged_df['weight_diff_pct'] = 100 * (merged_df['WEIGHT_new'] - merged_df['WEIGHT_old']) / merged_df['WEIGHT_old']
merged_df['absmag_diff_pct'] = 100 * (merged_df['ABSMAG01_SDSS_R'] - merged_df['ABSMAG_R']) / merged_df['ABSMAG_R']

# Print summary statistics
print(f"\nMerged dataframe statistics:")
print(f"Weight differences - Mean: {merged_df['weight_diff_pct'].mean():.3f}%, Median: {merged_df['weight_diff_pct'].median():.3f}%, Std: {merged_df['weight_diff_pct'].std():.3f}%")
print(f"AbsMag differences - Mean: {merged_df['absmag_diff_pct'].mean():.3f}%, Median: {merged_df['absmag_diff_pct'].median():.3f}%, Std: {merged_df['absmag_diff_pct'].std():.3f}%")

# Check for extreme outliers
extreme_weight_outliers = np.abs(merged_df['weight_diff_pct']) > 100
print(f"\nNumber of extreme weight outliers (>100% difference): {extreme_weight_outliers.sum()}")

if extreme_weight_outliers.sum() > 0:
    print("Examples of extreme weight outliers:")
    outlier_examples = merged_df[extreme_weight_outliers].head(5)
    for _, row in outlier_examples.iterrows():
        print(f"  TARGETID {row['TARGETID']}: Old weight {row['WEIGHT_old']:.3f}, New weight {row['WEIGHT_new']:.3f}, Diff: {row['weight_diff_pct']:.1f}%")

# Show random sample from merged data
print(f"\nRandom sample from merged data:")
sample_merged = merged_df.sample(min(10, len(merged_df)))
for _, row in sample_merged.iterrows():
    print(f"TARGETID {row['TARGETID']}: Weight diff: {row['weight_diff_pct']:.1f}%, AbsMag diff: {row['absmag_diff_pct']:.1f}%")

In [29]:
# Take some TARGETIDs from df_new and print off the ABSMAG_R from the match in old
print("TARGETID: New (ABSMAG01_SDSS_R and WEIGHT) vs Old (ABSMAG_R and WEIGHT)")
sample_ids = pd.Index(df_new.index).intersection(df_old.index)
if len(sample_ids) > 0:
    target_ids = np.random.choice(sample_ids, size=min(15, len(sample_ids)), replace=False)
    for target_id in target_ids:
        new_row = df_new.loc[[target_id]]
        old_row = df_old.loc[[target_id]]
        print(f"{target_id}: New: ({new_row['ABSMAG01_SDSS_R'].values[0]:.3f}, {new_row['WEIGHT'].values[0]:.3f}), Old: ({old_row['ABSMAG_R'].values[0]:.3f}, {old_row['WEIGHT'].values[0]:.3f})")


TARGETID: New (ABSMAG01_SDSS_R and WEIGHT) vs Old (ABSMAG_R and WEIGHT)
39627643326301868: New: (-19.713, 0.451), Old: (-19.714, 0.488)
39627945723039818: New: (-19.675, 0.925), Old: (-19.675, 0.962)
39627637152288046: New: (-19.098, 0.860), Old: (-19.098, 0.895)
39627745189170797: New: (-19.863, 1.077), Old: (-19.862, 1.166)
39633531260898442: New: (-19.187, 1.662), Old: (-19.189, 1.799)
39628406454747551: New: (-19.137, 1.385), Old: (-19.137, 1.499)
39627654256656977: New: (-19.414, 0.881), Old: (-19.416, 0.916)
39633538068253443: New: (-19.667, 0.895), Old: (-19.667, 0.968)
39627745046561759: New: (-19.584, 0.895), Old: (-19.582, 0.931)
39627703044806340: New: (-19.216, 0.991), Old: (-19.216, 1.030)
39627782552030456: New: (-19.093, 0.874), Old: (-19.090, 0.909)
39633182366107009: New: (-19.386, 1.711), Old: (-19.388, 1.852)
39627860050185016: New: (-19.105, 0.860), Old: (-19.094, 0.895)
39633148711012077: New: (-19.128, 1.058), Old: (-19.124, 1.145)
39633099574740464: New: (-19.378

In [None]:
from astropy.coordinates import SkyCoord

import astropy.units as u

# Create SkyCoord objects for both dataframes
coords_old = SkyCoord(ra=df_old['RA'].values * u.deg, dec=df_old['DEC'].values * u.deg)
coords_new = SkyCoord(ra=df_new['RA'].values * u.deg, dec=df_new['DEC'].values * u.deg)

# Match old to new
idx, d2d, d3d = coords_old.match_to_catalog_sky(coords_new)

# Add columns to df_old for matched new TARGETID and separation in arcsec
df_old['matched_new_TARGETID'] = df_new.iloc[idx]['TARGETID'].values
df_old['sep_arcsec'] = d2d.arcsec

matched = d2d.to(u.arcsec) < 1.0 * u.arcsec  # 1 arcsec match threshold
df_old['matched'] = matched

# Show a summary of the matching
print(f"Matched {matched.sum()} out of {len(df_old)} old entries to new entries within 1 arcsec")

# View Imaging Systematics

In [None]:
# TODO look at imaging systematics
#path = 'BGS_BRIGHT_N_0.0010.5_linclusimsysfit.png'

for fn in ['BGS_BRIGHT_N_0.0010.5_linclusimsysfit.png']:
    path = os.path.join(CUSTOM_CLUSTERING_RESULTS_FOLDER_NEW, 'Y1', 'LSS', 'iron', 'LSScats', 'v1.5pip', '19-20_Q_i1', fn)
    if os.path.exists(path):
        plt.figure(figsize=(10, 10))
        img = plt.imread(path)
        plt.imshow(img)
        plt.axis('off')
        plt.title(fn)
        plt.show()
        
        # Also print off the .txt file. Same thing but _linfitparam.txt
        txt_fn = path.replace('_linclusimsysfit.png', '_linfitparam.txt')
        if os.path.exists(txt_fn):
            with open(txt_fn, 'r') as f:
                print(f.read())
        else:
            print("No text file found for", txt_fn)
