In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.colors as c
import astropy.coordinates as coord
import astropy.units as u
import astropy.io.fits as fits
import healpy as hp
from astropy.table import Table,join
import types
import sys

if './SelfCalGroupFinder/py/' not in sys.path:
    sys.path.append('./SelfCalGroupFinder/py/')
from pyutils import *
from dataloc import *
import groupcatalog as gc
import catalog_definitions as cat

%load_ext autoreload
%autoreload 2

In [None]:
id_table = Table.read(SDSS_DR7B_ID_FILE, names=['indx', 'run', 'rerun', 'camcol', 'field', 'id'], delimiter='\s', format='ascii')
id_table.keep_columns(['indx'])
print(len(id_table))

In [None]:
# <indx> <sector> <mregion> <ra> <dec> <cz> <fgotten> <selection fn>
#lss_table = Table.read(SDSS_DR7B_LSS_FILE, names=['indx', 'sector', 'mregion', 'ra', 'dec', 'cz', 'fgotten', 'selection_fn'], guess=False, delimiter='\s', format='ascii')

In [None]:
# <indx> <M_u> <M_g> <M_r> <M_i> <M_z> <mu_{50}> <r50/r90>
#phot_table = Table.read(SDSS_DR7B_PHOTO_FILE, names=['indx', 'M_u', 'M_g', 'M_r', 'M_i', 'M_z', 'mu_50', 'r50_r90'], guess=False, delimiter='\s', format='ascii')

In [None]:
df_good = id_table.to_pandas()

In [None]:
obj_table = Table.read(SDSS_DR7_OBJECT_FILE, format='fits')
spec_table = Table.read(SDSS_DR7_SPECTRO_FILE, format='fits')
collisions_table = Table.read(SDSS_DR7_COLLISIONS_FILE, format='fits')
imaging_table = Table.read(SDSS_DR7_IMAGING_FILE, format='fits')
print(len(obj_table))
print(len(spec_table))
print(len(collisions_table))
print(len(imaging_table))

In [None]:
obj_table.keep_columns(['RA', 'DEC'])
spec_table.keep_columns(['OBJTYPE', 'CLASS', 'SUBCLASS'])
collisions_table.keep_columns(['Z','Z_SDSS','Z_SPECTRO','Z_FIXED','FIXED','ZTYPE','GOT'])
#imaging_table.keep_columns(['OBJC_TYPE', ])

df = obj_table.to_pandas()
df = df.join(spec_table.to_pandas(), how='left')
df = df.join(collisions_table.to_pandas(), how='left')
print(len(df))

In [None]:
df_good = df_good.join(df, how='inner', on='indx')

In [None]:
print(df_good.FIXED.sum() / len(df_good))
print(df_good.GOT.sum() / len(df_good))
print(df_good.groupby('ZTYPE').RA.count() / len(df_good))
print(df_good[df_good['Z_FIXED'] != -1.0].RA.count() / len(df_good)) # Weird, this is not the same percent of FIXED

In [None]:
# DAT file Jeremy built before going into GF
tinker_catalog_df = pd.read_csv(SDSS_v1_DAT_FILE, delimiter=' ', names=('RA', 'Dec', 'z', 'logLgal', 'V_max', 'color', 'chi'))

# Read in Jeremy's group catalog 
catalog = cat.sdss_published
catalog.postprocess()

assert len(df_good) == len(tinker_catalog_df)
assert len(df_good) == len(catalog.all_data)

assert np.isclose(df_good.RA, tinker_catalog_df.RA, rtol=1E-6, atol=1E-5).all()
assert np.isclose(df_good.RA, catalog.all_data.RA, rtol=1E-6, atol=1E-5).all()

print(df_good['RA'][0:5])
print(tinker_catalog_df['RA'][0:5])
print(catalog.all_data['RA'][0:5])

In [None]:
catalog.all_data = catalog.all_data.join(df_good.FIXED, how='inner')
#catalog.all_data = catalog.all_data.join(df_good.Z_SPECTRO, how='inner')
catalog.all_data['z_DESI1'] = np.nan
catalog.all_data['z_DESI3'] = np.nan

In [None]:
desi_Y1_observed_galaxies = gc.deserialize(cat.bgs_fiberonly_1pass)
desi_Y1_catalog = coord.SkyCoord(ra=desi_Y1_observed_galaxies.all_data.RA.to_numpy()*u.degree, dec=desi_Y1_observed_galaxies.all_data.Dec.to_numpy()*u.degree, frame='icrs')
desi_Y1_z = desi_Y1_observed_galaxies.all_data['z'].to_numpy()

desi_Y3_observed_galaxies = gc.deserialize(cat.bgs_y3_fiberonly_1pass)
desi_Y3_catalog = coord.SkyCoord(ra=desi_Y3_observed_galaxies.all_data.RA.to_numpy()*u.degree, dec=desi_Y3_observed_galaxies.all_data.Dec.to_numpy()*u.degree, frame='icrs')
desi_Y3_z = desi_Y3_observed_galaxies.all_data['z'].to_numpy()

def match_redshifts_from_DESI(df: pd.DataFrame, desi_catalog, desi_z, colname):
    lost = df.FIXED == 1 

    to_match = coord.SkyCoord(ra=df.loc[lost, 'RA'].to_numpy()*u.degree, dec=df.loc[lost,'Dec'].to_numpy()*u.degree, frame='icrs')
    idx, d2d, d3d = coord.match_coordinates_sky(to_match, desi_catalog, nthneighbor=1)
    ang_distances = d2d.to(u.arcsec).value
    z_to_steal = desi_z[idx]

    # if angular distance is < 3", then we consider it a match to SDSS catalog and copy over it's z
    ANGULAR_DISTANCE_MATCH = 3
    matched = ang_distances < ANGULAR_DISTANCE_MATCH
    print(f"Matched {matched.sum()} out of {len(df.loc[lost])} ({matched.sum()/len(df.loc[lost]):.1%}) lost SDSS with DESI redshifts")

    df.loc[lost, colname] = np.where(matched, z_to_steal, np.nan)


match_redshifts_from_DESI(catalog.all_data, desi_Y1_catalog, desi_Y1_z, 'z_DESI1')
match_redshifts_from_DESI(catalog.all_data, desi_Y3_catalog, desi_Y3_z, 'z_DESI3')

In [None]:
lost_gals = catalog.all_data.loc[catalog.all_data.FIXED == 1]
# For the matched ones, we can now calculate the difference in redshift

for colname in ['z_DESI1', 'z_DESI3']:
    matched_gals = lost_gals[~np.isnan(lost_gals[colname])].copy()
    dz = matched_gals['z'] - matched_gals[colname]

    print(f"{len(matched_gals)} galaxies with DESI redshifts matched to SDSS with {colname}")

    CLOSE_ENOUGH = 0.005 # cz = 1500 km/s
    QUITE_CLOSE = 0.001
    print(f"{np.isclose(matched_gals['z'], matched_gals[colname], rtol=0.0, atol=CLOSE_ENOUGH).sum() / len(matched_gals):.3f} of galaxies are within 0.005 of DESI redshift")
    print(f"{np.isclose(matched_gals['z'], matched_gals[colname], rtol=0.0, atol=QUITE_CLOSE).sum() / len(matched_gals):.3f} of galaxies are within 0.001 of DESI redshift")


    plt.figure()
    junk=plt.hist(dz, bins=200, range=(-0.1, 0.1))
    plt.xlim(-0.1, 0.1)
    plt.yscale('log')
    plt.ylabel('Number of Galaxies')
    plt.xlabel(f'z_SDSS - {colname}')
    # draw horizontal line at 0.005 and 0.001
    plt.axvline(CLOSE_ENOUGH, color='r')
    plt.axvline(-CLOSE_ENOUGH, color='r')
    #plt.axvline(QUITE_CLOSE, color='g')
    #plt.axvline(-QUITE_CLOSE, color='g')
    plt.draw()

In [None]:
# When z_DESI is not -1.0, set z to be z_DESI
catalog.all_data['z'] = np.where(np.isnan(catalog.all_data['z_DESI3']), catalog.all_data['z'], catalog.all_data['z_DESI3'])

# Make a column that is True when FIXED and z_DESI is not nan
catalog.all_data['z_assigned_flag'] = ((catalog.all_data.FIXED == 1) & (~np.isnan(catalog.all_data['z_DESI3']))).astype(int)


In [None]:
catalog.all_data['quiescent'] = catalog.all_data['quiescent'].astype(float)

# TODO BUG for the copied over from DESI ones, we may have incorrect color / chi info
catalog.all_data['color_flag'] = tinker_catalog_df.color
catalog.all_data['chi'] = tinker_catalog_df.chi

In [None]:
# Write the new version of the fluxlim file to go into group finder
catalog.all_data[['RA', 'Dec', 'z', 'logLgal', 'V_max', 'color_flag', 'chi']].to_csv(SDSS_v2_DAT_FILE, sep=' ', header=False, index=False)

In [None]:
print(f"SDSS v2 Completeness: {(catalog.all_data.z_assigned_flag == 0).sum() / len(catalog.all_data)}")

In [None]:
# Read old galprops file
tinker_galprops = pd.read_csv(SDSS_v1_GALPROPS_FILE, delimiter=' ', names=('Mag_g', 'Mag_r', 'sigma_v', 'Dn4000', 'concentration', 'log_M_star'))
tinker_galprops = tinker_galprops.join(catalog.all_data[['z_assigned_flag']], how='left')

# TODO the rows we updated still have old copied over values here

# And write it as v2
tinker_galprops.to_csv(SDSS_v2_GALPROPS_FILE, sep=' ', header=False, index=False)