In [None]:
import numpy as np
import pandas as pd
from pathlib import Path

from astropy import units as u
from astropy.coordinates import SkyCoord
from astropy.table import Table, hstack
import matplotlib.pyplot as plt

### NYU VAGC (K correct/sersic index) Matching

In [None]:
data_path = Path("/bgfs/jnewman/bid13/photoZ/data/pasquet2019/")

In [None]:
kcorr = Table.read(data_path/"nyu_vagc/kcorrect.none.model.z0.00.fits")

In [None]:
sersic = Table.read(data_path/"nyu_vagc/sersic_catalog.fits")

In [None]:
psqt_updated = Table.read(data_path/"SDSS_inference_johanna.fits")

In [None]:
psqt_updated = psqt_updated[['zmean_cnn','zmed_cnn','zpeak_cnn', 'specObjID' ]].to_pandas().convert_dtypes()

In [None]:
sdss_extra = pd.read_csv(data_path/"nyu_vagc/pasquet_vagc.csv").convert_dtypes()


In [None]:
kcorr = hstack([kcorr, sersic], join_type="exact")

In [None]:
psqt = pd.DataFrame(np.load(data_path/"sdss.npz", allow_pickle=True)["labels"])
psqt = psqt.merge(sdss_extra, how = "left", on ="specObjID" )
psqt = psqt.merge(psqt_updated, how = "left", on = "specObjID")
# psqt = psqt.drop("index", axis=1)

In [None]:
psqt_coord = SkyCoord(ra=np.array(psqt["ra"])*u.deg, dec=np.array(psqt["dec"])*u.deg, frame="icrs")

In [None]:
kcorr_coord = SkyCoord(ra=kcorr["RA"]*u.deg, dec=kcorr["DEC"]*u.deg, frame="icrs")

In [None]:
idx_psqt, idx_Kcorr, d2d, d3d = kcorr_coord.search_around_sky(psqt_coord, 0.95*u.arcsecond)

In [None]:
plt.figure(figsize=(16,8))
plt.hist(d2d.arcsec, bins=20, histtype="step")
plt.yscale("log")

In [None]:
len(idx_psqt)/len(psqt)

In [None]:
cross_match_cat = pd.DataFrame({'imageID':idx_psqt,"idx_Kcorr":idx_Kcorr, "d2d":d2d.arcsec})

In [None]:
absMag_u = kcorr['ABSMAG'][:,0][idx_Kcorr]
absMag_g = kcorr['ABSMAG'][:,1][idx_Kcorr]
absMag_r = kcorr['ABSMAG'][:,2][idx_Kcorr]
absMag_i = kcorr['ABSMAG'][:,3][idx_Kcorr]
absMag_z = kcorr['ABSMAG'][:,4][idx_Kcorr]

In [None]:
absMagErr_u = 1/np.sqrt(kcorr['ABSMAG_IVAR'][:,0])[idx_Kcorr]
absMagErr_g = 1/np.sqrt(kcorr['ABSMAG_IVAR'][:,1])[idx_Kcorr]
absMagErr_r = 1/np.sqrt(kcorr['ABSMAG_IVAR'][:,2])[idx_Kcorr]
absMagErr_i = 1/np.sqrt(kcorr['ABSMAG_IVAR'][:,3])[idx_Kcorr]
absMagErr_z = 1/np.sqrt(kcorr['ABSMAG_IVAR'][:,4])[idx_Kcorr]

# ra = kcorr["RA"][idx_Kcorr]
# dec = kcorr["DEC"][idx_Kcorr]

In [None]:
sersicN_u = kcorr["SERSIC_N"][:, 0][idx_Kcorr]
sersicN_g = kcorr["SERSIC_N"][:, 1][idx_Kcorr]
sersicN_r = kcorr["SERSIC_N"][:, 2][idx_Kcorr]
sersicN_i = kcorr["SERSIC_N"][:, 3][idx_Kcorr]
sersicN_z = kcorr["SERSIC_N"][:, 4][idx_Kcorr]

In [None]:
cross_match_cat["absMag_u"] = absMag_u
cross_match_cat["absMag_g"] = absMag_g
cross_match_cat["absMag_r"] = absMag_r
cross_match_cat["absMag_i"] = absMag_i
cross_match_cat["absMag_z"] = absMag_z

cross_match_cat["absMagErr_u"] = absMagErr_u
cross_match_cat["absMagErr_g"] = absMagErr_g
cross_match_cat["absMagErr_r"] = absMagErr_r
cross_match_cat["absMagErr_i"] = absMagErr_i
cross_match_cat["absMagErr_z"] = absMagErr_z

cross_match_cat["sersicN_u"] = sersicN_u
cross_match_cat["sersicN_g"] = sersicN_g
cross_match_cat["sersicN_r"] = sersicN_r
cross_match_cat["sersicN_i"] = sersicN_i
cross_match_cat["sersicN_z"] = sersicN_z
# cross_match_cat["ra"] = ra
# cross_match_cat["dec"] = dec

In [None]:
#Cross check the ones with duplicated matches
dups = cross_match_cat[cross_match_cat.duplicated("imageID", keep=False)]
dups = dups[np.isfinite(dups.absMagErr_u)]
dups = dups[dups.absMag_r<0]
dups[dups.duplicated("imageID", keep=False)]

In [None]:
keep_bool = ~cross_match_cat.duplicated("imageID", keep=False)
cross_match_cat = cross_match_cat[keep_bool]

In [None]:
psqt_merged = psqt.merge(cross_match_cat, how = "left", on = "imageID")
# psqt_merged.to_csv(data_path/"psqt_vagc_kcorr.csv", index=False)

# GZ1 morph matching

In [None]:
def agreement(probability):
    """Returns measure of agreement as defined in Dieleman et al 2015
    Args:
        probability(array): shape(num_data, num_class)
    """
    n = probability.shape[1]
    H = -1 * np.sum(probability * np.nan_to_num(np.log(probability)), axis=-1)

    return 1 - (H / np.log(n))

In [None]:
# Merge debiased classifications
gz1 = pd.read_csv(data_path / "pasquet_gz1_bid13.csv")

prob = np.array(
    [
        gz1["p_el_debiased"],
        gz1["p_cs_debiased"],
        
    ]
).T

gz1['gz1Morpho'] = np.argmax(prob, axis=-1)

gz1_mask = ((gz1["p_el_debiased"]>=0.8) | (gz1["p_cs_debiased"]>=0.8))

gz1_morph = gz1[['specObjID', 'gz1Morpho','p_el_debiased',"p_cs_debiased"]][gz1_mask]

psqt_merged = psqt_merged.merge(gz1_morph,how='left', on ="specObjID" )

In [None]:
#merge raw classifications
gz1 = pd.read_csv(data_path / "pasquet_gz1_all_bid13.csv")
# gz1['p_un_debiased'] = 1-(gz1['p_el_debiased'] + gz1["p_cs_debiased"])

# prob = np.array(
#     [
#         gz1["p_el_debiased"],
#         gz1["p_cs_debiased"],
#         gz1["p_un_debiased"],
#     ]
# ).T

# agreed = agreement(prob)>=0.8

In [None]:
prob = np.array(
    [
        gz1["p_el"],
        gz1["p_cs"],
        gz1["p_mg"],
        gz1["p_dk"],
    ]
).T

# gz1['gz1Morpho'] = np.argmax(prob, axis=-1)


gz1_morph = gz1[['specObjID','p_el',"p_cs","p_mg","p_dk" ]]

In [None]:
psqt_merged = psqt_merged.merge(gz1_morph,how='left', on ="specObjID" )

### Merge with Dominguez-Sanchez Morpho cat

In [None]:
ml_morph = Table.read(data_path/"ZOO_model_full_catalogue.fit")
# Astropy converts everything to float objIDs should be int
ml_morph_df = ml_morph.to_pandas()
ml_morph_df = ml_morph_df.drop(["dr7objid","galcount"], axis=1)
ml_morph_df["dr7ObjID"] = ml_morph["dr7objid"].astype('Int64')
ml_morph_df = ml_morph_df.convert_dtypes()

In [None]:
psqt_merged = psqt_merged.merge(ml_morph_df,how='left', on ="dr7ObjID" )

## Save the data

In [None]:
labels = psqt_merged.to_records(index=False)

In [None]:
np.savez(data_path/"sdss_vagc", cube =np.load(data_path/"sdss.npz", allow_pickle=True)["cube"], labels=labels)