# Rejecting bad objects and duplicates

In this notebook I compose the final catalog of LSBGs, by rejecting the bad objects (found by the group of people who did the visual inspection) and duplicates,  found by checking objects in our catalog that lie in a distance less than 2" between each other.

In [1]:
# First, import some libraries we are going to need
import numpy as np
import pandas as pd
import scipy as sp

Let's import the full catalog now.

In [2]:
from astropy.io import fits
Galfit_res = fits.open('y3_gold_2_2_lsbg_galfit_v3.2.fits')
#Galfit_res[1].header.keys

Their photometric and structural properties now.

In [3]:
coadd_object_id = Galfit_res[1].data["COADD_OBJECT_ID"]
object_num = Galfit_res[1].data["OBJECT_NUMBER"] #what is this?
# Coordinates
RA = Galfit_res[1].data["RA"]
DEC = Galfit_res[1].data["DEC"]
# A, B image
A_IMAGE = Galfit_res[1].data["A_IMAGE"]
B_IMAGE = Galfit_res[1].data["B_IMAGE"]
# Effective radius 
R_eff = 0.263*Galfit_res[1].data["RE_G"]
R_eff_err = 0.263*Galfit_res[1].data["RE_ERR_G"]
# Sersic index
n_ser = Galfit_res[1].data["N"]
n_ser_err = Galfit_res[1].data["N_ERR"]
# Magnitudes
mag_g = Galfit_res[1].data["MAG_G"]
mag_r = Galfit_res[1].data["MAG_R"]
mag_i = Galfit_res[1].data["MAG_I"]
# Errors on magnitudes 
mag_g_err = Galfit_res[1].data["MAG_ERR_G"]
mag_r_err = Galfit_res[1].data["MAG_ERR_R"]
mag_i_err = Galfit_res[1].data["MAG_ERR_I"]

Now import the Coadds of the bad objects.

In [4]:
df_bad = pd.read_csv("Coadd_bad.csv")
#df.head()

Get the coadd_ids of the "bad" objects.

In [5]:
bad_coadd = df_bad['Coadd_id'].values
print(len(bad_coadd))

99


Now create a dataframe that contains the coadd_ids of all objects. That way we can easily exclude the bad `coadd_ids` from the list of ids.

In [6]:
df_all = pd.DataFrame({'all_coadds':np.asarray(coadd_object_id)})
df_all=pd.DataFrame({'all_coadds':np.array(coadd_object_id).byteswap().newbyteorder()})

In [7]:
mask = df_all['all_coadds'].isin(bad_coadd)
df_new = df_all[~df_all.all_coadds.isin(df_bad.Coadd_id)]
print(len(df_new))

21324


Similarly, we can drop all properties (and not just the coadds) of those objects that have coadd_ids belonging in the bad coadd_id list.

In [8]:
from astropy.table import Table

table = Table.read('y3_gold_2_2_lsbg_galfit_v3.2.fits')
pandas_df = table.to_pandas()

In [9]:
df_new = pandas_df[~pandas_df.COADD_OBJECT_ID.isin(df_bad.Coadd_id)]
print(len(df_new))

21324


### Now, let's exclude duplicates and objects that are very close to each other.

First, exclude the duplicate coadds (we have three of them).

In [10]:
Coadds = df_new['COADD_OBJECT_ID'].values
mask_dupl = df_new.duplicated(subset='COADD_OBJECT_ID', keep='first') # Create a mask
# Now keep only the uniques
df_unique = df_new[~mask_dupl]
#df_unique.head()
print(len(df_unique['RA'].values))

21321


Get COADD_IDs, RAs and DECs of the  unique objects.

In [11]:
Coadds = df_unique['COADD_OBJECT_ID'].values
RAs = df_unique['RA'].values
DECs = df_unique['DEC'].values

In [12]:
from astropy import units as u
from astropy.coordinates import SkyCoord

In [13]:
sep_dupls = [] #empty array, keeps the separations of duplicates
Coadds_dupl = [] #Empty array, keeps the Coadds_of the duplicates
RAs_dupl = [] #Empty array, keeps the RAs of the duplicates
DECs_dupl = [] #Empty array, keeps the DECs of the duplicates

n_size = len(RAs)
max_sep = 4. # Maximum allowed separation in arcsec

# Define the full catalog here
C_full = SkyCoord(ra=RAs*u.degree, dec=DECs*u.degree, frame='icrs')

for i in range(n_size):
    C_loc = SkyCoord(ra=RAs[i]*u.degree, dec=DECs[i]*u.degree, frame='icrs')
    separ = C_full.separation(C_loc).arcsec #Separations in arcsec
    
    # ===========================================================
    # ===========================================================
    # Keep those with separation less than max_sep
    sep_with = separ[(separ<max_sep)&(separ>0.0)].tolist()
    if (sep_with!=[]): # If it is not empty
        Coadds_dup_loc = Coadds[(separ<max_sep)&(separ>0.0)]
        RAs_dup_loc = RAs[(separ<max_sep)&(separ>0.0)]
        DECs_dup_loc = DECs[(separ<max_sep)&(separ>0.0)]
        
        Coadds_dupl = np.concatenate((Coadds_dupl,Coadds_dup_loc))
        RAs_dupl = np.concatenate((RAs_dupl,RAs_dup_loc))
        DECs_dupl = np.concatenate((DECs_dupl,DECs_dup_loc))

Get cutouts of all the duplicates to check which of them to keep.

In [14]:
import urllib
import matplotlib
import matplotlib.pyplot as plt
%matplotlib inline

#print(Coadds_dupl.astype(int))

In [15]:
#N_cuts = len(DECs_dupl)
#zoom = 15
# ==================================
# ==================================
#for i in range(N_cuts):
    # Give a name to the figure. Name them as "Image_cand_(i).jpb
    # Where i is the number of the candidate
    # This is easy to change to ra, dec or coadd ID or whatever...
#    fig_name = "Image_cand_{0}.jpg".format(i)
    
    #Create now the name of the URL
    # This need to have as inputs (that change) the RA, DEC of each objec and zoom
#    RA_loc = RAs_dupl[i] #The RA of the i-th object
#    DEC_loc = DECs_dupl[i] # The DEC of the i-th object
#    Coadd_loc = Coadds_dupl[i]
#    Coadd_loc = int(Coadd_loc)
#    url_name = "https://data.darkenergysurvey.org/fnalmisc/lsbgalfit/v3/segmap_{0}.png".format(Coadd_loc)
    
#    urllib.urlretrieve(url_name, fig_name) #Retreaves and saves each image

Now display the bad objects

In [16]:
#from IPython.display import Image
#from PIL import Image

In [17]:
#n_cols = 3
#n_rows = int(N_cuts/n_cols)+1

#fig,ax = plt.subplots(n_rows,n_cols, figsize=(16,3*n_rows))

#for i in range(N_cuts):
#    fig_name = "Image_cand_{0}.jpg".format(i)
#    image = Image.open(fig_name)
#    ax[i//3][i%3].imshow(image)
#    ax[i//3][i%3].set_title("{0}".format(i), fontsize=18)
#    ax[i//3][i%3].set_xticks([]) # Remove x ticks
#    ax[i//3][i%3].set_yticks([]) # Remove y ticks
    
    
#fig.show()

In [18]:
# Create an array with the indices of the "bad" coadd_id duplicates
Bad_IDs = [2,3,5,7,9,11,14,15,17,20,22,24,26,29,30,33,34,
          37,38,39,42,45,46,48,52,54,56,58,60,63,64,66,
          68,70,73]

Bad_IDs = np.asarray(Bad_IDs)

In [24]:
# Create 
Bad_dupls_coadd = Coadds_dupl[Bad_IDs]
# Make it integer
Bad_coadd_dupls = Bad_dupls_coadd.astype(int)

In [None]:
# Create a new dataframe where we exclude the bad objects

In [28]:
df_final = df_unique[~df_unique.COADD_OBJECT_ID.isin(Bad_coadd_dupls)]
print(len(df_final))

21286


In [34]:
# Save the dataframe as csv
df_final.to_csv("Galfit_final_IDs.csv")

In [55]:
# Now create a csv file that contains the coadd ids of the duplicate objects that are in the catalog
Good_IDs = np.asarray([0,1,4,8,10,12,13,16,18,19,21,23,25,27,28,31,32,35,36,
           40,41,43,44,47,49,50,51,53,55,57,59,61,62,65,67,69,71,72])

Good_dupl_coadds = Coadds_dupl[Good_IDs]
Good_dupl_coadds = Good_dupl_coadds.astype(int)
# Create a data frame
df_dupls = pd.DataFrame({'Coadd_IDs_dupls':np.asarray(Good_dupl_coadds).byteswap().newbyteorder()})

In [57]:
# Save the dataframe as csv
df_dupls.to_csv("Dupls_coadd_IDs.csv")

In [59]:
table_final = Table.from_pandas(df_final)

In [None]:
t.write('test_table.fits')