In [None]:
%matplotlib inline
from astropy.table import Table, Column
from matplotlib import pyplot as plt
from skimage import io
import numpy as np
from IPython import display
import os

In [None]:
results = Table.read('./catalogs/results_table_full_0.5_2.0.fits')
results_df = results.to_pandas()
results_df = results_df.applymap(lambda x: x.decode() if isinstance(x, bytes) else x)

In [None]:
dup_mask = results_df[['NAME', 'RA_detc']].duplicated('NAME', keep=False)

In [None]:
dup_cat = results_df[dup_mask]

In [None]:
dup_cat_tab = Table.from_pandas(dup_cat)
dup_cat_tab.filled(-1.0).write('./catalogs/results_table_dups_0.5_2.0.fits', overwrite=True)

## Column for Duplicates

Add a column to the results table indicating whether or not the source is a duplicate

Then we are gonna mark all the current duplicates as true and then loop through them. 

In [None]:
results.add_column(Column(data=False, name='DUPLICATE'))

In [None]:
results['DUPLICATE'][dup_cat.index.values] = True

## Try to read previous results

In [None]:
if os.path.isfile('./catalogs/results_table_inspected_0.5_2.0.fits'):
    results_previous = Table.read('./catalogs/results_table_inspected_0.5_2.0.fits')
    results_previous_df = results_previous.to_pandas()
    results_previous_df = results_previous_df.applymap(lambda x: x.decode() if isinstance(x, bytes) else x)
    columns = ['NAME', 'INDEX', 'DUPLICATE']
    results_p_df_small = results_previous_df[columns]
    
    # merge the previous results into the new results
    x = results_df.merge(results_p_df_small, how='outer', left_on=['NAME', 'INDEX'], right_on=['NAME', 'INDEX'])
    
    # there are no new fields/indexes
    if not x.DUPLICATE.isna().any():
        results_df = x # write the merged df back 
        results = Table.from_pandas(results_df)
        results.filled(-1.0).write('./catalogs/results_table_inspected_0.5_2.0.fits', overwrite=True)
    
    else:
        # here we are gonna check if there are duplicates
        if x[x.DUPLICATE.isna()][['NAME', 'INDEX']].duplicated('NAME', keep=False).any():
            print('THERE ARE DUPLICATES!!! Need to inspect.')
            
            
        else: # there are no duplicates
            x.loc[x.DUPLICATE.isna(), 'DUPLICATE'] = False
            results_df = x # write the merged df back
            results_df.DUPLICATE = results_df.DUPLICATE.astype(bool) # fix a problem when writing the data out
            results = Table.from_pandas(results_df)
            results.filled(-1.0).write('./catalogs/results_table_inspected_0.5_2.0.fits', overwrite=True)
            
            print('THERE ARE NO DUPLICATES!!! Catalog updated.')
            

## Now we are gonna visually inspect the duplicate fields

This is gonna let us identify which is the "primary" source

In [None]:
# use the pandas iterrows because the astropy one sucks
results_df = results.to_pandas()

In [None]:
# make a catalog mask

fig = plt.figure(1, figsize=(10, 10))
ax = fig.add_subplot()

plt.ion()

name_old= ''
for index, row in results_df.iterrows():
    if not row.DUPLICATE:
        continue

    try:
        name = row.NAME.decode().replace(' ', '_')

        if name == name_old:
            pass
        else:
            display.clear_output(wait=True)
            jpgfile = f'../data/{name}/{name}_XRT_vtp_zoom.png'
            jpg_array = io.imread(jpgfile)
            name_old = name
            
            ax.imshow(jpg_array, origin='upper', interpolation='bicubic')
            ax.set_title(f'{name}')

            ax.spines['right'].set_visible(False)
            ax.spines['left'].set_visible(False)
            ax.spines['top'].set_visible(False)
            ax.spines['bottom'].set_visible(False)
            ax.yaxis.set_ticks([])
            ax.xaxis.set_ticks([])

            plt.show()
            display.display(fig)
            
    except NameError:
        display.clear_output(wait=True)
        jpgfile = f'../data/{name}/{name}_XRT_vtp_zoom.png'
        jpg_array = io.imread(jpgfile)
        name_old = name
        
        ax.imshow(jpg_array, origin='upper', interpolation='bicubic')
        ax.set_title(f'{name}')

        ax.spines['right'].set_visible(False)
        ax.spines['left'].set_visible(False)
        ax.spines['top'].set_visible(False)
        ax.spines['bottom'].set_visible(False)
        ax.yaxis.set_ticks([])
        ax.xaxis.set_ticks([])

        plt.show()
        display.display(fig)

    x = input(f"Is {row.INDEX} the main source? [n]y.")
    
    if x == 'y':
        print('updated')
        results_df.loc[index, 'DUPLICATE'] = False

# Save the results!

So we don't have to look at all of them again. 

Uncomment the next cell if you are gonna redo the inspections!

In [None]:
results_df = results_df.applymap(lambda x: x.decode() if isinstance(x, bytes) else x)

In [None]:
results = Table.from_pandas(results_df)
results.filled(-1.0).write('./catalogs/results_table_inspected_0.5_2.0.fits', overwrite=True)