In [None]:
from astropy.coordinates import SkyCoord
from astropy.io import ascii
from astropy.table import Column, hstack, setdiff, Table, TableMergeError, vstack
import astropy.units as u
import os

%run Utility.ipynb

# Function to build the super-catalog (All catalogs cross-matched with each other and crammed together)
def build_catalog(main_cat_path, cat_folder, arc_range, save_path, format_cat, ow):
    
    # Creates the save_path folder if it does not already exist
    if not os.path.exists(save_path):
        os.makedirs(save_path)
    
    # Reads the catalog file into a Table object
    main_cat = ascii.read(main_cat_path)
    
    # The name of the main catalog
    main_cat_name = os.path.splitext(os.path.basename(main_cat_path))[0]
    
    # Calls the .get() function to retrieve the name of the columns containing the right ascenscion and declination values
    main_ra_dec = get_ra_dec_names(main_cat_name)
    
    # Determines the max range at which a pair is considered to be properly matched (in arc-seconds)
    max_sep = arc_range * u.arcsec
    
    # The initial catalog at the beginning of each for-cycle
    temp_cat = main_cat
    
    # The resulting catalog which will be a compilation of all the catalogs in the CAT_FOLDER
    final_cat = main_cat
    
    # Encodes the CAT_FOLDER path to be used by the OS methods
    folder = os.fsencode(cat_folder)
    
    # Begins the for-cycle in the folder containing all catalogs to be added to the main catalog
    for item in os.listdir(folder):
        
        # Decodes the item file to be used by the rest of the code
        item = os.fsdecode(item)
        
        # Name of the secondary catalog
        item_name = os.path.splitext(os.path.basename(item))[0]
        
        print("Secondary Catalog is: " + item_name)
        
        print("Main Catalog Length is:", len(final_cat))
        
        # Adds the complete path to the item file to be used by the rest of the code
        item = cat_folder + item
        
        # Checks if the secondary catalog is not the same as the main catalog and, if so, passes on to the next file
        if (item_name == main_cat_name):
            
            pass
        
        else:
            
            # Resets the column names of the FINAL_CAT that where changed due to the HSTACK command
            final_cat = reset_main_catalog_columns(temp_cat, final_cat)
            
            # Sets the catalog as it is by the start of the for-cycle
            temp_cat = final_cat
            
            # Reads the secondary catalog into a Table object
            secondary_cat = ascii.read(item)
            
            # Calls the .get() function to retrieve the name of the columns containing the right ascenscion and declination values
            secondary_ra_dec = get_ra_dec_names(item_name)
            
            # Transforms the right ascenscion and declination columns of the main catalog into SkyCoord arrays
            c_1 = SkyCoord(ra = final_cat[main_ra_dec[0]].astype(float) * u.degree, dec = final_cat[main_ra_dec[1]].astype(float) * u.degree)
            
            # Transforms the right ascenscion and declination columns of the secondary catalog into SkyCoord arrays
            c_2 = SkyCoord(ra = secondary_cat[secondary_ra_dec[0]].astype(float) * u.degree, dec = secondary_cat[secondary_ra_dec[1]].astype(float) * u.degree)
            
            # The index-array of the rows in the secondary catalog which matched with each row of the main catalog
            # The 2-D distance between the source in the main catalog with the matched source in the secondary catalog
            # The 3-D distance between the source in the main catalog with the matched source in the secondary catalog
            idx, d2d, d3d = c_1.match_to_catalog_sky(c_2)
            
            # The index-array of the matches with a distance between the two objects inferior to the maximum allowed separation set by the MAX_SEP variable
            idx_sep = d2d < max_sep
            
            # DENIS has right ascenscion and declination column names different than "ra" and "dec" so, when vertically stacking the main catalog with DENIS,
            # the DENIS entries were empty in the "ra" and "dec" columns which, then, the SkyCoord methods accused of cells being missing
            # The IF copies the values of the DENIS right ascenscion and declination columns into the "ra" and "dec" columns
            if item_name == "DENIS":
                
                secondary_cat[main_ra_dec[0]] = secondary_cat[secondary_ra_dec[0]].copy()
                
                secondary_cat[main_ra_dec[1]] = secondary_cat[secondary_ra_dec[1]].copy()
            
            # The rows of the main catalog with a match
            main_cat_match = final_cat[idx_sep]
            
            # The rows of the secondary catalog with a match
            secondary_cat_match = secondary_cat[idx[idx_sep]]
            
            # The instructions to be followed when there have been matches between the main and secondary catalog
            if len(main_cat_match) != 0:
                
                
                # 
                #try:
                
                # Creates the array of rows in the main catalog with no matches
                main_cat_no_match = setdiff(final_cat, main_cat_match, keys = [main_ra_dec[0], main_ra_dec[1]])
                
                #except ValueError:
                    
                    #main_cat_match.rename_column(main_ra_dec[0], main_ra_dec[0] + "_" + main_cat_name)
                    
                    #main_cat_match.rename_column(main_ra_dec[1], main_ra_dec[1] + "_" + main_cat_name)
                    #print("Value Error!")
                    #main_cat_no_match = setdiff(final_cat, main_cat_match, keys = [main_ra_dec[0], main_ra_dec[1]])
                
                # Creates the array of rows in the secondary catalog with no matches
                secondary_cat_no_match = setdiff(secondary_cat, secondary_cat_match, keys = secondary_ra_dec)
                
                # Stackes the matched rows horizontally, preserving all the information pertaining to both catalogs about the rows
                # HSTACK when MAIN_CAT_MATCH is empty produces an empty table
                final_cat = hstack([main_cat_match, secondary_cat_match], table_names = ["", item_name])
                
                # Resets the names of the columns in the main catalog as they were renamed during HSTACK
                final_cat = reset_main_catalog_columns(temp_cat, final_cat)
                
                print("Match!")
                
                # Stacks the cross-matched catalog with the main catalog rows which did not produce a match,
                # and then those of the secondary catalog which also did not get a match
                # Some catalogs have columns with the same names but different .dtypes
                # The same_name_diff_types function changes the name of the column in the second catalog so both columns
                # are not attempted to be stacked vertically
                try:
                    
                    final_cat = vstack([final_cat, main_cat_no_match])
                    
                    final_cat = vstack([final_cat, secondary_cat_no_match])
                
                except TableMergeError:
                    
                    secondary_cat_no_match = same_name_diff_types(final_cat, secondary_cat_no_match, item_name)
                    
                    final_cat = vstack([final_cat, main_cat_no_match])
                    
                    final_cat = vstack([final_cat, secondary_cat_no_match])
            
            # The instructions to be followed when there have been no matches between the main and secondary catalog
            # Some catalogs have columns with the same names but different .dtypes
            # The same_name_diff_types function changes the name of the column in the second catalog so both columns
            # are not attempted to be stacked vertically
            elif len(main_cat_match) == 0:
                
                try:
                    
                    final_cat = vstack([final_cat, secondary_cat])
                
                except TableMergeError:
                    
                    secondary_cat = same_name_diff_types(final_cat, secondary_cat, item_name)
                    
                    final_cat = vstack([final_cat, secondary_cat])
    
    # Saves the final catalog in the SAVE_PATH, with format FORMAT_CAT
    ascii.write(final_cat, save_path + "MegaCatalog.cat", format = format_cat, overwrite = ow)

# The parameters provided to this function are as is:
# CAT_1 -- An astropy.table object containing your first catalog
# CAT_1_RA -- The name of the column in your first catalog with right-ascenscion values
# CAT_1_DEC -- The name of the column in your first catalog with declination values
# CAT_2 -- An astropy.table object containing your second catalog
# CAT_2_RA -- The name of the column in your second catalog with right-ascenscion values
# CAT_2_DEC -- The name of the column in your second catalog with declination values
# K -- The cross-matching radius (in arc-seconds)
# NTHNEIGHBOR -- Which closest cross-match to the source to save (sometimes, a catalog is cross-matched with itself in order to strip down repeated sources - in this case, it is useful to change NTHNEIGHBOR = 2)
# The function returns a new astropy.table object with only the cross-matched sources between the first and second catalogs, along with all the columns from both of these catalogs
def cross_match(cat_1, cat_1_ra, cat_1_dec, cat_2, cat_2_ra, cat_2_dec, k = 2, nthneighbor = 1, sep = False):
    
    # Some of the catalogs have right-ascenscion written in hours instead of degrees
    # This will raise a TypeError, which this try-and-except routine takes care of
    try:
        
        # Transforms the right ascenscion and declination columns of the first catalog into SkyCoord arrays
        c_1 = SkyCoord(ra = cat_1[cat_1_ra].astype(float) * u.degree, dec = cat_1[cat_1_dec].astype(float) * u.degree)
    
    except TypeError:
        
        ra = cat_1[cat_1_ra]
        
        dec = cat_1[cat_1_dec]
        
        # Transforms the right ascenscion and declination columns of the second catalog into SkyCoord arrays
        c_1 = SkyCoord(ra = ra, dec = dec, unit = (u.hourangle, u.deg), frame = 'icrs')
    
    try:
        
        # Transforms the right ascenscion and declination columns of the second catalog into SkyCoord arrays
        c_2 = SkyCoord(ra = cat_2[cat_2_ra].astype(float) * u.degree, dec = cat_2[cat_2_dec].astype(float) * u.degree)
    
    except TypeError:
        
        ra = cat_2[cat_2_ra]
        
        dec = cat_2[cat_2_dec]
        
        # Transforms the right ascenscion and declination columns of the second catalog into SkyCoord arrays
        c_2 = SkyCoord(ra = ra, dec = dec, unit = (u.hourangle, u.deg), frame = 'icrs')
    
    # The index-array of the rows in the secondary catalog which matched with each row of the main catalog
    # The 2-D distance between the source in the main catalog with the matched source in the secondary catalog
    # The 3-D distance between the source in the main catalog with the matched source in the secondary catalog
    idx, d2d, d3d = c_1.match_to_catalog_sky(c_2, nthneighbor = nthneighbor)
    
    # Determines the max range at which a pair is considered to be properly matched (in arc-seconds)
    max_sep = k * u.arcsec
    
    # The index-array of the matches with a distance between the two objects inferior to the maximum allowed separation set by the MAX_SEP variable
    idx_sep = d2d < max_sep
    
    # The rows of the first catalog with a match
    cat_1_m = cat_1[idx_sep]
    
    # The rows of the second catalog with a match
    cat_2_m = cat_2[idx[idx_sep]]
    
    # Throughout multiple cross-matches, some column names may repeat themselves in the different catalogs
    # The HSTACK function appends a sufix in column names common to both catalogs being cross-matched using the TABLE_NAMES variable
    # As it is now, a column named EXAMPLE which is common to both catalogs will be renamed to EXAMPLE_ and EXAMPLE_2, each corresponding to the first and second catalog, respectively
    # For the case where you are cross-matching a catalog which already is the product of this CROSSMATCH function, it may happen that there are already EXAMPLE_ and EXAMPLE_2 columns
    # This try-and-except routine accounts for that and the new column will be named EXAMPLE_3 (change from 3 to 4, 5, 6, etc., as is needed)
    try:
        
        # Stackes the matched rows horizontally, preserving all the information pertaining to both catalogs about the rows
        tabular_f = hstack([cat_1_m, cat_2_m], table_names = ["", "2"])
    
    except TableMergeError:
        
        i = 3
        
        # Stackes the matched rows horizontally, preserving all the information pertaining to both catalogs about the rows
        #tabular_f = hstack([cat_1_m, cat_2_m], table_names = ["", "3"])
        tabular_f = iter_col_name(cat_1_m, cat_2_m, i)
    
    if sep == False:
        
        return tabular_f
    
    else:
        
        d2d_sep = d2d[d2d < max_sep].degree
        
        tabular_f['d2d'] = d2d_sep
        
        return tabular_f

# Use this function to stack all catalogs in a target folder into a single one
def stack_catalogs(path, save_path):
    
    full_cat = Table()
    
    #Encodes the path of the dataset to be used by other OS methods
    folder = os.fsencode(path)
    
    for item in os.listdir(folder):
        
        item = os.fsdecode(item) #Decodes the current item so it becomes a string
        
        item = path + item #To comply with the func() function parameter
        
        print(item)
        
        temp_cat = ascii.read(item)
        
        full_cat = vstack([full_cat, temp_cat])
    
    ascii.write(full_cat, save_path, format = 'csv', overwrite = True)

# For when attributing repeating column names when cross-matching (It's used by the cross-matching function)
def iter_col_name(cat_1, cat_2, i):
    
    i = str(i)
    
    try:
        
        tabular_f = hstack([cat_1, cat_2], table_names = ["", i])
    
    except TableMergeError:
        
        i = int(i) + 1
        
        tabular_f = iter_col_name(cat_1, cat_2, i)
    
    return tabular_f