# `sewpy` Tutorial

#### This notebook will got through how to generate the tables for every cluster that will merge the GoGreen catalogs and the output from SEXtractor (in particular we want the FWHM in pixels and arcsecond, as well as the flags and `FLUX_RADIUS`. First, here are the imports I use. Additionally, this can be executed in a script called `sextract_light_v2.py`

In [None]:
# This just ensures that I am in the folder that contain my scripts. I tend ensure that the scripts are at the highest level of my working folder. Execute this first to ensure the custom imports work properly

In [None]:
cd /path/to/custom/module

In [4]:
import os
import sys
import platform
import pandas as pd 
import math
import glob
import csv
import sewpy
import cat_match
import matplotlib.pyplot as plt
#from cat_match import main, phot_match, load_catalog, plot_catalog, gg_table_main, show_gg # <-- this is a custom module I made that has routines to do catalog matching
import cat_match 
from astropy.io import ascii,fits
from astropy.table import Table, Column
from astropy.utils.data import get_pkg_data_filename
from astropy.visualization import simple_norm
from astropy.wcs import WCS
import numpy as np 
from astropy.coordinates import SkyCoord
from astropy import units as u
import time

#### Here is some code that will make sure your code is agnostic with respect to directory structure (This is tested on the pop!OS, Manjaro, and macOS). You can modify this code to suit your system


In [5]:
os_check = platform.platform(terse=True)[:5]
if os_check == 'macOS':
    preamble = '/path/to/mac/'
    root2 = f'{preamble}Deconvolution/'
    root1 = f'{root2}Data/PHOTOMETRY/PHOTOM_CATS/'
else:
    preamble = '/path/to/linux/'
    root2 = f'{preamble}Deconvolution/'
    root1 = f'{root2}Data/PHOTOMETRY/PHOTOM_CATS/'

#### Now, here are the functions that we will use to help run the script

In [6]:
def match_catalogs(gg_df, catalogs_dict, ra_key='ra_x', dec_key='dec_x'):
    """
    Matches a main catalog (gg_df) to multiple catalogs in a dictionary using astropy's match_to_catalog_sky,
    and performs an outer merge using pandas.

    Parameters:
    gg_df (pandas.DataFrame): The main catalog with columns for RA and Dec.
    catalogs_dict (dict): A dictionary where keys are catalog names and values are catalogs to be matched.
    ra_key (str): The key for RA in the main catalog. Default is 'ra_x'.
    dec_key (str): The key for Dec in the main catalog. Default is 'dec_x'.

    Returns:
    pandas.DataFrame: The merged DataFrame with matched columns added.
    """

    # Create SkyCoord object for the main catalog
    gg_coords = SkyCoord(ra=gg_df[ra_key], dec=gg_df[dec_key], unit='deg')

    for name, catalog in catalogs_dict.items():
        # Create SkyCoord object for the catalog to match
        cat_coords = SkyCoord(ra=catalog[f'ra_{name}'], dec=catalog[f'dec_{name}'], unit='deg')

        # Perform the match
        idx, d2d, d3d = gg_coords.match_to_catalog_sky(cat_coords)

        # Convert the matched catalog to a pandas DataFrame
        matched_df = catalog.iloc[idx].reset_index(drop=True)

        # Rename columns to avoid conflicts
        matched_df = matched_df.add_prefix(f"{name}_")

        # Add separation columns to matched_df
        matched_df[f'{name}_separation_2d'] = d2d.arcsec

        # Merge the matched DataFrame with the main DataFrame (incrementally)
        gg_df = pd.merge(gg_df, matched_df, left_index=True, right_index=True, how='outer')

    return gg_df


def print_band(cluster, file_list, prefix, suffix):
  
    for file in file_list:
    
        result = trim_mos_fits(file, prefix, suffix)
        #print(cluster, result)  # Output: example_filename
        clust_list.append(cluster)
        bands_list.append(result)
        files_list.append(file)


def trim_mos_fits(input_string, prefix, suffix):
    if input_string.startswith(prefix) and input_string.endswith(suffix):
        # Remove "mos" from the beginning and ".fits" from the end
        int_string = input_string.replace(prefix, '')
        final_string = int_string.replace(suffix, '')
        return final_string
        #return input_string[3:-5]
    else:
        raise ValueError("The input string does not start with 'mos' and end with '.fits'.")


def file_table():

    for cluster in clusters:
        data_path = f'{root2}{cluster}/GOGREEN_IMAGES/native/images/'
        prefix = f'{data_path}mos_'
        suffix = '_3.fits'
        files = sorted(glob.glob(os.path.join(f'{data_path}', '*.fits')))
        print_band(cluster, files, prefix, suffix)
        #cluster_dic[cluster] = cluster_files

    
    df['Cluster'] = clust_list
    df['Band'] = bands_list
    df['File'] = files_list







In [None]:
# this can also be the final_merge table made in Tutorial 1
cluster_df = pd.read_csv('final_file_merge.csv')
cluster_df

#### Below are the parameters we want to obtain using `sewpy`

In [8]:
params = [
	"NUMBER",
	"X_IMAGE",
	"Y_IMAGE",
	"FWHM_IMAGE",
	"FWHM_WORLD",
	"FLUX_RADIUS",
	"FLUX_AUTO",
	"FLUX_APER",
	"FLAGS"

	]
sex_path = '/usr/local/bin/sex'

clusters = ['SpARCS0035', 'SpARCS0219','SpARCS0335','SpARCS1034','SpARCS1051','SpARCS1616','SpARCS1634','SpARCS1638','SPT0546', 'SPT0205', 'SPT2106'] # <-- the clusters 



#### Now here is the main code to execute. The input is an array containing the cluster names

In [9]:
def main(clusters):
    # selecting rows based on condition 
    #filtered_df = cluster_df.loc[cluster_df['Cluster'] == cluster]
    
    for cluster in clusters:
        cat_file_gg_csv = f'{root2}{cluster}/{cluster}_gal_table_photomcats_merged.csv'
        cat_file_gg = pd.read_csv(cat_file_gg_csv)
        index_dict = cluster
        catalog_dic = {}
        filtered_df = cluster_df.loc[cluster_df['Cluster'] == cluster]
        for i in filtered_df.index:
            work_dir = f'{root2}{cluster}/sewpy'
            #band_paths = sorted(glob.glob(os.path.join(f'{data_path}', jband)))
            hdu = fits.open(get_pkg_data_filename(filtered_df['Data File'][i]))[0]
            wcs = WCS(hdu)
            for band in filtered_df["Band"]:
                ra_col_match, dec_col_match = f'ra_{band}', f'dec_{band}' 
                #ra_test.append(ra_col_match)
                #dec_test.append(dec_col_match)
            
                sew = sewpy.SEW(workdir=f'{work_dir}/{band}', params=params, sexpath=sex_path)
                sex_output= sew(filtered_df['Data File'][i])
                catalog = sex_output['table']
                catalog = catalog.to_pandas()
                catalog.rename(columns={'X_IMAGE': f'X_IMAGE_{band}', 'Y_IMAGE': f'Y_IMAGE_{band}', f'FLUX_RADIUS': f'R_e{band}'}, inplace=True)
                pixels = [(ra_pix, dec_pix) for ra_pix, dec_pix in zip(catalog[f'X_IMAGE_{band}'], catalog[f'Y_IMAGE_{band}']) ]
                ra_dec = [wcs.pixel_to_world(ra, dec) for (ra, dec) in pixels]
                ra = [ra_dec[i].ra.deg for i in range(len(ra_dec))]
                dec = [ra_dec[i].dec.deg for i in range(len(ra_dec))]
                catalog[f'ra_{band}'] = ra
                catalog[f'dec_{band}'] = dec
                #catalog_r['R_er'] = 1/hl_table_r['1/R_er']
                catalog.dropna(subset=f'ra_{band}', inplace=True)
                catalog_to_dict = catalog.to_dict()
                catalog_dic[band] = catalog
                matched_table = match_catalogs(cat_file_gg, catalog_dic)
        #matched_table.to_csv(f'{root2}/merged_catalogs/{cluster}_gg_sextractor_merged.csv')
        display(matched_table)
            #main_indices_dict[index_dict] = matched_indices
            #main_se_dict[cluster][filtered_df["Band"][i]] = catalog_to_dict

In [None]:
t0 = time.time()

In [None]:
main(clusters)

In [None]:
t1 = time.time()
total_time = (t1 - t0)/60
print(f'Total time: {total_time} minutes')

# Measure the FWHM of the PSFS
#### Here, we will try to measure the FWHM of all the PSF's we generated earlier, plot the PSF, and calculate the FWHM on each plot

In [16]:
data_path = f'{root2}PSFs'
work_dir = f'{root2}sewpy_psf/'

In [None]:
psf_files = sorted(glob.glob(os.path.join(f'{data_path}', '*.fits')))
for file in psf_files:
    suffix = ".fits"
    updated_string = file.replace(f'{data_path}/', "")
    updated_string = updated_string.replace(suffix, "")
    #print(updated_string)
    with fits.open(file) as hdul:
        
        image_data = hdul[0].data
        wcs = WCS(hdul[0].header)
        sew = sewpy.SEW(workdir=work_dir, params=params, sexpath=sex_path)
        sex_output= sew(file)
        catalog = sex_output['table']
        catalog = catalog.to_pandas()
        new_cat = catalog[catalog['FLUX_AUTO'] == max(catalog['FLUX_AUTO'])]
        pixel_x = new_cat['X_IMAGE']
        pixel_y = new_cat['Y_IMAGE']
        size = new_cat['FWHM_IMAGE']
        size_arc = np.round((float(new_cat['FWHM_IMAGE']) * .2)/2, 3)
        plt.imshow(image_data, origin='lower', cmap='viridis')
        plt.text(40, 100, f'PSF Size {size_arc} arcseconds', fontsize=12, color='white')
        plt.scatter(pixel_x, pixel_y, s=size*2, edgecolor='red', facecolor='none', label='FWHM')
        plt.title(updated_string)
        plt.show()