In [1]:
import os
import pandas as pd
import numpy as np
from astropy.io import fits
import matplotlib.pyplot as plt

directory: str = '/data/HSC/HSC_v6/step1/g_band_sextractor/test_set_subset'
window: str = '30px_diameter'
data = []

for filename in os.listdir(directory):
    if filename.endswith(".cat"):
        file_path = os.path.join(directory, filename)
        with open(file_path, "r") as f:
            
            object_id = filename.split("_")[2].split(".")[0]
            
            segmented_filepath = f'{directory}/test_segmented_{object_id}.fits'
            img = fits.getdata(segmented_filepath)
            img_size = img.shape
            
            center_i = 63
            center_j = 63
            radius = int(int(window[:2])/2)
            in_center = []
            
            for i in range(img_size[0]):
                for j in range(img_size[1]):
                    dist = np.sqrt((i - center_i)**2 + (j - center_j)**2)
                    if dist <= radius:
                        if img[i, j] not in in_center:
                            if img[i, j] != 0:
                                in_center.append(img[i, j])
            num_in_center = len(in_center)
            
            param_names = []
            
            for line in f:
                if line.startswith('#'):
                    param_line = line.strip().split()
                    param_names.append(param_line[2])
                if not line.startswith('#'):
                    val_line = line.strip().split()
                    row_data = []
                    for i in val_line:
                        row_data.append(i)
                    row_data.insert(0, num_in_center)
                    row_data.insert(0, object_id)
                    data.append(row_data)
            param_names.insert(0, 'NUMBER_IN_CENTER')
            param_names.insert(0, 'object_id')

In [2]:
df = pd.DataFrame(data, columns=param_names)

In [3]:
if not os.path.exists(f'/data/HSC/HSC_v6/step1/g_band_sextractor/{window}_test_set_subset/'):
    os.makedirs(f'/data/HSC/HSC_v6/step1/g_band_sextractor/{window}_test_set_subset/')
df.to_csv(f'/data/HSC/HSC_v6/step1/g_band_sextractor/{window}_test_set_subset/segmented_image_data_v1.csv', index=False)

In [5]:
param_names

['object_id',
 'NUMBER_IN_CENTER',
 'NUMBER',
 'PETRO_RADIUS',
 'X_IMAGE',
 'Y_IMAGE',
 'XMIN_IMAGE',
 'XMAX_IMAGE',
 'YMIN_IMAGE',
 'YMAX_IMAGE',
 'ISOAREA_IMAGE',
 'ISOAREA_WORLD',
 'A_IMAGE',
 'B_IMAGE',
 'THETA_IMAGE',
 'THETA_WORLD',
 'MU_MAX',
 'ELLIPTICITY',
 'FLUX_RADIUS',
 'SPHEROID_SERSICN']

In [6]:
df

Unnamed: 0,object_id,NUMBER_IN_CENTER,NUMBER,PETRO_RADIUS,X_IMAGE,Y_IMAGE,XMIN_IMAGE,XMAX_IMAGE,YMIN_IMAGE,YMAX_IMAGE,ISOAREA_IMAGE,ISOAREA_WORLD,A_IMAGE,B_IMAGE,THETA_IMAGE,THETA_WORLD,MU_MAX,ELLIPTICITY,FLUX_RADIUS,SPHEROID_SERSICN
0,44165732820345011,1,1,5.28,60.4379,61.0973,48,73,45,77,600,1.337963e-06,5.471,3.402,-62.45,62.44,-6.3732,0.378,4.681,1.112
1,44165732820345011,1,2,7.26,71.2864,13.4610,67,75,8,19,71,1.583256e-07,2.607,1.727,-74.76,74.75,-3.0533,0.338,3.610,0.666
2,44165732820345011,1,3,5.28,42.7207,5.0004,36,50,1,12,147,3.278009e-07,2.380,2.183,13.15,-13.14,-5.9365,0.083,2.563,0.919
3,44165732820345011,1,4,10.56,36.0194,107.9593,35,37,107,109,9,2.006944e-08,0.801,0.785,-25.36,25.28,-2.2926,0.019,2.801,2.271
4,44165732820345011,1,5,9.90,83.8493,101.0536,81,88,96,106,65,1.44946e-07,2.689,1.740,-86.72,86.71,-2.5707,0.353,4.744,0.538
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
171479,70338112470928978,1,3,10.56,60.5057,60.5368,54,67,55,67,119,2.653627e-07,3.026,2.385,-33.49,33.20,-3.6846,0.212,5.249,1.134
171480,41135775126940447,1,1,5.94,61.0488,60.7739,18,103,28,81,2547,5.679653e-06,13.506,6.877,5.22,-5.22,-6.6053,0.491,11.765,1.550
171481,41135775126940447,1,2,6.60,23.7846,7.6093,21,26,5,10,29,6.466821e-08,1.390,1.353,-33.02,33.04,-3.6483,0.027,2.483,0.868
171482,41135775126940447,1,3,5.94,83.4133,106.9226,74,93,96,118,285,6.355324e-07,5.566,2.806,51.34,-51.34,-4.3739,0.496,5.911,0.591


In [7]:
# from astropy.io import fits
# import matplotlib.pyplot as plt

# for filename in os.listdir(directory)[:50]:
#     if filename.startswith("test_segmented"):
#         file_path = os.path.join(directory, filename)
#         print(filename)
#         img = fits.getdata(file_path)
#         plt.figure()
#         plt.imshow(img)
#         plt.show()

In [8]:
# obj_ids = np.unique(df['object_id'])
# for object_id in obj_ids[:30]:
#     segmented_filepath = f'{directory}/test_segmented_{object_id}_step1.fits'
#     original_filepath = f'{directory}/{object_id}_step1.fits'
#     segmented_img = fits.getdata(segmented_filepath)
#     original_img = fits.getdata(original_filepath)
#     fig, (segmented_ax, original_ax) = plt.subplots(1, 2)
#     segmented_ax.imshow(segmented_img)
#     original_ax.imshow(original_img)

In [9]:
# must clear prevous segmented and cat files if you want to make a new batch

In [10]:
# detailed galaxies might be seen as multiple galaxies, as we need to consider the separation between galaxies
# it might be hard to keep track of

# run num_in_center script on segmented files and match magnitudes by object id to the df

# plot magnitude vs petro rad 
# also make histograms when the df is full