In [1]:
import os
import pandas as pd
import numpy as np
from astropy.io import fits
import matplotlib.pyplot as plt

In [2]:
directory = '/data/HSC/HSC_v6/step1/g_band_sextractor/test_set_subset'
window = '10px_diameter'

In [3]:
df = pd.read_csv(f'/data/HSC/HSC_v6/step1/g_band_sextractor/{window}_test_set_subset/segmented_mag_data_v1.csv')
obj_ids = np.unique(df['object_id'])

In [4]:
obj_ids # automatically sorted

array([36407046198803509, 36407046198804043, 36407050493759629, ...,
       74649160124502454, 74649164419466433, 74649168714406416])

In [5]:
# initialize an empty list to store the data
data = []

# loop through each object id
for obj_id in obj_ids:
    # load the segmented image for this object
    img_filename = f'{directory}/test_segmented_{obj_id}.fits'
    img = fits.getdata(img_filename)

    # loop through each unique object label in the image
    for obj_label in np.unique(img):
        # skip over the background label (which should be 0)
        if obj_label == 0:
            continue

        # count the number of pixels with this label
        num_pixels = np.sum(img == obj_label)

        # store the data in a tuple
        row = (obj_id, obj_label, num_pixels)

        # append the tuple to the data list
        data.append(row)

# convert the data list to a Pandas DataFrame
area_df = pd.DataFrame(data, columns=['object_id', 'NUMBER', 'PIXEL_AREA'])

In [6]:
area_df

Unnamed: 0,object_id,NUMBER,PIXEL_AREA
0,36407046198803509,1,327
1,36407046198803509,2,8
2,36407046198803509,3,73
3,36407046198803509,4,179
4,36407046198804043,1,978
...,...,...,...
171443,74649164419466433,5,68
171444,74649168714406416,1,49
171445,74649168714406416,2,19
171446,74649168714406416,3,9


In [7]:
full_df = pd.merge(df, area_df, on=['object_id', 'NUMBER'])

In [8]:
full_df.columns

Index(['object_id', 'NUMBER_IN_CENTER', 'NUMBER', 'PETRO_RADIUS', 'X_IMAGE',
       'Y_IMAGE', 'XMIN_IMAGE', 'XMAX_IMAGE', 'YMIN_IMAGE', 'YMAX_IMAGE',
       'ISOAREA_IMAGE', 'ISOAREA_WORLD', 'A_IMAGE', 'B_IMAGE', 'THETA_IMAGE',
       'THETA_WORLD', 'MU_MAX', 'ELLIPTICITY', 'FLUX_RADIUS',
       'SPHEROID_SERSICN', 'g_cmodel_mag', 'PIXEL_AREA'],
      dtype='object')

In [9]:
full_df.to_csv(f'/data/HSC/HSC_v6/step1/g_band_sextractor/{window}_test_set_subset/processed_data_v1.csv', index=False)