In [1]:
import os
import csv
import h5py
import galsim
import random
import numpy as np
import pandas as pd
from astropy.io import fits

In [2]:
def trunc_rayleigh(sigma, max_val):
    assert max_val > sigma
    tmp = max_val+1.0
    while tmp > max_val:
        tmp = np.random.rayleigh(sigma)
    return tmp

In [3]:
test = trunc_rayleigh(.2, .6)
print (test)
print(type(test))

0.11181068014837506
<class 'float'>


In [4]:
def trunc_gaussian(mean, std, min_val):
    tmp = min_val+1.0
    while tmp < min_val:
        tmp = np.random.normal(mean, std_dev)
    return tmp

In [5]:
def get_psf(x,y):
    
    psf_sigma = 2.0 + 0.25 * ((x+y)-1)
    psf_g1 = 0.25 * 2. * (x-0.5)
    psf_g2 = 0.25 * y
    
    psf_gauss = galsim.Gaussian(sigma=psf_sigma)
    psf = psf_gauss.shear(g1=psf_g1, g2=psf_g2)
    
    return psf, psf_sigma, psf_g1, psf_g2

In [6]:
def get_gal_angle():
    
    theta = 2.0 * np.pi * np.random.uniform(0.0, 1)
    
    return theta

In [7]:
print (get_gal_angle())

5.576883551722925


In [8]:
def get_gal_ellipticity(theta):
    ellip = trunc_rayleigh(.2, .6)
    e1 = ellip * np.cos(2 * theta)
    e2 = ellip * np.sin(2* theta)
    return e1, e2

In [9]:
e1, e2 = get_gal_ellipticity(get_gal_angle())
print (e1, e2)
print (type(e1), type(e2))

-0.24900957446340594 0.37468872082974086
<class 'numpy.float64'> <class 'numpy.float64'>


In [10]:
def get_shear(shear_range):
    shear = np.random.uniform(shear_range[0], shear_range[1])
    return shear

In [11]:
def get_flux(S_range, R):
    S = np.random.uniform(S_range[0], S_range[1])
    F = S * np.pi * (R/.263)**2
    return F

In [12]:
def get_gal(R_range, sersic_n_range, S_range):
    R = np.random.uniform(R_range[0], R_range[1])
    n = np.random.uniform(sersic_n_range[0], sersic_n_range[1])
    F = get_flux(S_range, R)
    gal = galsim.Sersic(n = n, half_light_radius = R, flux = F)
    return gal, R, n, F

In [13]:
def get_spiral(R_range, sersic_bulge, sersic_disk_range, S_range):
    
    frac_disk = np.random.uniform()
    
    R_disk = np.random.uniform(R_range[0], R_range[1])
    S_disk_range = tuple(element * frac_disk for element in S_range)
    S_disk = np.random.uniform(S_disk_range[0], S_disk_range[1])
    F_disk = S_disk * np.pi * (R_disk/.263)**2
    n_d = np.random.uniform(sersic_disk_range[0], sersic_disk_range[1])
    
    frac_bulge = 1-frac_disk
    
    R_bulge = np.random.uniform(.4, .6) * R_disk
    F_bulge = F_disk/frac_disk * frac_bulge

    bulge = galsim.Sersic(n = sersic_bulge, half_light_radius = R_bulge, flux = F_bulge)
    disk = galsim.Sersic(n = n_d, half_light_radius = R_disk, flux = F_disk)
    
    gal = disk + bulge
    
    return gal, R_bulge, R_disk, n_d, F_bulge+F_disk

In [14]:
def get_snr():
    snr = np.random.normal(60, 30)
    snr_value = max(snr, 0.1)
    return snr_value

In [22]:
print (get_snr())

56.129092391648335


In [15]:
nx = 25
ny = 25
stamp_xsize = 127 # pixel
stamp_ysize = 127 # pixel
pixel_scale = .263 # arcsec / pixel


noise_sigma = 1.

shift_radius = .263 # arcsec
shift_radius_sq = shift_radius ** 2

shear_range = (-0.1, 0.1) # uniform distribution

sersic_bulge = 4.
sersic_disk_range = (1., 1.5)

sersic_n_range = (1.0, 6.0) #uniform distribution

# half-light radius
R_range = (.526, 2.104) # arcsec, half-light radius, uniform distribution

R_disk_range = (.526, 2.104) # arcsec

S_range = (1., 15.) # pix^-2, uniform distribution

seed_1 = 98237
rng_1 = galsim.BaseDeviate(seed_1)
random_seed = rng_1.raw() # used for gal simulation

In [16]:

fit_files_directory = os.path.join('/data3/shear_simulated_galaxy', 'fits_files_TEST_noise_free')
if not os.path.isdir(fit_files_directory):
    os.mkdir(fit_files_directory)
    
galaxy_path = os.path.join('/data3/shear_simulated_galaxy/fits_files_TEST_noise_free', 'galaxy_images')
if not os.path.isdir(galaxy_path):
    os.mkdir(galaxy_path)
psf_path = os.path.join('/data3/shear_simulated_galaxy/fits_files_TEST_noise_free', 'psf_images')
if not os.path.isdir(psf_path):
    os.mkdir(psf_path)
    
hdf5_path = '/data3/shear_simulated_galaxy/NonUniformPsf_image127x127_with_Metadata_TEST_noise_free.hdf5'
hf = h5py.File(hdf5_path, 'w')

In [17]:
metadata = []
batch_size = 5000
csv_file_path = os.path.join('/data3/shear_simulated_galaxy', 'Metadata_TEST_noise_free.csv')

image_id_counter = 0
    
gal_image = galsim.ImageF(stamp_xsize * nx, stamp_ysize * ny, scale = pixel_scale)
psf_image = galsim.ImageF(stamp_xsize * nx, stamp_ysize * ny, scale = pixel_scale)
#image = galsim.ImageF(stamp_xsize * nx, stamp_ysize * ny, scale = pixel_scale)

for iy in range(ny):
    for ix in range(nx):

        rng = galsim.BaseDeviate(random_seed+ix)
        snr = get_snr()
        
        isSpiral = False

        #rng_s = galsim.BaseDeviate(seed_1+k+1)
        #rng_s1 = galsim.BaseDeviate(seed_3+k+1)
        #ud = galsim.UniformDeviate(random_seed+k+1)


        # create galaxy in this subimage
        
        if (np.random.uniform() <= 0.5):
            gal, R, n, F = get_gal(R_range, sersic_n_range, S_range)
        else:
            gal, R_bulge, R_disk, n, F = get_spiral(R_range, sersic_bulge, sersic_disk_range, S_range)
            isSpiral = True
            
            #get_gal(R_range, sersic_n_range, pixel_scale, S_range)
        
        # add ellipticity to galaxy
        theta = get_gal_angle()
        e1, e2 = get_gal_ellipticity(theta)
        gal = gal.shear(g1=e1, g2=e2)

        # shear the galaxy
        g1 = get_shear(shear_range)
        g2 = get_shear(shear_range)
        shear_gal = gal.lens(g1 = g1, g2 = g2, mu = 1.)
        
        # shift galaxy
        rsq = 2 * shift_radius_sq
        while (rsq > shift_radius_sq):
            dx = (2*np.random.random()-1) * shift_radius/2
            dy = (2*np.random.random()-1) * shift_radius/2
            rsq = dx**2 + dy**2

        this_gal = shear_gal.shift(dx,dy) 

        # create psf
        psf_x = np.random.uniform(0.0, 1.0)
        psf_y = np.random.uniform(0.0, 1.0)
        this_psf, psf_sigma, psf_e1, psf_e2 = get_psf(psf_x, psf_y)

        # convolve psf with gal
        final_gal = galsim.Convolve([this_psf, this_gal])
        
        # +1  and -1 to create one pixel border between stamps
        # create subimage
        b = galsim.BoundsI(ix * stamp_xsize + 1,(ix+1) * stamp_xsize -1,
                            iy * stamp_ysize + 1, (iy+1) * stamp_ysize -1)
        sub_gal_image = gal_image[b]
        sub_psf_image = psf_image[b]
        subim = final_gal.drawImage(sub_gal_image)
        
        #sub_image = image[b]
        #im = shear_gal.drawImage(sub_image)

        # add noise
        #noise = galsim.CCDNoise(rng, sky_level=0., gain = -1.0, read_noise=1.0)
        #subim.addNoise(noise)

        subpsf_im = this_psf.drawImage(sub_psf_image)

        # generate object_id
        object_id = int(f"{ix}{iy}") #keep track of different realizations of the same galaxy, galaxy_id
        object_id_g = image_id_counter # actual object id
        object_id_p = image_id_counter
        #object_id_p = int(str(object_id_g)+"0") #psf id, have an additional 0 at the end of galaxy id

        image_id_counter +=1


        if snr>10:
            # write images to fits files
            psf_name = str(object_id_p) + '.fits'
            psf_file_name = os.path.join(psf_path, psf_name)

            gal_name = str(object_id_g) + '.fits'
            gal_file_name = os.path.join(galaxy_path, gal_name)
            
            subpsf_im.write(psf_file_name)
            subim.write(gal_file_name)

            #'galaxy_id': object_id,

            if isSpiral == True:
                metadata.append({
                    'object_id': object_id_g,
                    'e1': e1,
                    'e2': e2,
                    'g1': g1,
                    'g2': g2,
                    'psf_e1': psf_e1,
                    'psf_e2': psf_e2,
                    'psf_sigma': psf_sigma,
                    'sersic_n': 0.,
                    'sersic_bulge_n': 4.,
                    'sersic_disk_n': n,
                    'half_light_radius': 0.,
                    'half_light_radius(bulge)': R_bulge,
                    'half_light_radius(disk)': R_disk,
                    'flux': F,
                    'shift_radius_dx': dx,
                    'shift_radius_dy': dy,
                    #'snr': snr
                })
                isSpiral = False
            else:
                metadata.append({               
                    'object_id': object_id_g,
                    'e1': e1,
                    'e2': e2,
                    'g1': g1,
                    'g2': g2,
                    'psf_e1': psf_e1,
                    'psf_e2': psf_e2,
                    'psf_sigma': psf_sigma,
                    'sersic_n': n,
                    'sersic_bulge_n': 0.,
                    'sersic_disk_n': 0.,
                    'half_light_radius': R,
                    'half_light_radius(bulge)': 0.,
                    'half_light_radius(disk)': 0.,
                    'flux': F,
                    'shift_radius_dx': dx,
                    'shift_radius_dy': dy,
                    #'snr': snr
                })

            if len(metadata) >= batch_size or (ix==nx-1 and iy==ny-1):
                metadata_df = pd.DataFrame(metadata)
                metadata_df.to_csv(csv_file_path, mode='a', header=False, index=False)
                metadata=[]
                metadata_df = pd.DataFrame()

In [18]:
csv_file = '/data3/shear_simulated_galaxy/Metadata_TEST_noise_free.csv'
import re
with open(csv_file, 'r', newline='') as f:
    reader = csv.reader(f)
    first_row = next(reader)  # Read the first row
    print(first_row)
    
    header_detected = False
    for value in first_row:
        if re.match(r'^[A-Za-z]', value):  # Check if the value starts with an alphabetic character
            header_detected = True
            break

    if header_detected:
        print("File likely has headers")
    else:
        print("File likely does not have headers")

['0', '-0.10804144009563121', '-0.5249494413649045', '0.015583199691669838', '-0.06941239647790914', '0.2117934048370203', '0.20042974674013736', '2.1813264491586475', '0.0', '4.0', '1.2187613152368861', '0.0', '0.9561269268314272', '1.8032807176415349', '1108.231394520936', '-0.07077429628152049', '0.06394819343738009']
File likely does not have headers


In [23]:
# Specify the file names
input_csv_file = '/data3/shear_simulated_galaxy/Metadata_TEST_noise_free.csv'
output_csv_file = os.path.join('/data3/shear_simulated_galaxy', 'metadata_TEST_noise_free.csv')

# Define your header row
header = ['object_id',
          'e1',
          'e2',
          'g1',
          'g2',
          'psf_e1',
          'psf_e2',
          'psf_sigma',
          'sersic_n',
          'sersic_bulge_n',
          'sersic_disk_n',
          'half_light_radius',
          'half_light_radius(bulge)',
          'half_light_radius(disk)',
          'flux',
          'shift_radius_dx',
          'shift_radius_dy']
          #'snr']

with open(input_csv_file, 'r', newline='') as infile, \
     open(output_csv_file, 'w', newline='') as outfile:
    
    # Create a CSV reader for the existing file and a CSV writer for the new file
    reader = csv.reader(infile)
    writer = csv.writer(outfile)
    
    # Write the header row to the new file
    writer.writerow(header)
    
    # Copy the existing data from the old file to the new file
    for row in reader:
        writer.writerow(row)

In [24]:
def make_hdf5_from_raw_images():
    
    #WORKING TO PRODUCE FINAL FULL DATASET:

    #for raw:

    #get number of galaxies in the image directory and sort them
    #object id is the filename of the galaxy images
    #galaxy id keeps track of the same galaxy

    image_name_list = sorted(os.listdir("/data3/shear_simulated_galaxy/fits_files_TEST_noise_free/galaxy_images"))
    psf_name_list = sorted(os.listdir("/data3/shear_simulated_galaxy/fits_files_TEST_noise_free/psf_images"))
    #gal_name_list = sorted(os.listdir("/data3/shear_simulated_galaxy/fits_files_TEST_noise_free/just_gal_images"))
    
    # check if galaxy and psf list have equal number of images
    if len(image_name_list)!=len(psf_name_list):
        return "not every galaxy has coresponding psf"

    #load metadata
    sheardata = pd.read_csv('/data3/shear_simulated_galaxy/metadata_TEST_noise_free.csv')
    sheardata.describe()

    b = np.argsort(sheardata['object_id'])
    sorted_sheardata = sheardata.iloc[b][:]   
    sheardata = sorted_sheardata
    
    """
    column_to_drop = 'snr'

    # Check if the column exists before dropping
    if column_to_drop in sheardata.columns:
        sheardata.drop(column_to_drop, axis=1, inplace=True)
        print(f"Column '{column_to_drop}' has been dropped.")
    else:
        print(f"Column '{column_to_drop}' not found in DataFrame.")
    """

    #name the file you want to create
    hf= h5py.File('/data3/shear_simulated_galaxy/NonUniformPsf_image127x127_with_Metadata_TEST_noise_free.hdf5', 'a')

    # create metadata's corresponding dataset in hdf5 file
    for (columnName, columnData) in sheardata.iteritems():
        print(columnName)
        hf.create_dataset(columnName,data=sheardata[columnName])

        
    for i in range(len(image_name_list)):     

        #object_id = image_name_list[i][0:17] # slice the object_id
        galaxy_image = []
        #just_galaxy_image = []
        psf_image = []
        
        try:
            g_image = fits.open("/data3/shear_simulated_galaxy/fits_files_TEST_noise_free/galaxy_images/"+image_name_list[i])
            g_image_data = g_image[0].data
            
            #image = fits.open("/data3/shear_simulated_galaxy/fits_files_TEST_noise_free/just_gal_images/"+image_name_list[i])
            #image_data = image[0].data

            p_image = fits.open("/data3/shear_simulated_galaxy/fits_files_TEST_noise_free/psf_images/"+psf_name_list[i])
            p_image_data = p_image[0].data
        except fits.verify.VerifyError:
            print(f"Corrupt FITS file: {image_name_list[i]}")
        except Exception as e:
            print(f"Error processing FITS file: {image_name_list[i]} - Error: {e}")



        g_pad1 = int((127-len(g_image_data))/2)
        g_pad2 = 127-len(g_image_data)-g_pad1
        g_pad3 = int((127-len(g_image_data[0]))/2)
        g_pad4 = 127-len(g_image_data[0])-g_pad3

        g_im = np.pad(g_image_data,((g_pad1,g_pad2),(g_pad3,g_pad4)),"constant",constant_values = ((0,0),(0,0)))

        galaxy_image.append(g_im)

        #galaxy_image_reshape = np.reshape(np.array(galaxy_image),[1,1,127,127])
        """
        i_pad1 = int((127-len(image_data))/2)
        i_pad2 = 127-len(image_data)-i_pad1
        i_pad3 = int((127-len(image_data[0]))/2)
        i_pad4 = 127-len(image_data[0])-i_pad3

        im = np.pad(g_image_data,((g_pad1,g_pad2),(g_pad3,g_pad4)),"constant",constant_values = ((0,0),(0,0)))

        just_galaxy_image.append(im)
        """
        
        p_pad1 = int((127-len(p_image_data))/2)
        p_pad2 = 127-len(p_image_data)-p_pad1
        p_pad3 = int((127-len(p_image_data[0]))/2)
        p_pad4 = 127-len(p_image_data[0])-p_pad3


        p_im = np.pad(p_image_data,((p_pad1,p_pad2),(p_pad3,p_pad4)),"constant",constant_values = ((0,0),(0,0)))

        psf_image.append(p_im)

        #psf_image_reshape = np.reshape(np.array(psf_image),[1,1,127,127])
        

        if i == 0:
            hf.create_dataset("galaxy_image",data = galaxy_image,chunks = True,maxshape = (None,127,127))
            #hf.create_dataset("just_galaxy_image",data = just_galaxy_image,chunks = True,maxshape = (None,127,127))
            hf.create_dataset("psf_image",data = psf_image,chunks = True,maxshape = (None,127,127))

        else:
            hf['galaxy_image'].resize((hf['galaxy_image'].shape[0]+1), axis=0)
            hf['galaxy_image'][hf["galaxy_image"].shape[0]-1,:,:] = galaxy_image
            
            hf['just_galaxy_image'].resize((hf['just_galaxy_image'].shape[0]+1), axis=0)
            hf['just_galaxy_image'][hf["just_galaxy_image"].shape[0]-1,:,:] = galaxy_image
            
            hf['psf_image'].resize((hf['psf_image'].shape[0]+1), axis=0)
            hf['psf_image'][hf["psf_image"].shape[0]-1,:,:] = psf_image

            
        g_image.close()
        p_image.close()


    hf.close()


In [25]:
make_hdf5_from_raw_images()

  for (columnName, columnData) in sheardata.iteritems():


object_id
e1
e2
g1
g2
psf_e1
psf_e2
psf_sigma
sersic_n
sersic_bulge_n
sersic_disk_n
half_light_radius
half_light_radius(bulge)
half_light_radius(disk)
flux
shift_radius_dx
shift_radius_dy


In [26]:
hf = h5py.File('/data3/shear_simulated_galaxy/NonUniformPsf_image127x127_with_Metadata_TEST_noise_free.hdf5', 'r')
all_items = hf.items()

# Print the items
for name, item in all_items:
    print(name)
    print(item)  # This will print the metadata about the group or dataset
    print("-----------")
    
gal_image

# Close the HDF5 file
hf.close()

e1
<HDF5 dataset "e1": shape (600,), type "<f8">
-----------
e2
<HDF5 dataset "e2": shape (600,), type "<f8">
-----------
flux
<HDF5 dataset "flux": shape (600,), type "<f8">
-----------
g1
<HDF5 dataset "g1": shape (600,), type "<f8">
-----------
g2
<HDF5 dataset "g2": shape (600,), type "<f8">
-----------
galaxy_image
<HDF5 dataset "galaxy_image": shape (600, 127, 127), type ">f4">
-----------
half_light_radius
<HDF5 dataset "half_light_radius": shape (600,), type "<f8">
-----------
half_light_radius(bulge)
<HDF5 dataset "half_light_radius(bulge)": shape (600,), type "<f8">
-----------
half_light_radius(disk)
<HDF5 dataset "half_light_radius(disk)": shape (600,), type "<f8">
-----------
object_id
<HDF5 dataset "object_id": shape (600,), type "<i8">
-----------
psf_e1
<HDF5 dataset "psf_e1": shape (600,), type "<f8">
-----------
psf_e2
<HDF5 dataset "psf_e2": shape (600,), type "<f8">
-----------
psf_image
<HDF5 dataset "psf_image": shape (600, 127, 127), type ">f4">
-----------
psf_s