In [1]:
import numpy as np
import galsim
import pandas as pd
import cv2
import matplotlib.pyplot as plt
from tqdm import tqdm
import copy

# Galsim arguments

In [2]:
image_size=64
pixel_size=0.03
psf_size=49
psf_pixel_size=0.074
galaxy_type='real'
psf_type='real'

noise_border_size=8
cat = galsim.COSMOSCatalog(sample='23.5')

# Processing galaxy

In [3]:
def Convolve_with_PSF(gal):
    # Dilate the PSF to match required resolution
    psf_dilate_factor = psf_pixel_size / 0.074  # taken for HST F814W band
    psf = gal.original_psf.dilate(psf_dilate_factor).withFlux(1.)

    #Perform convolution with PSF
    gal = galsim.Convolve(gal, psf)
    
    return gal

def Manual_noise_extraction(image_64x64):
    #Border regions for noise extraction
    Borders=np.array([image_64x64[:noise_border_size,:noise_border_size],image_64x64[:noise_border_size,-noise_border_size:],
                    image_64x64[-noise_border_size:,:noise_border_size],image_64x64[-noise_border_size:,-noise_border_size:]])

    #Taking min to make sure that we take std of only background
    #It enables us to avoid accounting std for noise+border of 
    #diagonal huge edge on spiral galaxies touching noise extraction regions
    noise=min(Borders.std(axis=(1,2)))
    
    return noise

def Process_galaxy(index):
    
    #Making galaxy
    gal=cat.makeGalaxy(index,gal_type=galaxy_type)
    
    #Add PSF
    #It is important to add PSF in the first place since image size gets bigger
    gal=Convolve_with_PSF(gal)
    
    #Extracting information on noise and pixel_scale which is typically 0.03"
    _,_,_,pixel_scale,noise_variance=cat.getRealParams(index)
    
    #Length of picture that contains 99.5% of flux
    orig_shape=gal.getGoodImageSize(pixel_scale)
    
    #Sort out too small images
    #Somehow galsim adds 2 to the size of desired image
    #The use of size'=size-2 results in desired size
    if orig_shape<(image_size-2):
            return False
    
    #Build image
    #Galsim works in terms of arcsec so pixel_scale brings you to 'orig_shape'
    #After that we scale picture as we want but with value init_size/(desired_size-2)
    image_64x64= gal.drawImage(scale=pixel_scale*orig_shape/(image_size-2) ,
                               use_true_center=True, method='auto').array
    
    #This is needed to define SNR
    maxI_4= (cv2.resize(image_64x64, dsize=(16, 16), interpolation=cv2.INTER_CUBIC)).max()
    
    #Extraction of noise sigma from edges of the image
    noise=Manual_noise_extraction(image_64x64)
    
    #Dict with image parameters and scaling data
    parameters=cat.getParametricRecord(index)
    parameters['COSMOS_noise_sigma']=np.sqrt(noise_variance)
    parameters['flux']=gal.flux
    parameters['Manual_noise_sigma']=noise
    parameters['maxI_4_CV']=maxI_4
    parameters['Original_size']=orig_shape
    
    return parameters,image_64x64

def Show_100(galaxies):
    plt.figure(figsize=(20,20))
    gal_to_see=galaxies[:100]
    for i in range(len(gal_to_see)):
        x=i//10
        y=np.mod(i,10)
        ax = plt.subplot2grid((10,10), (x,y))
        ax.imshow(gal_to_see[i],cmap='gray_r')
        ax.axis('off')
    plt.show()

# Get Data

In [5]:
#Set the index to start from and the index to end on
start=30000
stop=40000
images_galaxy=np.zeros((0,64,64))
df=pd.DataFrame()
for index in tqdm(range(start,stop)):
    result=Process_galaxy(index)
    
    #Check whether galaxy have proper shape
    if result==False:
        continue
    
    parameters, image = result
    
    #Push results to storages
    df=df.append([parameters])
    images_galaxy=np.append(images_galaxy,[image],axis=0)

100%|██████████| 10000/10000 [54:30<00:00,  2.57it/s] 


In [None]:
#Save collected data

In [6]:
df.to_csv('Dataset_23_5/Dataset_labels_20.csv')

In [7]:
np.save('Dataset_23_5/Dataset_images_20.npy',images_galaxy)