In [1]:
import os
import h5py
import numpy as np
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt
from sys import stdout
from datetime import datetime
# import neptune.new as neptune
# from neptune.new.integrations.tensorflow_keras import NeptuneCallback
from astropy.io import fits

2023-05-03 17:04:46.418550: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Cropping2D, ZeroPadding2D
from tensorflow.keras.layers import Conv2D, Conv2DTranspose
from tensorflow.keras.layers import Input, Flatten, Dense, Reshape
import tensorflow_probability as tfp
from tensorflow_probability import layers as tfpl
from tensorflow_probability import distributions as tfd
from tensorflow.nn import leaky_relu
from tensorflow.keras.callbacks import TensorBoard, ModelCheckpoint, ReduceLROnPlateau

In [3]:
LATENT_DIM = 128
BASE_DEPTH = 8
IMAGE_SHAPE = (5, 127, 127)
EPOCHS = 200
CHECKPOINTS_TO_SAVE = 4
KL_WEIGHT = 1e-5
# Good value: 1e-6

In [4]:
gpus = tf.config.list_physical_devices('GPU');
if gpus:
    try:
        tf.config.experimental.set_virtual_device_configuration(
            gpus[0],
            [tf.config.experimental.VirtualDeviceConfiguration(memory_limit = 20000)]);
        logical_gpus = tf.config.experimental.list_logical_devices('GPU');
        print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPUs")
    except RuntimeError as e:
        print(e)

1 Physical GPUs, 1 Logical GPUs


2023-05-03 17:04:49.398792: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2023-05-03 17:04:49.430267: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2023-05-03 17:04:49.430580: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysf

In [5]:
now = datetime.now()

DATASET_NAME = "HSC_v6_small"
MODEL_TYPE = "VAE"
MODEL_VERSION = "v1.0"
MODEL_SUBVERSION = "v1.0.20221118_14_03_25"

model_id = '_'.join([DATASET_NAME, MODEL_TYPE, MODEL_VERSION])

model_path = os.path.join('/models', model_id, MODEL_SUBVERSION, 'model')

weights_path = model_path + '/weights.h5'

In [6]:
hf_train = h5py.File('/data/HSC/HSC_v6/step2A/127x127/5x127x127_training_small.hdf5', 'r')
hf_test = h5py.File('/data/HSC/HSC_v6/step2A/127x127/5x127x127_testing_small.hdf5', 'r')
hf_validation = h5py.File('/data/HSC/HSC_v6/step2A/127x127/5x127x127_validation_small.hdf5', 'r')
x_train = np.asarray(hf_train['image'][0:])
x_test = np.asarray(hf_test['image'][0:])
x_validation = np.asarray(hf_validation['image'][0:])
max_value = 4.16
x_train = np.true_divide(x_train, max_value)
x_test = np.true_divide(x_test, max_value)
x_validation = np.true_divide(x_validation, max_value)
y_train = np.asarray(hf_train['specz_redshift'][0:])[..., None]
y_test = np.asarray(hf_test['specz_redshift'][0:])[..., None]
y_validation = np.asarray(hf_validation['specz_redshift'][0:])[..., None]
# object_id_train = np.asarray(hf_train['object_id'][0:])
# object_id = np.asarray(hf_test['object_id'][0:])
# object_id_validation = np.asarray(hf_validation['object_id'][0:])
hf_train.close()
hf_test.close()
hf_validation.close()

In [7]:
prior = tfd.Independent(tfd.Normal(loc=tf.zeros(LATENT_DIM), scale = 0.1), reinterpreted_batch_ndims = 1)

class VAE(Model):
    def __init__(self):
        super(VAE, self).__init__()
        
        images = Input(shape = IMAGE_SHAPE)
        x = Conv2D(BASE_DEPTH, 3, strides = 2, activation = leaky_relu,
                   padding = 'same', data_format = 'channels_first')(images)
        x = Conv2D(BASE_DEPTH, 3, strides = 2, activation = leaky_relu,
                   padding = 'same', data_format = 'channels_first')(x)
        x = Flatten()(x)
        x = Dense(tfpl.MultivariateNormalTriL.params_size(LATENT_DIM), activation = None)(x)
        z = tfpl.MultivariateNormalTriL(LATENT_DIM,
                  activity_regularizer=tfpl.KLDivergenceRegularizer(prior, weight = KL_WEIGHT))(x)
        self.encoder = Model(images, z, name = 'encoder')

        latents = Input(shape = LATENT_DIM)
        x = Dense(8 * LATENT_DIM * 32 * 32, activation = None)(latents)
        x = Reshape((8 * LATENT_DIM, 32, 32))(x)
        x = Conv2DTranspose(BASE_DEPTH, 3, strides = 2, activation = leaky_relu,
                            padding = 'same', data_format = 'channels_first')(x)
        x = Conv2DTranspose(BASE_DEPTH, 3, strides = 2, activation = leaky_relu,
                            padding = 'same', data_format = 'channels_first')(x)
        x = Conv2DTranspose(BASE_DEPTH, 3, strides = 1, activation = leaky_relu,
                            padding = 'same', data_format = 'channels_first')(x)
        x = Conv2D(IMAGE_SHAPE[0], 3, strides = 1, activation = None, 
                   padding = 'same', data_format = 'channels_first')(x)
        outputs = Cropping2D(cropping=((0, 1), (0, 1)), data_format = 'channels_first')(x)
        self.decoder = Model(latents, outputs, name = 'decoder')

    def call(self, x):
        x = self.encoder(x)
        x = self.decoder(x)
        return x
    
    def summary(self):
        print(self.encoder.summary())
        print(self.decoder.summary())

In [8]:
model = VAE()
model.compile(optimizer = 'adam', loss = 'mae')
model.summary()

Model: "encoder"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 5, 127, 127)]     0         
                                                                 
 conv2d (Conv2D)             (None, 8, 64, 64)         368       
                                                                 
 conv2d_1 (Conv2D)           (None, 8, 32, 32)         584       
                                                                 
 flatten (Flatten)           (None, 8192)              0         
                                                                 
 dense (Dense)               (None, 8384)              68690112  
                                                                 
 multivariate_normal_tri_l (  ((None, 128),            0         
 MultivariateNormalTriL)      (None, 128))                       
                                                           

In [9]:
run = neptune.init(
    name = MODEL_SUBVERSION,
    project = "astro-data-lab/VAE",
    api_token = "eyJhcGlfYWRkcmVzcyI6Imh0dHBzOi8vYXBwLm5lcHR1bmUuYWkiLCJhcGlfdXJsIjoiaHR0cHM6Ly9hcHAubmVwdHVuZS5haSIsImFwaV9rZXkiOiIxOGFlZGMxOC04MWU5LTQ2NDctYjlhZS05NGE2NGQ0NmIzMmEifQ==",
)  # your credentials

# logs_callback = TensorBoard(log_dir = logs_path)

LR_callback = ReduceLROnPlateau()

# tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir = logs_path, histogram_freq = 1)

neptune_callback = NeptuneCallback(run = run)

NameError: name 'neptune' is not defined

In [11]:
history = model.fit(x = x_train, y = x_train, epochs = EPOCHS, validation_data = (x_validation, x_validation), verbose = 1)

2023-05-03 17:05:44.022132: W tensorflow/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 3225800000 exceeds 10% of free system memory.
2023-05-03 17:05:45.695331: W tensorflow/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 3225800000 exceeds 10% of free system memory.
2023-05-03 17:05:47.312302: W tensorflow/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 3225800000 exceeds 10% of free system memory.
2023-05-03 17:05:48.305810: W tensorflow/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 3225800000 exceeds 10% of free system memory.


Epoch 1/200


2023-05-03 17:05:51.366996: E tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:429] Could not create cudnn handle: CUDNN_STATUS_NOT_INITIALIZED
2023-05-03 17:05:51.367051: E tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:438] Possibly insufficient driver version: 520.61.5
2023-05-03 17:05:51.367203: W tensorflow/core/framework/op_kernel.cc:1830] OP_REQUIRES failed at conv_ops.cc:1068 : UNIMPLEMENTED: DNN library is not found.
2023-05-03 17:05:51.367221: I tensorflow/core/common_runtime/executor.cc:1197] [/job:localhost/replica:0/task:0/device:GPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): UNIMPLEMENTED: DNN library is not found.
	 [[{{node vae/encoder/conv2d/Conv2D}}]]


UnimplementedError: Graph execution error:

Detected at node 'vae/encoder/conv2d/Conv2D' defined at (most recent call last):
    File "/opt/anaconda3/envs/jupyterhub/lib/python3.10/runpy.py", line 196, in _run_module_as_main
      return _run_code(code, main_globals, None,
    File "/opt/anaconda3/envs/jupyterhub/lib/python3.10/runpy.py", line 86, in _run_code
      exec(code, run_globals)
    File "/opt/anaconda3/envs/jupyterhub/lib/python3.10/site-packages/ipykernel_launcher.py", line 17, in <module>
      app.launch_new_instance()
    File "/opt/anaconda3/envs/jupyterhub/lib/python3.10/site-packages/traitlets/config/application.py", line 1043, in launch_instance
      app.start()
    File "/opt/anaconda3/envs/jupyterhub/lib/python3.10/site-packages/ipykernel/kernelapp.py", line 712, in start
      self.io_loop.start()
    File "/opt/anaconda3/envs/jupyterhub/lib/python3.10/site-packages/tornado/platform/asyncio.py", line 215, in start
      self.asyncio_loop.run_forever()
    File "/opt/anaconda3/envs/jupyterhub/lib/python3.10/asyncio/base_events.py", line 603, in run_forever
      self._run_once()
    File "/opt/anaconda3/envs/jupyterhub/lib/python3.10/asyncio/base_events.py", line 1909, in _run_once
      handle._run()
    File "/opt/anaconda3/envs/jupyterhub/lib/python3.10/asyncio/events.py", line 80, in _run
      self._context.run(self._callback, *self._args)
    File "/opt/anaconda3/envs/jupyterhub/lib/python3.10/site-packages/ipykernel/kernelbase.py", line 510, in dispatch_queue
      await self.process_one()
    File "/opt/anaconda3/envs/jupyterhub/lib/python3.10/site-packages/ipykernel/kernelbase.py", line 499, in process_one
      await dispatch(*args)
    File "/opt/anaconda3/envs/jupyterhub/lib/python3.10/site-packages/ipykernel/kernelbase.py", line 406, in dispatch_shell
      await result
    File "/opt/anaconda3/envs/jupyterhub/lib/python3.10/site-packages/ipykernel/kernelbase.py", line 730, in execute_request
      reply_content = await reply_content
    File "/opt/anaconda3/envs/jupyterhub/lib/python3.10/site-packages/ipykernel/ipkernel.py", line 383, in do_execute
      res = shell.run_cell(
    File "/opt/anaconda3/envs/jupyterhub/lib/python3.10/site-packages/ipykernel/zmqshell.py", line 528, in run_cell
      return super().run_cell(*args, **kwargs)
    File "/opt/anaconda3/envs/jupyterhub/lib/python3.10/site-packages/IPython/core/interactiveshell.py", line 3006, in run_cell
      result = self._run_cell(
    File "/opt/anaconda3/envs/jupyterhub/lib/python3.10/site-packages/IPython/core/interactiveshell.py", line 3061, in _run_cell
      result = runner(coro)
    File "/opt/anaconda3/envs/jupyterhub/lib/python3.10/site-packages/IPython/core/async_helpers.py", line 129, in _pseudo_sync_runner
      coro.send(None)
    File "/opt/anaconda3/envs/jupyterhub/lib/python3.10/site-packages/IPython/core/interactiveshell.py", line 3266, in run_cell_async
      has_raised = await self.run_ast_nodes(code_ast.body, cell_name,
    File "/opt/anaconda3/envs/jupyterhub/lib/python3.10/site-packages/IPython/core/interactiveshell.py", line 3445, in run_ast_nodes
      if await self.run_code(code, result, async_=asy):
    File "/opt/anaconda3/envs/jupyterhub/lib/python3.10/site-packages/IPython/core/interactiveshell.py", line 3505, in run_code
      exec(code_obj, self.user_global_ns, self.user_ns)
    File "/tmp/ipykernel_4142807/1110390625.py", line 1, in <module>
      history = model.fit(x = x_train, y = x_train, epochs = EPOCHS, validation_data = (x_validation, x_validation), verbose = 1)
    File "/home/billyli/.local/lib/python3.10/site-packages/keras/utils/traceback_utils.py", line 65, in error_handler
      return fn(*args, **kwargs)
    File "/home/billyli/.local/lib/python3.10/site-packages/keras/engine/training.py", line 1685, in fit
      tmp_logs = self.train_function(iterator)
    File "/home/billyli/.local/lib/python3.10/site-packages/keras/engine/training.py", line 1284, in train_function
      return step_function(self, iterator)
    File "/home/billyli/.local/lib/python3.10/site-packages/keras/engine/training.py", line 1268, in step_function
      outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "/home/billyli/.local/lib/python3.10/site-packages/keras/engine/training.py", line 1249, in run_step
      outputs = model.train_step(data)
    File "/home/billyli/.local/lib/python3.10/site-packages/keras/engine/training.py", line 1050, in train_step
      y_pred = self(x, training=True)
    File "/home/billyli/.local/lib/python3.10/site-packages/keras/utils/traceback_utils.py", line 65, in error_handler
      return fn(*args, **kwargs)
    File "/home/billyli/.local/lib/python3.10/site-packages/keras/engine/training.py", line 558, in __call__
      return super().__call__(*args, **kwargs)
    File "/home/billyli/.local/lib/python3.10/site-packages/keras/utils/traceback_utils.py", line 65, in error_handler
      return fn(*args, **kwargs)
    File "/home/billyli/.local/lib/python3.10/site-packages/keras/engine/base_layer.py", line 1145, in __call__
      outputs = call_fn(inputs, *args, **kwargs)
    File "/home/billyli/.local/lib/python3.10/site-packages/keras/utils/traceback_utils.py", line 96, in error_handler
      return fn(*args, **kwargs)
    File "/tmp/ipykernel_4142807/4289805732.py", line 33, in call
      x = self.encoder(x)
    File "/home/billyli/.local/lib/python3.10/site-packages/keras/utils/traceback_utils.py", line 65, in error_handler
      return fn(*args, **kwargs)
    File "/home/billyli/.local/lib/python3.10/site-packages/keras/engine/training.py", line 558, in __call__
      return super().__call__(*args, **kwargs)
    File "/home/billyli/.local/lib/python3.10/site-packages/keras/utils/traceback_utils.py", line 65, in error_handler
      return fn(*args, **kwargs)
    File "/home/billyli/.local/lib/python3.10/site-packages/keras/engine/base_layer.py", line 1145, in __call__
      outputs = call_fn(inputs, *args, **kwargs)
    File "/home/billyli/.local/lib/python3.10/site-packages/keras/utils/traceback_utils.py", line 96, in error_handler
      return fn(*args, **kwargs)
    File "/home/billyli/.local/lib/python3.10/site-packages/keras/engine/functional.py", line 512, in call
      return self._run_internal_graph(inputs, training=training, mask=mask)
    File "/home/billyli/.local/lib/python3.10/site-packages/keras/engine/functional.py", line 669, in _run_internal_graph
      outputs = node.layer(*args, **kwargs)
    File "/home/billyli/.local/lib/python3.10/site-packages/keras/utils/traceback_utils.py", line 65, in error_handler
      return fn(*args, **kwargs)
    File "/home/billyli/.local/lib/python3.10/site-packages/keras/engine/base_layer.py", line 1145, in __call__
      outputs = call_fn(inputs, *args, **kwargs)
    File "/home/billyli/.local/lib/python3.10/site-packages/keras/utils/traceback_utils.py", line 96, in error_handler
      return fn(*args, **kwargs)
    File "/home/billyli/.local/lib/python3.10/site-packages/keras/layers/convolutional/base_conv.py", line 290, in call
      outputs = self.convolution_op(inputs, self.kernel)
    File "/home/billyli/.local/lib/python3.10/site-packages/keras/layers/convolutional/base_conv.py", line 262, in convolution_op
      return tf.nn.convolution(
Node: 'vae/encoder/conv2d/Conv2D'
DNN library is not found.
	 [[{{node vae/encoder/conv2d/Conv2D}}]] [Op:__inference_train_function_4388]

In [None]:
def generate_example_galaxies(num_to_generate = 10):
    z = prior.sample(num_to_generate)
    xhat = model.decoder(z)
    fig, axes = plt.subplots(nrows = num_to_generate, ncols = 5, figsize = (4 * 5, 4 * num_to_generate))
    for i in range(num_to_generate):
        for j in range(0,5):
            axes[i][j].imshow(xhat[i][j], cmap = 'afmhot')
            axes[i][j].set_title(f'Generated image {i} band {j}')

In [None]:
generate_example_galaxies(10)

In [None]:
image_list_g = sorted(os.listdir("/mnt/data/HSC/HSC_v6/step1/g_band"))
image_list_r = sorted(os.listdir("/mnt/data/HSC/HSC_v6/step1/r_band"))
image_list_i = sorted(os.listdir("/mnt/data/HSC/HSC_v6/step1/i_band"))
image_list_z = sorted(os.listdir("/mnt/data/HSC/HSC_v6/step1/z_band"))
image_list_y = sorted(os.listdir("/mnt/data/HSC/HSC_v6/step1/y_band"))

photozdata = pd.read_csv('/mnt/data/HSC/HSC_v6/HSC_v6.csv')
photozdata.describe()

b = np.argsort(photozdata['object_id'])
sorted_photozdata = photozdata.iloc[b][:]
photozdata = sorted_photozdata

hf_in = h5py.File('/mnt/data/HSC/HSC_v6/step3/127x127_mae_in.hdf5', 'a')
hf_out = h5py.File('/mnt/data/HSC/HSC_v6/step3/127x127_mae_out.hdf5', 'a')

In [None]:
count_in = 0
count_out = 0
size = len(image_list_g)
for i in range(size):
    
    stdout.write("\rChecking %d samples of " % (i + 1) + str(size))
    
    object_id = image_list_g[i][0:17]

    five_band_image = []

    image_g = fits.open("/mnt/data/HSC/HSC_v6/step1/g_band/" + image_list_g[i])
    image_r = fits.open("/mnt/data/HSC/HSC_v6/step1/r_band/" + image_list_r[i])
    image_i = fits.open("/mnt/data/HSC/HSC_v6/step1/i_band/" + image_list_i[i])
    image_z = fits.open("/mnt/data/HSC/HSC_v6/step1/z_band/" + image_list_z[i])
    image_y = fits.open("/mnt/data/HSC/HSC_v6/step1/y_band/" + image_list_y[i])

    image_g_data = image_g[1].data
    image_r_data = image_r[1].data
    image_i_data = image_i[1].data
    image_z_data = image_z[1].data
    image_y_data = image_y[1].data

    pad1 = int((127 - len(image_g_data)) / 2)
    pad2 = 127 - len(image_g_data) - pad1
    pad3 = int((127 - len(image_g_data[0])) / 2)
    pad4 = 127 - len(image_g_data[0]) - pad3


    im_g = np.pad(image_g_data, ((pad1, pad2), (pad3, pad4)), "constant", constant_values = ((0, 0), (0, 0)))
    im_r = np.pad(image_r_data, ((pad1, pad2), (pad3, pad4)), "constant", constant_values = ((0, 0), (0, 0)))
    im_i = np.pad(image_i_data, ((pad1, pad2), (pad3, pad4)), "constant", constant_values = ((0, 0), (0, 0)))
    im_z = np.pad(image_z_data, ((pad1, pad2), (pad3, pad4)), "constant", constant_values = ((0, 0), (0, 0)))
    im_y = np.pad(image_y_data, ((pad1, pad2), (pad3, pad4)), "constant", constant_values = ((0, 0), (0, 0)))

    im = np.true_divide(np.array([im_g, im_r, im_i, im_z, im_y]), max_value)
    loss = model.evaluate(np.array([im]), np.array([im]), verbose = 0)
    
    if loss <= 0.05:
    
        five_band_image.append(im_g)
        five_band_image.append(im_r)
        five_band_image.append(im_i)
        five_band_image.append(im_z)
        five_band_image.append(im_y)

        five_band_image_reshape = np.reshape(np.array(five_band_image), [1, 5, 127, 127])

        photozdata_subset = photozdata.iloc[i]

        specz = photozdata_subset['specz_redshift']
        specz_reshape = np.reshape(specz, [1, 1])

        if count_in == 0:
        
            for (columnName, columnData) in photozdata.iteritems():
                
                if columnName == 'specz_name' or columnName == 'coord':
                    
                    hf_in.create_dataset(columnName, data = np.reshape(np.array(photozdata[columnName]).astype(str), [286401, 1]).astype('S')[i], maxshape = (None, ))
                    
                else:
                    
                    hf_in.create_dataset(columnName, data = photozdata[columnName][i : i + 1], maxshape = (None, ))
            
            hf_in.create_dataset('image', data = five_band_image_reshape, chunks = True, maxshape = (None, 5, 127, 127))

        else:
        
            for (columnName, columnData) in photozdata.iteritems():
        
                hf_in[columnName].resize((hf_in[columnName].shape[0] + 1), axis = 0)
            
                if columnName == 'specz_name' or columnName == 'coord':
                    
                    hf_in[columnName][hf_in[columnName].shape[0] - 1] = np.reshape(np.array(photozdata[columnName]).astype(str), [286401, 1]).astype('S')[i]
                    
                else:
                    
                    hf_in[columnName][hf_in[columnName].shape[0] - 1] = photozdata[columnName][i : i + 1]
            
            hf_in['image'].resize((hf_in['image'].shape[0] + 1), axis = 0)
            hf_in['image'][hf_in['image'].shape[0] - 1, :, :, :] = five_band_image
        
        count_in = count_in + 1
    
    else:
        
        five_band_image.append(im_g)
        five_band_image.append(im_r)
        five_band_image.append(im_i)
        five_band_image.append(im_z)
        five_band_image.append(im_y)

        five_band_image_reshape = np.reshape(np.array(five_band_image), [1, 5, 127, 127])

        photozdata_subset = photozdata.iloc[i]

        specz = photozdata_subset["specz_redshift"]
        specz_reshape = np.reshape(specz, [1, 1])

        if count_out == 0:
            
            for (columnName, columnData) in photozdata.iteritems():
                
                if columnName == 'specz_name' or columnName == 'coord':
                    
                    hf_out.create_dataset(columnName, data = np.reshape(np.array(photozdata[columnName]).astype(str), [286401, 1]).astype('S')[i], maxshape = (None, ))
                    
                else:
                    
                    hf_out.create_dataset(columnName, data = photozdata[columnName][i : i + 1], maxshape = (None, ))
                
            hf_out.create_dataset('image', data = five_band_image_reshape, chunks = True, maxshape = (None, 5, 127, 127))

        else:
            
            for (columnName, columnData) in photozdata.iteritems():
        
                hf_out[columnName].resize((hf_out[columnName].shape[0] + 1), axis = 0)
            
                if columnName == 'specz_name' or columnName == 'coord':
                    
                    hf_out[columnName][hf_out[columnName].shape[0] - 1] = np.reshape(np.array(photozdata[columnName]).astype(str), [286401, 1]).astype('S')[i]
                    
                else:
                    
                    hf_out[columnName][hf_out[columnName].shape[0] - 1] = photozdata[columnName][i : i + 1]
                
            hf_out['image'].resize((hf_out['image'].shape[0] + 1), axis = 0)
            hf_out['image'][hf_out["image"].shape[0] - 1, :, :, :] = five_band_image
        
        count_out = count_out + 1

    image_g.close()
    image_r.close()
    image_i.close()
    image_z.close()
    image_y.close()

hf_in.close()
hf_out.close()

In [None]:
print(count_in)

In [None]:
print(count_out)

In [None]:
hf = h5py.File('/data/HSC/HSC_v6/step3/127x127_mae_in.hdf5', 'r')
y_array = np.asarray(hf['specz_redshift'][0:])[..., None]

In [None]:
hf.close()

In [None]:
plt.hist(y_array, bins = 100)
plt.show()

In [None]:
# END