In [9]:
import h5py
import pandas as pd
import numpy as np
import shutil
import tensorflow as tf
from sklearn.preprocessing import StandardScaler

In [2]:
gpus = tf.config.list_physical_devices('GPU')
if gpus:
    try:
        tf.config.experimental.set_virtual_device_configuration(
            gpus[0],
            [tf.config.experimental.VirtualDeviceConfiguration(10*1000)])
        logical_gpus = tf.config.experimental.list_logical_devices('GPU')
        print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPUs")
    except RuntimeError as e:
        print(e)

1 Physical GPUs, 1 Logical GPUs


In [4]:
TRAIN_OLD = f'/data/HSC/HSC_v6/step2A/127x127/5x127x127_training.hdf5'
VAL_OLD = f'/data/HSC/HSC_v6/step2A/127x127/5x127x127_validation.hdf5'
TEST_OLD = f'/data/HSC/HSC_v6/step2A/127x127/5x127x127_testing.hdf5'
TRAIN_NEW = f'/data/HSC/HSC_v6/step2A/127x127/5x127x127_training_with_mags_normalized.hdf5'
VAL_NEW = f'/data/HSC/HSC_v6/step2A/127x127/5x127x127_validation_with_mags_normalized.hdf5'
TEST_NEW = f'/data/HSC/HSC_v6/step2A/127x127/5x127x127_testing_with_mags_normalized.hdf5'
with h5py.File(TRAIN_OLD, 'r') as f:
    shutil.copyfile(TRAIN_OLD, TRAIN_NEW)
with h5py.File(VAL_OLD, 'r') as f:
    shutil.copyfile(VAL_OLD, VAL_NEW)
with h5py.File(TEST_OLD, 'r') as f:
    shutil.copyfile(TEST_OLD, TEST_NEW)

In [5]:
with h5py.File(TRAIN_OLD, 'r') as f:
    train_len = len(f['object_id'])
with h5py.File(VAL_OLD, 'r') as f:
    val_len = len(f['object_id'])
with h5py.File(TEST_OLD, 'r') as f:
    test_len = len(f['object_id'])

In [6]:
train_len

200481

In [7]:
scaler = StandardScaler()

In [19]:
with h5py.File(TRAIN_NEW, 'a') as f:
    mags_train = np.transpose(np.array([f['g_cmodel_mag'][:], f['r_cmodel_mag'][:], f['i_cmodel_mag'][:], f['z_cmodel_mag'][:], f['y_cmodel_mag'][:]]))
    scaler.fit(mags_train)
    f['g_cmodel_mag_normalized'] = scaler.transform(mags_train)[:, 0]
    f['r_cmodel_mag_normalized'] = scaler.transform(mags_train)[:, 1]
    f['i_cmodel_mag_normalized'] = scaler.transform(mags_train)[:, 2]
    f['z_cmodel_mag_normalized'] = scaler.transform(mags_train)[:, 3]
    f['y_cmodel_mag_normalized'] = scaler.transform(mags_train)[:, 4]
  

In [20]:
with h5py.File(VAL_NEW, 'a') as f:
    mags_val = np.transpose(np.array([f['g_cmodel_mag'][:], f['r_cmodel_mag'][:], f['i_cmodel_mag'][:], f['z_cmodel_mag'][:], f['y_cmodel_mag'][:]]))
    f['g_cmodel_mag_normalized'] = scaler.transform(mags_val)[:, 0]
    f['r_cmodel_mag_normalized'] = scaler.transform(mags_val)[:, 1]
    f['i_cmodel_mag_normalized'] = scaler.transform(mags_val)[:, 2]
    f['z_cmodel_mag_normalized'] = scaler.transform(mags_val)[:, 3]
    f['y_cmodel_mag_normalized'] = scaler.transform(mags_val)[:, 4]
  

In [22]:
with h5py.File(TEST_NEW, 'a') as f:
    mags_test = np.transpose(np.array([f['g_cmodel_mag'][:], f['r_cmodel_mag'][:], f['i_cmodel_mag'][:], f['z_cmodel_mag'][:], f['y_cmodel_mag'][:]]))
    f['g_cmodel_mag_normalized'] = scaler.transform(mags_test)[:, 0]
    f['r_cmodel_mag_normalized'] = scaler.transform(mags_test)[:, 1]
    f['i_cmodel_mag_normalized'] = scaler.transform(mags_test)[:, 2]
    f['z_cmodel_mag_normalized'] = scaler.transform(mags_test)[:, 3]
    f['y_cmodel_mag_normalized'] = scaler.transform(mags_test)[:, 4]
  

In [23]:
with h5py.File(TEST_NEW, 'r') as f:
    print(f['g_cmodel_mag_normalized'][:])

[-0.49655788 -0.93345986  0.1983927  ...  0.40052469  0.24482443
  0.66267636]
