# construct Training/Testing datasets of central subhalos
Use subhalos and central/satellite information compiled in `compile_subhalos.ipynb` and `groupcat.ipynb` to compile training and testing datasets for the NPE

In [1]:
import os, h5py, glob
import numpy as np
from tqdm.notebook import tqdm, trange
from astropy.table import Table, vstack

In [2]:
snapshot = 91
dat_dir = '/scratch/gpfs/chhahn/haloflow/'
grp_dir = '/scratch/gpfs/chhahn/haloflow/groupcat/idark.ipmu.jp/hsc405/GroupCats/groups_0%i/' % snapshot

In [3]:
h = 0.6773358287273804

read in all compiled subhalo and only keep those at `snapshot = 91`, which corresponds to $z=0.1$

In [4]:
subhalo = Table.read(os.path.join(dat_dir, 'subhalos_morph.csv'))
is_snap  = (subhalo['snapshot'] == snapshot)
subhalo = subhalo[is_snap]
print('%i subhalos' % len(subhalo))

12188 subhalos


read in subhalos and group catalog data 

In [5]:
# compile subhalos and groups
tab_sub, tab_grp = [], []
for i in range(np.max([int(fsub.split('.')[-2]) for fsub in glob.glob(os.path.join(grp_dir, '*.hdf5'))])+1): 
    with h5py.File(os.path.join(grp_dir, 'fof_subhalo_tab_0%i.%i.hdf5' % (snapshot, i)), 'r') as fsub: 
        _tab = Table()
        for k in fsub['Subhalo'].keys():
            _tab[k] = fsub['Subhalo'][k][...]
        tab_sub.append(_tab)
    
        _tab = Table()
        for k in fsub['Group'].keys():
            _tab[k] = fsub['Group'][k][...]
        tab_grp.append(_tab)
        
tab_sub = vstack(tab_sub)        
tab_grp = vstack(tab_grp)   

# compile central subhalos

In [6]:
central_subid = tab_grp['GroupFirstSub'][tab_grp['GroupFirstSub'] != -1]
is_central = np.array([_id in central_subid for _id in subhalo['subhalo_id']])
print('%i centrals out of %i subhalos' % (np.sum(is_central), len(is_central)))

subhalo = subhalo[is_central]

7468 centrals out of 12188 subhalos


In [8]:
lum_has_stars = np.zeros((len(subhalo), 4))
lum_above_mlim = np.zeros((len(subhalo), 4))

has_stars = tab_sub['SubhaloMassType'][:,4] > 0
above_mlim = np.log10(tab_sub['SubhaloMassType'][:,4] * 10**10 / h) > 9.

for i_sub in tqdm(np.unique(subhalo['subhalo_id'])): 
    i_grp = tab_sub['SubhaloGrNr'][i_sub]
    in_group = (tab_sub['SubhaloGrNr'] == i_grp) & (np.arange(len(tab_sub)) != i_sub)
    
    # g, r, i, z
    is_sub = (subhalo['subhalo_id'] == i_sub)
    lum_has_stars[is_sub,:] = np.tile(
        np.sum(10**(-0.4 * tab_sub[in_group & has_stars]['SubhaloStellarPhotometrics'][:,4:]), axis=0), 
        (np.sum(is_sub),1))
    lum_above_mlim[is_sub,:] = np.tile(
        np.sum(10**(-0.4 * tab_sub[in_group & above_mlim]['SubhaloStellarPhotometrics'][:,4:]), axis=0), 
        (np.sum(is_sub),1))

  """


  0%|          | 0/1867 [00:00<?, ?it/s]

In [12]:
subhalo['lum_has_stars'] = lum_has_stars
subhalo['lum_above_mlim'] = lum_above_mlim

In [13]:
subhalo

g_Sersic_Reff,g_Sersic_mag,g_Sersic_dmag_m,g_Sersic_dmag_p,g_CAS_C,g_CAS_A,snapshot,subhalo_id,version,i_Sersic_Reff,i_Sersic_mag,i_Sersic_dmag_m,i_Sersic_dmag_p,i_CAS_C,i_CAS_A,r_Sersic_Reff,r_Sersic_mag,r_Sersic_dmag_m,r_Sersic_dmag_p,r_CAS_C,r_CAS_A,y_Sersic_Reff,y_Sersic_mag,y_Sersic_dmag_m,y_Sersic_dmag_p,y_CAS_C,y_CAS_A,z_Sersic_Reff,z_Sersic_mag,z_Sersic_dmag_m,z_Sersic_dmag_p,z_CAS_C,z_CAS_A,SubhaloMassType_stars,SubhaloMassType_dm,lum_has_stars [4],lum_above_mlim [4]
float64,float64,float64,float64,float64,float64,int64,int64,str2,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64
19.3399,13.3532,-0.000235766,0.000968579,5.48404,0.128438,91,0,v3,19.0344,11.9149,-0.00614352,0.0441109,5.5885,0.151465,19.3865,12.4089,-0.00406867,0.000176909,5.48404,0.135967,18.6958,11.5054,-0.017918,0.25913,5.5386,0.135583,18.4421,11.6366,-0.0187289,0.160165,5.64329,0.1419,12.7342,14.2415,7794981376.0 .. 25328717824.0,7279218176.0 .. 24038508544.0
17.8423,13.3935,-0.00436186,0.00183683,5.38695,0.0904381,91,0,v2,15.5692,12.0187,-0.009137,0.0291452,5.55585,0.0861324,15.5731,12.5123,-0.000620076,0.00299924,5.38695,0.0893994,14.8731,11.6315,-0.0169273,0.0366591,5.45017,0.0698951,15.6444,11.7254,-0.000748302,0.00353526,5.55585,0.0778021,12.7342,14.2415,7794981376.0 .. 25328717824.0,7279218176.0 .. 24038508544.0
19.4988,13.3941,-0.0634623,0.0803728,5.53764,0.103165,91,0,v1,18.8773,11.9666,-0.058396,0.0774409,5.69275,0.129235,16.3905,12.5511,-0.167439,0.0408123,5.53764,0.106361,20.3895,11.5183,-0.0408704,0.0943793,5.64329,0.0945187,21.5618,11.5932,-0.00577006,0.00813687,5.69275,0.119082,12.7342,14.2415,7794981376.0 .. 25328717824.0,7279218176.0 .. 24038508544.0
17.4816,13.4667,-0.00148941,0.00169452,5.5386,0.0655124,91,0,v0,17.4199,12.0365,-0.00046291,0.000525119,5.70243,0.0809164,17.6987,12.5213,-0.019676,0.0129239,5.5386,0.0727035,17.8812,11.6088,-0.00314681,0.00136466,5.43369,0.0659247,17.103,11.7548,-0.00131203,0.00228339,5.70243,0.0771571,12.7342,14.2415,7794981376.0 .. 25328717824.0,7279218176.0 .. 24038508544.0
8.35114,14.371,-0.000231679,0.000353278,3.76913,0.156349,91,70250,v0,4.53555,13.381,-6.8082e-05,8.51091e-05,4.22041,0.119823,5.25201,13.7941,-0.00479618,0.002363,4.02414,0.132992,4.35746,12.9981,-0.000605105,0.000244437,3.98999,0.0989417,4.71643,13.0892,-0.000294769,0.000150397,4.1056,0.10153,12.1915,13.8559,7043611136.0 .. 19042433024.0,6673797120.0 .. 18302830592.0
5.72622,15.4516,-0.00124161,0.00518067,3.08117,0.132677,91,70250,v1,6.164,14.0962,-0.000137111,0.000157175,3.1908,0.174494,5.74311,14.6235,-0.000986357,0.00377013,3.1336,0.147059,5.74302,13.7465,-0.000279723,0.000285364,3.07244,0.16041,6.05386,13.8397,-0.000305054,0.000210211,3.1908,0.167985,12.1915,13.8559,7043611136.0 .. 19042433024.0,6673797120.0 .. 18302830592.0
10.927,14.9119,-0.000545817,0.00337444,4.07805,0.311707,91,70250,v2,11.3977,13.6259,-0.0134751,0.00928575,4.20955,0.318754,11.0887,14.1084,-0.00234484,0.000533303,4.3329,0.29462,10.7468,13.2604,-0.00740034,0.00588496,4.09623,0.297498,10.7628,13.4022,-0.0465935,0.0166154,4.26038,0.337333,12.1915,13.8559,7043611136.0 .. 19042433024.0,6673797120.0 .. 18302830592.0
7.91157,14.4359,-0.00102789,0.000354864,3.83602,0.163602,91,70250,v3,3.74139,13.5004,-0.000171076,0.000879333,4.31121,0.146037,4.51638,13.892,-0.00121281,0.000205462,4.22041,0.15837,3.91238,13.0708,-0.000235931,0.000220311,4.19559,0.114799,4.23096,13.1591,-0.000145081,0.00012245,4.19559,0.124879,12.1915,13.8559,7043611136.0 .. 19042433024.0,6673797120.0 .. 18302830592.0
7.45598,14.7677,-0.00578747,0.00685728,4.32941,0.2039,91,101482,v0,4.55396,13.6359,-9.57631e-05,0.000127331,4.69846,0.15264,4.84716,14.11,-0.000610095,0.00267659,4.69846,0.17248,4.13891,13.2776,-0.000663573,0.00141235,4.56045,0.124105,4.63067,13.3358,-0.00737446,0.00284665,4.58762,0.133888,12.195,13.7078,3609828352.0 .. 9767180288.0,3424485376.0 .. 9359472640.0
9.88019,14.5773,-0.000567513,0.00206406,4.43971,0.172794,91,101482,v1,4.1264,13.633,-0.000899041,0.000195064,4.69846,0.125887,7.23143,13.8319,-0.00874284,0.00315115,4.62065,0.131072,4.76644,13.1294,-0.000356725,0.000275218,4.4762,0.0951914,4.89557,13.2385,-0.0055052,0.00162382,4.69846,0.10878,12.195,13.7078,3609828352.0 .. 9767180288.0,3424485376.0 .. 9359472640.0


In [14]:
subhalo.write(os.path.join(dat_dir, 'subhalos.central.snapshot%i.hdf5' % snapshot), overwrite=True)

  "using default path {}".format(path))


## select test subhalos
We will select 125 subhalos based on `subhalo_id`. Since there are 4 angles per subhalo, this will provide a test set of 500. 

We'll also stick with subhalos with $\log M_* > 9.5$ to mitigate complications with the stellar mass lower bound.

In [15]:
uid = np.random.choice(np.unique(subhalo['subhalo_id'][subhalo['SubhaloMassType_stars'] > 9.5]), replace=False, size=125)

i_test = np.zeros(len(subhalo)).astype(bool)
for _uid in uid:
    i_test[subhalo['subhalo_id'] == _uid] = True

In [16]:
print('%s test subhalos' % np.sum(i_test))

500 test subhalos


In [17]:
test_subhalos = subhalo[i_test]
train_subhalos = subhalo[~i_test]

test_subhalos.write(os.path.join(dat_dir, 'subhalos.central.snapshot%i.test.hdf5' % snapshot), 
                    overwrite=True) 
train_subhalos.write(os.path.join(dat_dir, 'subhalos.central.snapshot%i.train.hdf5' % snapshot),  
                     overwrite=True) 