# construct Training/Testing datasets of central subhalos
Use subhalos and central/satellite information compiled in `compile_subhalos.ipynb` and `groupcat.ipynb` to compile training and testing datasets for the NPE

In [1]:
import os, h5py
import numpy as np
from astropy import table as aTable

read in all compiled subhalo and only keep those at `snapshot = 91`, which corresponds to $z=0.1$

In [2]:
subhalo = aTable.Table.read('/Users/chahah/data/frb_halos/subhalos_morph.csv')
snap91  = (subhalo['snapshot'] == 91)
subhalo = subhalo[snap91]
print('%i subhalos' % len(subhalo))

12179 subhalos


lets only keep central galaxies based on central/satellite classification from `groupcat.ipynb`

In [3]:
central_id = np.load('/Users/chahah/data/frb_halos/centrals.subfind_id.snapshot91.npy')
is_central = np.array([_id in central_id for _id in subhalo['subhalo_id']])
subhalo = subhalo[is_central]
print('%.2f of subhalos are centrals' % np.mean(is_central))
print('%i subhalos' % len(subhalo))

0.61 of subhalos are centrals
7468 subhalos


In [4]:
subhalo[:5]

g_Sersic_Reff,g_Sersic_mag,g_CAS_C,g_CAS_A,snapshot,subhalo_id,version,i_Sersic_Reff,i_Sersic_mag,i_CAS_C,i_CAS_A,r_Sersic_Reff,r_Sersic_mag,r_CAS_C,r_CAS_A,y_Sersic_Reff,y_Sersic_mag,y_CAS_C,y_CAS_A,z_Sersic_Reff,z_Sersic_mag,z_CAS_C,z_CAS_A,SubhaloMassType_stars,SubhaloMassType_dm
float64,float64,float64,float64,int64,int64,str2,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64
16.5282,13.4244,5.23794,0.0904381,91,0,v2,14.4653,12.0606,5.30594,0.0861324,13.0986,12.6109,5.41252,0.0893994,13.5889,11.6764,5.30594,0.0698951,14.0126,11.7837,5.30594,0.0778021,12.7342,14.2415
17.0677,13.4204,5.43369,0.128438,91,0,v3,11.8979,12.1546,5.59752,0.151465,19.8217,12.3975,5.43369,0.135967,11.0848,11.7779,5.49236,0.135583,13.2725,11.8089,5.59752,0.1419,12.7342,14.2415
17.1306,13.4667,5.69275,0.103165,91,0,v1,16.8541,12.0389,5.69275,0.129235,16.8944,12.5384,5.5885,0.106361,17.1656,11.6107,5.64329,0.0945187,16.6774,11.7437,5.74775,0.119082,12.7342,14.2415
17.353,13.4715,5.49236,0.0655124,91,0,v0,15.1905,12.1003,5.66127,0.0809164,17.953,12.5103,5.32853,0.0727035,18.0359,11.6035,5.38695,0.0659247,14.8276,11.8243,5.66127,0.0771571,12.7342,14.2415
8.35147,14.3709,3.76913,0.156349,91,70250,v0,4.53571,13.381,4.1056,0.119823,5.2346,13.7961,4.22041,0.132992,4.3566,12.9982,3.98999,0.0989417,4.71578,13.0893,3.98999,0.10153,12.1915,13.8559


## select test subhalos
We will select 125 subhalos based on `subhalo_id`. Since there are 4 angles per subhalo, this will provide a test set of 500. 

We'll also stick with subhalos with $\log M_* > 9.5$ to mitigate complications with the stellar mass lower bound.

In [None]:
# select 125 test subhalos with logM*>9.5 as our test sample
# the rest are training set
uid = np.random.choice(np.unique(subhalo['subhalo_id'][subhalo['SubhaloMassType_stars'] > 9.5]), replace=False, size=125)

i_test = np.zeros(len(subhalo)).astype(bool)
for _uid in uid:
    i_test[subhalo['subhalo_id'] == _uid] = True

test_subhalos = subhalo[i_test]
train_subhalos = subhalo[~i_test]

test_subhalos.write('subhalos.central.snapshot91.test.csv', format='csv') 
train_subhalos.write('subhalos.central.snapshot91.train.csv', format='csv') 


In [None]:

# tidy up table 
cols = list(subhalo.dtype.names)
cols.remove('snapshot')
cols.remove('subhalo_id')
cols.remove('version')
cols.remove('SubhaloMassType_dm')

