# construct Training/Testing datasets of central subhalos
Use subhalos and central/satellite information compiled in `compile_subhalos.ipynb` and `groupcat.ipynb` to compile training and testing datasets for the NPE

In [1]:
import os, h5py
import numpy as np
from astropy import table as aTable

read in all compiled subhalo and only keep those at `snapshot = 91`, which corresponds to $z=0.1$

In [2]:
subhalo = aTable.Table.read('/Users/chahah/data/frb_halos/subhalos_morph.csv')
snap91  = (subhalo['snapshot'] == 91)
subhalo = subhalo[snap91]
print('%i subhalos' % len(subhalo))

12188 subhalos


lets only keep central galaxies based on central/satellite classification from `groupcat.ipynb`

In [3]:
central_id = np.load('/Users/chahah/data/frb_halos/centrals.subfind_id.snapshot91.npy')
is_central = np.array([_id in central_id for _id in subhalo['subhalo_id']])
subhalo = subhalo[is_central]
print('%.2f of subhalos are centrals' % np.mean(is_central))
print('%i subhalos' % len(subhalo))

0.61 of subhalos are centrals
7468 subhalos


In [4]:
subhalo[:5]

g_Sersic_Reff,g_Sersic_mag,g_Sersic_dmag_m,g_Sersic_dmag_p,g_CAS_C,g_CAS_A,snapshot,subhalo_id,version,i_Sersic_Reff,i_Sersic_mag,i_Sersic_dmag_m,i_Sersic_dmag_p,i_CAS_C,i_CAS_A,r_Sersic_Reff,r_Sersic_mag,r_Sersic_dmag_m,r_Sersic_dmag_p,r_CAS_C,r_CAS_A,y_Sersic_Reff,y_Sersic_mag,y_Sersic_dmag_m,y_Sersic_dmag_p,y_CAS_C,y_CAS_A,z_Sersic_Reff,z_Sersic_mag,z_Sersic_dmag_m,z_Sersic_dmag_p,z_CAS_C,z_CAS_A,SubhaloMassType_stars,SubhaloMassType_dm
float64,float64,float64,float64,float64,float64,int64,int64,str2,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64
19.3399,13.3532,-0.000235766,0.000968579,5.48404,0.128438,91,0,v3,19.0344,11.9149,-0.00614352,0.0441109,5.5885,0.151465,19.3865,12.4089,-0.00406867,0.000176909,5.48404,0.135967,18.6958,11.5054,-0.017918,0.25913,5.5386,0.135583,18.4421,11.6366,-0.0187289,0.160165,5.64329,0.1419,12.7342,14.2415
17.8423,13.3935,-0.00436186,0.00183683,5.38695,0.0904381,91,0,v2,15.5692,12.0187,-0.009137,0.0291452,5.55585,0.0861324,15.5731,12.5123,-0.000620076,0.00299924,5.38695,0.0893994,14.8731,11.6315,-0.0169273,0.0366591,5.45017,0.0698951,15.6444,11.7254,-0.000748302,0.00353526,5.55585,0.0778021,12.7342,14.2415
19.4988,13.3941,-0.0634623,0.0803728,5.53764,0.103165,91,0,v1,18.8773,11.9666,-0.058396,0.0774409,5.69275,0.129235,16.3905,12.5511,-0.167439,0.0408123,5.53764,0.106361,20.3895,11.5183,-0.0408704,0.0943793,5.64329,0.0945187,21.5618,11.5932,-0.00577006,0.00813687,5.69275,0.119082,12.7342,14.2415
17.4816,13.4667,-0.00148941,0.00169452,5.5386,0.0655124,91,0,v0,17.4199,12.0365,-0.00046291,0.000525119,5.70243,0.0809164,17.6987,12.5213,-0.019676,0.0129239,5.5386,0.0727035,17.8812,11.6088,-0.00314681,0.00136466,5.43369,0.0659247,17.103,11.7548,-0.00131203,0.00228339,5.70243,0.0771571,12.7342,14.2415
8.35114,14.371,-0.000231679,0.000353278,3.76913,0.156349,91,70250,v0,4.53555,13.381,-6.8082e-05,8.51091e-05,4.22041,0.119823,5.25201,13.7941,-0.00479618,0.002363,4.02414,0.132992,4.35746,12.9981,-0.000605105,0.000244437,3.98999,0.0989417,4.71643,13.0892,-0.000294769,0.000150397,4.1056,0.10153,12.1915,13.8559


## select test subhalos
We will select 125 subhalos based on `subhalo_id`. Since there are 4 angles per subhalo, this will provide a test set of 500. 

We'll also stick with subhalos with $\log M_* > 9.5$ to mitigate complications with the stellar mass lower bound.

In [5]:
uid = np.random.choice(np.unique(subhalo['subhalo_id'][subhalo['SubhaloMassType_stars'] > 9.5]), replace=False, size=125)

i_test = np.zeros(len(subhalo)).astype(bool)
for _uid in uid:
    i_test[subhalo['subhalo_id'] == _uid] = True

In [6]:
print('%s test subhalos' % np.sum(i_test))

500 test subhalos


In [8]:
test_subhalos = subhalo[i_test]
train_subhalos = subhalo[~i_test]

test_subhalos.write('/Users/chahah/data/frb_halos/subhalos.central.snapshot91.test.csv', format='csv', overwrite=True) 
train_subhalos.write('/Users/chahah/data/frb_halos/subhalos.central.snapshot91.train.csv', format='csv', overwrite=True) 