# Using Merger trees

In [None]:
import h5py
import pandas as pd
import numpy as np
samples = pd.read_csv('../data/samples_in_lightcone0.csv')
catalog = pd.read_hdf('../data/halo_properties_in_lightcone0.hdf5')
vr_tree = h5py.File('/data2/FLAMINGO/L1000N1800/HYDRO_FIDUCIAL/merger_trees/vr_trees.hdf5', 'r')

In [None]:
# Matching soap catalogue with merger tree ids!
samples['GalaxyID'] = -1
samples['TopLeafID'] = -1
for i in range(len(samples)):
    snap_num = samples.loc[i, 'snap_num']
    soapid = samples.loc[i, 'SOAPID']
    galaxyid = vr_tree['SOAP/Snapshot00'+str(snap_num)][soapid]
    
    samples.loc[i, 'GalaxyID'] = galaxyid
    samples.loc[i, 'TopLeafID'] = vr_tree['MergerTree/TopLeafID'][galaxyid]

# I doubt it's needed to account for progenitors and descendants,
# because the dynamics is different after mergers.

In [None]:
dup_mask = (np.abs(samples['x_lc']) > 500) | (np.abs(samples['y_lc']) > 500) | (np.abs(samples['z_lc']) > 500)
samples_dup = samples[dup_mask]
samples_near = samples[~dup_mask]

In [None]:
len(samples_dup)

69

In [None]:
# 1 Find samples_dup's near universe counterparts
# Not founded for whatever reason
mergerids = samples_dup['GalaxyID']
near_mergerids = vr_tree['SOAP/Snapshot0075'][:]
print(np.sum(np.isin(mergerids, near_mergerids))) # >>
print(np.sum(np.isin(near_mergerids, mergerids))) # >>

0
0


In [None]:
# 2 Find them from same topleaf
topleafids = samples_dup['TopLeafID']
near_mergerids = vr_tree['SOAP/Snapshot0075'][:]
near_topleafids = vr_tree['MergerTree/TopLeafID'][:][near_mergerids]
print(np.sum(np.isin(topleafids, near_topleafids))) # >> 65
print(np.sum(np.isin(near_topleafids, topleafids))) # >> 58 multiple duplicates -> one near universe counterpart!
# >> 65 out of 69! Not bad

68
61


In [None]:
np.sum(np.isin(samples_dup['TopLeafID'], samples_near['TopLeafID']))

49

In [None]:
np.intersect1d(samples_dup['TopLeafID'], samples_near['TopLeafID'])

array([ 46076833,  46647332, 161039594, 167877905, 170714420, 171851690,
       184580684, 187230211, 282510918, 334103242, 343189958, 345110682,
       357334365, 397671033, 404772937, 457996413, 528526380, 536568968,
       562645652, 566327479, 578365166, 604531732, 623713826, 640990623,
       647147338, 650252744, 712023421, 716546697, 724578024, 743711202,
       785494899, 787706529, 810814802, 826368038, 839275696, 858854857,
       863550888, 863835432, 893661109, 901789918, 902646262, 904230047])

In [None]:
771-len(samples.drop_duplicates(subset=['TopLeafID'])) # This means all 49 clusters are between near and far universes

49

Find from which snapshot and which halo in the near universe counterpart!

In [None]:
# Matching soap catalogue with merger tree ids!
catalog['GalaxyID'] = -1
catalog['TopLeafID'] = -1
for i in range(len(catalog)):
    snap_num = catalog.loc[i, 'snap_num']
    soapid = catalog.loc[i, 'SOAPID']
    galaxyid = vr_tree['SOAP/Snapshot00'+str(snap_num)][soapid]
    
    catalog.loc[i, 'GalaxyID'] = galaxyid
    catalog.loc[i, 'TopLeafID'] = vr_tree['MergerTree/TopLeafID'][galaxyid]

In [None]:
dup_df = samples_dup.merge(catalog, on=['TopLeafID', 'snap_num'], how='inner')

Ones that are found in snapshot0077 but didn't make it into the cut (65-49=16)

In [None]:
# get the topleafids of the 16
mask = np.isin(samples_dup['TopLeafID'], near_topleafids)
abnormal_dup = samples_dup[mask]
mask = np.isin(abnormal_dup['TopLeafID'], samples_near['TopLeafID'])
abnormal_dup = abnormal_dup[~mask]
abnormal_dup.sort_values(by='TopLeafID') # Sort by topleafid so that the final list of near universe counterparts is matched in this order for easy comparison
n = len(abnormal_dup)
print(len(abnormal_dup))

# Find which near universe's topleafids match the 16's
match = np.isin(near_topleafids, abnormal_dup['TopLeafID'])
print(np.sum(match))

# match the 16's topleafids to near universe's galaxyids
match_galaxyids = near_mergerids[match]
assert len(match_galaxyids) == n
soapids = np.array([])
for i in range(len(match_galaxyids)):
    soapids = np.concatenate((soapids, np.where(vr_tree['SOAP/Snapshot0077'][:] == match_galaxyids[i])[0]))
print(soapids)

16
16
[  920389.  1874057.  2600905.  2706639.  4556268.  5329332.  5668871.
  5772921.  5858678.  6560748.  9097199.  9430379. 10712266. 11897190.
 12587038. 13016203.]


In [None]:
np.argwhere(np.array([1,2,3,4,5])==5)[0]

array([4])

In [None]:
catalog['SOAPID']

In [None]:
np.isin(np.array(soapids), catalog['SOAPID'])

array([ True, False, False, False, False, False, False, False, False,
        True, False, False, False, False,  True, False])

In [None]:
catalog.loc[catalog['snap_num'] == 77]

Unnamed: 0,lc_id,redshift,theta_on_lc,phi_on_lc,M_fof_lc,x_lc,y_lc,z_lc,snap_num,MfofSOAP,...,M500,GasMass,LX0InRestframeWithoutRecentAGNHeating,LX0InRestframeWithoutRecentAGNHeatingCoreExcision,GasTemperatureWithoutRecentAGNHeatingCoreExcision,SpectroscopicLikeTemperatureWithoutRecentAGNHeatingCoreExcision,Y5R500WithoutRecentAGNHeating,Vx,Vy,Vz
1589230,13017956,0.021017,1.030311,43.908471,8.583189e+12,66.329352,63.849052,1.655752,77,8.581345e+12,...,5.866925e+12,6.100196e+10,7.001310e+40,2.241610e+40,2813952.0,3756032.0,1.713322e+41,453.860000,12.859985,455.160030
1589231,13017333,0.021250,6.911810,47.844790,1.447236e+13,61.859496,68.328786,11.173146,77,1.446545e+13,...,8.693014e+12,1.683090e+11,2.769898e+41,2.769898e+41,3518464.0,3825664.0,3.106778e+41,423.160030,148.459960,336.059940
1589232,13018171,0.024445,4.466595,43.329278,8.589794e+12,77.667462,73.265082,8.340412,77,8.589935e+12,...,4.269198e+12,5.207648e+10,2.879640e+40,1.439820e+40,1742848.0,2031616.0,6.099561e+40,-21.774536,-406.874570,290.625500
1589233,13017776,0.022881,12.008518,50.984997,9.003344e+12,61.698615,76.150665,20.847555,77,9.002251e+12,...,6.597070e+12,2.248147e+10,1.242031e+40,1.924722e+39,13008896.0,12681216.0,2.807330e+41,90.059940,-163.239990,407.660030
1589234,13017602,0.022794,13.092200,52.013974,1.071530e+13,59.841805,76.632535,22.612075,77,1.072024e+13,...,5.841156e+12,7.730941e+10,3.526176e+40,2.356455e+40,4874240.0,4075520.0,4.260335e+41,303.559940,229.359990,265.160030
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1591021,11899510,0.022797,-5.885697,-46.840753,5.021522e+12,67.926424,-72.437476,-10.236966,77,5.025112e+12,...,2.778844e+12,5.650566e+10,2.360709e+40,2.360709e+40,295936.0,2113536.0,4.644854e+40,-159.504030,-12.404053,34.895996
1591022,11897458,0.013024,-2.161575,-46.702473,4.378664e+13,39.037703,-41.429387,-2.148567,77,4.377431e+13,...,2.642264e+13,1.142461e+12,1.736801e+42,1.377463e+42,9502720.0,10092544.0,4.039832e+42,410.102780,-46.797120,139.902830
1591023,11899156,0.012694,-4.197473,-49.181849,6.492990e+12,36.322263,-42.052807,-4.078157,77,6.493991e+12,...,3.534758e+12,5.408974e+10,3.045527e+40,3.045527e+40,2441216.0,2494464.0,2.739273e+41,436.695920,32.095947,421.795900
1591024,11897740,0.022186,-1.912917,-47.332148,2.287207e+13,65.799154,-71.386186,-3.242556,77,2.288359e+13,...,1.565086e+13,6.055904e+11,6.227167e+41,4.614229e+41,7274496.0,7323648.0,1.516298e+42,117.802860,-48.097168,73.202880


## output from 7duplicate_match.py


1. The 9 halos that are first observed and not duplicated in near universe

In [1]:
import pandas as pd
import numpy as np
import h5py
unmatched_dup = pd.read_csv('../data/unmatched_duplicates.csv')
catalog = pd.read_csv('../data/halo_properties_in_lightcone0_with_tree.csv')
vr_tree = h5py.File('/data2/FLAMINGO/L1000N1800/HYDRO_FIDUCIAL/merger_trees/vr_trees.hdf5', 'r')

In [8]:
# Are there duplicates throughout the catalog? 
match = pd.merge(unmatched_dup, catalog, on=['TopLeafID'], how='inner')

In [13]:
for snap_num in range(72, 77+1):
    topleafids = unmatched_dup['TopLeafID']
    near_mergerids = vr_tree['SOAP/Snapshot00'+str(snap_num)][:]
    near_topleafids = vr_tree['MergerTree/TopLeafID'][:][near_mergerids]
    n_matched = np.sum(np.isin(topleafids, near_topleafids))
    print(n_matched) # >> 65
    assert np.sum(np.isin(near_topleafids, topleafids)) == n_matched # one-to-one match
    for i in range(len(topleafids)):
        topleafid = topleafids[i]
        soapidx = np.argwhere(near_topleafids == topleafid)
        if len(soapidx) > 0:
            unmatched_dup.loc[unmatched_dup['TopLeafID']==topleafid, 'SOAPID_near'] = soapidx[0][0]
            unmatched_dup.loc[unmatched_dup['TopLeafID']==topleafid, 'snap_num_near'] = snap_num
        
        print(snap_num, soapidx, topleafid)

9
72 [[5644094]] 377700723
72 [[4988097]] 419587918
72 [[6301647]] 412589
72 [[2678353]] 123454236
72 [[11390513]] 534273563
72 [[2888993]] 567251456
72 [[9897687]] 680357185
72 [[6301511]] 270568914
72 [[6199227]] 671949215
8
73 [[5564836]] 377700723
73 [[4917583]] 419587918
73 [[6212701]] 412589
73 [[2641832]] 123454236
73 [[11234915]] 534273563
73 [[2756095]] 567251456
73 [] 680357185
73 [[6212795]] 270568914
73 [[6112399]] 671949215
8
74 [[5487083]] 377700723
74 [[4762169]] 419587918
74 [[6035409]] 412589
74 [[2605336]] 123454236
74 [[10987595]] 534273563
74 [[2718119]] 567251456
74 [] 680357185
74 [[6035410]] 270568914
74 [[5928187]] 671949215
8
75 [[5492818]] 377700723
75 [[4779176]] 419587918
75 [[6038154]] 412589
75 [[2660208]] 123454236
75 [[10917194]] 534273563
75 [[2769935]] 567251456
75 [] 680357185
75 [[6038261]] 270568914
75 [[5940819]] 671949215
6
76 [[5413399]] 377700723
76 [[4627880]] 419587918
76 [[5863617]] 412589
76 [[2622487]] 123454236
76 [] 534273563
76 [[2642411

In [23]:
# Extract their soap id and snapshot number
soap = h5py.File('/data2/FLAMINGO/L1000N1800/HYDRO_FIDUCIAL/SOAP/halo_properties_00'+str(snap_num)+'.hdf5', 'r')
soap['FOFSubhaloProperties/CentreOfMass'][5858678]

array([575.47369276, 615.74286276, 347.50427276])

In [24]:
soap['FOFSubhaloProperties/CentreOfMass'][2600905]

array([745.48107876,  82.16690876, 388.35622876])

2. The 11 clusters that was observed a 2nd time but did not make the sample selection

In [1]:
import pandas as pd
import numpy as np
import h5py
matched_dup = pd.read_csv('../data/matched_duplicates.csv')
catalog = pd.read_hdf('../data/halo_properties_in_lightcone0_with_tree.hdf5', key='catalog')
samples = pd.read_csv('../data/samples_in_lightcone0_with_tree.csv')

In [2]:
box_mask = (np.abs(samples['x_lc']) <= 500) | (np.abs(samples['y_lc']) <= 500) | (np.abs(samples['z_lc']) <= 500)
cut = np.isin(matched_dup['GalaxyID_near'], samples['GalaxyID'][box_mask]) # mask nearby universe counterparts that make the cut
cutted_near_dup_info = matched_dup[~cut] # select those that didn't make the cut
print(len(cutted_near_dup_info)) # should be 11
cutted_near_dup_info.reset_index(drop=True, inplace=True) # reset index for easy iteration

box_mask = (np.abs(catalog['x_lc']) <= 500) | (np.abs(catalog['y_lc']) <= 500) | (np.abs(catalog['z_lc']) <= 500)
catalog_near = catalog[box_mask] # consider only the near field duplicates

cutted_near_dup_properties = pd.DataFrame()
for i in range(len(cutted_near_dup_info)):
    soapidx = cutted_near_dup_info.loc[i, 'SOAPID_near']
    snap_num = cutted_near_dup_info.loc[i, 'snap_num_near']
    cutted_near_dup_property = catalog_near.loc[(catalog_near['SOAPID'] == soapidx) & (catalog_near['snap_num'] == snap_num)]
    cutted_near_dup_properties = pd.concat([cutted_near_dup_properties, cutted_near_dup_property])

11


In [3]:
cutted_near_dup_properties.reset_index(drop=True, inplace=True)
cutted_near_dup_properties

Unnamed: 0,lc_id,redshift,theta_on_lc,phi_on_lc,M_fof_lc,x_lc,y_lc,z_lc,snap_num,MfofSOAP,...,LX0InRestframeWithoutRecentAGNHeating,LX0InRestframeWithoutRecentAGNHeatingCoreExcision,GasTemperatureWithoutRecentAGNHeatingCoreExcision,SpectroscopicLikeTemperatureWithoutRecentAGNHeatingCoreExcision,Y5R500WithoutRecentAGNHeating,Vx,Vy,Vz,GalaxyID,TopLeafID
0,5842388,0.11557,55.950588,47.940239,120067700000000.0,185.731693,205.843771,410.27731,75,120121600000000.0,...,9.495239000000001e+42,7.611436000000001e+42,16629760.0,17137664.0,2.678703e+43,286.60236,196.10236,137.9024,9549088,9549159
1,9714207,0.094224,26.902743,63.470385,171987400000000.0,161.601974,323.704308,183.573702,75,172073600000000.0,...,3.197021e+43,2.478345e+43,20709376.0,22806528.0,5.17447e+43,-84.35333,147.04663,-210.35333,633146302,633146375
2,1958771,0.143625,33.405415,76.711787,656987300000000.0,117.214133,496.306707,336.326016,74,657508000000000.0,...,3.1569489999999998e+44,2.526256e+44,50200576.0,55508990.0,5.303396e+44,-57.684387,-248.18439,226.31561,174343007,174343079
3,1958771,0.145055,33.036706,-76.901846,656987300000000.0,117.196817,-503.69683,336.31329,74,657508000000000.0,...,3.1569489999999998e+44,2.526256e+44,50200576.0,55508990.0,5.303396e+44,-57.684387,-248.18439,226.31561,174343007,174343079
4,12080849,0.041231,-16.160838,-80.415476,478892900000000.0,28.747816,-170.246934,-50.03356,76,478837300000000.0,...,7.491657e+43,6.489865e+43,41287680.0,39911424.0,4.25108e+44,325.59204,316.49207,-102.507935,431913939,431914006
5,2828679,0.173707,47.953336,86.923047,446043600000000.0,26.362356,490.420091,544.560475,74,445852000000000.0,...,1.864203e+44,1.641195e+44,39845890.0,40632320.0,2.9339419999999998e+44,329.6106,-146.08942,-491.08942,283194523,283194595
6,2828679,0.158041,-42.823858,86.907184,446043600000000.0,26.506541,490.568316,-455.31402,74,445852000000000.0,...,1.864203e+44,1.641195e+44,39845890.0,40632320.0,2.9339419999999998e+44,329.6106,-146.08942,-491.08942,283194523,283194595
7,2828679,0.161488,-41.753623,-87.026898,446043600000000.0,26.461372,-509.489293,-455.40744,74,445852000000000.0,...,1.864203e+44,1.641195e+44,39845890.0,40632320.0,2.9339419999999998e+44,329.6106,-146.08942,-491.08942,283194523,283194595
8,9371747,0.1011,15.175875,122.139541,1653275000000000.0,-223.151321,355.188931,113.778035,75,1653666000000000.0,...,8.989988e+44,6.286023e+44,81526780.0,89915390.0,2.2049859999999998e+45,190.6521,-284.14795,130.552,806556361,806556436
9,6860788,0.147553,-19.572528,52.093439,1686950000000000.0,362.995047,466.177523,-210.067995,74,1686651000000000.0,...,1.179152e+45,1.0230469999999999e+45,94765060.0,96468990.0,2.915822e+45,342.83765,-234.26233,-234.56232,2326795,2326864


In [4]:
cutted_near_dup_properties['SpectroscopicLikeTemperatureWithoutRecentAGNHeatingCoreExcision'] /= 11605425

In [5]:
pd.set_option('display.max_columns', None)
cutted_near_dup_properties.loc[np.abs(cutted_near_dup_properties['theta_on_lc'])>20]

Unnamed: 0,lc_id,redshift,theta_on_lc,phi_on_lc,M_fof_lc,x_lc,y_lc,z_lc,snap_num,MfofSOAP,SOAPID,M500,GasMass,LX0InRestframeWithoutRecentAGNHeating,LX0InRestframeWithoutRecentAGNHeatingCoreExcision,GasTemperatureWithoutRecentAGNHeatingCoreExcision,SpectroscopicLikeTemperatureWithoutRecentAGNHeatingCoreExcision,Y5R500WithoutRecentAGNHeating,Vx,Vy,Vz,GalaxyID,TopLeafID
0,5842388,0.11557,55.950588,47.940239,120067700000000.0,185.731693,205.843771,410.27731,75,120121600000000.0,5842387,60404420000000.0,3839701000000.0,9.495239000000001e+42,7.611436000000001e+42,16629760.0,1.476694,2.678703e+43,286.60236,196.10236,137.9024,9549088,9549159
1,9714207,0.094224,26.902743,63.470385,171987400000000.0,161.601974,323.704308,183.573702,75,172073600000000.0,9714206,94008240000000.0,8332237000000.0,3.197021e+43,2.478345e+43,20709376.0,1.965161,5.17447e+43,-84.35333,147.04663,-210.35333,633146302,633146375
2,1958771,0.143625,33.405415,76.711787,656987300000000.0,117.214133,496.306707,336.326016,74,657508000000000.0,1958770,391975900000000.0,46179490000000.0,3.1569489999999998e+44,2.526256e+44,50200576.0,4.783021,5.303396e+44,-57.684387,-248.18439,226.31561,174343007,174343079
3,1958771,0.145055,33.036706,-76.901846,656987300000000.0,117.196817,-503.69683,336.31329,74,657508000000000.0,1958770,391975900000000.0,46179490000000.0,3.1569489999999998e+44,2.526256e+44,50200576.0,4.783021,5.303396e+44,-57.684387,-248.18439,226.31561,174343007,174343079
5,2828679,0.173707,47.953336,86.923047,446043600000000.0,26.362356,490.420091,544.560475,74,445852000000000.0,2828678,306763700000000.0,34634620000000.0,1.864203e+44,1.641195e+44,39845890.0,3.501149,2.9339419999999998e+44,329.6106,-146.08942,-491.08942,283194523,283194595
6,2828679,0.158041,-42.823858,86.907184,446043600000000.0,26.506541,490.568316,-455.31402,74,445852000000000.0,2828678,306763700000000.0,34634620000000.0,1.864203e+44,1.641195e+44,39845890.0,3.501149,2.9339419999999998e+44,329.6106,-146.08942,-491.08942,283194523,283194595
7,2828679,0.161488,-41.753623,-87.026898,446043600000000.0,26.461372,-509.489293,-455.40744,74,445852000000000.0,2828678,306763700000000.0,34634620000000.0,1.864203e+44,1.641195e+44,39845890.0,3.501149,2.9339419999999998e+44,329.6106,-146.08942,-491.08942,283194523,283194595
13,963218,0.172488,-30.069626,41.27077,834114900000000.0,473.976605,415.97038,-365.112765,74,834529300000000.0,963217,614627000000000.0,75591420000000.0,4.920102e+44,4.160483e+44,60293120.0,5.104901,9.338437e+44,-90.35358,171.34644,46.846436,360969659,360969731


In [6]:
from unyt import Mpc, cm
r = (cutted_near_dup_properties['x_lc']**2 + cutted_near_dup_properties['y_lc']**2 + cutted_near_dup_properties['z_lc']**2)**0.5
r = (np.array(r)*Mpc).to(cm)
z = cutted_near_dup_properties['redshift']
z = np.array(z)

c = 299792.458                  # the speed of light in km/s

vx = cutted_near_dup_properties['Vx']                 # velocities in km/s
vy = cutted_near_dup_properties['Vy']
vz = cutted_near_dup_properties['Vz']

rx = cutted_near_dup_properties['x_lc']
ry = cutted_near_dup_properties['y_lc']
rz = cutted_near_dup_properties['z_lc']

los_v = (vx*rx + vy*ry + vz*rz) / (rx**2 + ry**2 + rz**2)**0.5
beta = los_v / c
z_pec = (1 + los_v/c) / (1 - beta**2)**0.5 - 1
z_obs = (z + 1) * (z_pec + 1) - 1
z_obs = np.array(z_obs)

r = r * (1 + z_obs)

cutted_near_dup_properties['LX0InRestframeWithoutRecentAGNHeating']/(1+z)**3 / r**2 / 4 / np.pi

0     1.869472e-13
1     1.034927e-12
2     3.616440e-12
3     3.514778e-12
4     1.585546e-11
5     1.304685e-12
6     1.664714e-12
7     1.571319e-12
8     2.462275e-11
9     1.258538e-11
10    1.013800e-11
11    4.091242e-11
12    2.039077e-10
13    3.494519e-12
14    6.326545e-11
Name: LX0InRestframeWithoutRecentAGNHeating, dtype: float64

# Summary
- 69 out of 771 lies outside of one box size and should be duplicates
- None is labeled the same cluster by the merger tree
- 49 out of 69 have the same progenitors with some near universe clusters in our sample
- 11 out of 69 have near universe counterparts that was in lightcone but did not make the cut.
     11 matched 15 near universe duplicates, 7 of which are cut out by galactic latitude, and 8 by flux.
     One cluster in particular becomes dimmer (in LX) by 100 times by 0.1 redshift
- 9 out of 69 have near universe counterparts in soap catalogue but did not get into the lightcone earlier, i.e. they are not duplicated. This is the first time they are observed.
