# Gap Mass Statistics

### Imports

In [3]:
import haccytrees.mergertrees
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.pyplot import cm
from matplotlib.lines import Line2D
import matplotlib.colors as mcolors
import matplotlib.patches as mpatches
import pandas as pd
from astropy.cosmology import FlatLambdaCDM
import astropy.units as u
from itertools import groupby
from matplotlib.ticker import ScalarFormatter
%load_ext line_profiler
%reload_ext autoreload
%autoreload 1
%aimport help_func_haccytrees 
# on roomba: /data/a/cpac/aurora/fossil_groups/help_func_haccytrees.py

plt.rcParams.update({
    "text.usetex": True,
    'font.size': 13,
    "figure.figsize": (6.25, 3.75)
})

The line_profiler extension is already loaded. To reload it, use:
  %reload_ext line_profiler
pyfftw not available, using numpy fft


### Read in merger trees (may take a few minutes)

In [4]:
%%time
forest, progenitor_array = haccytrees.mergertrees.read_forest(
    '/data/a/cpac/mbuehlmann/LastJourney/forest/target_forest_aurora.hdf5',
    'LastJourney', nchunks=1, chunknum=0,
    include_fields = ["tree_node_mass", "snapnum", "fof_halo_tag", "sod_halo_cdelta", "fof_halo_center_x", "fof_halo_center_y", "fof_halo_center_z"]
)

fg_forest, fg_progenitor_array = haccytrees.mergertrees.read_forest(
    "/data/a/cpac/mbuehlmann/LastJourney/forest/fg_forest.hdf5",
    'LastJourney',
    #mass_threshold=5e11 # Is this for mergers, or "particle size"? (might be the wrong word)
)

CPU times: user 34.9 s, sys: 1min 6s, total: 1min 41s
Wall time: 1min 42s


### Establish globals

In [31]:
sim = haccytrees.Simulation.simulations['LastJourney']
redshifts = sim.step2z(np.array(sim.cosmotools_steps))
print(redshifts[71])

1.0059880239520962


### Pick out halos for the FG and full samples, generate indices

In [8]:
"""
About helper function: help_func_haccytrees.make_masks()

Parameters
----------
my_forest: dict
    A forest generated by `haccytrees.mergertrees.read_forest()`
bins: list of lists (opt)
    default: [[1e13, 10**13.05], [10**13.3, 10**13.35], [10**13.6, 10**13.65]]
pre_masked_z0: boolean (opt)
    default: False
    Use if my_forest is already constrained to snapnum of 100 (this is unusual)
    
Returns
-------
masks: list of lists
    default: three masks, one for each narrow mass bin
    Each list is a boolean mask of shape (nhalos,) (that's the same as length of one column of my_forest).
    You can use np.nonzero(masks[i])[0] to return the list of indices (`halo_idxs`) of halos in your sample (for the ith mass bin).
    Then, use something like `forest[key][halo_idxs]` to find the value of any key stored in `forest` for the halos in in your index.
        e.g. `forest['tree_node_mass'][halo_idxs]` returns halo masses for the halos in your sample
"""

fg_masks = help_func_haccytrees.make_masks(fg_forest)
halo_masks = help_func_haccytrees.make_masks(forest) # Forest should already only contain values in these bins

# "idxs" indicates multiple indexes, one for each mass bin
fg_idxs = [np.nonzero(fg_masks[0])[0], np.nonzero(fg_masks[1])[0], np.nonzero(fg_masks[2])[0]]
halo_idxs = [np.nonzero(halo_masks[0])[0], np.nonzero(halo_masks[1])[0], np.nonzero(halo_masks[2])[0]]

[array([     4608,      9689,     10341, ..., 612885968, 612886456,
       612888264]), array([    35519,     57347,     75396, ..., 612873149, 612877699,
       612884547]), array([   187728,    298687,    379709, ..., 611818926, 612161467,
       612810998])]


### Create luminous merger table
here "luminous mergers" and "major mergers" mean the same thing (old nomenclature...)

In [35]:
# Here "binned" indicates there are multiple mass bins
# "binned" variables are lists of arrays. Length of list = number of mass bins

# Loop over the two subsets (fossils and "all halos")
for i, (this_idx, this_forest, this_progenitor_array) in enumerate(zip([halo_idxs, fg_idxs], [forest, fg_forest], [progenitor_array, fg_progenitor_array])):
    
    binned_mainbranch_index, binned_masses_table = help_func_haccytrees.get_binned_branches(this_idx, this_forest)
    binned_mainbranch_mergers = help_func_haccytrees.get_binned_mainbranch_mergers(this_forest, this_progenitor_array, binned_mainbranch_index)
    binned_major_mergers_table = help_func_haccytrees.get_binned_major_mergers(binned_mainbranch_mergers)
    
    # loop over each luminous merger table
    for j, (merger_table, masses_table) in enumerate(zip(binned_major_mergers_table, binned_masses_table)):
        print(merger_table.shape) # each merger table should have shape (nhalos, 101)
        
        # remove columns where z > 1
        z_less_than_one = redshifts < 1
        merger_table = merger_table[:, z_less_than_one] # new shape should be (nhalos, 29)

        # remove rows where M(z=1) is not > 10^12 
        # is that too restrictive?? (should we just wait to start counting luminous mergers until they reach M(z) > 10^12?)
        Mz1_greater_than_1e12 = masses_table[:,71] > 10**12 # Note: z = 1 is ntimestep = 71
        merger_table = merger_table[Mz1_greater_than_1e12] # new shape should be (nhalos - nhalos_with_Mz1_less_than_1e12, 29)
        print(merger_table.shape)

        # calculate gap (in z) between consecutive major mergers
        # ...
        
        # plot the distribution
        # ...
        

(269358, 101)
(265532, 29)
(36181, 101)
(36104, 29)
(2454, 101)
(2453, 29)
(269358, 101)
(269358, 29)
(36181, 101)
(36180, 29)
(2454, 101)
(2454, 29)
