In [1]:
import numpy as np
import pandas as pd
import copy
import pickle

In [2]:
## Load Mainbranch file ##
read_cols = 61
col_names = ["ch{0:02d}".format(i) for i in range(read_cols)]

mainbranch_list = ["mainbranch_MW039.csv", "mainbranch_MW038.csv"]
df_dict = {}
for key in mainbranch_list:
    df_dict[key] = np.array(pd.read_csv(key, names = col_names, dtype = str))

In [3]:
## Define a GetInfoTree object ##
class GetInfoTree:

    def __init__(self, df):
        self.df = df
        self.tree_index = np.where(df=="tree")[0]

    def get_info(self, ch):
        ## Extract channel of self.df ##
        return_list = []
        for t_idx in range(len(self.tree_index)):
            if t_idx != len(self.tree_index)-1:
                current_tree = self.df[self.tree_index[t_idx]+1:self.tree_index[t_idx+1]]
            else:
                current_tree = self.df[self.tree_index[t_idx]+1:]
            info_list = []
            for current_halo in range(current_tree.shape[0]):
                info_list.append(float(current_tree[current_halo, ch]))
            return_list.append(info_list)

        return return_list

getinfo_dict = {}
for key in mainbranch_list:
    getinfo_dict[key] = GetInfoTree(df_dict[key])

In [14]:
## Specify the column number of  parameters to be extract as a list. ##
## See MergerTree about the correspondence between column number and parameter. ##
param_list = [0, 1, 5, 10, 11, 17, 20]
param_name_list = ["ScaleFactor", "ID", "pid", "Mvir", "Rvir", "x", "vx"]
range_min, range_max = "1e+6", "1e+18"
param_dict = {}
for i in range(len(param_list)):
    param_dict[param_name_list[i]] = param_list[i]

In [15]:
pickle_name = "_" + range_min + "_" + range_max
for p_name in param_name_list:
    pickle_name += "_" + p_name
pickle_name += ".pickle"

In [16]:
## Extract the specified parameters by param_dict. ##
param = {}
for param_key in param_name_list:
    param[param_key] = {}
    for m_key in mainbranch_list:
        param[param_key][m_key] = getinfo_dict[m_key].get_info(param_dict[param_key])

In [17]:
## Extract Mvir(z=0) of all haloes to get use_idx_dict. ##
if "Mvir" in param_name_list:
    mvir_z0 = {}
    for m_key in mainbranch_list:
        mvir_z0_list = []
        for idx in range(len(param["Mvir"][m_key])):
            mvir_z0_list.append(param["Mvir"][m_key][idx][-1])
        mvir_z0[m_key] = mvir_z0_list

In [18]:
def classified_index(classify_list, min_val, max_val):
    index_list = []
    for idx, elem in enumerate(classify_list):
        if elem >= min_val and elem <= max_val:
            index_list.append(idx)
    return index_list

use_idx_dict = {}
for m_key in mainbranch_list:
    use_idx_dict[m_key] = classified_index(mvir_z0[m_key], float(range_min), float(range_max))
    print("m_key : {},  length : {}".format(m_key, len(use_idx_dict[m_key])))  ##Number of using halo of each MW0XY.tree

m_key : mainbranch_MW039.csv,  length : 4355
m_key : mainbranch_MW038.csv,  length : 3345


In [19]:
## Extract a host halo. ##
host_param = {}
for param_key in param_name_list:
    host_param[param_key] = {}
    for m_key in mainbranch_list:
        host_param[param_key][m_key] = np.array(param[param_key][m_key][0])
        
m_str = ""
for m_key in mainbranch_list:
    m_str += m_key[11:16] + "_"
with open("host_param" + pickle_name[:-7] + "_" + m_str[:-1] + ".pickle", mode = "wb") as f:
    pickle.dump(host_param, f)

In [20]:
## Extract using haloes as param_use_idx. ##
param_use_idx = {}
for p_key in param_name_list:
    param_use_idx[p_key] = {}
    for m_key in mainbranch_list:
        param_use_idx[p_key][m_key] = []
        for use_idx in use_idx_dict[m_key]:
            if use_idx == 0:
                continue
            param_use_idx[p_key][m_key].append(np.array(param[p_key][m_key][use_idx]))
        #print("{}({}) : {}".format(p_key, m_key, len(param_use_idx[p_key][m_key])))
with open("param" + pickle_name[:-7] + "_" + m_str[:-1] + ".pickle", mode = "wb") as f:
    pickle.dump(param_use_idx, f)

In [21]:
with open("../param_list.txt", mode = "a") as f:
    f.writelines("param" + pickle_name[:-7] + "_" + m_str[:-1] + ".pickle" + "\n")