In [8]:
##Import some modules used for this notebook.
import os, sys, shutil
import numpy as np
import itertools as it
import copy as cp
import pickle

In [28]:
##Print a dir_list which is a list of ectracted parameters.
##The parameters are made by ./parameter/MakePickleOfParam.ipynb.
dir_list = np.loadtxt("param_list.txt", dtype = "str")
for i in range(len(dir_list)):
    print("{}. {}".format(i, dir_list[i]))

##Input index-number of param_list.txt in dir_num to select it.
##Load the parameters of host-halo and sub-halo.
dir_num = int(input("\nSelect index>> "))
with open("parameters/" + dir_list[dir_num], mode = "rb") as f:
    param = pickle.load(f)
with open("parameters/host_" + dir_list[dir_num], mode = "rb") as f:
    host_param = pickle.load(f)

##The list of used parameters and host-halo.
param_list = list(param.keys())
mainbranch_list = list(param[param_list[0]].keys())
print(param_list)
print(mainbranch_list)

0. param_1e+6_1e+18_ScaleFactor_ID_pid_Mvir_Rvir_x_vx_MW039_MW038.pickle
1. param_1e+7_1e+18_ScaleFactor_ID_pid_Mvir_Rvir_x_vx_MW039_MW038.pickle

Select index>> 0
['ScaleFactor', 'ID', 'pid', 'Mvir', 'Rvir', 'x', 'vx']
['mainbranch_MW039.csv', 'mainbranch_MW038.csv']


In [106]:
##Get zacc of each sub-halo as acc_sf(Data type is dictionary).
##The zacc is a time when ID of host-halo and pid of sub-halo match.
acc_sf = {}
for m_key in mainbranch_list:
    acc_sf[m_key] = []
    for idx, parameter in enumerate(param["pid"][m_key]):
        for i in range(parameter.size):
            host_i = host_param["ID"][m_key].size - parameter.size + i
            if host_param["ID"][m_key][host_i] == parameter[i]:
                acc_sf[m_key].append(i)
                break
            elif i == parameter.size-1:
                acc_sf[m_key].append(-1)

In [107]:
##Describe variables of learning-parameter of a DeepLearningModel in LearnParam.
class LearnParam:
    def __init__(self):
        self.eps = 1.0  ##Prevent devision by zero like 1 / (0 + eps).
        self.bias = True ##If This bias is True, add normalized dataset to bias.
        self.hidden = [[100]*5]  ##Defines number of hidden layers and neurons in each layers.
        self.batch_denominator = [200]  ##Defines a size of mini-batch as size(train-dataset) /  batch_denominator.
        self.learn_rate = ["1e-3"]  ##Learning rate of Back propagation.
        self.optimizer = ["Adam"]  ##Optimizer of the DeepLearningModel.
        self.batch_normalization = [True]  ##Whether to use BatchNormalization or not.
        self.loss_func = ["MSE_RE"]  ##Loss function of a DeepLearning model.
        self.activation_func = ["relu"]  ##Activation function of the hidden layers.
        self.weight_init = ["he"]  ##Condition for initializing weight of the hidden layers.
        self.epoch = 200  ##Training epoch.
        self.input_size = 2  ##Size of input-dataset(axis == 1)
        self.output_size = 7  ##Size of output-dataset(axis == 1)
        self.param_kind = "x"  ##Learning parameter.
        self.train_ratio = 0.7  ##Percentage of training-dataset to all-dataset.
        self.normalize_format = "Normalization"  ##Selects a format of dataset normalization in None, Normalization, Standardization.
        self.extract_dataset = "After_acc"  ##Selects a condition of dataset.
        ##All is overall, Before/After_acc is in part of before/after accretion, All(_not)_acc is overall of (not)accreted sub-halo.
        self.learn_num = 1  ##Distinguishes the directories where training results are saved.
        self.save_fig_type = ".png"  ##Format of the saved figures.

In [108]:
##Extract the data to be used as dataset(data type is double-dictionary).
##Data structure of the dataset is that axis1 is host-halo(ex:MW039, MW038), axis2 is sub-halo.
def make_dataset(m_list, p_kind, h_param, param, ext_data, acc_sf):
    dataset = {}
    for m_key in m_list:
        dataset[m_key] = []
        for idx, parameter in enumerate(param[p_kind][m_key]):
            ##If the data used is after accretion(ext_data == After_Acc), extract after accretion point.
            ##The accretion point is taken from the acc_sf as start_i.
            start_i = 0
            if ext_data in ["After_acc", "All_acc"]:
                if acc_sf[m_key][idx] == -1:
                    ##The acc_sf == -1 is not accretion so skip this halo.
                    continue
                elif ext_data == "After_acc":
                    start_i = acc_sf[m_key][idx]
            if p_kind in ["x", "y", "z"]:
                ##If p_kind is coordinate, use relative value of sub-halo to host-halo.
                data = parameter[start_i:]
                host_i = data.size
                data -= h_param[p_kind][m_key][-host_i:]
                dataset[m_key].append(data)
            else:
                data = parameter[start_i:]
    return dataset

In [117]:
class Normalization:
    def __init__(self, m_list, norm_format, bias, eps):
        self.m_list = m_list
        self.norm_format = norm_format
        self.bias = bias
        self.eps = eps
        print("m_list : {}".format(self.m_list))
        print("norm_format : {}".format(self.norm_format))
        print("bias : {}".format(self.bias))
        if self.bias:
            print("eps : {}".format(self.eps))
        self.param_min, self.param_max = None, None
        self.mean, self.stddev = None, None
        
    def run(self, dataset):
        if self.norm_format == "Normalization":
            dataset_normed = self.__normalize(cp.deepcopy(dataset))
        if self.norm_format == "Standardization":
            dataset_normed = self.__standardization(cp.deepcopy(dataset))
        if self.norm_format == "None":
            dataset_normed = self.__none(cp.deepcopy(dataset))
        return dataset_normed
        
    def __normalize(self, dataset):
        param_min, param_max = {}, {}
        for m_key in self.m_list:
            param_min[m_key], param_max[m_key] = np.inf, -np.inf
            for param in dataset[m_key]:
                if param.min() < param_min[m_key]:
                    param_min[m_key] = param.min()
                if param.max() > param_max[m_key]:
                    param_max[m_key] = param.max()
            for param in dataset[m_key]:
                param -= param_min[m_key]
                param /= (param_max[m_key] - param_min[m_key])
                if self.bias:
                    param += self.eps
        self.param_min, self.param_max = param_min, param_max
        return dataset
    
    def __inv_normalize(self, dataset):
        for m_key in self.m_list:
            for param in dataset[m_key]:
                if self.bias:
                    param -= self.eps
                param *= (self.param_max[m_key] - self.param_min[m_key])
                param += self.param_min[m_key]
        return dataset
    
    def __standardize(self, dataset):
        mean, stddev = {}, {}
        for m_key in self.m_list:
            denominator = 0
            mean[m_key] = 0
            stddev[m_key] = 0
            for param in dataset[m_key]:
                mean[m_key] += np.sum(param)
                denominator += param.size()
            for param in dataset[m_key]:
                stddev[m_key] += np.sum((param - mean[m_key])**2)
            stddev[m_key] = np.sqrt(stddev[m_key] / denominator)
            for param in dataset[m_key]:
                param = (param - mean[m_key]) / stddev[m_key]
        self.mean, self.stddev = mean, stddev
        return dataset
    
    def __inv_standardize(self, dataset):
        for m_key in self.m_list:
            for param in dataset[m_key]:
                param *= self.stddev[m_key]
                param += self.mean[m_key]
        return dataset
    
    def __none(self, dataset):
        if self.bias:
            for m_key in self.m_list:
                for param in dataset[m_key]:
                    param += self.eps
        return dataset
    
    def __inv_none(self, dataset):
        if self.bias:
            for m_key in self.m_list:
                for param in dataset[m_key]:
                    param -= self.eps
        return dataset

In [118]:
LP = LearnParam()
dataset = make_dataset(mainbranch_list, LP.param_kind, host_param, param, LP.extract_dataset, acc_sf)

In [119]:
Normalization = Normalization(mainbranch_list, LP.normalize_format, LP.bias, LP.eps)

m_list : ['mainbranch_MW039.csv', 'mainbranch_MW038.csv']
norm_format : Normalization
bias : True
eps : 1.0


In [120]:
dataset_normed = Normalization.run(dataset)