In [1]:
import os
import json
import pickle
from collections import defaultdict
from functools import reduce

import numpy as np

from gfos.data.utils import load_layout
from gfos.data.constants import mask_min_max

In [2]:
LAYOUT_DIR = r"H:\data\gfos\predict-ai-model-runtime\npz_all\npz\layout"
sources = ("xla", "nlp")
searchs = ("default", "random")
features = ("node_feat", "node_config_feat")

In [3]:
data_dir = os.path.join(LAYOUT_DIR, sources[0], searchs[0])
files: dict = load_layout(LAYOUT_DIR, compile_type="random", model_type="xla")
all_files = reduce(lambda x, y: x + y, files.values())

In [6]:
normalizer = defaultdict(lambda: defaultdict(dict))

for source in sources:
    for search in searchs:
        for feature in features:
            if feature == "node_feat":
                feats = [np.load(file)[feature] for file in all_files]
                feats = np.concatenate(feats, axis=0)
            elif feature == "node_config_feat":
                # To reduce the memory usage, pre-allocate the memory
                num_configs = [np.load(file)[feature].size // 18 for file in all_files]
                num_all_configs = sum(num_configs)
                num_cumsum_configs = np.cumsum(num_configs)
                feats = np.zeros((num_all_configs, 18), dtype=np.float32)
                for i, file in enumerate(all_files):
                    start = 0 if i == 0 else num_cumsum_configs[i - 1]
                    end = num_cumsum_configs[i]
                    feats[start:end] = np.load(file)["node_config_feat"].reshape(-1, 18)

            node_min = np.min(feats, axis=0)
            node_max = np.max(feats, axis=0)
            mask = node_max != node_min
            
            normalizer[source][search].update({
                f"{feature}_mask": mask.tolist(),
                f"{feature}_min": node_min.tolist(),
                f"{feature}_max": node_max.tolist(),
            })
            
            del feats
            
            print(f"{source} {search} {feature} finished")

xla default node_feat finished
xla default node_config_feat finished
xla random node_feat finished
xla random node_config_feat finished
nlp default node_feat finished
nlp default node_config_feat finished
nlp random node_feat finished
nlp random node_config_feat finished


In [7]:
json.dump(normalizer, open("../../data/normalizer.json", "w"), indent=4)