In [1]:
import numpy as np
import pickle
import json
import ast

file_paths = [
    './data/nasbench201/pickle/desktop-cpu-core-i7-7820x-fp32.pickle',
    './data/nasbench201/pickle/desktop-gpu-gtx-1080ti-fp32.pickle',
    './data/nasbench201/pickle/desktop-gpu-gtx-1080ti-large.pickle',
    './data/nasbench201/pickle/embedded-gpu-jetson-nono-fp16.pickle',
    './data/nasbench201/pickle/embedded-tpu-edgetpu-large.pickle',
    './data/nasbench201/pickle/embeeded-tpu-edgetpu-int8.pickle',
    './data/nasbench201/pickle/mobile-cpu-snapdragon-450-contex-a53-int8.pickle',
    './data/nasbench201/pickle/mobile-cpu-snapdragon-675-kryo-460-int8.pickle',
    './data/nasbench201/pickle/mobile-cpu-snapdragon-855-kryo-485-int8.pickle',
    './data/nasbench201/pickle/mobile-dsp-snapdragon-675-hexagon-685-int8.pickle',
    './data/nasbench201/pickle/mobile-dsp-snapdragon-855-hexagon-690-int8.pickle',
    './data/nasbench201/pickle/mobile-gpu-snapdragon-450-adreno-506-int8.pickle',
    './data/nasbench201/pickle/mobile-gpu-snapdragon-675-adren0-612-int8.pickle',
    './data/nasbench201/pickle/mobile-gpu-snapdragon-855-adren0-640-int8.pickle'
]


key_map = {}
idx = 0
first = True
all_df = np.zeros((len(file_paths), 15284, 6))
all_y = np.zeros((len(file_paths), 15284, 1))
for i, file_path in enumerate(file_paths):
    if first:
        idx = 0
        try:
            with open(file_path,'rb') as file:
                loaded_data = pickle.load(file)
                for key, value in loaded_data.items():
                    if key not in key_map:
                        all_df[i, idx] = key
                        key_map[key] = idx
                        all_y[i, idx] = value
                        idx += 1
        except Exception as e:
                print(f"Error loading {file_path}: {e}")
        first = False
    else:
        try:
            with open(file_path,'rb') as file:
                loaded_data = pickle.load(file)
                for key, value in loaded_data.items():
                    all_df[i, key_map[key]] = key 
                    all_y[i, key_map[key]] = value
        except Exception as e:
            print(f"Error loading {file_path}: {e}")

import os
for i, file_path in enumerate(file_paths):
    x = all_df[i]
    y = all_y[i]
    data = np.concatenate((x, y), axis=1)
    os.makedirs('./data/nasbench201/pkl', exist_ok=True)
    output_file_path = f'./data/nasbench201/pkl/{file_path.split("/")[-1].replace(".pickle", ".pkl")}'

    with open(output_file_path,'wb') as output_file:
        pickle.dump(data,output_file)

In [4]:
import os
import pickle
import numpy as np
from data_provider.data_control import load_data
from data_provider.data_scaler import get_scaler
from utils.exp_config import get_config

# ===== 1. 获取配置 =====
config = get_config('TransModelConfig')

# 数据集名和存储目录
dataset_name = os.path.splitext(os.path.basename(config.dst_dataset))[0]
save_dir = os.path.join("data", dataset_name)
os.makedirs(save_dir, exist_ok=True)

# 生成当前 run 的文件名
def file_path(var_name):
    """生成形如 dataset_x_run1.pkl 的文件路径"""
    return os.path.join(save_dir, f"{dataset_name}_{var_name}_round{config.runid}.pkl")

# 需要的文件名列表
expected_files = [
    file_path("train_x"), file_path("valid_x"), file_path("test_x"),
    file_path("train_y"), file_path("valid_y"), file_path("test_y")
]

# ===== 2. 如果已经存在所有文件，则直接读取 =====
if all(os.path.exists(f) for f in expected_files):
    def load_pkl(path):
        with open(path, 'rb') as f:
            return pickle.load(f)

    train_x = load_pkl(file_path("train_x"))
    valid_x = load_pkl(file_path("valid_x"))
    test_x  = load_pkl(file_path("test_x"))
    train_y = load_pkl(file_path("train_y"))
    valid_y = load_pkl(file_path("valid_y"))
    test_y  = load_pkl(file_path("test_y"))

    print(f"✅ Loaded preprocessed data from {save_dir}")

else:
    # ===== 3. 加载并转为 float32 =====
    x, y = load_data(config)
    x = np.asarray(x, dtype=np.float32)
    y = np.asarray(y, dtype=np.float32)

    # ===== 4. 切分比例 =====
    parts = [int(s) for s in config.spliter_ratio.strip().split(':')]
    total = sum(parts)
    train_ratio, valid_ratio = parts[0] / total, parts[1] / total

    # ===== 5. 打乱切分 =====
    n = len(x)
    train_size = int(n * train_ratio)
    valid_size = int(n * valid_ratio) if config.eval_set else 0

    indices = np.random.permutation(n)
    train_idx = indices[:train_size]
    valid_idx = indices[train_size:train_size + valid_size]
    test_idx  = indices[train_size + valid_size:]

    train_x, train_y = x[train_idx], y[train_idx]
    valid_x, valid_y = x[valid_idx], y[valid_idx]
    test_x,  test_y  = x[test_idx],  y[test_idx]

    # ===== 6. 归一化 =====
    x_scaler = get_scaler(train_x, config, 'None')
    y_scaler = get_scaler(train_y, config, 'globalminmax')

    train_x = x_scaler.transform(train_x)
    valid_x = x_scaler.transform(valid_x)
    test_x  = x_scaler.transform(test_x)

    train_y = y_scaler.transform(train_y).astype(np.float32)
    valid_y = y_scaler.transform(valid_y).astype(np.float32)
    test_y  = y_scaler.transform(test_y).astype(np.float32)

    # ===== 7. 保存每个变量 =====
    def save_pkl(var_name, data):
        with open(file_path(var_name), 'wb') as f:
            pickle.dump(data, f)

    save_pkl("train_x", train_x)
    save_pkl("valid_x", valid_x)
    save_pkl("test_x",  test_x)
    save_pkl("train_y", train_y)
    save_pkl("valid_y", valid_y)
    save_pkl("test_y",  test_y)

    print(f"✅ Data processed and saved to {save_dir}")

<module 'module.name' from '/home/rtx4090/code/python/current/LightNAS/configs/TransModelConfig.py'> TransModelConfig
✅ Data processed and saved to data/desktop-gpu-gtx-1080ti-fp32


In [6]:
import pickle 
from data_process.nas_201_api import NASBench201API as API
import numpy as np
def get_arch_str_from_arch_vector(arch_vector):
    _opname_to_index = {
        'none': 0,
        'skip_connect': 1,
        'nor_conv_1x1': 2,
        'nor_conv_3x3': 3,
        'avg_pool_3x3': 4,
        'input': 5,
        'output': 6,
        'global': 7
    }
    _opindex_to_name = {value: key for key, value in _opname_to_index.items()}
    ops = [_opindex_to_name[int(opindex)] for opindex in arch_vector]
    return '|{}~0|+|{}~0|{}~1|+|{}~0|{}~1|{}~2|'.format(*ops)

with open('./data/nasbench201/pkl/desktop-cpu-core-i7-7820x-fp32.pkl', 'rb') as f:
    df = pickle.load(f)
api = API('./data_process/nas_201_api/NAS-Bench-201-v1_0-e61699.pth', verbose=False)

  file_path_or_dict = torch.load(file_path_or_dict, map_location='cpu')


In [None]:
from data_process.create_latency import get_adjacency_and_features, get_matrix_and_ops
import numpy as np 
data = {}

for i in range(len(df)):
    try:
        # df 若为 numpy 数组，下面索引正常；若为 list，请先转 np.array
        key = np.asarray(df[i, :-1], dtype=np.int32)   # 长度应为 6
        arch_str = get_arch_str_from_arch_vector(key)
        index = api.query_index_by_arch(arch_str)
        adjacency_matrix = get_matrix_and_ops(key)
        
        cost_info = api.get_cost_info(index, dataset='cifar10-valid')
        
        flops  = float(cost_info['flops'])
        params = float(cost_info['params'])
        
        adjacency_matrix, label = get_matrix_and_ops(key)
        adjacency_matrix, features = get_adjacency_and_features(adjacency_matrix, label)
        
        features = np.argmax(features, axis=1)
        
        acc_info = api.get_more_info(
            index,
            dataset='cifar10-valid',
            iepoch=None,
            hp='200',
            is_random=False
        )
        
        acc = float(acc_info['test-accuracy'])

        # 存入 dict
        data[i] = {
            'adjacency_matrix': adjacency_matrix,
            "features": features,
            "flops": flops,
            "params": params,
            "accuracy": acc
        }

        # 可选：简单进度打印
        if (i + 1) % 1000 == 0 or i == len(df) - 1:
            print(f'Processed {i + 1}/{len(df)}')

    except Exception as e:
        print(f"[Warning] Skipped item {i} due to: {e}")

Processed 1000/15284
Processed 2000/15284
Processed 3000/15284
Processed 4000/15284
Processed 5000/15284
Processed 6000/15284
Processed 7000/15284
Processed 8000/15284
Processed 9000/15284
Processed 10000/15284
Processed 11000/15284
Processed 12000/15284
Processed 13000/15284
Processed 14000/15284
Processed 15000/15284
Processed 15284/15284


In [19]:
data

{0: {'adjacency_matrix': array([[1., 1., 1., 1., 1., 1., 1., 1., 1.],
         [0., 1., 0., 0., 0., 1., 0., 0., 0.],
         [0., 0., 1., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 1., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 1., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 1., 0., 0., 1.],
         [0., 0., 0., 0., 0., 0., 1., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 1., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0., 1.]]),
  'features': array([5, 3, 0, 0, 0, 0, 0, 0, 4]),
  'flops': 15.64737,
  'params': 0.129306,
  'accuracy': 85.62},
 1: {'adjacency_matrix': array([[1., 1., 1., 1., 1., 1., 1., 1., 1.],
         [0., 1., 0., 0., 0., 1., 1., 0., 0.],
         [0., 0., 1., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 1., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 1., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 1., 0., 0., 1.],
         [0., 0., 0., 0., 0., 0., 1., 0., 1.],
         [0., 0., 0., 0., 0., 0., 0., 1., 0.],
         [0., 0., 0., 0., 0., 0., 0., 

In [None]:
def get_bench201():
    from data_process.create_latency import get_adjacency_and_features, get_matrix_and_ops
    import numpy as np 
    data = {}

    for i in range(len(df)):
        try:
            # df 若为 numpy 数组，下面索引正常；若为 list，请先转 np.array
            key = np.asarray(df[i, :-1], dtype=np.int32)   # 长度应为 6
            arch_str = get_arch_str_from_arch_vector(key)
            index = api.query_index_by_arch(arch_str)
            adjacency_matrix = get_matrix_and_ops(key)
            
            cost_info = api.get_cost_info(index, dataset='cifar10-valid')
            
            flops  = float(cost_info['flops'])
            params = float(cost_info['params'])
            
            adjacency_matrix, label = get_matrix_and_ops(key)
            adjacency_matrix, features = get_adjacency_and_features(adjacency_matrix, label)
            
            features = np.argmax(features, axis=1)
            
            acc_info = api.get_more_info(
                index,
                dataset='cifar10-valid',
                iepoch=None,
                hp='200',
                is_random=False
            )
            
            acc = float(acc_info['test-accuracy'])

            # 存入 dict
            data[i] = {
                'adjacency_matrix': adjacency_matrix,
                "features": features,
                "flops": flops,
                "params": params,
                "accuracy": acc
            }

            # 可选：简单进度打印
            if (i + 1) % 1000 == 0 or i == len(df) - 1:
                print(f'Processed {i + 1}/{len(df)}')

        except Exception as e:
            print(f"[Warning] Skipped item {i} due to: {e}")
    return data