In [6]:
import os
import sys
sys.path.append('../../cnn_design-space/cnnbench/')
sys.path.append('../../boshnas/boshnas/')
sys.path.append('../')

import argparse
import numpy as np
import yaml
import random
import tabulate
import subprocess
import time
import json
import hashlib
import random

import torch

from six.moves import cPickle as pickle
from tqdm import tqdm

from boshnas_2inp import BOSHNAS as BOSHCODE
from acq import gosh_acq as acq

from run_boshcode import convert_to_tabular, update_dataset, get_neighbor_hash

from library import GraphLib, Graph
from utils import print_util as pu

In [2]:
graphlib_file = '../../cnn_design-space/cnnbench/dataset/dataset_mini.json'
new_graphlib_file = './dataset_mini_trained.json'
accel_embeddings_file = '../../accelerator_design-space/accelbench/embeddings/embeddings.pkl'
cnn_config_file = '../../cnn_design-space/cnnbench/configs/CIFAR10/config.yaml'
models_dir = '../../models'
accel_dataset_file = '../accel_dataset/accel_dataset_mini_bkp.pkl'
accel_dataset_file_trained = './accel_dataset_mini_trained.pkl'
performance_weights = [0, 0.2, 0, 0.2, 0.1, 0.2, 0.3]

graphLib = GraphLib.load_from_dataset(graphlib_file)

# accel_embeddings = pickle.load(open(accel_embeddings_file, 'rb'))
# accel_embeddings = np.array(accel_embeddings)

cnn_config = yaml.safe_load(open(cnn_config_file))

cnn_models_dir = os.path.join(models_dir, 'cnnbench_models', cnn_config['dataset'])
accel_models_dir = os.path.join(models_dir, 'accelbench_models')

# Get trained CNN models and Accelerator architectures
trained_cnn_hashes = os.listdir(cnn_models_dir)
trained_cnn_hashes_new = []
for cnn_hash in trained_cnn_hashes:
    if 'model.pt' in os.listdir(os.path.join(cnn_models_dir, cnn_hash)): trained_cnn_hashes_new.append(cnn_hash)
        
trained_cnn_hashes = trained_cnn_hashes_new
trained_accel_hashes = [accel_hash[:-4] for accel_hash in os.listdir(accel_models_dir)]

# Load CNN-Accelerator pairs dataset
accel_dataset = pickle.load(open(accel_dataset_file, 'rb'))

accel_embeddings = [accel['accel_emb'].tolist() for accel in accel_dataset.values()]
accel_embeddings = [str(elem) for elem in accel_embeddings]
accel_embeddings = [eval(elem) for elem in set(accel_embeddings)]
accel_embeddings = np.array(accel_embeddings)

accel_hashes = list(accel_dataset.keys())

# Check trained_accel_hashes have all respective CNNs trained
trained_accel_hashes_new = []
for accel_hash in trained_accel_hashes:
    if accel_hash not in accel_hashes:
        print(f'Trained CNN-Accelerator pair with hash: {accel_hash}, not in current dataset')
        continue
    cnn_hash = accel_dataset[accel_hash]['cnn_hash']
    if cnn_hash not in trained_cnn_hashes:
        print(f'CNN-Accelerator pair with hash: {accel_hash}, doesn\'t have respective CNN trained (with hash: {cnn_hash})')
    else:
        trained_accel_hashes_new.append(accel_hash)
        
trained_accel_hashes = trained_accel_hashes_new

old_best_performance = update_dataset(graphLib, accel_dataset, cnn_models_dir, accel_models_dir, 
    new_graphlib_file, accel_dataset_file_trained, performance_weights, save_dataset=False)

# Get entire dataset in embedding space
cnn_embeddings = []
for graph in graphLib.library:
    cnn_embeddings.append(graph.embedding)
cnn_embeddings = np.array(cnn_embeddings)

min_cnn, max_cnn = np.min(cnn_embeddings, axis=0), np.max(cnn_embeddings, axis=0)
min_accel, max_accel = np.min(accel_embeddings, axis=0), np.max(accel_embeddings, axis=0)

X_ds = []
for cnn_idx in range(cnn_embeddings.shape[0]):
    for accel_idx in range(accel_embeddings.shape[0]):
        X_ds.append((cnn_embeddings[cnn_idx, :], accel_embeddings[accel_idx, :]))
        
# Initialize the two-input BOSHNAS model
surrogate_model = BOSHCODE(input_dim1=cnn_embeddings.shape[1],
                          input_dim2=accel_embeddings.shape[1],
                          bounds1=(min_cnn, max_cnn),
                          bounds2=(min_accel, max_accel),
                          trust_region=False,
                          second_order=True,
                          parallel=True,
                          model_aleatoric=True,
                          save_path='./surrogate_model/',
                          pretrained=False)

# Get initial dataset after finetuning num_init models
X_cnn, X_accel, y = convert_to_tabular(accel_dataset, graphLib, performance_weights)
max_loss = np.amax(y)

Trained CNN-Accelerator pair with hash: 293181b319b0deec6cfc31771b3c1464fe538f6f93d9eb22b2d73891d50e11fc, not in current dataset
CNN-Accelerator pair with hash: d26935273a41e425dbef42f8a61ab57b4fba8483eababe4f044625410030f63a, doesn't have respective CNN trained (with hash: d74c8175e990c815efa1092f254fc65cef658f5051e36b404777a3b3bc31e059)
CNN-Accelerator pair with hash: af95420c27b1e19b0a1fb8d1166fb646c8308cf89a34647765c71a9463db85b6, doesn't have respective CNN trained (with hash: a3c500d1cd395f12812fae0db02e72ff3ec9962a0c0e372dc812541f7ba038a2)
CNN-Accelerator pair with hash: 95bd07e8360817122001ccb7bb73bb58d82c4bc50560d7ea877ccceede95b1d2, doesn't have respective CNN trained (with hash: d6b07ee2297fa76ab5e71507ca95ab73f9e6e95461cb856ac79b34818751aba8)
CNN-Accelerator pair with hash: e883fa4f8988b3f67bd9e26ae334edb44d031a90851d22bb0566d64b0ac6cbec, doesn't have respective CNN trained (with hash: 1abf976cd9b816f2e37532b7cb82dba8732ffea628880c3a716b6f5d5a235b66)
CNN-Accelerator pair wi

Updating CNN-Accelerator library:   0%|          | 0/41 [00:00<?, ?it/s]

[92mDataset saved to:[0m ./dataset_mini_trained.json


Updating CNN-Accelerator library: 100%|██████████| 41/41 [00:01<00:00, 35.24it/s]



[92mTrained CNNs in dataset:[0m 25
[92mSimulated CNN-Accelerator pairs:[0m 39
[92mBest performance:[0m 0.9522054327578187



Converting dataset to tabular: 100%|██████████| 85630034/85630034 [00:50<00:00, 1705259.63it/s]


In [8]:
# Get next queries
query_indices = surrogate_model.get_queries(x=X_ds, k=1, explore_type='ucb', use_al=False) 

# Run queries
for i in set(query_indices):
    accel_hash = accel_hashes[i]
    accel_emb = accel_dataset[accel_hash]['accel_emb']

    cnn_model, _ = graphLib.get_graph(model_hash=accel_dataset[accel_hash]['cnn_hash'])
    chosen_neighbor_hash = get_neighbor_hash(cnn_model, trained_cnn_hashes)
    
    print(f'Accelerator hash: {accel_hash}\nCNN hash: {cnn_model.hash}\nCNN neighbor: {chosen_neighbor_hash}')

Accelerator hash: 81a9133de2e7f24508d1459f076c42072492e2280088eb77c0167d1d209c3297
CNN hash: e3a979c95f7b0716b4dc7b5f763e6f11ea659d350caff793e75352fe7791441f
CNN neighbor: None
