In [1]:
import sys
import os
import yaml
import json
import numpy as np
import random
import subprocess
import torch
from matplotlib import pyplot as plt
from tqdm import tqdm

sys.path.append('../../embeddings/')
sys.path.append('../../boshnas/')

from library import Graph, GraphLib
from boshnas import BOSHNAS

In [2]:
trained_hashes = os.listdir('/scratch/gpfs/stuli/txf_design-space/models/glue/')

graphLib = GraphLib.load_from_dataset('../../dataset/dataset_test_bn.json')

def is_homogenous(graphObject):
    model_dict = graphObject.model_dict
    hashed_f = [hash(str(item)) for item in model_dict['f']]
    return True if len(set(model_dict['h'])) == 1 and len(set(model_dict['n'])) == 1 and len(set(model_dict['o'])) == 1 \
        and len(set(hashed_f)) == 1 and len(set(model_dict['p'])) == 1 else False

homogenous_models, heterogenous_models = 0, 0
X_ds_total = np.zeros((len(trained_hashes), 16))
y_ds_total = np.zeros((len(trained_hashes)))
count = 0

for model_hash in trained_hashes:
    model, _ = graphLib.get_graph(model_hash=model_hash)
    X_ds_total[count, :], y_ds_total[count] = model.embedding, \
        1 - json.load(open(f'/scratch/gpfs/stuli/txf_design-space/models/glue/{model_hash}/all_results.json'))['glue_score']
    if is_homogenous(model):
        homogenous_models += 1
    else:
        heterogenous_models += 1
    count += 1
        
print(f'Homogenous models: {homogenous_models}\nHeterogenous models: {heterogenous_models}')

Homogenous models: 13
Heterogenous models: 44


In [3]:
print(f'Best design-space performance: {1 - np.amin(y_ds_total): 0.03f}')

best_model_hash = trained_hashes[np.argmin(y_ds_total)]
best_model, _ = graphLib.get_graph(model_hash=best_model_hash)

print(f'Best model hash: {best_model_hash}')
print(f'Best model dict: {best_model.model_dict}')

print(f'Best model is homogenous: {is_homogenous(best_model)}')

Best design-space performance:  0.419
Best model hash: 70d79a6325ce426644aed68b93a6d751bcf4dba5b38ced20024a06604b033da7
Best model dict: {'l': 4, 'h': [256, 256, 256, 256], 'n': [4, 4, 2, 2], 'o': ['l', 'l', 'l', 'l'], 'f': [[1024], [1024], [512, 512, 512], [512, 512, 512]], 'p': ['dct', 'dct', 'dct', 'dct']}
Best model is homogenous: False


In [14]:
top_models = []

print('Top 10 performances:')
for i in range(10):
    model_hash = trained_hashes[np.argsort(y_ds_total)[i]]
    model, _ = graphLib.get_graph(model_hash=model_hash)
    top_models.append(model)
    
    print(f'{model.model_dict}:\n\t{1 - y_ds_total[np.argsort(y_ds_total)[i]]}')

Top 10 performances:
{'l': 4, 'h': [256, 256, 256, 256], 'n': [4, 4, 2, 2], 'o': ['l', 'l', 'l', 'l'], 'f': [[1024], [1024], [512, 512, 512], [512, 512, 512]], 'p': ['dct', 'dct', 'dct', 'dct']}:
	0.41935307370178576
{'l': 4, 'h': [256, 256, 256, 256], 'n': [4, 4, 2, 2], 'o': ['c', 'c', 'c', 'c'], 'f': [[512, 512, 512], [512, 512, 512], [1024, 1024, 1024], [1024, 1024, 1024]], 'p': [5, 5, 5, 5]}:
	0.41747407511343715
{'l': 2, 'h': [128, 128], 'n': [4, 4], 'o': ['sa', 'sa'], 'f': [[1024], [1024]], 'p': ['sdp', 'sdp']}:
	0.41721479366960335
{'l': 4, 'h': [256, 256, 256, 256], 'n': [2, 2, 2, 2], 'o': ['c', 'c', 'sa', 'sa'], 'f': [[1024, 1024, 1024], [1024, 1024, 1024], [512, 512, 512], [512, 512, 512]], 'p': [9, 9, 'wma', 'wma']}:
	0.41688767101809343
{'l': 4, 'h': [128, 128, 256, 256], 'n': [4, 4, 2, 2], 'o': ['l', 'l', 'l', 'l'], 'f': [[1024], [1024], [512], [512]], 'p': ['dft', 'dft', 'dct', 'dct']}:
	0.41577651438604857
{'l': 4, 'h': [128, 128, 256, 256], 'n': [4, 4, 2, 2], 'o': ['c',

In [None]:
TOP_NUM_MODELS = 5
NUM_NEIGHBORS_FOR_INTERPOLATION = 10

new_library = []

graphLib_new = GraphLib('../../design_space/design_space_test.yaml')

new_library = []

for i in tqdm(range(TOP_NUM_MODELS), desc='Generating new library'):
    num_neighbors = NUM_NEIGHBORS_FOR_INTERPOLATION # // (i+1)
    for n in range(num_neighbors):
        try:
            new_library.extend(graphLib.interpolate_neighbors(top_models[i], \
                graphLib.get_graph(model_hash=top_models[i].neighbors[n])[0], 2, 1))
        
print('Length of new library: ', len(new_library))

In [None]:
hashes = []

reduced_library = []

for n in tqdm(new_library, desc='Reducing new library'):
    if n.hash in hashes: continue
    hashes.append(n.hash)
    reduced_library.append(n)
    
print('Length of reduced library: ', len(reduced_library))

In [None]:
graphLib_new.library = reduced_library

# Build embeddings
graphLib_new.build_embeddings(16, algo='GD', kernel='GraphEditDistance', n_jobs=1)

# Save dataset
graphLib_new.save_dataset('../../dataset/dataset_test_bn_2.json')