In [1]:
import sys
import os
import yaml
import json
import numpy as np
import random
import subprocess
import torch
from matplotlib import pyplot as plt
from tqdm import tqdm

sys.path.append('../../embeddings/')
sys.path.append('../../boshnas/')

from library import Graph, GraphLib
from boshnas import BOSHNAS

In [2]:
trained_hashes = os.listdir('/scratch/gpfs/stuli/txf_design-space/models/glue/')

graphLib = GraphLib.load_from_dataset('../../dataset/dataset_test_bn_2.json')

def is_homogenous(graphObject):
    model_dict = graphObject.model_dict
    hashed_f = [hash(str(item)) for item in model_dict['f']]
    return True if len(set(model_dict['h'])) == 1 and len(set(model_dict['n'])) == 1 and len(set(model_dict['o'])) == 1 \
        and len(set(hashed_f)) == 1 and len(set(model_dict['p'])) == 1 else False

homogenous_models, heterogenous_models = 0, 0
X_ds_total = np.zeros((len(trained_hashes), 16))
y_ds_total = np.zeros((len(trained_hashes)))
count = 0

for model_hash in trained_hashes:
    model, _ = graphLib.get_graph(model_hash=model_hash)
    X_ds_total[count, :], y_ds_total[count] = model.embedding, \
        1 - json.load(open(f'/scratch/gpfs/stuli/txf_design-space/models/glue/{model_hash}/all_results.json'))['glue_score']
    if is_homogenous(model):
        homogenous_models += 1
    else:
        heterogenous_models += 1
    count += 1
        
print(f'Homogenous models: {homogenous_models}\nHeterogenous models: {heterogenous_models}')

Homogenous models: 2
Heterogenous models: 16


In [3]:
print(f'Best design-space performance: {1 - np.amin(y_ds_total): 0.03f}')

best_model_hash = trained_hashes[np.argmin(y_ds_total)]
best_model, _ = graphLib.get_graph(model_hash=best_model_hash)

print(f'Best model hash: {best_model_hash}')
print(f'Best model dict: {best_model.model_dict}')

print(f'Best model is homogenous: {is_homogenous(best_model)}')

Best design-space performance:  0.428
Best model hash: 23f3026b5209a1616b04a1fe57875355e59be58ec9adcd39faf3e0648a533cd9
Best model dict: {'l': 4, 'h': [256, 256, 128, 128], 'n': [2, 2, 4, 4], 'o': ['sa', 'sa', 'l', 'l'], 'f': [[512, 512, 512], [512, 512, 512], [1024], [1024]], 'p': ['sdp', 'sdp', 'dct', 'dct']}
Best model is homogenous: False


In [4]:
top_models = []

print('Top 10 performances:')
for i in range(10):
    model_hash = trained_hashes[np.argsort(y_ds_total)[i]]
    model, _ = graphLib.get_graph(model_hash=model_hash)
    top_models.append(model)
    
    print(f'{model.hash}:\n\t{1 - y_ds_total[np.argsort(y_ds_total)[i]]}')

Top 10 performances:
23f3026b5209a1616b04a1fe57875355e59be58ec9adcd39faf3e0648a533cd9:
	0.4279763024421528
70d79a6325ce426644aed68b93a6d751bcf4dba5b38ced20024a06604b033da7:
	0.41935307370178576
7b9dbcf1b493250d3474e3122a1928709abd568e907e6ffec867abf1f0188921:
	0.41823140219610466
7befea358c6e0bebbb3ef1135856492eb15905e61d15fcc7bfe77f9513dd1c11:
	0.41747407511343715
43b801e44f2b7ea6a2c57ffeb0e6abfef0e68103e3fd088dab6c5bf872c0c680:
	0.41721479366960335
db4f03da8e0d240ae1b2e7a0dd0c2aff630d3cde4b2bed3582bc18b668b529e8:
	0.41688767101809343
e357514481b9448530cf4c9bb3d37c59ca711febb364a850b14910ee0f9bee9e:
	0.41577651438604857
143e2f47222bd5da033216a3adeb65100c63d2848275090be39ece2c321557dd:
	0.41465312160806833
ea481cc86e3d1babcbcba578abd3f353ea51416caa50e99d6196de6f67c90e32:
	0.4098048288389893
fb3b58d322ed68ce187c27f683cc4d712c480347a34561a7a08b9a9047b72b26:
	0.40428287373095917


In [None]:
TOP_NUM_MODELS = 5
NUM_NEIGHBORS_FOR_INTERPOLATION = 5

new_library = []

graphLib_new = GraphLib('../../design_space/design_space_test.yaml')

new_library = []

for i in tqdm(range(TOP_NUM_MODELS), desc='Generating new library'):
    num_neighbors = NUM_NEIGHBORS_FOR_INTERPOLATION # // (i+1)
    for n in range(num_neighbors):
        new_library.extend(graphLib.interpolate_neighbors(top_models[i], \
            graphLib.get_graph(model_hash=top_models[i].neighbors[n])[0], 2, 1, heterogeneous_feed_forward=True))

for i in tqdm(range(TOP_NUM_MODELS), desc='Expanding new library'):
    for j in range(i+1, TOP_NUM_MODELS):
        try:
            new_library.extend(graphLib.interpolate_neighbors(top_models[i], \
                top_models[j], 2, 1, heterogeneous_feed_forward=True))
        except:
            continue
        
print('Length of new library: ', len(new_library))

In [None]:
hashes = []

reduced_library = []

for n in tqdm(new_library, desc='Reducing new library'):
    if n.hash in hashes: continue
    hashes.append(n.hash)
    reduced_library.append(n)
    
print('Length of reduced library: ', len(reduced_library))

In [None]:
graphLib_new.library = reduced_library

# Build embeddings
graphLib_new.build_embeddings(16, algo='GD', kernel='GraphEditDistance', n_jobs=1)

# Save dataset
graphLib_new.save_dataset('../../dataset/dataset_test_bn_2.json')