In [1]:
# This code was written in TF 1.12 but should be supported all the way through
# TF 1.15. Untested in TF 2.0+.
%tensorflow_version 1.x

# Download the raw data (only 108 epoch data points, for full dataset,
# uncomment the second line for nasbench_full.tfrecord).

!curl -O https://storage.googleapis.com/nasbench/nasbench_only108.tfrecord
# !curl -O https://storage.googleapis.com/nasbench/nasbench_full.tfrecord

# Clone and install the code and dependencies.

!git clone https://github.com/google-research/nasbench
!pip install ./nasbench

# Initialize the NASBench object which parses the raw data into memory (this
# should only be run once as it takes up to a few minutes).
from nasbench import api

import sys
if './nasbench/nasbench/' not in sys.path:
  sys.path.append('./nasbench/nasbench/')

from api import *

dataset_file = 'nasbench_only108.tfrecord'

# Use nasbench_full.tfrecord for full dataset (run download command above).
nasbench = api.NASBench(dataset_file)

TensorFlow 1.x selected.
  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100  498M  100  498M    0     0   108M      0  0:00:04  0:00:04 --:--:--  108M
Cloning into 'nasbench'...
remote: Enumerating objects: 96, done.[K
remote: Total 96 (delta 0), reused 0 (delta 0), pack-reused 96[K
Unpacking objects: 100% (96/96), done.
Processing ./nasbench
Collecting gast==0.2.2
  Downloading https://files.pythonhosted.org/packages/4e/35/11749bf99b2d4e3cceb4d55ca22590b0d7c2c62b9de38ac4a4a7f4687421/gast-0.2.2.tar.gz
Building wheels for collected packages: nasbench, gast
  Building wheel for nasbench (setup.py) ... [?25l[?25hdone
  Created wheel for nasbench: filename=nasbench-1.0-cp36-none-any.whl size=46789 sha256=6f34e9d5bbde255c35345c23bbdbe54b49d3805d0eb64c098c9d98fb9141f721
  Stored in directory: /tmp/pip-ephem-wheel-cache-tsmed7bj/wheels/4b/19/99/1d5fdfe30f8b16fab91e900808f4f7e5adc

In [14]:
# Standard imports
import copy
import numpy as np
import matplotlib.pyplot as plt
import random
from scipy.sparse import coo_matrix as compress

# Useful constants
INPUT = 'input'
OUTPUT = 'output'
CONV3X3 = 'conv3x3-bn-relu'
CONV1X1 = 'conv1x1-bn-relu'
MAXPOOL3X3 = 'maxpool3x3'
NUM_VERTICES = 7
MAX_EDGES = 9
EDGE_SPOTS = NUM_VERTICES * (NUM_VERTICES - 1) / 2   # Upper triangular matrix
OP_SPOTS = NUM_VERTICES - 2   # Input/output vertices are fixed
ALLOWED_OPS = [CONV3X3, CONV1X1, MAXPOOL3X3]
ALLOWED_EDGES = [0, 1]   # Binary adjacency matrix

In [5]:
count = 0
max_count = 423624 * 3

adjacency_list = []
operations_list = []

print('Generating dataset')
for serialized_row in tf.python_io.tf_record_iterator(dataset_file):

  count += 1
  print('\rCompleted: %0.2f%%' % (count/(max_count)*100), end='')

  # Take only unique adjacency matrices
  if count % 3 != 0: continue

  # Parse the data from the data file.
  module_hash, epochs, raw_adjacency, raw_operations, raw_metrics = (
      json.loads(serialized_row.decode('utf-8')))

  dim = int(np.sqrt(len(raw_adjacency)))
  adjacency = np.array([int(e) for e in list(raw_adjacency)], dtype=np.int8)
  adjacency = np.reshape(adjacency, (dim, dim))
  operations = raw_operations.split(',')
  metrics = model_metrics_pb2.ModelMetrics.FromString(
      base64.b64decode(raw_metrics))
  
  adjacency_list.append(adjacency)
  operations_list.append(operations)

  # Evaluation statistics at the end of training
  '''
  final_evaluation = metrics.evaluation_data[2]
  training_time = final_evaluation.training_time
  train_accuracy = final_evaluation.train_accuracy
  validation_accuracy = (
      final_evaluation.validation_accuracy)
  test_accuracy = final_evaluation.test_accuracy
  trainable_params = metrics.trainable_parameters

  print(f'Module {count//3+1}.{count%3+1} \nAdjacency matrix: \n{adjacency} \nOperations: {operations} \nTrainable parameters: {trainable_params}')
  print(f'Train Accuracy: {train_accuracy} \nValidation Accuracy: {validation_accuracy} \nTest Accuracy: {test_accuracy}\n')
  '''

Generating dataset
Completed: 100.00%

In [18]:
ordered_operations = [INPUT, OUTPUT, MAXPOOL3X3, CONV1X1, CONV3X3]

weighted_adjacency_list = copy.deepcopy(adjacency_list)
compressed_weighted_list = []

count = 0

print('Augmenting dataset')
for i in range(len(weighted_adjacency_list)):

  count += 1
  print('\rCompleted: %0.2f%%' % (count/(max_count/3)*100), end='')

  vertices = np.shape(weighted_adjacency_list[i])[0]

  for v in range(vertices):
    weighted_adjacency_list[i][v, :] *= (ordered_operations.index(operations_list[i][v]) + 1)

  compressed_weighted_list.append(compress(weighted_adjacency_list[i]))

Augmenting dataset
Completed: 100.00%