# Introduction

Notebook para análise de dataset NATS-Bench de neural architecture search (NAS).

# Import Libraries

In [1]:
import os
# disable tensorflow log level infos
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' # show only errors

In [2]:
import time
import random

from absl import app
from absl import flags
import numpy as np
import pyglove as pg
from typing import Tuple

import nats_bench
import pyglove as pg

from xautodl.models import get_cell_based_tiny_net

# Utilitary Functions

In [33]:
@pg.functor([('ops', pg.typing.List(pg.typing.Str())),('num_nodes', pg.typing.Int())])
def model_tss_spc(ops, num_nodes):
    """The architecture in the topology search space of NATS-Bench."""
    nodes, k = [], 0
    for i in range(1, num_nodes):
        xstrs = []
        for j in range(i):
            xstrs.append('{:}~{:}'.format(ops[k], j))
            k += 1
        nodes.append('|' + '|'.join(xstrs) + '|')
    return '+'.join(nodes)


@pg.functor([('channels', pg.typing.List(pg.typing.Int()))])
def model_sss_spc(channels):
    """The architecture in the size search space of NATS-Bench."""
    return ':'.join(str(x) for x in channels)


def get_algorithm(algorithm_str):
    """Creates algorithm."""
    if algorithm_str == 'random':
        return pg.generators.Random()
    elif algorithm_str == 'evolution':
        return pg.evolution.regularized_evolution(mutator=pg.evolution.mutators.Uniform(), population_size=50, tournament_size=10)
    else:
        return pg.load(algorithm_str)


def get_search_space(ss_indicator):
    """The default search space in NATS-Bench.
  
    Args:
      ss_indicator: tss or sss, indicating the topology or size search space.
  
    Returns:
      A hyper model object that repesents a search space.
    """
    info = nats_bench.search_space_info('nats-bench', ss_indicator)
    #print(info)
    if ss_indicator == 'tss':
        total = info['num_nodes'] * (info['num_nodes'] - 1) // 2
        return model_tss_spc(pg.sublist_of(total, info['op_names'], choices_distinct=False), info['num_nodes'])
    elif ss_indicator == 'sss':
        return model_sss_spc(pg.sublist_of(info['num_layers'], info['candidates'], choices_distinct=False))
    

def get_reporting_epoch(search_space_name):
    # Results in the paper use reporting epochs $H^1$ and $H^2$ for the topology
    # and size search spaces respectively. See section 3.3 of the paper.
    DEFAULT_REPORTING_EPOCH = dict(tss=200, sss=90)
    return DEFAULT_REPORTING_EPOCH[search_space_name]    


def get_default_nats_file(search_space_name):
    DEFAULT_NATS_FILEs = dict(tss=None, sss=None)
    return DEFAULT_NATS_FILEs[search_space_name]

    
def search(nats_api,
           model_search_space_name,
           algo_name,
           dataset='cifar10',
           reporting_epoch=12,
           max_train_hours=2e4):
    """Define the search procedure.
  
    Args:
      nats_api: the NATS-Bench object.
      search_model: which is a `model` object annotated with `one_of`.
      algo: algorithm for search.
      dataset: the target dataset
      reporting_epoch: Use test set results for models trained for this many epochs.
      max_train_hours: max time budget to train the models, which is the sum of training time queried from NAS-Bench.
  
    Returns:
      A tuple of (total time spent at step i for all steps,
                  best validation accuracy at step i for all steps,
                  best test accuracy at step i for all steps)
    """
    
    print(f'Executing NAS: Search Space: {model_search_space_name} | algorithm: {algo_name} | dataset: {dataset} | reporting epoch: {reporting_epoch} | max_train_hours: {max_train_hours}')
    
    model_search_space = get_search_space(model_search_space_name)
    reporting_epoch = get_reporting_epoch(model_search_space_name)
    algorithm = get_algorithm(algo_name)
    
    nats_api.reset_time()
    times, best_valids, best_tests = [0.0], [0.0], [0.0]
    valid_models = 0
    time_spent = 0
    start_time = time.time()
    last_report_time = start_time
    for model, feedback in pg.sample(model_search_space, algorithm):
        spec = model()
        
        #print(f'new cell-spec: {spec}')
        #print(f'new cell-spec ID | DNA: {feedback.id} | {feedback.dna}')
        
        (validation_accuracy, _, _, _) = nats_api.simulate_train_eval(spec, dataset=dataset, hp=reporting_epoch)
        
        time_spent = nats_api.used_time
        
        more_info = nats_api.get_more_info(spec, dataset, hp=reporting_epoch)  # pytype: disable=wrong-arg-types  # dict-kwargs
        
        valid_models += 1
        
        feedback(validation_accuracy)
        
        if validation_accuracy > best_valids[-1]:
            best_valids.append(validation_accuracy)
            best_tests.append(more_info['test-accuracy'])
        else:
            best_valids.append(best_valids[-1])
            best_tests.append(best_tests[-1])

        times.append(time_spent)
        time_spent_in_hours = time_spent / (60 * 60)
        
        if time_spent_in_hours > max_train_hours:
            break # Break the first time we exceed the budget.
        
        if feedback.id % 100 == 0:
            now = time.time()
            print(f'Tried {feedback.id} models, valid {valid_models}, '
                  f'time_spent_in_hours: {int(time_spent_in_hours)}h, '
                  f'time_spent: {round(time_spent,3)}s, '
                  f'elapse since last report: {round(now - last_report_time,3)}s.')
            last_report_time = now
            
    print(f'Total time elapse: {time.time() - start_time} seconds.')
    
    # Remove the first element of each list because these are placeholders
    # used for computing the current max. They don't correspond to
    # actual results from nats_api.
    times,best_valid,best_test = times[1:], best_valids[1:], best_tests[1:]    
        
    print('%15s %15s %15s %15s' % ('# trials', 'best valid (%)', 'best test (%)', 'simulated train hours'))
    print('%15d %15.4f %15.4f %21d' % (len(times), best_valid[-1], best_test[-1], times[-1]))

# Create API and Load Dataset

In [27]:
model_search_space_name = 'tss'

In [28]:
default_nats_file = get_default_nats_file(model_search_space_name)

# Load the dataset.
nats_bench.api_utils.reset_file_system('default')
nats_api = nats_bench.create(default_nats_file, model_search_space_name, fast_mode=True, verbose=False)

[2023-05-30 02:51:18] Try to use the default NATS-Bench (topology) path from fast_mode=True and path=None.


# Run Search

# Compare Algorithms - TSS Search Space

## Random

In [34]:
algorithm = 'random'
dataset = 'cifar10'
max_train_hours = 100

search(nats_api, model_search_space_name, algorithm, dataset, reporting_epoch, max_train_hours)

Executing NAS: Search Space: tss | algorithm: random | dataset: cifar10 | reporting epoch: 200 | max_train_hours: 100
Tried 100 models, valid 100, time_spent_in_hours: 52h, time_spent: 190723.99s, elapse since last report: 2.936s.
Total time elapse: 5.784667253494263 seconds.
       # trials  best valid (%)   best test (%) simulated train hours
            192         91.5400         94.0700                361310


## Evolution

In [35]:
algorithm = 'evolution'
dataset = 'cifar10'
max_train_hours = 100

search(nats_api, model_search_space_name, algorithm, dataset, reporting_epoch, max_train_hours)

Executing NAS: Search Space: tss | algorithm: evolution | dataset: cifar10 | reporting epoch: 200 | max_train_hours: 100
Tried 100 models, valid 100, time_spent_in_hours: 55h, time_spent: 199073.843s, elapse since last report: 2.777s.
Total time elapse: 4.355370998382568 seconds.
       # trials  best valid (%)   best test (%) simulated train hours
            166         91.7200         94.2600                361334
