In [1]:
import gym
import numpy as np

## NasBench101

In [2]:
from nasbench import api
dataset = api.NASBench("/scratch2/sem22hs2/nasbench_full.tfrecord")

Loading dataset from file... This may take a few minutes...
Instructions for updating:
Use eager execution and: 
`tf.data.TFRecordDataset(path)`


KeyboardInterrupt: 

In [5]:
def objective_function(adjacency_mat,labeling, budget=108):
    labeling = ['input'] + list(labeling) + ['output']
    model_spec = api.ModelSpec(adjacency_mat, labeling)
    try:
        data = dataset.query(model_spec, epochs=budget)
    except api.OutOfDomainError:
        # self.record_invalid(adjacency_mat, labeling, 1, 1, 0)
        return 0, 0

    # self.record_valid(adjacency_mat, labeling, data, model_spec)
    return data["validation_accuracy"], data["training_time"]

In [6]:
# Todo: sample architecture
# Todo: Check architecture validity. Neg reward if not.

class NasBench101(gym.Env):
    metadata = {"render_modes": [], "render_fps": 1}
    def __init__(self, v=7, e=9, ops=['conv1x1-bn-relu', 'conv3x3-bn-relu', 'maxpool3x3'], step_max=1000, 
                 render_mode: Optional[str] = None):
        assert render_mode is None # or render_mode in self.metadata["render_modes"]
        # Environment definition
        self.max_edges = e
        self.vertices = v
        self.ops = ops

        # Current state
        self.adjacency_mat = np.zeros([v,v])
        self.labeling = (v-2)*[ops[0]] # Initialize op for all layers that are not input or output layer
        
        # Helper
        self.idx_upper = np.triu_indices(v) # Indices of upper triangular matrix

        self.num_step = 0
        self.step_max =  step_max
        num_indecies_triu = len(self.idx_upper[0])
        self.observation_space = spaces.Dict(
            {
                "adjacency_mat": spaces.MultiBinary(num_indecies_triu),
                "labels": spaces.MultiDiscrete(np.array((v-2)*[len(ops)])),
            }
        )

        
    def step(self, action):
        e=self.max_edges
        v=self.vertices
        n = (v*(v+1)/2) # Number of indices in upper triag. part of matrix
        if action < n:
            # Todo: Check this changes matrix at right place
            iu = self.idx_upper
            self.adjacency_mat[iu[0][action],iu[1][action]] = not self.adjacency_mat[iu[0][action],iu[1][action]]
        else:
            o=len(self.ops)
            action = action - n
            [label_row, op] = np.unravel_index(action,[v, o])
            self.labeling[label_row] = op

        y, c = objective_function(self.adjacency_mat, self.labeling)
        reward = y
        if self.step == self.step_max:
            done = 1
        else: done = 0

        observation = None
        info = None
        return observation, reward, done, info

    def reset(self):
        self.adjacency_mat[self.idx_upper] = np.random.randint(0,2,len(self.idx_upper))
        self.labeling = np.random.randint(0,3,len(self.labeling))

## NATS Bench (same topology space as NasBench 201)

In [1]:
import gym
import os
import sys
import ray
from ray.tune.integration.wandb import WandbLoggerCallback
module_path = os.path.abspath(os.path.join('nas-bench-envs'))
if module_path not in sys.path:
    sys.path.append(module_path)
    os.environ['PYTHONPATH'] = module_path

In [2]:
from nas_bench_envs.envs.nas_bench_201_envs import NasBench201
from ray.tune.registry import register_env

select_env = "nas_bench_envs/NasBench201-v0"
register_env(select_env, lambda config: NasBench201(config))

In [3]:
# Show the architecture topology string of the 12-th architecture
# For the topology search space, the string is interpreted as
# arch = '|{}~0|+|{}~0|{}~1|+|{}~0|{}~1|{}~2|'.format(
#         edge_node_0_to_node_1,
#         edge_node_0_to_node_2,
#         edge_node_1_to_node_2,
#         edge_node_0_to_node_3,
#         edge_node_1_to_node_3,
#         edge_node_2_to_node_3,
#         )

In [4]:
from nats_bench import create
# Create the API instance for the topology search space in NATS
api = create("/scratch2/sem22hs2/NATS-tss-v1_0-3ffb9-simple", 'tss', fast_mode=True, verbose=False)
architecture_str = api.arch(12)
print(architecture_str)
print()
info = api.get_more_info(architecture_str, 'cifar10')
print(info)

|none~0|+|none~0|none~1|+|none~0|nor_conv_3x3~1|avg_pool_3x3~2|

{'train-loss': 2.302620864105225, 'train-accuracy': 9.866, 'train-per-time': 13.317416548728943, 'train-all-time': 159.80899858474731, 'comment': 'In this dict, train-loss/accuracy/time is the metric on the train+valid sets of CIFAR-10. The test-loss/accuracy/time is the performance of the CIFAR-10 test set after training on the train+valid sets by 12 epochs. The per-time and total-time indicate the per epoch and total time costs, respectively.', 'test-loss': 2.3025942649841307, 'test-accuracy': 10.0, 'test-per-time': 1.0880018813269479, 'test-all-time': 13.056022575923373}


In [14]:
N_ITER = 20

ray.init(runtime_env={"working_dir": "./"})
ray.tune.run(
    "PPO",
    stop={"training_iteration": 15},
    config={
        "env": NasBench201,
        "record_env": True,
        "framework": "torch",
        "num_cpus_per_worker": 2,
        "num_gpus": 0,
        "num_workers": 4
    },
    
    #local_dir="logs",
    #callbacks=[WandbLoggerCallback(api_key="c36c598399c6c7f2f0b446aac164da6c7956a263", project="NasBenchV1")],
)

2022-07-28 15:37:19,167	INFO packaging.py:388 -- Creating a file package for local directory './'.
2022-07-28 15:37:19,760	INFO packaging.py:241 -- Pushing file package 'gcs://_ray_pkg_f6f72a63941ab217.zip' (9.29MiB) to Ray cluster...
2022-07-28 15:37:19,962	INFO packaging.py:243 -- Successfully pushed file package 'gcs://_ray_pkg_f6f72a63941ab217.zip'.
2022-07-28 15:37:21,345	INFO trial_runner.py:803 -- starting PPO_NasBench201Env_6757b_00000
[2m[36m(PPOTrainer pid=16583)[0m 2022-07-28 15:37:26,858	INFO ppo.py:268 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(PPOTrainer pid=16583)[0m 2022-07-28 15:37:26,858	INFO trainer.py:864 -- Current log_level is WARN. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.


[2m[36m(RolloutWorker pid=16643)[0m Setting the path for recording to /home/sem22h2/ray_results/PPO/PPO_NasBench201Env_6757b_00000_0_2022-07-28_15-37-21/


[2m[36m(RolloutWorker pid=16643)[0m   logger.warn(


Trial name,status,loc
PPO_NasBench201Env_6757b_00000,RUNNING,129.132.4.157:16583


[2m[36m(RolloutWorker pid=16642)[0m Setting the path for recording to /home/sem22h2/ray_results/PPO/PPO_NasBench201Env_6757b_00000_0_2022-07-28_15-37-21/


[2m[36m(RolloutWorker pid=16642)[0m   logger.warn(
[2m[36m(RolloutWorker pid=16645)[0m   logger.warn(
[2m[36m(RolloutWorker pid=16644)[0m   logger.warn(


[2m[36m(RolloutWorker pid=16645)[0m Setting the path for recording to /home/sem22h2/ray_results/PPO/PPO_NasBench201Env_6757b_00000_0_2022-07-28_15-37-21/
[2m[36m(RolloutWorker pid=16644)[0m Setting the path for recording to /home/sem22h2/ray_results/PPO/PPO_NasBench201Env_6757b_00000_0_2022-07-28_15-37-21/


2022-07-28 15:37:33,278	ERROR trial_runner.py:876 -- Trial PPO_NasBench201Env_6757b_00000: Error processing event.
NoneType: None


Result for PPO_NasBench201Env_6757b_00000:
  date: 2022-07-28_15-37-32
  experiment_id: 5e65395c3ce14a6284df16b04d9fdd29
  hostname: sassauna4.ee.ethz.ch
  node_ip: 129.132.4.157
  pid: 16583
  timestamp: 1659015452
  trial_id: 6757b_00000
  


Trial name,status,loc
PPO_NasBench201Env_6757b_00000,ERROR,129.132.4.157:16583

Trial name,# failures,error file
PPO_NasBench201Env_6757b_00000,1,/home/sem22h2/ray_results/PPO/PPO_NasBench201Env_6757b_00000_0_2022-07-28_15-37-21/error.txt


Trial name,status,loc
PPO_NasBench201Env_6757b_00000,ERROR,129.132.4.157:16583

Trial name,# failures,error file
PPO_NasBench201Env_6757b_00000,1,/home/sem22h2/ray_results/PPO/PPO_NasBench201Env_6757b_00000_0_2022-07-28_15-37-21/error.txt


[2m[36m(PPOTrainer pid=16583)[0m 2022-07-28 15:37:33,703	ERROR worker.py:92 -- Unhandled error (suppress with 'RAY_IGNORE_UNHANDLED_ERRORS=1'): [36mray::RolloutWorker.par_iter_next()[39m (pid=16645, ip=129.132.4.157, repr=<ray.rllib.evaluation.rollout_worker.RolloutWorker object at 0x7f59282dbc70>)
[2m[36m(PPOTrainer pid=16583)[0m   File "/home/sem22h2/.conda/envs/RL/lib/python3.9/site-packages/ray/util/iter.py", line 1186, in par_iter_next
[2m[36m(PPOTrainer pid=16583)[0m     return next(self.local_it)
[2m[36m(PPOTrainer pid=16583)[0m   File "/home/sem22h2/.conda/envs/RL/lib/python3.9/site-packages/ray/rllib/evaluation/rollout_worker.py", line 404, in gen_rollouts
[2m[36m(PPOTrainer pid=16583)[0m     yield self.sample()
[2m[36m(PPOTrainer pid=16583)[0m   File "/home/sem22h2/.conda/envs/RL/lib/python3.9/site-packages/ray/rllib/evaluation/rollout_worker.py", line 815, in sample
[2m[36m(PPOTrainer pid=16583)[0m     batches = [self.input_reader.next()]
[2m[36m(PPO

TuneError: ('Trials did not complete', [PPO_NasBench201Env_6757b_00000])

In [13]:
ray.shutdown()

<TimeLimit<NasBench201<nas_bench_envs/NasBench201-v0>>>

Ray Gym Environment checker

In [21]:
ray.rllib.utils.check_env(NasBench201())

