# Advanced Features of Eve
## eve-mli: making learning interesting

## NAS with RL

Now, let's try some more interesting things with **Eve**.

Let's begin with an example on minst, which use DDPG method to search for the best bit width for quantization
network.

First, let's import the necessary package.

In [1]:
import argparse
import difflib
import importlib
import os
import sys
import uuid

import gym
import numpy as np

import torch
from pprint import pprint
from eve.rl.exp_manager import ExperimentManager
from eve.rl.utils.utils import ALGOS, StoreDict
from eve.rl.common.utils import set_random_seed

Define hyperparameters for experiments

In [2]:

parser = argparse.ArgumentParser()
parser.add_argument(
    "--algo",
    help="RL Algorithm used to NAS searching.",
    default="ddpg",
    type=str,
    required=False,
    choices=list(ALGOS.keys()),
)
parser.add_argument(
    "--env",
    help="The environment used to wrapper trainer."
    "Different environments will apply different"
    "reward functions and interactive steps.",
    default="mnist-v0",
    type=str,
    required=False,
)
parser.add_argument(
    "-tb",
    "--tensorboard-log",
    help="Tensorboard log dir.",
    default="/media/densechen/data/code/eve-mli/examples/logs/",
    type=str,
)
parser.add_argument(
    "-i",
    "--trained_agent",
    help="Path to a pretrained agent to continue training",
    default="",
    type=str,
)
parser.add_argument(
    "--truncate-last-trajectory",
    help="When using HER with online sampling the last"
    "trajectory in the replay buffer will be truncated"
    "after reloading the replay buffer.",
    default=True,
    type=bool,
)
parser.add_argument(
    "-n",
    "--n-timesteps",
    help="Overwrite the number of timesteps",
    default=-1,
    type=int,
)
parser.add_argument(
    "--num-threads",
    help="Number of threads for PyTorch (-1 to use default)",
    default=-1,
    type=int,
)
parser.add_argument(
    "--log-interval",
    help="Overwrite log interval (default: -1, no change)",
    default=-1,
    type=int,
)
parser.add_argument(
    "--eval-freq",
    help="Evaluate the agent every n steps (if negative, no evaluation)",
    default=10000,
    type=int,
)
parser.add_argument(
    "--eval-episodes",
    help="Number of episodes to use for evaluation",
    default=5,
    type=int,
)
parser.add_argument(
    "--save-freq",
    help="Save the model every n steps (if negative, no checkpoint)",
    default=-1,
    type=int,
)
parser.add_argument(
    "--save-replay-buffer",
    help="Save the replay buffer too (when applicable)",
    action="store_true",
    default=False,
)
parser.add_argument(
    "-f",
    "--log-folder",
    help="Log folder",
    type=str,
    default="logs",
)
parser.add_argument(
    "--seed",
    help="Random generator seed",
    type=int,
    default=-1,
)
parser.add_argument(
    "--vec-env",
    help="VecEnv type",
    type=str,
    default="dummy",
    choices=["dummy", "subproc"],
)
parser.add_argument(
    "--n-trials",
    help="Number of trials for optimizing hyperparameters",
    type=int,
    default=10,
)
parser.add_argument(
    "-optimize",
    "--optimize-hyperparameters",
    action="store_true",
    default=False,
    help="Run hyperparameters search",
)
parser.add_argument(
    "--n-jobs",
    help="Number of parallel jobs when optimizing hyperparameters",
    type=int,
    default=1,
)
parser.add_argument(
    "--sampler",
    help="Sampler to use when optimizing hyperparameters",
    type=str,
    default="tpe",
    choices=["random", "tpe", "skopt"],
)
parser.add_argument(
    "--pruner",
    help="Pruner to use when optimizing hyperparameters",
    type=str,
    default="median",
    choices=["halving", "median", "none"],
)
parser.add_argument(
    "--n-startup-trials",
    help="Number of trials before using optuna sampler",
    type=int,
    default=10,
)
parser.add_argument(
    "--n-evaluations",
    help="Number of evaluations for hyperparameter optimization",
    type=int,
    default=20,
)
parser.add_argument(
    "--storage",
    help="Database storage path if distributed optimization should be used",
    type=str,
    default=None,
)
parser.add_argument(
    "--study-name",
    help="Study name for distributed optimization",
    type=str,
    default=None,
)
parser.add_argument("--verbose",
                    help="Verbose mode (0: no output, 1: INFO)",
                    default=1,
                    type=int)
parser.add_argument(
    "--env-kwargs",
    type=str,
    nargs="+",
    action=StoreDict,
    help="Optional keyword argument to pass to the env constructor"
    "Discard! Manually defined in latter.",
)
parser.add_argument(
    "-params",
    "--hyperparams",
    type=str,
    nargs="+",
    action=StoreDict,
    help="Overwrite hyperparameter (e.g. learning_rate:0.01)",
)
parser.add_argument("-uuid",
                    "--uuid",
                    action="store_true",
                    default=False,
                    help="Ensure that the run has a unique ID.")
args = parser.parse_args()

# Create env_kwargs here
# the following parameters is used to define a trainer for env.
args.env_kwargs = dict(
    eve_net_kwargs={
        "node": "IfNode",
        "node_kwargs": {
            "voltage_threshold": 0.5,
            "time_independent": False,
            "requires_upgrade": False,
        },
        "quan": "SteQuan",
        "quan_kwargs": {
            "requires_upgrade": True,
        },
        "encoder": "RateEncoder",
        "encoder_kwargs": {
            "timesteps": 1,
        }
    },
    max_bits=8,
    root_dir="/media/densechen/data/code/eve-mli/examples/logs",
    data_root="/media/densechen/data/dataset",
    pretrained="/media/densechen/data/code/eve-mli/examples/checkpoint/mnist.pth",
    device="auto")

# rewrite log floder.
args.log_folder = "/media/densechen/data/code/eve-mli/examples/logs"

pprint(args)

Namespace(algo='ddpg', env='mnist-v0', env_kwargs={'eve_net_kwargs': {'node': 'IfNode', 'node_kwargs': {'voltage_threshold': 0.5, 'time_independent': False, 'requires_upgrade': False}, 'quan': 'SteQuan', 'quan_kwargs': {'requires_upgrade': True}, 'encoder': 'RateEncoder', 'encoder_kwargs': {'timesteps': 1}}, 'max_bits': 8, 'root_dir': '/media/densechen/data/code/eve-mli/examples/logs', 'data_root': '/media/densechen/data/dataset', 'pretrained': '/media/densechen/data/code/eve-mli/examples/checkpoint/mnist.pth', 'device': 'auto'}, eval_episodes=5, eval_freq=10000, hyperparams=None, log_folder='/media/densechen/data/code/eve-mli/examples/logs', log_interval=-1, n_evaluations=20, n_jobs=1, n_startup_trials=10, n_timesteps=-1, n_trials=10, num_threads=-1, optimize_hyperparameters=False, pruner='median', sampler='tpe', save_freq=-1, save_replay_buffer=False, seed=-1, storage=None, study_name=None, tensorboard_log='/media/densechen/data/code/eve-mli/examples/logs/', trained_agent='', truncat

In [3]:

env_id = args.env
registered_envs = set(gym.envs.registry.env_specs.keys())
# If the environment is not found, suggest the closest math
if env_id not in registered_envs:
    try:
        closest_match = difflib.get_close_matches(env_id, registered_envs,
                                                  n=1)[0]
    except IndexError:
        closest_match = "no close match found..."
    raise ValueError(
        r"{env_id} not found in gym registry, you maybe meant {closest_match}")

# Unique id to ensure there is no race condition for the folder creation
uuid_str = f"_{uuid.uuid4()}" if args.uuid else ""
if args.seed < 0:
    # Seed but with a random one.
    args.seed = np.random.randint(2**32 - 1, dtype="int64").item()

set_random_seed(args.seed)

# Setting num threads to 1 makes things run faster on cpu.
if args.num_threads > 0:
    if args.verbose > 0:
        pprint(f"Setting torch.num_threads to {args.num_threads}")
        torch.set_num_threads(args.num_threads)

if args.trained_agent != "":
    assert args.trained_agent.endswith(".zip") and os.path.isfile(args.trained_agent), \
        "The trained_agent must be a valid path to a .zip fle."
print("=" * 10, env_id, "=" * 10)
print(f"Seed: {args.seed}")

Seed: 3759089907


Define the ExperimentManager

In [4]:

exp_manager = ExperimentManager(
    args,
    args.algo,
    env_id,
    args.log_folder,
    args.tensorboard_log,
    args.n_timesteps,
    args.eval_freq,
    args.eval_episodes,
    args.save_freq,
    args.hyperparams,
    args.env_kwargs,
    args.trained_agent,
    args.optimize_hyperparameters,
    args.storage,
    args.study_name,
    args.n_trials,
    args.n_jobs,
    args.sampler,
    args.pruner,
    n_startup_trials=args.n_startup_trials,
    n_evaluations=args.n_evaluations,
    truncate_last_trajectory=args.truncate_last_trajectory,
    uuid_str=uuid_str,
    seed=args.seed,
    log_interval=args.log_interval,
    save_replay_buffer=args.save_replay_buffer,
    verbose=args.verbose,
    vec_env_type=args.vec_env,
    default_hyperparameter_yaml="hyperparams",
)

Launch the trainer

In [5]:
model = exp_manager.setup_experiment()

OrderedDict([('buffer_size', 2000),
             ('gamma', 0.98),
             ('gradient_steps', -1),
             ('learning_rate', 0.001),
             ('learning_starts', 0),
             ('n_episodes_rollout', 1),
             ('n_timesteps', 1000000.0),
             ('noise_std', 0.1),
             ('noise_type', 'normal'),
             ('policy', 'MlpPolicy'),
             ('policy_kwargs', 'dict(net_arch=[400, 300])')])
Using 1 environments
Creating test environment
Using cuda device
bit_width reset to 8.
set baseline acc as 0.8422666139240507
Using cuda device
bit_width reset to 8.
set baseline acc as 0.8422666139240507
Applying normal noise with std 0.1
Using cuda device
Log path: /media/densechen/data/code/eve-mli/examples/logs/ddpg/mnist-v0_4


In [6]:
# Normal training
if model is not None:
    exp_manager.learn(model)
    exp_manager.save_trained_model(model)
else:
    exp_manager.hyperparameters_optimization()

Logging to /media/densechen/data/code/eve-mli/examples/logs/mnist-v0/DDPG_4
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1        |
|    ep_rew_mean     | -0.0641  |
| time/              |          |
|    episodes        | 4        |
|    fps             | 1        |
|    time_elapsed    | 2        |
|    total timesteps | 4        |
| train/             |          |
|    actor_loss      | 0.592    |
|    critic_loss     | 0.26     |
|    learning_rate   | 0.001    |
|    n_updates       | 3        |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1        |
|    ep_rew_mean     | -0.0717  |
| time/              |          |
|    episodes        | 8        |
|    fps             | 1        |
|    time_elapsed    | 4        |
|    total timesteps | 8        |
| train/             |          |
|    actor_loss      | 0.0304   |
|    critic_loss     | 0.243    |
|    l

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1        |
|    ep_rew_mean     | -0.0772  |
| time/              |          |
|    episodes        | 68       |
|    fps             | 2        |
|    time_elapsed    | 25       |
|    total timesteps | 68       |
| train/             |          |
|    actor_loss      | 0.0498   |
|    critic_loss     | 0.00155  |
|    learning_rate   | 0.001    |
|    n_updates       | 67       |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1        |
|    ep_rew_mean     | -0.077   |
| time/              |          |
|    episodes        | 72       |
|    fps             | 2        |
|    time_elapsed    | 27       |
|    total timesteps | 72       |
| train/             |          |
|    actor_loss      | 0.0863   |
|    critic_loss     | 0.00124  |
|    learning_rate   | 0.001    |
|    n_updates       | 71       |
--------------

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1        |
|    ep_rew_mean     | -0.0744  |
| time/              |          |
|    episodes        | 136      |
|    fps             | 2        |
|    time_elapsed    | 50       |
|    total timesteps | 136      |
| train/             |          |
|    actor_loss      | 0.0747   |
|    critic_loss     | 0.00072  |
|    learning_rate   | 0.001    |
|    n_updates       | 135      |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1        |
|    ep_rew_mean     | -0.0747  |
| time/              |          |
|    episodes        | 140      |
|    fps             | 2        |
|    time_elapsed    | 51       |
|    total timesteps | 140      |
| train/             |          |
|    actor_loss      | 0.0718   |
|    critic_loss     | 0.00122  |
|    learning_rate   | 0.001    |
|    n_updates       | 139      |
--------------

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1        |
|    ep_rew_mean     | -0.0743  |
| time/              |          |
|    episodes        | 204      |
|    fps             | 2        |
|    time_elapsed    | 75       |
|    total timesteps | 204      |
| train/             |          |
|    actor_loss      | 0.0695   |
|    critic_loss     | 0.00123  |
|    learning_rate   | 0.001    |
|    n_updates       | 203      |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1        |
|    ep_rew_mean     | -0.0747  |
| time/              |          |
|    episodes        | 208      |
|    fps             | 2        |
|    time_elapsed    | 77       |
|    total timesteps | 208      |
| train/             |          |
|    actor_loss      | 0.0711   |
|    critic_loss     | 0.000979 |
|    learning_rate   | 0.001    |
|    n_updates       | 207      |
--------------

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1        |
|    ep_rew_mean     | -0.0742  |
| time/              |          |
|    episodes        | 272      |
|    fps             | 2        |
|    time_elapsed    | 101      |
|    total timesteps | 272      |
| train/             |          |
|    actor_loss      | 0.0721   |
|    critic_loss     | 0.000892 |
|    learning_rate   | 0.001    |
|    n_updates       | 271      |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1        |
|    ep_rew_mean     | -0.0746  |
| time/              |          |
|    episodes        | 276      |
|    fps             | 2        |
|    time_elapsed    | 103      |
|    total timesteps | 276      |
| train/             |          |
|    actor_loss      | 0.071    |
|    critic_loss     | 0.00113  |
|    learning_rate   | 0.001    |
|    n_updates       | 275      |
--------------

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1        |
|    ep_rew_mean     | -0.0791  |
| time/              |          |
|    episodes        | 340      |
|    fps             | 2        |
|    time_elapsed    | 127      |
|    total timesteps | 340      |
| train/             |          |
|    actor_loss      | 0.0669   |
|    critic_loss     | 0.00101  |
|    learning_rate   | 0.001    |
|    n_updates       | 339      |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1        |
|    ep_rew_mean     | -0.0782  |
| time/              |          |
|    episodes        | 344      |
|    fps             | 2        |
|    time_elapsed    | 129      |
|    total timesteps | 344      |
| train/             |          |
|    actor_loss      | 0.0723   |
|    critic_loss     | 0.000941 |
|    learning_rate   | 0.001    |
|    n_updates       | 343      |
--------------

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1        |
|    ep_rew_mean     | -0.0745  |
| time/              |          |
|    episodes        | 408      |
|    fps             | 2        |
|    time_elapsed    | 153      |
|    total timesteps | 408      |
| train/             |          |
|    actor_loss      | 0.0732   |
|    critic_loss     | 0.000974 |
|    learning_rate   | 0.001    |
|    n_updates       | 407      |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1        |
|    ep_rew_mean     | -0.0744  |
| time/              |          |
|    episodes        | 412      |
|    fps             | 2        |
|    time_elapsed    | 155      |
|    total timesteps | 412      |
| train/             |          |
|    actor_loss      | 0.0834   |
|    critic_loss     | 0.000649 |
|    learning_rate   | 0.001    |
|    n_updates       | 411      |
--------------

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1        |
|    ep_rew_mean     | -0.073   |
| time/              |          |
|    episodes        | 476      |
|    fps             | 2        |
|    time_elapsed    | 179      |
|    total timesteps | 476      |
| train/             |          |
|    actor_loss      | 0.0745   |
|    critic_loss     | 0.000792 |
|    learning_rate   | 0.001    |
|    n_updates       | 475      |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1        |
|    ep_rew_mean     | -0.0731  |
| time/              |          |
|    episodes        | 480      |
|    fps             | 2        |
|    time_elapsed    | 181      |
|    total timesteps | 480      |
| train/             |          |
|    actor_loss      | 0.0762   |
|    critic_loss     | 0.000737 |
|    learning_rate   | 0.001    |
|    n_updates       | 479      |
--------------

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1        |
|    ep_rew_mean     | -0.0752  |
| time/              |          |
|    episodes        | 544      |
|    fps             | 2        |
|    time_elapsed    | 205      |
|    total timesteps | 544      |
| train/             |          |
|    actor_loss      | 0.0875   |
|    critic_loss     | 0.001    |
|    learning_rate   | 0.001    |
|    n_updates       | 543      |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1        |
|    ep_rew_mean     | -0.0754  |
| time/              |          |
|    episodes        | 548      |
|    fps             | 2        |
|    time_elapsed    | 206      |
|    total timesteps | 548      |
| train/             |          |
|    actor_loss      | 0.0831   |
|    critic_loss     | 0.00106  |
|    learning_rate   | 0.001    |
|    n_updates       | 547      |
--------------

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1        |
|    ep_rew_mean     | -0.0769  |
| time/              |          |
|    episodes        | 612      |
|    fps             | 2        |
|    time_elapsed    | 231      |
|    total timesteps | 612      |
| train/             |          |
|    actor_loss      | 0.0662   |
|    critic_loss     | 0.000774 |
|    learning_rate   | 0.001    |
|    n_updates       | 611      |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1        |
|    ep_rew_mean     | -0.0754  |
| time/              |          |
|    episodes        | 616      |
|    fps             | 2        |
|    time_elapsed    | 232      |
|    total timesteps | 616      |
| train/             |          |
|    actor_loss      | 0.0578   |
|    critic_loss     | 0.000624 |
|    learning_rate   | 0.001    |
|    n_updates       | 615      |
--------------

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1        |
|    ep_rew_mean     | -0.077   |
| time/              |          |
|    episodes        | 680      |
|    fps             | 2        |
|    time_elapsed    | 257      |
|    total timesteps | 680      |
| train/             |          |
|    actor_loss      | 0.0765   |
|    critic_loss     | 0.00109  |
|    learning_rate   | 0.001    |
|    n_updates       | 679      |
---------------------------------
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1        |
|    ep_rew_mean     | -0.0761  |
| time/              |          |
|    episodes        | 684      |
|    fps             | 2        |
|    time_elapsed    | 258      |
|    total timesteps | 684      |
| train/             |          |
|    actor_loss      | 0.0711   |
|    critic_loss     | 0.000862 |
|    learning_rate   | 0.001    |
|    n_updates       | 683      |
--------------

Go to the tensorboard log folder, and run ```tensorboard --logdir .``` to see the trianing log.

$rew = len \times (original - current)$