# Hyperparameter selection with Ray Tune

In [1]:
import os
from datetime import datetime
import time
import random
import warnings
import joblib
import warnings
import gc
# My modules
from config import Config
from logger import init_logger
from common_utils import set_seeds, read_csvs, stratify_split, setup, get_data_dfs, get_loaders
from model import Model
from train_loop_functions import train_epoch, valid_epoch

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from sklearn.metrics import accuracy_score

import torch
from torch import nn
from torch.cuda.amp import GradScaler

# hyperparameter tuning
from functools import partial
import ray
from ray import tune
from ray.tune import CLIReporter
from ray.tune.schedulers import ASHAScheduler

In [2]:
%load_ext autoreload
%autoreload 2

# Setup

In [3]:
set_seeds(Config.seed)
LOGGER = init_logger() # uses Python's logging framework

# Tuning

In [4]:
# all params are passed in by Tune
def train_main(config, checkpoint_dir=None, data_dir=None):
    assert config is not None
    # -------- DATASETS AND LOADERS --------
    data_df, test_df = read_csvs(data_dir, Config.debug)
    train_folds = stratify_split(data_df, Config.fold_num, Config.seed, Config.target_col)
    
    # select only one of the folds (fold 0)
    train_df, valid_df = get_data_dfs(train_folds, 0)
    train_dataloader, valid_dataloader = get_loaders(train_df, valid_df,
                                                     config["batch_size"], 
                                                     data_dir+'/train_images')
    
    # -------- MODEL --------
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model, optimizer, scheduler, criterion = setup(Config.model_arch, config["lr"], 
                                                   config["is_amsgrad"], data_df.label.nunique(), device)
    
    # The `checkpoint_dir` parameter gets passed by Ray Tune when a checkpoint should be restored.
    if checkpoint_dir:
        checkpoint = os.path.join(checkpoint_dir, "checkpoint")
        model_state, optimizer_state = torch.load(checkpoint)
        model.load_state_dict(model_state)
        optimizer.load_state_dict(optimizer_state)
        
    # EPOCHS TRAIN
    for e in range(10):
        # TRAIN
        avg_training_loss = train_epoch(train_dataloader, model, 
                                      criterion, optimizer, 
                                      scheduler, GradScaler(), 
                                      config["accum_iter"], LOGGER,
                                      device)
        #print(torch.cuda.memory_summary(device))
        # VALIDATE
        avg_validation_loss, preds = valid_epoch(valid_dataloader, model, 
                                               criterion, config["accum_iter"],
                                               LOGGER, device)
  
        validation_labels = valid_df[Config.target_col].values
        accuracy = accuracy_score(y_true=validation_labels, y_pred=preds)
        
        # SAVE CHECKPOINT.
        # It is automatically registered with Ray Tune and will potentially
        # be passed as the `checkpoint_dir` parameter in future iterations.
        with tune.checkpoint_dir(step=e) as checkpoint_dir:
            path = os.path.join(checkpoint_dir, "checkpoint")
            torch.save((model.state_dict(), optimizer.state_dict()), path)

        tune.report(loss=avg_validation_loss, accuracy=accuracy)
        gc.collect()

    print("Finished Training")

In [5]:
def main(num_samples=20, max_num_epochs=10, gpus_per_trial=1):
    data_dir = os.path.abspath('./data')
    
    hyperconfig = {
        "is_amsgrad": tune.choice([False, True]),
        "accum_iter": tune.choice([1,2,4,8,16]),
        "lr": tune.loguniform(1e-4, 1e-1),
        "batch_size": tune.choice([4, 8, 16])
    }
    
    scheduler = ASHAScheduler(
        metric="loss",
        mode="min",
        max_t=max_num_epochs,
        grace_period=1,
        reduction_factor=2)
    reporter = CLIReporter(metric_columns=["loss", "accuracy", "training_iteration"])
    result = tune.run(
        partial(train_main, data_dir=data_dir),
        resources_per_trial={"cpu": os.cpu_count(), "gpu": gpus_per_trial},
        config=hyperconfig,
        num_samples=num_samples,
        scheduler=scheduler,
        progress_reporter=reporter)

    best_trial = result.get_best_trial("loss", "min", "last")
    print("Best trial config: {}".format(best_trial.config))
    print("Best trial final validation loss: {}".format(
        best_trial.last_result["loss"]))
    print("Best trial final validation accuracy: {}".format(
        best_trial.last_result["accuracy"]))
"""
    best_trained_model = Model(Config.model_arch, Config.num_labels, pretrained=True).to(device)
    device = "cpu"
    if torch.cuda.is_available():
        device = "cuda:0"
        if gpus_per_trial > 1:
            best_trained_model = nn.DataParallel(best_trained_model)
    best_trained_model.to(device)

    checkpoint_path = os.path.join(best_trial.checkpoint.value, "checkpoint")

    model_state, optimizer_state = torch.load(checkpoint_path)
    best_trained_model.load_state_dict(model_state)

    test_acc = test_accuracy(best_trained_model, device)
    print("Best trial test set accuracy: {}".format(test_acc))
"""

In [6]:
main()

2020-12-25 06:27:21,505	INFO services.py:1092 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8265[39m[22m
2020-12-25 06:27:22,129	INFO registry.py:65 -- Detected unknown callable for trainable. Converting to class.
Traceback (most recent call last):
  File "/opt/favordata/anaconda3/envs/kaggle/lib/python3.7/site-packages/ray/reporter.py", line 309, in <module>
    reporter.run()
  File "/opt/favordata/anaconda3/envs/kaggle/lib/python3.7/site-packages/ray/reporter.py", line 245, in run
    port = server.add_insecure_port(f"[::]:{self.port}")
  File "/opt/favordata/anaconda3/envs/kaggle/lib/python3.7/site-packages/grpc/_server.py", line 962, in add_insecure_port
    address, _add_insecure_port(self._state, _common.encode(address)))
  File "/opt/favordata/anaconda3/envs/kaggle/lib/python3.7/site-packages/grpc/_common.py", line 166, in validate_port_binding_result
    raise RuntimeError(_ERROR_MESSAGE_PORT_BINDING_FAILED % address)
RuntimeError: Failed to bind to address [::]:358

== Status ==
Memory usage on this node: 4.9/31.3 GiB
Using AsyncHyperBand: num_stopped=0
Bracket: Iter 8.000: None | Iter 4.000: None | Iter 2.000: None | Iter 1.000: None
Resources requested: 8/8 CPUs, 1/1 GPUs, 0.0/17.63 GiB heap, 0.0/6.05 GiB objects (0/1.0 accelerator_type:RTX)
Result logdir: /opt/favordata/ray_results/DEFAULT_2020-12-25_06-27-22
Number of trials: 1/20 (1 RUNNING)
+---------------------+----------+-------+--------------+--------------+--------------+-----------+
| Trial name          | status   | loc   |   accum_iter |   batch_size | is_amsgrad   |        lr |
|---------------------+----------+-------+--------------+--------------+--------------+-----------|
| DEFAULT_3efa2_00000 | RUNNING  |       |            1 |           16 | False        | 0.0122859 |
+---------------------+----------+-------+--------------+--------------+--------------+-----------+


[2m[36m(pid=20305)[0m Epoch     0: adjusting learning rate of group 0 to 1.2286e-02.
Result for DEFAULT_3ef

Result for DEFAULT_3efa2_00000:
  accuracy: 0.28
  date: 2020-12-25_06-27-59
  done: false
  experiment_id: 1ad1c362cba44654bf6f108b767fbc2f
  experiment_tag: 0_accum_iter=1,batch_size=16,is_amsgrad=False,lr=0.012286
  hostname: Nevsky
  iterations_since_restore: 5
  loss: 7.100030183792114
  node_ip: 10.0.0.200
  pid: 20305
  should_checkpoint: true
  time_since_restore: 36.412052392959595
  time_this_iter_s: 6.529623746871948
  time_total_s: 36.412052392959595
  timestamp: 1608877679
  timesteps_since_restore: 0
  training_iteration: 5
  trial_id: 3efa2_00000
  
== Status ==
Memory usage on this node: 7.3/31.3 GiB
Using AsyncHyperBand: num_stopped=0
Bracket: Iter 8.000: None | Iter 4.000: -9.361285209655762 | Iter 2.000: -4.071831822395325 | Iter 1.000: -2.007357656955719
Resources requested: 8/8 CPUs, 1/1 GPUs, 0.0/17.63 GiB heap, 0.0/6.05 GiB objects (0/1.0 accelerator_type:RTX)
Result logdir: /opt/favordata/ray_results/DEFAULT_2020-12-25_06-27-22
Number of trials: 2/20 (1 PENDING,

Result for DEFAULT_3efa2_00000:
  accuracy: 0.34
  date: 2020-12-25_06-28-32
  done: true
  experiment_id: 1ad1c362cba44654bf6f108b767fbc2f
  experiment_tag: 0_accum_iter=1,batch_size=16,is_amsgrad=False,lr=0.012286
  hostname: Nevsky
  iterations_since_restore: 10
  loss: 4.92010498046875
  node_ip: 10.0.0.200
  pid: 20305
  should_checkpoint: true
  time_since_restore: 69.57885503768921
  time_this_iter_s: 6.52949595451355
  time_total_s: 69.57885503768921
  timestamp: 1608877712
  timesteps_since_restore: 0
  training_iteration: 10
  trial_id: 3efa2_00000
  
== Status ==
Memory usage on this node: 7.3/31.3 GiB
Using AsyncHyperBand: num_stopped=1
Bracket: Iter 8.000: -6.290502548217773 | Iter 4.000: -9.361285209655762 | Iter 2.000: -4.071831822395325 | Iter 1.000: -2.007357656955719
Resources requested: 8/8 CPUs, 1/1 GPUs, 0.0/17.63 GiB heap, 0.0/6.05 GiB objects (0/1.0 accelerator_type:RTX)
Result logdir: /opt/favordata/ray_results/DEFAULT_2020-12-25_06-27-22
Number of trials: 2/20 

Result for DEFAULT_3efa2_00001:
  accuracy: 0.68
  date: 2020-12-25_06-29-14
  done: false
  experiment_id: 24b542e496264eed89ae5ceed32b35f3
  experiment_tag: 1_accum_iter=4,batch_size=4,is_amsgrad=False,lr=0.00047924
  hostname: Nevsky
  iterations_since_restore: 4
  loss: 1.3956382870674133
  node_ip: 10.0.0.200
  pid: 20308
  should_checkpoint: true
  time_since_restore: 39.4513099193573
  time_this_iter_s: 8.895934820175171
  time_total_s: 39.4513099193573
  timestamp: 1608877754
  timesteps_since_restore: 0
  training_iteration: 4
  trial_id: 3efa2_00001
  
== Status ==
Memory usage on this node: 7.1/31.3 GiB
Using AsyncHyperBand: num_stopped=1
Bracket: Iter 8.000: -6.290502548217773 | Iter 4.000: -5.3784617483615875 | Iter 2.000: -2.676600068807602 | Iter 1.000: -1.803763061761856
Resources requested: 8/8 CPUs, 1/1 GPUs, 0.0/17.63 GiB heap, 0.0/6.05 GiB objects (0/1.0 accelerator_type:RTX)
Result logdir: /opt/favordata/ray_results/DEFAULT_2020-12-25_06-27-22
Number of trials: 3/2

Result for DEFAULT_3efa2_00001:
  accuracy: 0.78
  date: 2020-12-25_06-29-49
  done: false
  experiment_id: 24b542e496264eed89ae5ceed32b35f3
  experiment_tag: 1_accum_iter=4,batch_size=4,is_amsgrad=False,lr=0.00047924
  hostname: Nevsky
  iterations_since_restore: 8
  loss: 1.2177004218101501
  node_ip: 10.0.0.200
  pid: 20308
  should_checkpoint: true
  time_since_restore: 74.35782504081726
  time_this_iter_s: 8.792399883270264
  time_total_s: 74.35782504081726
  timestamp: 1608877789
  timesteps_since_restore: 0
  training_iteration: 8
  trial_id: 3efa2_00001
  
== Status ==
Memory usage on this node: 7.1/31.3 GiB
Using AsyncHyperBand: num_stopped=1
Bracket: Iter 8.000: -3.754101485013962 | Iter 4.000: -5.3784617483615875 | Iter 2.000: -2.676600068807602 | Iter 1.000: -1.803763061761856
Resources requested: 8/8 CPUs, 1/1 GPUs, 0.0/17.63 GiB heap, 0.0/6.05 GiB objects (0/1.0 accelerator_type:RTX)
Result logdir: /opt/favordata/ray_results/DEFAULT_2020-12-25_06-27-22
Number of trials: 3

Result for DEFAULT_3efa2_00002:
  accuracy: 0.28
  date: 2020-12-25_06-30-26
  done: true
  experiment_id: e8ce337008204237bbe90cc171e7b2f1
  experiment_tag: 2_accum_iter=1,batch_size=8,is_amsgrad=True,lr=0.014401
  hostname: Nevsky
  iterations_since_restore: 2
  loss: 9.421448945999146
  node_ip: 10.0.0.200
  pid: 20303
  should_checkpoint: true
  time_since_restore: 17.33701252937317
  time_this_iter_s: 7.071772336959839
  time_total_s: 17.33701252937317
  timestamp: 1608877826
  timesteps_since_restore: 0
  training_iteration: 2
  trial_id: 3efa2_00002
  
== Status ==
Memory usage on this node: 7.2/31.3 GiB
Using AsyncHyperBand: num_stopped=3
Bracket: Iter 8.000: -3.754101485013962 | Iter 4.000: -5.3784617483615875 | Iter 2.000: -4.071831822395325 | Iter 1.000: -1.6001684665679932
Resources requested: 8/8 CPUs, 1/1 GPUs, 0.0/17.63 GiB heap, 0.0/6.05 GiB objects (0/1.0 accelerator_type:RTX)
Result logdir: /opt/favordata/ray_results/DEFAULT_2020-12-25_06-27-22
Number of trials: 4/20 

[2m[36m(pid=20306)[0m Epoch     0: adjusting learning rate of group 0 to 1.0704e-03.
Result for DEFAULT_3efa2_00005:
  accuracy: 0.68
  date: 2020-12-25_06-31-15
  done: false
  experiment_id: b3864becec8f4f99b4533deeb281b2fc
  experiment_tag: 5_accum_iter=2,batch_size=8,is_amsgrad=False,lr=0.0010704
  hostname: Nevsky
  iterations_since_restore: 1
  loss: 1.3604517579078674
  node_ip: 10.0.0.200
  pid: 20306
  should_checkpoint: true
  time_since_restore: 9.34530758857727
  time_this_iter_s: 9.34530758857727
  time_total_s: 9.34530758857727
  timestamp: 1608877875
  timesteps_since_restore: 0
  training_iteration: 1
  trial_id: 3efa2_00005
  
== Status ==
Memory usage on this node: 7.0/31.3 GiB
Using AsyncHyperBand: num_stopped=5
Bracket: Iter 8.000: -3.754101485013962 | Iter 4.000: -5.3784617483615875 | Iter 2.000: -6.746640384197235 | Iter 1.000: -1.6001684665679932
Resources requested: 8/8 CPUs, 1/1 GPUs, 0.0/17.63 GiB heap, 0.0/6.05 GiB objects (0/1.0 accelerator_type:RTX)
Resu

Result for DEFAULT_3efa2_00005:
  accuracy: 0.66
  date: 2020-12-25_06-31-35
  done: false
  experiment_id: b3864becec8f4f99b4533deeb281b2fc
  experiment_tag: 5_accum_iter=2,batch_size=8,is_amsgrad=False,lr=0.0010704
  hostname: Nevsky
  iterations_since_restore: 4
  loss: 0.7782346308231354
  node_ip: 10.0.0.200
  pid: 20306
  should_checkpoint: true
  time_since_restore: 29.851088523864746
  time_this_iter_s: 6.871183633804321
  time_total_s: 29.851088523864746
  timestamp: 1608877895
  timesteps_since_restore: 0
  training_iteration: 4
  trial_id: 3efa2_00005
  
== Status ==
Memory usage on this node: 7.1/31.3 GiB
Using AsyncHyperBand: num_stopped=5
Bracket: Iter 8.000: -3.754101485013962 | Iter 4.000: -1.3956382870674133 | Iter 2.000: -4.071831822395325 | Iter 1.000: -1.6001684665679932
Resources requested: 8/8 CPUs, 1/1 GPUs, 0.0/17.63 GiB heap, 0.0/6.05 GiB objects (0/1.0 accelerator_type:RTX)
Result logdir: /opt/favordata/ray_results/DEFAULT_2020-12-25_06-27-22
Number of trials:

Result for DEFAULT_3efa2_00005:
  accuracy: 0.7
  date: 2020-12-25_06-31-56
  done: false
  experiment_id: b3864becec8f4f99b4533deeb281b2fc
  experiment_tag: 5_accum_iter=2,batch_size=8,is_amsgrad=False,lr=0.0010704
  hostname: Nevsky
  iterations_since_restore: 7
  loss: 0.6871243417263031
  node_ip: 10.0.0.200
  pid: 20306
  should_checkpoint: true
  time_since_restore: 50.25795245170593
  time_this_iter_s: 6.840797185897827
  time_total_s: 50.25795245170593
  timestamp: 1608877916
  timesteps_since_restore: 0
  training_iteration: 7
  trial_id: 3efa2_00005
  
== Status ==
Memory usage on this node: 7.1/31.3 GiB
Using AsyncHyperBand: num_stopped=5
Bracket: Iter 8.000: -3.754101485013962 | Iter 4.000: -1.3956382870674133 | Iter 2.000: -4.071831822395325 | Iter 1.000: -1.6001684665679932
Resources requested: 8/8 CPUs, 1/1 GPUs, 0.0/17.63 GiB heap, 0.0/6.05 GiB objects (0/1.0 accelerator_type:RTX)
Result logdir: /opt/favordata/ray_results/DEFAULT_2020-12-25_06-27-22
Number of trials: 7/

Result for DEFAULT_3efa2_00005:
  accuracy: 0.78
  date: 2020-12-25_06-32-17
  done: true
  experiment_id: b3864becec8f4f99b4533deeb281b2fc
  experiment_tag: 5_accum_iter=2,batch_size=8,is_amsgrad=False,lr=0.0010704
  hostname: Nevsky
  iterations_since_restore: 10
  loss: 0.7779440581798553
  node_ip: 10.0.0.200
  pid: 20306
  should_checkpoint: true
  time_since_restore: 70.98855471611023
  time_this_iter_s: 6.869710445404053
  time_total_s: 70.98855471611023
  timestamp: 1608877937
  timesteps_since_restore: 0
  training_iteration: 10
  trial_id: 3efa2_00005
  
== Status ==
Memory usage on this node: 7.1/31.3 GiB
Using AsyncHyperBand: num_stopped=6
Bracket: Iter 8.000: -1.2177004218101501 | Iter 4.000: -1.3956382870674133 | Iter 2.000: -4.071831822395325 | Iter 1.000: -1.6001684665679932
Resources requested: 8/8 CPUs, 1/1 GPUs, 0.0/17.63 GiB heap, 0.0/6.05 GiB objects (0/1.0 accelerator_type:RTX)
Result logdir: /opt/favordata/ray_results/DEFAULT_2020-12-25_06-27-22
Number of trials:

Result for DEFAULT_3efa2_00007:
  accuracy: 0.68
  date: 2020-12-25_06-32-48
  done: false
  experiment_id: ae439d8c671a42238a759a070062c786
  experiment_tag: 7_accum_iter=1,batch_size=16,is_amsgrad=True,lr=0.0015636
  hostname: Nevsky
  iterations_since_restore: 2
  loss: 0.9857660830020905
  node_ip: 10.0.0.200
  pid: 20302
  should_checkpoint: true
  time_since_restore: 16.501186847686768
  time_this_iter_s: 6.776992559432983
  time_total_s: 16.501186847686768
  timestamp: 1608877968
  timesteps_since_restore: 0
  training_iteration: 2
  trial_id: 3efa2_00007
  
== Status ==
Memory usage on this node: 7.0/31.3 GiB
Using AsyncHyperBand: num_stopped=7
Bracket: Iter 8.000: -1.2177004218101501 | Iter 4.000: -1.3956382870674133 | Iter 2.000: -2.676600068807602 | Iter 1.000: -1.6001684665679932
Resources requested: 8/8 CPUs, 1/1 GPUs, 0.0/17.63 GiB heap, 0.0/6.05 GiB objects (0/1.0 accelerator_type:RTX)
Result logdir: /opt/favordata/ray_results/DEFAULT_2020-12-25_06-27-22
Number of trials

Result for DEFAULT_3efa2_00007:
  accuracy: 0.68
  date: 2020-12-25_06-33-09
  done: false
  experiment_id: ae439d8c671a42238a759a070062c786
  experiment_tag: 7_accum_iter=1,batch_size=16,is_amsgrad=True,lr=0.0015636
  hostname: Nevsky
  iterations_since_restore: 5
  loss: 1.1482079327106476
  node_ip: 10.0.0.200
  pid: 20302
  should_checkpoint: true
  time_since_restore: 36.84531927108765
  time_this_iter_s: 6.855167627334595
  time_total_s: 36.84531927108765
  timestamp: 1608877989
  timesteps_since_restore: 0
  training_iteration: 5
  trial_id: 3efa2_00007
  
== Status ==
Memory usage on this node: 7.0/31.3 GiB
Using AsyncHyperBand: num_stopped=7
Bracket: Iter 8.000: -1.2177004218101501 | Iter 4.000: -1.1608115583658218 | Iter 2.000: -2.676600068807602 | Iter 1.000: -1.6001684665679932
Resources requested: 8/8 CPUs, 1/1 GPUs, 0.0/17.63 GiB heap, 0.0/6.05 GiB objects (0/1.0 accelerator_type:RTX)
Result logdir: /opt/favordata/ray_results/DEFAULT_2020-12-25_06-27-22
Number of trials: 

Result for DEFAULT_3efa2_00007:
  accuracy: 0.7
  date: 2020-12-25_06-33-29
  done: false
  experiment_id: ae439d8c671a42238a759a070062c786
  experiment_tag: 7_accum_iter=1,batch_size=16,is_amsgrad=True,lr=0.0015636
  hostname: Nevsky
  iterations_since_restore: 8
  loss: 0.9329634010791779
  node_ip: 10.0.0.200
  pid: 20302
  should_checkpoint: true
  time_since_restore: 57.46419382095337
  time_this_iter_s: 6.859773874282837
  time_total_s: 57.46419382095337
  timestamp: 1608878009
  timesteps_since_restore: 0
  training_iteration: 8
  trial_id: 3efa2_00007
  
== Status ==
Memory usage on this node: 7.0/31.3 GiB
Using AsyncHyperBand: num_stopped=7
Bracket: Iter 8.000: -1.075331911444664 | Iter 4.000: -1.1608115583658218 | Iter 2.000: -2.676600068807602 | Iter 1.000: -1.6001684665679932
Resources requested: 8/8 CPUs, 1/1 GPUs, 0.0/17.63 GiB heap, 0.0/6.05 GiB objects (0/1.0 accelerator_type:RTX)
Result logdir: /opt/favordata/ray_results/DEFAULT_2020-12-25_06-27-22
Number of trials: 9/

[2m[36m(pid=23758)[0m Epoch     0: adjusting learning rate of group 0 to 3.5276e-04.
Result for DEFAULT_3efa2_00008:
  accuracy: 0.68
  date: 2020-12-25_06-33-56
  done: false
  experiment_id: 5965eeb10f164571b48c07ef12229357
  experiment_tag: 8_accum_iter=2,batch_size=16,is_amsgrad=True,lr=0.00035276
  hostname: Nevsky
  iterations_since_restore: 1
  loss: 1.498033881187439
  node_ip: 10.0.0.200
  pid: 23758
  should_checkpoint: true
  time_since_restore: 10.521526098251343
  time_this_iter_s: 10.521526098251343
  time_total_s: 10.521526098251343
  timestamp: 1608878036
  timesteps_since_restore: 0
  training_iteration: 1
  trial_id: 3efa2_00008
  
== Status ==
Memory usage on this node: 6.9/31.3 GiB
Using AsyncHyperBand: num_stopped=8
Bracket: Iter 8.000: -1.075331911444664 | Iter 4.000: -1.1608115583658218 | Iter 2.000: -2.676600068807602 | Iter 1.000: -1.5788519084453583
Resources requested: 8/8 CPUs, 1/1 GPUs, 0.0/17.63 GiB heap, 0.0/6.05 GiB objects (0/1.0 accelerator_type:RTX

Result for DEFAULT_3efa2_00008:
  accuracy: 0.68
  date: 2020-12-25_06-34-16
  done: false
  experiment_id: 5965eeb10f164571b48c07ef12229357
  experiment_tag: 8_accum_iter=2,batch_size=16,is_amsgrad=True,lr=0.00035276
  hostname: Nevsky
  iterations_since_restore: 4
  loss: 0.9935813546180725
  node_ip: 10.0.0.200
  pid: 23758
  should_checkpoint: true
  time_since_restore: 29.918285369873047
  time_this_iter_s: 6.489647150039673
  time_total_s: 29.918285369873047
  timestamp: 1608878056
  timesteps_since_restore: 0
  training_iteration: 4
  trial_id: 3efa2_00008
  
== Status ==
Memory usage on this node: 7.0/31.3 GiB
Using AsyncHyperBand: num_stopped=8
Bracket: Iter 8.000: -1.075331911444664 | Iter 4.000: -0.9935813546180725 | Iter 2.000: -1.3670167326927185 | Iter 1.000: -1.5788519084453583
Resources requested: 8/8 CPUs, 1/1 GPUs, 0.0/17.63 GiB heap, 0.0/6.05 GiB objects (0/1.0 accelerator_type:RTX)
Result logdir: /opt/favordata/ray_results/DEFAULT_2020-12-25_06-27-22
Number of trial

Result for DEFAULT_3efa2_00008:
  accuracy: 0.72
  date: 2020-12-25_06-34-35
  done: false
  experiment_id: 5965eeb10f164571b48c07ef12229357
  experiment_tag: 8_accum_iter=2,batch_size=16,is_amsgrad=True,lr=0.00035276
  hostname: Nevsky
  iterations_since_restore: 7
  loss: 0.7294366657733917
  node_ip: 10.0.0.200
  pid: 23758
  should_checkpoint: true
  time_since_restore: 49.532243967056274
  time_this_iter_s: 6.550715923309326
  time_total_s: 49.532243967056274
  timestamp: 1608878075
  timesteps_since_restore: 0
  training_iteration: 7
  trial_id: 3efa2_00008
  
== Status ==
Memory usage on this node: 7.0/31.3 GiB
Using AsyncHyperBand: num_stopped=8
Bracket: Iter 8.000: -1.075331911444664 | Iter 4.000: -0.9935813546180725 | Iter 2.000: -1.3670167326927185 | Iter 1.000: -1.5788519084453583
Resources requested: 8/8 CPUs, 1/1 GPUs, 0.0/17.63 GiB heap, 0.0/6.05 GiB objects (0/1.0 accelerator_type:RTX)
Result logdir: /opt/favordata/ray_results/DEFAULT_2020-12-25_06-27-22
Number of trial

Result for DEFAULT_3efa2_00008:
  accuracy: 0.8
  date: 2020-12-25_06-34-55
  done: true
  experiment_id: 5965eeb10f164571b48c07ef12229357
  experiment_tag: 8_accum_iter=2,batch_size=16,is_amsgrad=True,lr=0.00035276
  hostname: Nevsky
  iterations_since_restore: 10
  loss: 0.7010703086853027
  node_ip: 10.0.0.200
  pid: 23758
  should_checkpoint: true
  time_since_restore: 69.19451594352722
  time_this_iter_s: 6.487068176269531
  time_total_s: 69.19451594352722
  timestamp: 1608878095
  timesteps_since_restore: 0
  training_iteration: 10
  trial_id: 3efa2_00008
  
== Status ==
Memory usage on this node: 7.0/31.3 GiB
Using AsyncHyperBand: num_stopped=9
Bracket: Iter 8.000: -0.9329634010791779 | Iter 4.000: -0.9935813546180725 | Iter 2.000: -1.3670167326927185 | Iter 1.000: -1.5788519084453583
Resources requested: 8/8 CPUs, 1/1 GPUs, 0.0/17.63 GiB heap, 0.0/6.05 GiB objects (0/1.0 accelerator_type:RTX)
Result logdir: /opt/favordata/ray_results/DEFAULT_2020-12-25_06-27-22
Number of trials

[2m[36m(pid=2158)[0m Epoch     0: adjusting learning rate of group 0 to 8.0021e-03.
Result for DEFAULT_3efa2_00010:
  accuracy: 0.64
  date: 2020-12-25_06-35-29
  done: true
  experiment_id: 07f524f1fe5843dfbff528b9d2d506c7
  experiment_tag: 10_accum_iter=8,batch_size=8,is_amsgrad=False,lr=0.0080021
  hostname: Nevsky
  iterations_since_restore: 1
  loss: 2.2757532596588135
  node_ip: 10.0.0.200
  pid: 2158
  should_checkpoint: true
  time_since_restore: 11.111332654953003
  time_this_iter_s: 11.111332654953003
  time_total_s: 11.111332654953003
  timestamp: 1608878129
  timesteps_since_restore: 0
  training_iteration: 1
  trial_id: 3efa2_00010
  
== Status ==
Memory usage on this node: 7.0/31.3 GiB
Using AsyncHyperBand: num_stopped=11
Bracket: Iter 8.000: -0.9329634010791779 | Iter 4.000: -0.9935813546180725 | Iter 2.000: -1.4782920181751251 | Iter 1.000: -1.5788519084453583
Resources requested: 8/8 CPUs, 1/1 GPUs, 0.0/17.63 GiB heap, 0.0/6.05 GiB objects (0/1.0 accelerator_type:RT

Result for DEFAULT_3efa2_00012:
  accuracy: 0.64
  date: 2020-12-25_06-35-58
  done: true
  experiment_id: a49895b600f24e3f8934ca67a124446e
  experiment_tag: 12_accum_iter=4,batch_size=16,is_amsgrad=True,lr=0.0147
  hostname: Nevsky
  iterations_since_restore: 2
  loss: 2.205573797225952
  node_ip: 10.0.0.200
  pid: 4518
  should_checkpoint: true
  time_since_restore: 15.905081272125244
  time_this_iter_s: 6.3914594650268555
  time_total_s: 15.905081272125244
  timestamp: 1608878158
  timesteps_since_restore: 0
  training_iteration: 2
  trial_id: 3efa2_00012
  
== Status ==
Memory usage on this node: 7.0/31.3 GiB
Using AsyncHyperBand: num_stopped=13
Bracket: Iter 8.000: -0.9329634010791779 | Iter 4.000: -0.9935813546180725 | Iter 2.000: -1.5895673036575317 | Iter 1.000: -1.5788519084453583
Resources requested: 8/8 CPUs, 1/1 GPUs, 0.0/17.63 GiB heap, 0.0/6.05 GiB objects (0/1.0 accelerator_type:RTX)
Result logdir: /opt/favordata/ray_results/DEFAULT_2020-12-25_06-27-22
Number of trials: 

Result for DEFAULT_3efa2_00013:
  accuracy: 0.64
  date: 2020-12-25_06-36-17
  done: false
  experiment_id: d42351e961a244bf9dccba85e1708632
  experiment_tag: 13_accum_iter=8,batch_size=16,is_amsgrad=True,lr=0.0012172
  hostname: Nevsky
  iterations_since_restore: 2
  loss: 1.1593576073646545
  node_ip: 10.0.0.200
  pid: 6454
  should_checkpoint: true
  time_since_restore: 17.405850172042847
  time_this_iter_s: 6.3892340660095215
  time_total_s: 17.405850172042847
  timestamp: 1608878177
  timesteps_since_restore: 0
  training_iteration: 2
  trial_id: 3efa2_00013
  
== Status ==
Memory usage on this node: 7.0/31.3 GiB
Using AsyncHyperBand: num_stopped=13
Bracket: Iter 8.000: -0.9329634010791779 | Iter 4.000: -0.9935813546180725 | Iter 2.000: -1.4782920181751251 | Iter 1.000: -1.5575353503227234
Resources requested: 8/8 CPUs, 1/1 GPUs, 0.0/17.63 GiB heap, 0.0/6.05 GiB objects (0/1.0 accelerator_type:RTX)
Result logdir: /opt/favordata/ray_results/DEFAULT_2020-12-25_06-27-22
Number of tri

Result for DEFAULT_3efa2_00013:
  accuracy: 0.72
  date: 2020-12-25_06-36-30
  done: false
  experiment_id: d42351e961a244bf9dccba85e1708632
  experiment_tag: 13_accum_iter=8,batch_size=16,is_amsgrad=True,lr=0.0012172
  hostname: Nevsky
  iterations_since_restore: 4
  loss: 0.8208216428756714
  node_ip: 10.0.0.200
  pid: 6454
  should_checkpoint: true
  time_since_restore: 30.31570267677307
  time_this_iter_s: 6.546307563781738
  time_total_s: 30.31570267677307
  timestamp: 1608878190
  timesteps_since_restore: 0
  training_iteration: 4
  trial_id: 3efa2_00013
  
== Status ==
Memory usage on this node: 7.0/31.3 GiB
Using AsyncHyperBand: num_stopped=13
Bracket: Iter 8.000: -0.9329634010791779 | Iter 4.000: -0.9597830921411514 | Iter 2.000: -1.4782920181751251 | Iter 1.000: -1.5575353503227234
Resources requested: 8/8 CPUs, 1/1 GPUs, 0.0/17.63 GiB heap, 0.0/6.05 GiB objects (0/1.0 accelerator_type:RTX)
Result logdir: /opt/favordata/ray_results/DEFAULT_2020-12-25_06-27-22
Number of trials

Result for DEFAULT_3efa2_00013:
  accuracy: 0.72
  date: 2020-12-25_06-36-43
  done: false
  experiment_id: d42351e961a244bf9dccba85e1708632
  experiment_tag: 13_accum_iter=8,batch_size=16,is_amsgrad=True,lr=0.0012172
  hostname: Nevsky
  iterations_since_restore: 6
  loss: 0.7778547704219818
  node_ip: 10.0.0.200
  pid: 6454
  should_checkpoint: true
  time_since_restore: 43.52001333236694
  time_this_iter_s: 6.684001207351685
  time_total_s: 43.52001333236694
  timestamp: 1608878203
  timesteps_since_restore: 0
  training_iteration: 6
  trial_id: 3efa2_00013
  
== Status ==
Memory usage on this node: 7.0/31.3 GiB
Using AsyncHyperBand: num_stopped=13
Bracket: Iter 8.000: -0.9329634010791779 | Iter 4.000: -0.9597830921411514 | Iter 2.000: -1.4782920181751251 | Iter 1.000: -1.5575353503227234
Resources requested: 8/8 CPUs, 1/1 GPUs, 0.0/17.63 GiB heap, 0.0/6.05 GiB objects (0/1.0 accelerator_type:RTX)
Result logdir: /opt/favordata/ray_results/DEFAULT_2020-12-25_06-27-22
Number of trials

Result for DEFAULT_3efa2_00013:
  accuracy: 0.76
  date: 2020-12-25_06-36-56
  done: false
  experiment_id: d42351e961a244bf9dccba85e1708632
  experiment_tag: 13_accum_iter=8,batch_size=16,is_amsgrad=True,lr=0.0012172
  hostname: Nevsky
  iterations_since_restore: 8
  loss: 0.6257586777210236
  node_ip: 10.0.0.200
  pid: 6454
  should_checkpoint: true
  time_since_restore: 56.60866332054138
  time_this_iter_s: 6.557947635650635
  time_total_s: 56.60866332054138
  timestamp: 1608878216
  timesteps_since_restore: 0
  training_iteration: 8
  trial_id: 3efa2_00013
  
== Status ==
Memory usage on this node: 7.0/31.3 GiB
Using AsyncHyperBand: num_stopped=13
Bracket: Iter 8.000: -0.8477224707603455 | Iter 4.000: -0.9597830921411514 | Iter 2.000: -1.4782920181751251 | Iter 1.000: -1.5575353503227234
Resources requested: 8/8 CPUs, 1/1 GPUs, 0.0/17.63 GiB heap, 0.0/6.05 GiB objects (0/1.0 accelerator_type:RTX)
Result logdir: /opt/favordata/ray_results/DEFAULT_2020-12-25_06-27-22
Number of trials

Result for DEFAULT_3efa2_00013:
  accuracy: 0.74
  date: 2020-12-25_06-37-10
  done: true
  experiment_id: d42351e961a244bf9dccba85e1708632
  experiment_tag: 13_accum_iter=8,batch_size=16,is_amsgrad=True,lr=0.0012172
  hostname: Nevsky
  iterations_since_restore: 10
  loss: 0.7421642541885376
  node_ip: 10.0.0.200
  pid: 6454
  should_checkpoint: true
  time_since_restore: 69.79488182067871
  time_this_iter_s: 6.670412540435791
  time_total_s: 69.79488182067871
  timestamp: 1608878230
  timesteps_since_restore: 0
  training_iteration: 10
  trial_id: 3efa2_00013
  
== Status ==
Memory usage on this node: 7.0/31.3 GiB
Using AsyncHyperBand: num_stopped=14
Bracket: Iter 8.000: -0.8477224707603455 | Iter 4.000: -0.9597830921411514 | Iter 2.000: -1.4782920181751251 | Iter 1.000: -1.5575353503227234
Resources requested: 8/8 CPUs, 1/1 GPUs, 0.0/17.63 GiB heap, 0.0/6.05 GiB objects (0/1.0 accelerator_type:RTX)
Result logdir: /opt/favordata/ray_results/DEFAULT_2020-12-25_06-27-22
Number of trial

Result for DEFAULT_3efa2_00014:
  accuracy: 0.74
  date: 2020-12-25_06-37-29
  done: false
  experiment_id: 8a6c2634656c412f84db3b28fea7d93d
  experiment_tag: 14_accum_iter=8,batch_size=8,is_amsgrad=False,lr=0.00076058
  hostname: Nevsky
  iterations_since_restore: 2
  loss: 1.4519417881965637
  node_ip: 10.0.0.200
  pid: 15755
  should_checkpoint: true
  time_since_restore: 17.26995825767517
  time_this_iter_s: 6.711941480636597
  time_total_s: 17.26995825767517
  timestamp: 1608878249
  timesteps_since_restore: 0
  training_iteration: 2
  trial_id: 3efa2_00014
  
== Status ==
Memory usage on this node: 7.0/31.3 GiB
Using AsyncHyperBand: num_stopped=14
Bracket: Iter 8.000: -0.8477224707603455 | Iter 4.000: -0.9597830921411514 | Iter 2.000: -1.4519417881965637 | Iter 1.000: -1.5277846157550812
Resources requested: 8/8 CPUs, 1/1 GPUs, 0.0/17.63 GiB heap, 0.0/6.05 GiB objects (0/1.0 accelerator_type:RTX)
Result logdir: /opt/favordata/ray_results/DEFAULT_2020-12-25_06-27-22
Number of tria

Result for DEFAULT_3efa2_00014:
  accuracy: 0.68
  date: 2020-12-25_06-37-43
  done: true
  experiment_id: 8a6c2634656c412f84db3b28fea7d93d
  experiment_tag: 14_accum_iter=8,batch_size=8,is_amsgrad=False,lr=0.00076058
  hostname: Nevsky
  iterations_since_restore: 4
  loss: 1.1117188334465027
  node_ip: 10.0.0.200
  pid: 15755
  should_checkpoint: true
  time_since_restore: 30.849124431610107
  time_this_iter_s: 6.816052198410034
  time_total_s: 30.849124431610107
  timestamp: 1608878263
  timesteps_since_restore: 0
  training_iteration: 4
  trial_id: 3efa2_00014
  
== Status ==
Memory usage on this node: 7.0/31.3 GiB
Using AsyncHyperBand: num_stopped=15
Bracket: Iter 8.000: -0.8477224707603455 | Iter 4.000: -0.9935813546180725 | Iter 2.000: -1.4519417881965637 | Iter 1.000: -1.5277846157550812
Resources requested: 8/8 CPUs, 1/1 GPUs, 0.0/17.63 GiB heap, 0.0/6.05 GiB objects (0/1.0 accelerator_type:RTX)
Result logdir: /opt/favordata/ray_results/DEFAULT_2020-12-25_06-27-22
Number of tri

Result for DEFAULT_3efa2_00015:
  accuracy: 0.64
  date: 2020-12-25_06-38-05
  done: false
  experiment_id: 9fd5a33474d547008639c5b628e5d87f
  experiment_tag: 15_accum_iter=8,batch_size=4,is_amsgrad=False,lr=0.00018894
  hostname: Nevsky
  iterations_since_restore: 2
  loss: 1.3896448612213135
  node_ip: 10.0.0.200
  pid: 19128
  should_checkpoint: true
  time_since_restore: 20.55797028541565
  time_this_iter_s: 8.728576898574829
  time_total_s: 20.55797028541565
  timestamp: 1608878285
  timesteps_since_restore: 0
  training_iteration: 2
  trial_id: 3efa2_00015
  
== Status ==
Memory usage on this node: 6.9/31.3 GiB
Using AsyncHyperBand: num_stopped=15
Bracket: Iter 8.000: -0.8477224707603455 | Iter 4.000: -0.9935813546180725 | Iter 2.000: -1.4207933247089386 | Iter 1.000: -1.498033881187439
Resources requested: 8/8 CPUs, 1/1 GPUs, 0.0/17.63 GiB heap, 0.0/6.05 GiB objects (0/1.0 accelerator_type:RTX)
Result logdir: /opt/favordata/ray_results/DEFAULT_2020-12-25_06-27-22
Number of trial

Result for DEFAULT_3efa2_00015:
  accuracy: 0.62
  date: 2020-12-25_06-38-23
  done: true
  experiment_id: 9fd5a33474d547008639c5b628e5d87f
  experiment_tag: 15_accum_iter=8,batch_size=4,is_amsgrad=False,lr=0.00018894
  hostname: Nevsky
  iterations_since_restore: 4
  loss: 1.4411739110946655
  node_ip: 10.0.0.200
  pid: 19128
  should_checkpoint: true
  time_since_restore: 37.97740960121155
  time_this_iter_s: 8.732038259506226
  time_total_s: 37.97740960121155
  timestamp: 1608878303
  timesteps_since_restore: 0
  training_iteration: 4
  trial_id: 3efa2_00015
  
== Status ==
Memory usage on this node: 6.9/31.3 GiB
Using AsyncHyperBand: num_stopped=16
Bracket: Iter 8.000: -0.8477224707603455 | Iter 4.000: -1.0526500940322876 | Iter 2.000: -1.4207933247089386 | Iter 1.000: -1.498033881187439
Resources requested: 8/8 CPUs, 1/1 GPUs, 0.0/17.63 GiB heap, 0.0/6.05 GiB objects (0/1.0 accelerator_type:RTX)
Result logdir: /opt/favordata/ray_results/DEFAULT_2020-12-25_06-27-22
Number of trials

Result for DEFAULT_3efa2_00016:
  accuracy: 0.66
  date: 2020-12-25_06-38-42
  done: true
  experiment_id: 7a20f762e4e549bb837919fec26e750c
  experiment_tag: 16_accum_iter=16,batch_size=8,is_amsgrad=True,lr=0.0019615
  hostname: Nevsky
  iterations_since_restore: 2
  loss: 1.8365841507911682
  node_ip: 10.0.0.200
  pid: 22347
  should_checkpoint: true
  time_since_restore: 16.84336829185486
  time_this_iter_s: 6.946133613586426
  time_total_s: 16.84336829185486
  timestamp: 1608878322
  timesteps_since_restore: 0
  training_iteration: 2
  trial_id: 3efa2_00016
  
== Status ==
Memory usage on this node: 7.0/31.3 GiB
Using AsyncHyperBand: num_stopped=17
Bracket: Iter 8.000: -0.8477224707603455 | Iter 4.000: -1.0526500940322876 | Iter 2.000: -1.4519417881965637 | Iter 1.000: -1.4871947169303894
Resources requested: 8/8 CPUs, 1/1 GPUs, 0.0/17.63 GiB heap, 0.0/6.05 GiB objects (0/1.0 accelerator_type:RTX)
Result logdir: /opt/favordata/ray_results/DEFAULT_2020-12-25_06-27-22
Number of trials

Result for DEFAULT_3efa2_00017:
  accuracy: 0.64
  date: 2020-12-25_06-39-01
  done: false
  experiment_id: e721e0b398df4fea88af44cb8cd2c859
  experiment_tag: 17_accum_iter=2,batch_size=8,is_amsgrad=False,lr=0.0018945
  hostname: Nevsky
  iterations_since_restore: 2
  loss: 0.9392977654933929
  node_ip: 10.0.0.200
  pid: 24090
  should_checkpoint: true
  time_since_restore: 17.187686920166016
  time_this_iter_s: 6.821173906326294
  time_total_s: 17.187686920166016
  timestamp: 1608878341
  timesteps_since_restore: 0
  training_iteration: 2
  trial_id: 3efa2_00017
  
== Status ==
Memory usage on this node: 7.0/31.3 GiB
Using AsyncHyperBand: num_stopped=17
Bracket: Iter 8.000: -0.8477224707603455 | Iter 4.000: -1.0526500940322876 | Iter 2.000: -1.4207933247089386 | Iter 1.000: -1.4763555526733398
Resources requested: 8/8 CPUs, 1/1 GPUs, 0.0/17.63 GiB heap, 0.0/6.05 GiB objects (0/1.0 accelerator_type:RTX)
Result logdir: /opt/favordata/ray_results/DEFAULT_2020-12-25_06-27-22
Number of tri

Result for DEFAULT_3efa2_00017:
  accuracy: 0.72
  date: 2020-12-25_06-39-15
  done: false
  experiment_id: e721e0b398df4fea88af44cb8cd2c859
  experiment_tag: 17_accum_iter=2,batch_size=8,is_amsgrad=False,lr=0.0018945
  hostname: Nevsky
  iterations_since_restore: 4
  loss: 0.9791465401649475
  node_ip: 10.0.0.200
  pid: 24090
  should_checkpoint: true
  time_since_restore: 30.718802452087402
  time_this_iter_s: 6.778299570083618
  time_total_s: 30.718802452087402
  timestamp: 1608878355
  timesteps_since_restore: 0
  training_iteration: 4
  trial_id: 3efa2_00017
  
== Status ==
Memory usage on this node: 7.0/31.3 GiB
Using AsyncHyperBand: num_stopped=17
Bracket: Iter 8.000: -0.8477224707603455 | Iter 4.000: -0.9935813546180725 | Iter 2.000: -1.4207933247089386 | Iter 1.000: -1.4763555526733398
Resources requested: 8/8 CPUs, 1/1 GPUs, 0.0/17.63 GiB heap, 0.0/6.05 GiB objects (0/1.0 accelerator_type:RTX)
Result logdir: /opt/favordata/ray_results/DEFAULT_2020-12-25_06-27-22
Number of tri

Result for DEFAULT_3efa2_00017:
  accuracy: 0.48
  date: 2020-12-25_06-39-28
  done: false
  experiment_id: e721e0b398df4fea88af44cb8cd2c859
  experiment_tag: 17_accum_iter=2,batch_size=8,is_amsgrad=False,lr=0.0018945
  hostname: Nevsky
  iterations_since_restore: 6
  loss: 1.341333031654358
  node_ip: 10.0.0.200
  pid: 24090
  should_checkpoint: true
  time_since_restore: 44.439547061920166
  time_this_iter_s: 6.955381393432617
  time_total_s: 44.439547061920166
  timestamp: 1608878368
  timesteps_since_restore: 0
  training_iteration: 6
  trial_id: 3efa2_00017
  
== Status ==
Memory usage on this node: 7.0/31.3 GiB
Using AsyncHyperBand: num_stopped=17
Bracket: Iter 8.000: -0.8477224707603455 | Iter 4.000: -0.9935813546180725 | Iter 2.000: -1.4207933247089386 | Iter 1.000: -1.4763555526733398
Resources requested: 8/8 CPUs, 1/1 GPUs, 0.0/17.63 GiB heap, 0.0/6.05 GiB objects (0/1.0 accelerator_type:RTX)
Result logdir: /opt/favordata/ray_results/DEFAULT_2020-12-25_06-27-22
Number of tria

Result for DEFAULT_3efa2_00017:
  accuracy: 0.5
  date: 2020-12-25_06-39-42
  done: true
  experiment_id: e721e0b398df4fea88af44cb8cd2c859
  experiment_tag: 17_accum_iter=2,batch_size=8,is_amsgrad=False,lr=0.0018945
  hostname: Nevsky
  iterations_since_restore: 8
  loss: 1.2934342622756958
  node_ip: 10.0.0.200
  pid: 24090
  should_checkpoint: true
  time_since_restore: 58.03996658325195
  time_this_iter_s: 6.8039562702178955
  time_total_s: 58.03996658325195
  timestamp: 1608878382
  timesteps_since_restore: 0
  training_iteration: 8
  trial_id: 3efa2_00017
  
== Status ==
Memory usage on this node: 7.0/31.3 GiB
Using AsyncHyperBand: num_stopped=18
Bracket: Iter 8.000: -0.9329634010791779 | Iter 4.000: -0.9935813546180725 | Iter 2.000: -1.4207933247089386 | Iter 1.000: -1.4763555526733398
Resources requested: 8/8 CPUs, 1/1 GPUs, 0.0/17.63 GiB heap, 0.0/6.05 GiB objects (0/1.0 accelerator_type:RTX)
Result logdir: /opt/favordata/ray_results/DEFAULT_2020-12-25_06-27-22
Number of trials

Result for DEFAULT_3efa2_00018:
  accuracy: 0.7
  date: 2020-12-25_06-40-02
  done: false
  experiment_id: b23a475ffec94e9b986f11a4ca253a5e
  experiment_tag: 18_accum_iter=4,batch_size=8,is_amsgrad=False,lr=0.0019013
  hostname: Nevsky
  iterations_since_restore: 2
  loss: 1.138426959514618
  node_ip: 10.0.0.200
  pid: 30036
  should_checkpoint: true
  time_since_restore: 17.95978832244873
  time_this_iter_s: 6.805258750915527
  time_total_s: 17.95978832244873
  timestamp: 1608878402
  timesteps_since_restore: 0
  training_iteration: 2
  trial_id: 3efa2_00018
  
== Status ==
Memory usage on this node: 7.0/31.3 GiB
Using AsyncHyperBand: num_stopped=18
Bracket: Iter 8.000: -0.9329634010791779 | Iter 4.000: -0.9935813546180725 | Iter 2.000: -1.3896448612213135 | Iter 1.000: -1.4405361711978912
Resources requested: 8/8 CPUs, 1/1 GPUs, 0.0/17.63 GiB heap, 0.0/6.05 GiB objects (0/1.0 accelerator_type:RTX)
Result logdir: /opt/favordata/ray_results/DEFAULT_2020-12-25_06-27-22
Number of trials:

Result for DEFAULT_3efa2_00018:
  accuracy: 0.72
  date: 2020-12-25_06-40-15
  done: false
  experiment_id: b23a475ffec94e9b986f11a4ca253a5e
  experiment_tag: 18_accum_iter=4,batch_size=8,is_amsgrad=False,lr=0.0019013
  hostname: Nevsky
  iterations_since_restore: 4
  loss: 0.8887990415096283
  node_ip: 10.0.0.200
  pid: 30036
  should_checkpoint: true
  time_since_restore: 31.488162994384766
  time_this_iter_s: 6.731662034988403
  time_total_s: 31.488162994384766
  timestamp: 1608878415
  timesteps_since_restore: 0
  training_iteration: 4
  trial_id: 3efa2_00018
  
== Status ==
Memory usage on this node: 7.0/31.3 GiB
Using AsyncHyperBand: num_stopped=18
Bracket: Iter 8.000: -0.9329634010791779 | Iter 4.000: -0.98636394739151 | Iter 2.000: -1.3896448612213135 | Iter 1.000: -1.4405361711978912
Resources requested: 8/8 CPUs, 1/1 GPUs, 0.0/17.63 GiB heap, 0.0/6.05 GiB objects (0/1.0 accelerator_type:RTX)
Result logdir: /opt/favordata/ray_results/DEFAULT_2020-12-25_06-27-22
Number of trial

Result for DEFAULT_3efa2_00018:
  accuracy: 0.72
  date: 2020-12-25_06-40-29
  done: false
  experiment_id: b23a475ffec94e9b986f11a4ca253a5e
  experiment_tag: 18_accum_iter=4,batch_size=8,is_amsgrad=False,lr=0.0019013
  hostname: Nevsky
  iterations_since_restore: 6
  loss: 0.8284388482570648
  node_ip: 10.0.0.200
  pid: 30036
  should_checkpoint: true
  time_since_restore: 44.976085901260376
  time_this_iter_s: 6.780740261077881
  time_total_s: 44.976085901260376
  timestamp: 1608878429
  timesteps_since_restore: 0
  training_iteration: 6
  trial_id: 3efa2_00018
  
== Status ==
Memory usage on this node: 7.0/31.3 GiB
Using AsyncHyperBand: num_stopped=18
Bracket: Iter 8.000: -0.9329634010791779 | Iter 4.000: -0.98636394739151 | Iter 2.000: -1.3896448612213135 | Iter 1.000: -1.4405361711978912
Resources requested: 8/8 CPUs, 1/1 GPUs, 0.0/17.63 GiB heap, 0.0/6.05 GiB objects (0/1.0 accelerator_type:RTX)
Result logdir: /opt/favordata/ray_results/DEFAULT_2020-12-25_06-27-22
Number of trial

Result for DEFAULT_3efa2_00018:
  accuracy: 0.72
  date: 2020-12-25_06-40-43
  done: false
  experiment_id: b23a475ffec94e9b986f11a4ca253a5e
  experiment_tag: 18_accum_iter=4,batch_size=8,is_amsgrad=False,lr=0.0019013
  hostname: Nevsky
  iterations_since_restore: 8
  loss: 0.8204785883426666
  node_ip: 10.0.0.200
  pid: 30036
  should_checkpoint: true
  time_since_restore: 58.67919158935547
  time_this_iter_s: 6.876724481582642
  time_total_s: 58.67919158935547
  timestamp: 1608878443
  timesteps_since_restore: 0
  training_iteration: 8
  trial_id: 3efa2_00018
  
== Status ==
Memory usage on this node: 7.0/31.3 GiB
Using AsyncHyperBand: num_stopped=18
Bracket: Iter 8.000: -0.8767209947109222 | Iter 4.000: -0.98636394739151 | Iter 2.000: -1.3896448612213135 | Iter 1.000: -1.4405361711978912
Resources requested: 8/8 CPUs, 1/1 GPUs, 0.0/17.63 GiB heap, 0.0/6.05 GiB objects (0/1.0 accelerator_type:RTX)
Result logdir: /opt/favordata/ray_results/DEFAULT_2020-12-25_06-27-22
Number of trials:

Result for DEFAULT_3efa2_00018:
  accuracy: 0.72
  date: 2020-12-25_06-40-56
  done: true
  experiment_id: b23a475ffec94e9b986f11a4ca253a5e
  experiment_tag: 18_accum_iter=4,batch_size=8,is_amsgrad=False,lr=0.0019013
  hostname: Nevsky
  iterations_since_restore: 10
  loss: 0.8445485830307007
  node_ip: 10.0.0.200
  pid: 30036
  should_checkpoint: true
  time_since_restore: 72.36692214012146
  time_this_iter_s: 6.832549810409546
  time_total_s: 72.36692214012146
  timestamp: 1608878456
  timesteps_since_restore: 0
  training_iteration: 10
  trial_id: 3efa2_00018
  
== Status ==
Memory usage on this node: 7.0/31.3 GiB
Using AsyncHyperBand: num_stopped=19
Bracket: Iter 8.000: -0.8767209947109222 | Iter 4.000: -0.98636394739151 | Iter 2.000: -1.3896448612213135 | Iter 1.000: -1.4405361711978912
Resources requested: 8/8 CPUs, 1/1 GPUs, 0.0/17.63 GiB heap, 0.0/6.05 GiB objects (0/1.0 accelerator_type:RTX)
Result logdir: /opt/favordata/ray_results/DEFAULT_2020-12-25_06-27-22
Number of trials

2020-12-25 06:41:10,687	INFO tune.py:439 -- Total run time: 829.70 seconds (828.50 seconds for the tuning loop).


Result for DEFAULT_3efa2_00019:
  accuracy: 0.54
  date: 2020-12-25_06-41-10
  done: true
  experiment_id: 21950de8df5d49c683d5631d633905a0
  experiment_tag: 19_accum_iter=1,batch_size=4,is_amsgrad=False,lr=0.067995
  hostname: Nevsky
  iterations_since_restore: 1
  loss: 795.1906127929688
  node_ip: 10.0.0.200
  pid: 4989
  should_checkpoint: true
  time_since_restore: 11.869887590408325
  time_this_iter_s: 11.869887590408325
  time_total_s: 11.869887590408325
  timestamp: 1608878470
  timesteps_since_restore: 0
  training_iteration: 1
  trial_id: 3efa2_00019
  
== Status ==
Memory usage on this node: 6.9/31.3 GiB
Using AsyncHyperBand: num_stopped=20
Bracket: Iter 8.000: -0.8767209947109222 | Iter 4.000: -0.98636394739151 | Iter 2.000: -1.3896448612213135 | Iter 1.000: -1.4763555526733398
Resources requested: 8/8 CPUs, 1/1 GPUs, 0.0/17.63 GiB heap, 0.0/6.05 GiB objects (0/1.0 accelerator_type:RTX)
Result logdir: /opt/favordata/ray_results/DEFAULT_2020-12-25_06-27-22
Number of trials: 

AttributeError: type object 'Config' has no attribute 'num_labels'

In [None]:
#^ignore error