# Hyperparameter selection with Ray Tune

experiment_tag: accum_iter=8,batch_size=16,is_amsgrad=False,lr=0.0012185,middle_fc=False,middle_fc_size=0,weight_decay=0.000125

In [1]:
import os
from datetime import datetime
import time
import random
import warnings
import joblib
import warnings
import gc
# My modules
from config import Config
from logger import init_logger
from common_utils import set_seeds, read_csvs, stratify_split, setup, get_data_dfs, get_loaders
from model import Model
from train_loop_functions import train_epoch, valid_epoch

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from sklearn.metrics import accuracy_score

import torch
from torch import nn
from torch.cuda.amp import GradScaler

# hyperparameter tuning
from functools import partial
import ray
from ray import tune
from ray.tune import CLIReporter
from ray.tune.schedulers import ASHAScheduler
from ray.tune.suggest.ax import AxSearch
from ray.tune.suggest.bayesopt import BayesOptSearch

In [2]:
%load_ext autoreload
%autoreload 2

# Setup

In [3]:
set_seeds(Config.seed)
LOGGER = init_logger() # uses Python's logging framework

# Tuning

In [4]:
# all params are passed in by Tune. this is the "objective" function.
def train_main(config, checkpoint_dir=None, data_dir=None):
    assert config is not None
    # -------- DATASETS AND LOADERS --------
    data_df, _, _ = read_csvs(data_dir, Config.debug, num_samples=10000)
    train_folds = stratify_split(data_df, Config.fold_num, Config.seed, Config.target_col)
    
    # select only one of the folds (fold 0)
    train_df, valid_df = get_data_dfs(train_folds, 0)
    train_dataloader, valid_dataloader = get_loaders(train_df, valid_df,
                                                     config["batch_size"], 
                                                     data_dir+'/train_images')
    
    # -------- MODEL --------
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model, optimizer = setup_model_optimizer(Config.model_arch, 
                                                   config["lr"], 
                                                   config["is_amsgrad"], 
                                                   num_labels=data_df.label.nunique(), 
                                                   fc_layer={
                                                       'middle_fc': config["middle_fc"], 
                                                        'middle_fc_size': config["middle_fc_size"]
                                                    }, 
                                                   weight_decay=config["weight_decay"],
                                                   device=device,
                                                   checkpoint=None)
    
    scheduler, criterion = get_schd_crit()
    
    # The `checkpoint_dir` parameter gets passed by Ray Tune when a checkpoint should be restored.
    if checkpoint_dir:
        checkpoint = os.path.join(checkpoint_dir, "checkpoint")
        model_state, optimizer_state = torch.load(checkpoint)
        model.load_state_dict(model_state)
        optimizer.load_state_dict(optimizer_state)
        
    # EPOCHS TRAIN
    for e in range(10):
        # TRAIN
        avg_training_loss = train_epoch(train_dataloader, model, 
                                      criterion, optimizer, 
                                      scheduler, GradScaler(), 
                                      config["accum_iter"], LOGGER,
                                      device)

        # VALIDATE
        avg_validation_loss, preds = valid_epoch(valid_dataloader, model, criterion, LOGGER, device)
  
        validation_labels = valid_df[Config.target_col].values
        accuracy = accuracy_score(y_true=validation_labels, y_pred=preds)
        
        # SAVE CHECKPOINT.
        # It is automatically registered with Ray Tune and will potentially
        # be passed as the `checkpoint_dir` parameter in future iterations.
        with tune.checkpoint_dir(step=e) as checkpoint_dir:
            path = os.path.join(checkpoint_dir, "checkpoint")
            torch.save((model.state_dict(), optimizer.state_dict()), path)

        tune.report(loss=avg_validation_loss, accuracy=accuracy)
        gc.collect()

    print("Finished Training")

In [5]:
def main(num_samples=50, max_num_epochs=10, gpus_per_trial=1):
    data_dir = os.path.abspath('./data')
    
    hyperconfig = {
        "is_amsgrad": False,
        "accum_iter": tune.choice([2,4,8]),
        "lr": tune.loguniform(1e-3, 1e-1),
        "batch_size": tune.choice([8, 16]),
        "weight_decay": tune.choice([1/8 * 0.001, 1/2 * 0.001, 0.]),
        # try adding a FC layer to the classifier portion of the model
        "middle_fc": False,
        "middle_fc_size": 0
    }
    
    scheduler = ASHAScheduler(
        #metric="loss",
        mode="min",
        max_t=max_num_epochs,
        grace_period=2,
        reduction_factor=2)
    reporter = CLIReporter(metric_columns=["loss", "accuracy", "training_iteration"])
    
    search_alg = AxSearch(metric="loss", mode="min")
    #search_alg = BayesOptSearch(metric="loss", mode="min")
    result = tune.run(
        partial(train_main, data_dir=data_dir),
        name="ax3",
        metric="loss",
        resources_per_trial={"cpu": os.cpu_count(), "gpu": gpus_per_trial},
        config=hyperconfig,
        num_samples=num_samples,
        scheduler=scheduler,
        progress_reporter=reporter,
        search_alg=search_alg,
        local_dir='./ray-results',
        checkpoint_score_attr='min-loss'
    )

    best_trial = result.get_best_trial("loss", "min", "last")
    print("Best trial config: {}".format(best_trial.config))
    print("Best trial final validation loss: {}".format(
        best_trial.last_result["loss"]))
    print("Best trial final validation accuracy: {}".format(
        best_trial.last_result["accuracy"]))

In [None]:
main()

2020-12-27 05:08:29,141	INFO services.py:1092 -- View the Ray dashboard at [1m[32mhttp://127.0.0.1:8265[39m[22m
2020-12-27 05:08:29,766	INFO registry.py:65 -- Detected unknown callable for trainable. Converting to class.
[INFO 12-27 05:08:29] ax.service.ax_client: Starting optimization with verbose logging. To disable logging, set the `verbose_logging` argument to `False`. Note that float values in the logs are rounded to 2 decimal points.
[INFO 12-27 05:08:29] ax.service.utils.instantiation: Inferred value type of ParameterType.INT for parameter accum_iter. If that is not the expected value type, you can explicity specify 'value_type' ('int', 'float', 'bool' or 'str') in parameter dict.
[INFO 12-27 05:08:29] ax.service.utils.instantiation: Inferred value type of ParameterType.INT for parameter batch_size. If that is not the expected value type, you can explicity specify 'value_type' ('int', 'float', 'bool' or 'str') in parameter dict.
[INFO 12-27 05:08:29] ax.service.utils.instant

== Status ==
Memory usage on this node: 5.3/31.3 GiB
Using AsyncHyperBand: num_stopped=0
Bracket: Iter 8.000: None | Iter 4.000: None | Iter 2.000: None
Resources requested: 8/8 CPUs, 1/1 GPUs, 0.0/16.16 GiB heap, 0.0/5.57 GiB objects (0/1.0 accelerator_type:RTX)
Result logdir: /opt/favordata/AI/Felix/kaggle-cassava/ray-results/ax3
Number of trials: 1/50 (1 RUNNING)
+------------------+----------+-------+--------------+--------------+--------------+-----------+-------------+------------------+----------------+
| Trial name       | status   | loc   |   accum_iter |   batch_size | is_amsgrad   |        lr | middle_fc   |   middle_fc_size |   weight_decay |
|------------------+----------+-------+--------------+--------------+--------------+-----------+-------------+------------------+----------------|
| DEFAULT_8f195b20 | RUNNING  |       |            2 |            8 | False        | 0.0072073 | False       |                0 |         0.0005 |
+------------------+----------+-------+----

Result for DEFAULT_8f195b20:
  accuracy: 0.5532
  date: 2020-12-27_05-34-53
  done: false
  experiment_id: 0d97abcc07a441c2aaaf8b522555c829
  experiment_tag: 1_accum_iter=2,batch_size=8,is_amsgrad=False,lr=0.0072073,middle_fc=False,middle_fc_size=0,weight_decay=0.0005
  hostname: Nevsky
  iterations_since_restore: 4
  loss: 1.2628076981894578
  node_ip: 10.0.0.200
  pid: 3301
  should_checkpoint: true
  time_since_restore: 1582.29887509346
  time_this_iter_s: 395.3860414028168
  time_total_s: 1582.29887509346
  timestamp: 1609047293
  timesteps_since_restore: 0
  training_iteration: 4
  trial_id: 8f195b20
  
== Status ==
Memory usage on this node: 7.6/31.3 GiB
Using AsyncHyperBand: num_stopped=0
Bracket: Iter 8.000: None | Iter 4.000: -1.2628076981894578 | Iter 2.000: -1.243230769905863
Resources requested: 8/8 CPUs, 1/1 GPUs, 0.0/16.16 GiB heap, 0.0/5.57 GiB objects (0/1.0 accelerator_type:RTX)
Result logdir: /opt/favordata/AI/Felix/kaggle-cassava/ray-results/ax3
Number of trials: 2/5

[2m[36m(pid=3301)[0m Epoch     8: adjusting learning rate of group 0 to 6.8914e-04.
Result for DEFAULT_8f195b20:
  accuracy: 0.564
  date: 2020-12-27_06-01-13
  done: false
  experiment_id: 0d97abcc07a441c2aaaf8b522555c829
  experiment_tag: 1_accum_iter=2,batch_size=8,is_amsgrad=False,lr=0.0072073,middle_fc=False,middle_fc_size=0,weight_decay=0.0005
  hostname: Nevsky
  iterations_since_restore: 8
  loss: 1.2458355705949324
  node_ip: 10.0.0.200
  pid: 3301
  should_checkpoint: true
  time_since_restore: 3162.5517632961273
  time_this_iter_s: 395.01573634147644
  time_total_s: 3162.5517632961273
  timestamp: 1609048873
  timesteps_since_restore: 0
  training_iteration: 8
  trial_id: 8f195b20
  
== Status ==
Memory usage on this node: 7.6/31.3 GiB
Using AsyncHyperBand: num_stopped=0
Bracket: Iter 8.000: -1.2458355705949324 | Iter 4.000: -1.2628076981894578 | Iter 2.000: -1.243230769905863
Resources requested: 8/8 CPUs, 1/1 GPUs, 0.0/16.16 GiB heap, 0.0/5.57 GiB objects (0/1.0 acceler

[INFO 12-27 06:14:23] ax.service.ax_client: Completed trial 0 with data: {'loss': (1.22, 0.0)}.
[INFO 12-27 06:14:23] ax.service.ax_client: Generated new trial 2 with parameters {'lr': 0.0, 'accum_iter': 4, 'batch_size': 8, 'weight_decay': 0.0, 'is_amsgrad': False, 'middle_fc': False, 'middle_fc_size': 0}.


Result for DEFAULT_8f195b20:
  accuracy: 0.5744
  date: 2020-12-27_06-14-23
  done: true
  experiment_id: 0d97abcc07a441c2aaaf8b522555c829
  experiment_tag: 1_accum_iter=2,batch_size=8,is_amsgrad=False,lr=0.0072073,middle_fc=False,middle_fc_size=0,weight_decay=0.0005
  hostname: Nevsky
  iterations_since_restore: 10
  loss: 1.220927136608317
  node_ip: 10.0.0.200
  pid: 3301
  should_checkpoint: true
  time_since_restore: 3952.848404407501
  time_this_iter_s: 394.88840198516846
  time_total_s: 3952.848404407501
  timestamp: 1609049663
  timesteps_since_restore: 0
  training_iteration: 10
  trial_id: 8f195b20
  
== Status ==
Memory usage on this node: 7.5/31.3 GiB
Using AsyncHyperBand: num_stopped=1
Bracket: Iter 8.000: -1.2458355705949324 | Iter 4.000: -1.2628076981894578 | Iter 2.000: -1.243230769905863
Resources requested: 8/8 CPUs, 1/1 GPUs, 0.0/16.16 GiB heap, 0.0/5.57 GiB objects (0/1.0 accelerator_type:RTX)
Result logdir: /opt/favordata/AI/Felix/kaggle-cassava/ray-results/ax3
Num

[INFO 12-27 06:26:22] ax.service.ax_client: Completed trial 1 with data: {'loss': (1.36, 0.0)}.
[INFO 12-27 06:26:22] ax.service.ax_client: Generated new trial 3 with parameters {'lr': 0.0, 'accum_iter': 2, 'batch_size': 8, 'weight_decay': 0.0, 'is_amsgrad': False, 'middle_fc': False, 'middle_fc_size': 0}.


Result for DEFAULT_8f20cbe4:
  accuracy: 0.7036
  date: 2020-12-27_06-26-22
  done: true
  experiment_id: 85ea3c1542c14c2ba43fe4139f93649f
  experiment_tag: 2_accum_iter=2,batch_size=16,is_amsgrad=False,lr=0.001308,middle_fc=False,middle_fc_size=0,weight_decay=0.0
  hostname: Nevsky
  iterations_since_restore: 2
  loss: 1.3565497356879561
  node_ip: 10.0.0.200
  pid: 3302
  should_checkpoint: true
  time_since_restore: 717.16929936409
  time_this_iter_s: 356.84243297576904
  time_total_s: 717.16929936409
  timestamp: 1609050382
  timesteps_since_restore: 0
  training_iteration: 2
  trial_id: 8f20cbe4
  
== Status ==
Memory usage on this node: 7.6/31.3 GiB
Using AsyncHyperBand: num_stopped=2
Bracket: Iter 8.000: -1.2458355705949324 | Iter 4.000: -1.2628076981894578 | Iter 2.000: -1.2998902527969096
Resources requested: 8/8 CPUs, 1/1 GPUs, 0.0/16.16 GiB heap, 0.0/5.57 GiB objects (0/1.0 accelerator_type:RTX)
Result logdir: /opt/favordata/AI/Felix/kaggle-cassava/ray-results/ax3
Number of 

[INFO 12-27 06:39:36] ax.service.ax_client: Completed trial 2 with data: {'loss': (1.92, 0.0)}.
[INFO 12-27 06:39:37] ax.service.ax_client: Generated new trial 4 with parameters {'lr': 0.05, 'accum_iter': 8, 'batch_size': 8, 'weight_decay': 0.0, 'is_amsgrad': False, 'middle_fc': False, 'middle_fc_size': 0}.


Result for DEFAULT_c3e50cc4:
  accuracy: 0.1236
  date: 2020-12-27_06-39-36
  done: true
  experiment_id: bb18d2b809c443859460f2a767e35a5f
  experiment_tag: 3_accum_iter=4,batch_size=8,is_amsgrad=False,lr=0.0026654,middle_fc=False,middle_fc_size=0,weight_decay=0.0
  hostname: Nevsky
  iterations_since_restore: 2
  loss: 1.9213401214985908
  node_ip: 10.0.0.200
  pid: 3299
  should_checkpoint: true
  time_since_restore: 792.4597654342651
  time_this_iter_s: 394.0402584075928
  time_total_s: 792.4597654342651
  timestamp: 1609051176
  timesteps_since_restore: 0
  training_iteration: 2
  trial_id: c3e50cc4
  
== Status ==
Memory usage on this node: 7.6/31.3 GiB
Using AsyncHyperBand: num_stopped=3
Bracket: Iter 8.000: -1.2458355705949324 | Iter 4.000: -1.2628076981894578 | Iter 2.000: -1.3565497356879561
Resources requested: 8/8 CPUs, 1/1 GPUs, 0.0/16.16 GiB heap, 0.0/5.57 GiB objects (0/1.0 accelerator_type:RTX)
Result logdir: /opt/favordata/AI/Felix/kaggle-cassava/ray-results/ax3
Number 

[2m[36m(pid=3295)[0m Epoch     3: adjusting learning rate of group 0 to 7.9996e-04.
Result for DEFAULT_70703db4:
  accuracy: 0.646
  date: 2020-12-27_06-59-27
  done: false
  experiment_id: 10880e7b0f534196826be3a655c74baf
  experiment_tag: 4_accum_iter=2,batch_size=8,is_amsgrad=False,lr=0.0010074,middle_fc=False,middle_fc_size=0,weight_decay=0.000125
  hostname: Nevsky
  iterations_since_restore: 3
  loss: 1.325021760373176
  node_ip: 10.0.0.200
  pid: 3295
  should_checkpoint: true
  time_since_restore: 1188.596974849701
  time_this_iter_s: 395.25930619239807
  time_total_s: 1188.596974849701
  timestamp: 1609052367
  timesteps_since_restore: 0
  training_iteration: 3
  trial_id: 70703db4
  
== Status ==
Memory usage on this node: 7.5/31.3 GiB
Using AsyncHyperBand: num_stopped=3
Bracket: Iter 8.000: -1.2458355705949324 | Iter 4.000: -1.2628076981894578 | Iter 2.000: -1.3433839831548402
Resources requested: 8/8 CPUs, 1/1 GPUs, 0.0/16.16 GiB heap, 0.0/5.57 GiB objects (0/1.0 acceler

[INFO 12-27 07:06:02] ax.service.ax_client: Completed trial 3 with data: {'loss': (1.33, 0.0)}.
[INFO 12-27 07:06:03] ax.service.ax_client: Generated new trial 5 with parameters {'lr': 0.02, 'accum_iter': 4, 'batch_size': 16, 'weight_decay': 0.0, 'is_amsgrad': False, 'middle_fc': False, 'middle_fc_size': 0}.


Result for DEFAULT_70703db4:
  accuracy: 0.6416
  date: 2020-12-27_07-06-02
  done: true
  experiment_id: 10880e7b0f534196826be3a655c74baf
  experiment_tag: 4_accum_iter=2,batch_size=8,is_amsgrad=False,lr=0.0010074,middle_fc=False,middle_fc_size=0,weight_decay=0.000125
  hostname: Nevsky
  iterations_since_restore: 4
  loss: 1.3343894255312183
  node_ip: 10.0.0.200
  pid: 3295
  should_checkpoint: true
  time_since_restore: 1584.278340101242
  time_this_iter_s: 395.68136525154114
  time_total_s: 1584.278340101242
  timestamp: 1609052762
  timesteps_since_restore: 0
  training_iteration: 4
  trial_id: 70703db4
  
== Status ==
Memory usage on this node: 7.5/31.3 GiB
Using AsyncHyperBand: num_stopped=4
Bracket: Iter 8.000: -1.2458355705949324 | Iter 4.000: -1.298598561860338 | Iter 2.000: -1.3433839831548402
Resources requested: 8/8 CPUs, 1/1 GPUs, 0.0/16.16 GiB heap, 0.0/5.57 GiB objects (0/1.0 accelerator_type:RTX)
Result logdir: /opt/favordata/AI/Felix/kaggle-cassava/ray-results/ax3
Nu

[INFO 12-27 07:19:15] ax.service.ax_client: Completed trial 4 with data: {'loss': (10.43, 0.0)}.
[INFO 12-27 07:19:15] ax.service.ax_client: Generated new trial 6 with parameters {'lr': 0.01, 'accum_iter': 2, 'batch_size': 16, 'weight_decay': 0.0, 'is_amsgrad': False, 'middle_fc': False, 'middle_fc_size': 0}.


Result for DEFAULT_49d28a7a:
  accuracy: 0.1588
  date: 2020-12-27_07-19-15
  done: true
  experiment_id: ced18a5c59b9427e926027a6f38c5460
  experiment_tag: 5_accum_iter=8,batch_size=8,is_amsgrad=False,lr=0.052176,middle_fc=False,middle_fc_size=0,weight_decay=0.0005
  hostname: Nevsky
  iterations_since_restore: 2
  loss: 10.425207705437382
  node_ip: 10.0.0.200
  pid: 3296
  should_checkpoint: true
  time_since_restore: 790.5445554256439
  time_this_iter_s: 393.22023367881775
  time_total_s: 790.5445554256439
  timestamp: 1609053555
  timesteps_since_restore: 0
  training_iteration: 2
  trial_id: 49d28a7a
  
== Status ==
Memory usage on this node: 7.5/31.3 GiB
Using AsyncHyperBand: num_stopped=5
Bracket: Iter 8.000: -1.2458355705949324 | Iter 4.000: -1.298598561860338 | Iter 2.000: -1.3565497356879561
Resources requested: 8/8 CPUs, 1/1 GPUs, 0.0/16.16 GiB heap, 0.0/5.57 GiB objects (0/1.0 accelerator_type:RTX)
Result logdir: /opt/favordata/AI/Felix/kaggle-cassava/ray-results/ax3
Numbe

[INFO 12-27 07:31:10] ax.service.ax_client: Completed trial 5 with data: {'loss': (9.13, 0.0)}.
[INFO 12-27 07:31:10] ax.service.ax_client: Generated new trial 7 with parameters {'lr': 0.0, 'accum_iter': 2, 'batch_size': 8, 'weight_decay': 0.0, 'is_amsgrad': False, 'middle_fc': False, 'middle_fc_size': 0}.


Result for DEFAULT_fb20acb4:
  accuracy: 0.0688
  date: 2020-12-27_07-31-10
  done: true
  experiment_id: c44cb9fdccbd4f9db21ca75b33294091
  experiment_tag: 6_accum_iter=4,batch_size=16,is_amsgrad=False,lr=0.023743,middle_fc=False,middle_fc_size=0,weight_decay=0.0
  hostname: Nevsky
  iterations_since_restore: 2
  loss: 9.132312460790706
  node_ip: 10.0.0.200
  pid: 3298
  should_checkpoint: true
  time_since_restore: 713.2389476299286
  time_this_iter_s: 354.8900513648987
  time_total_s: 713.2389476299286
  timestamp: 1609054270
  timesteps_since_restore: 0
  training_iteration: 2
  trial_id: fb20acb4
  
== Status ==
Memory usage on this node: 7.5/31.3 GiB
Using AsyncHyperBand: num_stopped=6
Bracket: Iter 8.000: -1.2458355705949324 | Iter 4.000: -1.298598561860338 | Iter 2.000: -1.6389449285932733
Resources requested: 8/8 CPUs, 1/1 GPUs, 0.0/16.16 GiB heap, 0.0/5.57 GiB objects (0/1.0 accelerator_type:RTX)
Result logdir: /opt/favordata/AI/Felix/kaggle-cassava/ray-results/ax3
Number of

[2m[36m(pid=3300)[0m Epoch     3: adjusting learning rate of group 0 to 4.2050e-03.
Result for DEFAULT_d3595ddc:
  accuracy: 0.616
  date: 2020-12-27_07-48-56
  done: false
  experiment_id: a42c381f18ed43c59ba67588ceb39062
  experiment_tag: 7_accum_iter=2,batch_size=16,is_amsgrad=False,lr=0.0052964,middle_fc=False,middle_fc_size=0,weight_decay=0.0005
  hostname: Nevsky
  iterations_since_restore: 3
  loss: 1.178484043743037
  node_ip: 10.0.0.200
  pid: 3300
  should_checkpoint: true
  time_since_restore: 1064.8042192459106
  time_this_iter_s: 352.9881293773651
  time_total_s: 1064.8042192459106
  timestamp: 1609055336
  timesteps_since_restore: 0
  training_iteration: 3
  trial_id: d3595ddc
  
== Status ==
Memory usage on this node: 7.5/31.3 GiB
Using AsyncHyperBand: num_stopped=6
Bracket: Iter 8.000: -1.2458355705949324 | Iter 4.000: -1.298598561860338 | Iter 2.000: -1.3565497356879561
Resources requested: 8/8 CPUs, 1/1 GPUs, 0.0/16.16 GiB heap, 0.0/5.57 GiB objects (0/1.0 accelera

[2m[36m(pid=3300)[0m Epoch     6: adjusting learning rate of group 0 to 1.8305e-03.
Result for DEFAULT_d3595ddc:
  accuracy: 0.614
  date: 2020-12-27_08-06-36
  done: false
  experiment_id: a42c381f18ed43c59ba67588ceb39062
  experiment_tag: 7_accum_iter=2,batch_size=16,is_amsgrad=False,lr=0.0052964,middle_fc=False,middle_fc_size=0,weight_decay=0.0005
  hostname: Nevsky
  iterations_since_restore: 6
  loss: 1.1857204112825515
  node_ip: 10.0.0.200
  pid: 3300
  should_checkpoint: true
  time_since_restore: 2124.313277721405
  time_this_iter_s: 353.0880787372589
  time_total_s: 2124.313277721405
  timestamp: 1609056396
  timesteps_since_restore: 0
  training_iteration: 6
  trial_id: d3595ddc
  
== Status ==
Memory usage on this node: 7.5/31.3 GiB
Using AsyncHyperBand: num_stopped=6
Bracket: Iter 8.000: -1.2458355705949324 | Iter 4.000: -1.2628076981894578 | Iter 2.000: -1.3565497356879561
Resources requested: 8/8 CPUs, 1/1 GPUs, 0.0/16.16 GiB heap, 0.0/5.57 GiB objects (0/1.0 accelera

[2m[36m(pid=3300)[0m Epoch     9: adjusting learning rate of group 0 to 1.3059e-04.
Result for DEFAULT_d3595ddc:
  accuracy: 0.6116
  date: 2020-12-27_08-24-15
  done: false
  experiment_id: a42c381f18ed43c59ba67588ceb39062
  experiment_tag: 7_accum_iter=2,batch_size=16,is_amsgrad=False,lr=0.0052964,middle_fc=False,middle_fc_size=0,weight_decay=0.0005
  hostname: Nevsky
  iterations_since_restore: 9
  loss: 1.1828302431710158
  node_ip: 10.0.0.200
  pid: 3300
  should_checkpoint: true
  time_since_restore: 3183.521435022354
  time_this_iter_s: 353.1130692958832
  time_total_s: 3183.521435022354
  timestamp: 1609057455
  timesteps_since_restore: 0
  training_iteration: 9
  trial_id: d3595ddc
  
== Status ==
Memory usage on this node: 7.5/31.3 GiB
Using AsyncHyperBand: num_stopped=6
Bracket: Iter 8.000: -1.213211819340911 | Iter 4.000: -1.2628076981894578 | Iter 2.000: -1.3565497356879561
Resources requested: 8/8 CPUs, 1/1 GPUs, 0.0/16.16 GiB heap, 0.0/5.57 GiB objects (0/1.0 accelera

[INFO 12-27 08:30:08] ax.service.ax_client: Completed trial 6 with data: {'loss': (1.18, 0.0)}.
[INFO 12-27 08:30:08] ax.service.ax_client: Generated new trial 8 with parameters {'lr': 0.02, 'accum_iter': 8, 'batch_size': 16, 'weight_decay': 0.0, 'is_amsgrad': False, 'middle_fc': False, 'middle_fc_size': 0}.


Result for DEFAULT_d3595ddc:
  accuracy: 0.6124
  date: 2020-12-27_08-30-08
  done: true
  experiment_id: a42c381f18ed43c59ba67588ceb39062
  experiment_tag: 7_accum_iter=2,batch_size=16,is_amsgrad=False,lr=0.0052964,middle_fc=False,middle_fc_size=0,weight_decay=0.0005
  hostname: Nevsky
  iterations_since_restore: 10
  loss: 1.1846335681178901
  node_ip: 10.0.0.200
  pid: 3300
  should_checkpoint: true
  time_since_restore: 3536.6174466609955
  time_this_iter_s: 353.09601163864136
  time_total_s: 3536.6174466609955
  timestamp: 1609057808
  timesteps_since_restore: 0
  training_iteration: 10
  trial_id: d3595ddc
  
== Status ==
Memory usage on this node: 7.5/31.3 GiB
Using AsyncHyperBand: num_stopped=7
Bracket: Iter 8.000: -1.213211819340911 | Iter 4.000: -1.2628076981894578 | Iter 2.000: -1.3565497356879561
Resources requested: 8/8 CPUs, 1/1 GPUs, 0.0/16.16 GiB heap, 0.0/5.57 GiB objects (0/1.0 accelerator_type:RTX)
Result logdir: /opt/favordata/AI/Felix/kaggle-cassava/ray-results/ax3

[INFO 12-27 08:43:22] ax.service.ax_client: Completed trial 7 with data: {'loss': (1.42, 0.0)}.
[INFO 12-27 08:43:22] ax.service.ax_client: Generated new trial 9 with parameters {'lr': 0.0, 'accum_iter': 8, 'batch_size': 8, 'weight_decay': 0.0, 'is_amsgrad': False, 'middle_fc': False, 'middle_fc_size': 0}.


Result for DEFAULT_7d7e5366:
  accuracy: 0.7052
  date: 2020-12-27_08-43-22
  done: true
  experiment_id: e8fff39134ca44deaf9337b6e6908a2c
  experiment_tag: 8_accum_iter=2,batch_size=8,is_amsgrad=False,lr=0.0030966,middle_fc=False,middle_fc_size=0,weight_decay=0.0005
  hostname: Nevsky
  iterations_since_restore: 2
  loss: 1.416282648904414
  node_ip: 10.0.0.200
  pid: 3297
  should_checkpoint: true
  time_since_restore: 792.2271537780762
  time_this_iter_s: 394.2549879550934
  time_total_s: 792.2271537780762
  timestamp: 1609058602
  timesteps_since_restore: 0
  training_iteration: 2
  trial_id: 7d7e5366
  
== Status ==
Memory usage on this node: 7.4/31.3 GiB
Using AsyncHyperBand: num_stopped=8
Bracket: Iter 8.000: -1.213211819340911 | Iter 4.000: -1.2628076981894578 | Iter 2.000: -1.386416192296185
Resources requested: 8/8 CPUs, 1/1 GPUs, 0.0/16.16 GiB heap, 0.0/5.57 GiB objects (0/1.0 accelerator_type:RTX)
Result logdir: /opt/favordata/AI/Felix/kaggle-cassava/ray-results/ax3
Number 

[INFO 12-27 08:55:15] ax.service.ax_client: Completed trial 8 with data: {'loss': (408194606.18, 0.0)}.
[INFO 12-27 08:55:16] ax.service.ax_client: Generated new trial 10 with parameters {'lr': 0.0, 'accum_iter': 8, 'batch_size': 16, 'weight_decay': 0.0, 'is_amsgrad': False, 'middle_fc': False, 'middle_fc_size': 0}.


Result for DEFAULT_ba816430:
  accuracy: 0.6176
  date: 2020-12-27_08-55-15
  done: true
  experiment_id: e6b5520fdf64434b84d311837849e63a
  experiment_tag: 9_accum_iter=8,batch_size=16,is_amsgrad=False,lr=0.015591,middle_fc=False,middle_fc_size=0,weight_decay=0.000125
  hostname: Nevsky
  iterations_since_restore: 2
  loss: 408194606.1772152
  node_ip: 10.0.0.200
  pid: 28843
  should_checkpoint: true
  time_since_restore: 711.288970708847
  time_this_iter_s: 353.68032121658325
  time_total_s: 711.288970708847
  timestamp: 1609059315
  timesteps_since_restore: 0
  training_iteration: 2
  trial_id: ba816430
  
== Status ==
Memory usage on this node: 7.4/31.3 GiB
Using AsyncHyperBand: num_stopped=9
Bracket: Iter 8.000: -1.213211819340911 | Iter 4.000: -1.2628076981894578 | Iter 2.000: -1.416282648904414
Resources requested: 8/8 CPUs, 1/1 GPUs, 0.0/16.16 GiB heap, 0.0/5.57 GiB objects (0/1.0 accelerator_type:RTX)
Result logdir: /opt/favordata/AI/Felix/kaggle-cassava/ray-results/ax3
Numbe

[2m[36m(pid=32345)[0m Epoch     2: adjusting learning rate of group 0 to 1.4576e-03.


[INFO 12-27 09:08:26] ax.service.ax_client: Completed trial 9 with data: {'loss': (1.9, 0.0)}.


Result for DEFAULT_93c04788:
  accuracy: 0.67
  date: 2020-12-27_09-08-26
  done: true
  experiment_id: 21f32dc24a954e7ea7ae7a14b37002d1
  experiment_tag: 10_accum_iter=8,batch_size=8,is_amsgrad=False,lr=0.0016113,middle_fc=False,middle_fc_size=0,weight_decay=0.0
  hostname: Nevsky
  iterations_since_restore: 2
  loss: 1.9033971526954747
  node_ip: 10.0.0.200
  pid: 32345
  should_checkpoint: true
  time_since_restore: 788.4827809333801
  time_this_iter_s: 393.0763065814972
  time_total_s: 788.4827809333801
  timestamp: 1609060106
  timesteps_since_restore: 0
  training_iteration: 2
  trial_id: 93c04788
  
== Status ==
Memory usage on this node: 7.4/31.3 GiB
Using AsyncHyperBand: num_stopped=10
Bracket: Iter 8.000: -1.213211819340911 | Iter 4.000: -1.2628076981894578 | Iter 2.000: -1.6598399007999443
Resources requested: 8/8 CPUs, 1/1 GPUs, 0.0/16.16 GiB heap, 0.0/5.57 GiB objects (0/1.0 accelerator_type:RTX)
Result logdir: /opt/favordata/AI/Felix/kaggle-cassava/ray-results/ax3
Number 

[INFO 12-27 09:08:26] ax.service.ax_client: Generated new trial 11 with parameters {'lr': 0.09, 'accum_iter': 8, 'batch_size': 16, 'weight_decay': 0.0, 'is_amsgrad': False, 'middle_fc': False, 'middle_fc_size': 0}.


[2m[36m(pid=3651)[0m Epoch     0: adjusting learning rate of group 0 to 1.2185e-03.
[2m[36m(pid=3651)[0m Epoch     1: adjusting learning rate of group 0 to 1.1887e-03.
Result for DEFAULT_3cf49600:
  accuracy: 0.676
  date: 2020-12-27_09-14-27
  done: false
  experiment_id: 6dd2da43613d4b39b874fc12af52722c
  experiment_tag: 11_accum_iter=8,batch_size=16,is_amsgrad=False,lr=0.0012185,middle_fc=False,middle_fc_size=0,weight_decay=0.000125
  hostname: Nevsky
  iterations_since_restore: 1
  loss: 1.121463175815872
  node_ip: 10.0.0.200
  pid: 3651
  should_checkpoint: true
  time_since_restore: 358.8265154361725
  time_this_iter_s: 358.8265154361725
  time_total_s: 358.8265154361725
  timestamp: 1609060467
  timesteps_since_restore: 0
  training_iteration: 1
  trial_id: 3cf49600
  
== Status ==
Memory usage on this node: 7.4/31.3 GiB
Using AsyncHyperBand: num_stopped=10
Bracket: Iter 8.000: -1.213211819340911 | Iter 4.000: -1.2628076981894578 | Iter 2.000: -1.6598399007999443
Resource

[2m[36m(pid=3651)[0m Epoch     3: adjusting learning rate of group 0 to 9.6758e-04.
Result for DEFAULT_3cf49600:
  accuracy: 0.7244
  date: 2020-12-27_09-26-14
  done: false
  experiment_id: 6dd2da43613d4b39b874fc12af52722c
  experiment_tag: 11_accum_iter=8,batch_size=16,is_amsgrad=False,lr=0.0012185,middle_fc=False,middle_fc_size=0,weight_decay=0.000125
  hostname: Nevsky
  iterations_since_restore: 3
  loss: 0.8516630854787706
  node_ip: 10.0.0.200
  pid: 3651
  should_checkpoint: true
  time_since_restore: 1065.9095885753632
  time_this_iter_s: 353.50985455513
  time_total_s: 1065.9095885753632
  timestamp: 1609061174
  timesteps_since_restore: 0
  training_iteration: 3
  trial_id: 3cf49600
  
== Status ==
Memory usage on this node: 7.4/31.3 GiB
Using AsyncHyperBand: num_stopped=10
Bracket: Iter 8.000: -1.213211819340911 | Iter 4.000: -1.2628076981894578 | Iter 2.000: -1.416282648904414
Resources requested: 8/8 CPUs, 1/1 GPUs, 0.0/16.16 GiB heap, 0.0/5.57 GiB objects (0/1.0 accel

[2m[36m(pid=3651)[0m Epoch     5: adjusting learning rate of group 0 to 6.0976e-04.
Result for DEFAULT_3cf49600:
  accuracy: 0.8032
  date: 2020-12-27_09-38-00
  done: false
  experiment_id: 6dd2da43613d4b39b874fc12af52722c
  experiment_tag: 11_accum_iter=8,batch_size=16,is_amsgrad=False,lr=0.0012185,middle_fc=False,middle_fc_size=0,weight_decay=0.000125
  hostname: Nevsky
  iterations_since_restore: 5
  loss: 0.5516687375080737
  node_ip: 10.0.0.200
  pid: 3651
  should_checkpoint: true
  time_since_restore: 1772.673003435135
  time_this_iter_s: 353.3779990673065
  time_total_s: 1772.673003435135
  timestamp: 1609061880
  timesteps_since_restore: 0
  training_iteration: 5
  trial_id: 3cf49600
  
== Status ==
Memory usage on this node: 7.4/31.3 GiB
Using AsyncHyperBand: num_stopped=10
Bracket: Iter 8.000: -1.213211819340911 | Iter 4.000: -1.222523618348037 | Iter 2.000: -1.416282648904414
Resources requested: 8/8 CPUs, 1/1 GPUs, 0.0/16.16 GiB heap, 0.0/5.57 GiB objects (0/1.0 accele

[2m[36m(pid=3651)[0m Epoch     7: adjusting learning rate of group 0 to 2.5194e-04.
Result for DEFAULT_3cf49600:
  accuracy: 0.8224
  date: 2020-12-27_09-49-47
  done: false
  experiment_id: 6dd2da43613d4b39b874fc12af52722c
  experiment_tag: 11_accum_iter=8,batch_size=16,is_amsgrad=False,lr=0.0012185,middle_fc=False,middle_fc_size=0,weight_decay=0.000125
  hostname: Nevsky
  iterations_since_restore: 7
  loss: 0.49555076941659176
  node_ip: 10.0.0.200
  pid: 3651
  should_checkpoint: true
  time_since_restore: 2479.634341955185
  time_this_iter_s: 353.2731955051422
  time_total_s: 2479.634341955185
  timestamp: 1609062587
  timesteps_since_restore: 0
  training_iteration: 7
  trial_id: 3cf49600
  
== Status ==
Memory usage on this node: 7.4/31.3 GiB
Using AsyncHyperBand: num_stopped=10
Bracket: Iter 8.000: -1.213211819340911 | Iter 4.000: -1.222523618348037 | Iter 2.000: -1.416282648904414
Resources requested: 8/8 CPUs, 1/1 GPUs, 0.0/16.16 GiB heap, 0.0/5.57 GiB objects (0/1.0 accel

[2m[36m(pid=3651)[0m Epoch     9: adjusting learning rate of group 0 to 3.0795e-05.
Result for DEFAULT_3cf49600:
  accuracy: 0.8332
  date: 2020-12-27_10-01-35
  done: false
  experiment_id: 6dd2da43613d4b39b874fc12af52722c
  experiment_tag: 11_accum_iter=8,batch_size=16,is_amsgrad=False,lr=0.0012185,middle_fc=False,middle_fc_size=0,weight_decay=0.000125
  hostname: Nevsky
  iterations_since_restore: 9
  loss: 0.476251524083222
  node_ip: 10.0.0.200
  pid: 3651
  should_checkpoint: true
  time_since_restore: 3187.15100646019
  time_this_iter_s: 353.7581596374512
  time_total_s: 3187.15100646019
  timestamp: 1609063295
  timesteps_since_restore: 0
  training_iteration: 9
  trial_id: 3cf49600
  
== Status ==
Memory usage on this node: 7.5/31.3 GiB
Using AsyncHyperBand: num_stopped=10
Bracket: Iter 8.000: -1.1805880680868897 | Iter 4.000: -1.222523618348037 | Iter 2.000: -1.416282648904414
Resources requested: 8/8 CPUs, 1/1 GPUs, 0.0/16.16 GiB heap, 0.0/5.57 GiB objects (0/1.0 accelera

[INFO 12-27 10:07:29] ax.service.ax_client: Completed trial 10 with data: {'loss': (0.47, 0.0)}.


Result for DEFAULT_3cf49600:
  accuracy: 0.8352
  date: 2020-12-27_10-07-29
  done: true
  experiment_id: 6dd2da43613d4b39b874fc12af52722c
  experiment_tag: 11_accum_iter=8,batch_size=16,is_amsgrad=False,lr=0.0012185,middle_fc=False,middle_fc_size=0,weight_decay=0.000125
  hostname: Nevsky
  iterations_since_restore: 10
  loss: 0.47487797616403316
  node_ip: 10.0.0.200
  pid: 3651
  should_checkpoint: true
  time_since_restore: 3540.854420900345
  time_this_iter_s: 353.70341444015503
  time_total_s: 3540.854420900345
  timestamp: 1609063649
  timesteps_since_restore: 0
  training_iteration: 10
  trial_id: 3cf49600
  
== Status ==
Memory usage on this node: 7.5/31.3 GiB
Using AsyncHyperBand: num_stopped=11
Bracket: Iter 8.000: -1.1805880680868897 | Iter 4.000: -1.222523618348037 | Iter 2.000: -1.416282648904414
Resources requested: 8/8 CPUs, 1/1 GPUs, 0.0/16.16 GiB heap, 0.0/5.57 GiB objects (0/1.0 accelerator_type:RTX)
Result logdir: /opt/favordata/AI/Felix/kaggle-cassava/ray-results/a

[INFO 12-27 10:07:29] ax.service.ax_client: Generated new trial 12 with parameters {'lr': 0.01, 'accum_iter': 8, 'batch_size': 8, 'weight_decay': 0.0, 'is_amsgrad': False, 'middle_fc': False, 'middle_fc_size': 0}.


[2m[36m(pid=20109)[0m Epoch     0: adjusting learning rate of group 0 to 8.6815e-02.
[2m[36m(pid=20109)[0m Epoch     1: adjusting learning rate of group 0 to 8.4691e-02.
Result for DEFAULT_141d88c0:
  accuracy: 0.6176
  date: 2020-12-27_10-13-29
  done: false
  experiment_id: 6d88b8b0fbc248228c6771a36482263a
  experiment_tag: 12_accum_iter=8,batch_size=16,is_amsgrad=False,lr=0.086815,middle_fc=False,middle_fc_size=0,weight_decay=0.000125
  hostname: Nevsky
  iterations_since_restore: 1
  loss: 2.0804586100387068e+33
  node_ip: 10.0.0.200
  pid: 20109
  should_checkpoint: true
  time_since_restore: 358.35335326194763
  time_this_iter_s: 358.35335326194763
  time_total_s: 358.35335326194763
  timestamp: 1609064009
  timesteps_since_restore: 0
  training_iteration: 1
  trial_id: 141d88c0
  
== Status ==
Memory usage on this node: 7.5/31.3 GiB
Using AsyncHyperBand: num_stopped=11
Bracket: Iter 8.000: -1.1805880680868897 | Iter 4.000: -1.222523618348037 | Iter 2.000: -1.41628264890441

[INFO 12-27 10:19:22] ax.service.ax_client: Completed trial 11 with data: {'loss': (2487.68, 0.0)}.


Result for DEFAULT_141d88c0:
  accuracy: 0.3232
  date: 2020-12-27_10-19-22
  done: true
  experiment_id: 6d88b8b0fbc248228c6771a36482263a
  experiment_tag: 12_accum_iter=8,batch_size=16,is_amsgrad=False,lr=0.086815,middle_fc=False,middle_fc_size=0,weight_decay=0.000125
  hostname: Nevsky
  iterations_since_restore: 2
  loss: 2487.6783569915383
  node_ip: 10.0.0.200
  pid: 20109
  should_checkpoint: true
  time_since_restore: 711.5356030464172
  time_this_iter_s: 353.1822497844696
  time_total_s: 711.5356030464172
  timestamp: 1609064362
  timesteps_since_restore: 0
  training_iteration: 2
  trial_id: 141d88c0
  
== Status ==
Memory usage on this node: 7.5/31.3 GiB
Using AsyncHyperBand: num_stopped=12
Bracket: Iter 8.000: -1.1805880680868897 | Iter 4.000: -1.222523618348037 | Iter 2.000: -1.6598399007999443
Resources requested: 8/8 CPUs, 1/1 GPUs, 0.0/16.16 GiB heap, 0.0/5.57 GiB objects (0/1.0 accelerator_type:RTX)
Result logdir: /opt/favordata/AI/Felix/kaggle-cassava/ray-results/ax3


[INFO 12-27 10:19:22] ax.service.ax_client: Generated new trial 13 with parameters {'lr': 0.01, 'accum_iter': 2, 'batch_size': 16, 'weight_decay': 0.0, 'is_amsgrad': False, 'middle_fc': False, 'middle_fc_size': 0}.


[2m[36m(pid=23738)[0m Epoch     0: adjusting learning rate of group 0 to 8.6365e-03.
[2m[36m(pid=23738)[0m Epoch     1: adjusting learning rate of group 0 to 8.4252e-03.
Result for DEFAULT_53d549f0:
  accuracy: 0.6176
  date: 2020-12-27_10-26-01
  done: false
  experiment_id: 65e2f5986c4840c5bf577e303dbfdce6
  experiment_tag: 13_accum_iter=8,batch_size=8,is_amsgrad=False,lr=0.0086365,middle_fc=False,middle_fc_size=0,weight_decay=0.0
  hostname: Nevsky
  iterations_since_restore: 1
  loss: 2.585746667053126
  node_ip: 10.0.0.200
  pid: 23738
  should_checkpoint: true
  time_since_restore: 396.1176495552063
  time_this_iter_s: 396.1176495552063
  time_total_s: 396.1176495552063
  timestamp: 1609064761
  timesteps_since_restore: 0
  training_iteration: 1
  trial_id: 53d549f0
  
== Status ==
Memory usage on this node: 7.4/31.3 GiB
Using AsyncHyperBand: num_stopped=12
Bracket: Iter 8.000: -1.1805880680868897 | Iter 4.000: -1.222523618348037 | Iter 2.000: -1.6598399007999443
Resources 

[INFO 12-27 10:32:34] ax.service.ax_client: Completed trial 12 with data: {'loss': (274.47, 0.0)}.


Result for DEFAULT_53d549f0:
  accuracy: 0.6176
  date: 2020-12-27_10-32-34
  done: true
  experiment_id: 65e2f5986c4840c5bf577e303dbfdce6
  experiment_tag: 13_accum_iter=8,batch_size=8,is_amsgrad=False,lr=0.0086365,middle_fc=False,middle_fc_size=0,weight_decay=0.0
  hostname: Nevsky
  iterations_since_restore: 2
  loss: 274.47403031361256
  node_ip: 10.0.0.200
  pid: 23738
  should_checkpoint: true
  time_since_restore: 789.3358888626099
  time_this_iter_s: 393.21823930740356
  time_total_s: 789.3358888626099
  timestamp: 1609065154
  timesteps_since_restore: 0
  training_iteration: 2
  trial_id: 53d549f0
  
== Status ==
Memory usage on this node: 7.4/31.3 GiB
Using AsyncHyperBand: num_stopped=13
Bracket: Iter 8.000: -1.1805880680868897 | Iter 4.000: -1.222523618348037 | Iter 2.000: -1.9033971526954747
Resources requested: 8/8 CPUs, 1/1 GPUs, 0.0/16.16 GiB heap, 0.0/5.57 GiB objects (0/1.0 accelerator_type:RTX)
Result logdir: /opt/favordata/AI/Felix/kaggle-cassava/ray-results/ax3
Numb

[INFO 12-27 10:32:34] ax.service.ax_client: Generated new trial 14 with parameters {'lr': 0.03, 'accum_iter': 8, 'batch_size': 8, 'weight_decay': 0.0, 'is_amsgrad': False, 'middle_fc': False, 'middle_fc_size': 0}.


[2m[36m(pid=27648)[0m Epoch     0: adjusting learning rate of group 0 to 6.0564e-03.
[2m[36m(pid=27648)[0m Epoch     1: adjusting learning rate of group 0 to 5.9083e-03.
Result for DEFAULT_fd21ad40:
  accuracy: 0.6164
  date: 2020-12-27_10-38-35
  done: false
  experiment_id: 96f04b345c5647768ccd14e91289e910
  experiment_tag: 14_accum_iter=2,batch_size=16,is_amsgrad=False,lr=0.0060564,middle_fc=False,middle_fc_size=0,weight_decay=0.000125
  hostname: Nevsky
  iterations_since_restore: 1
  loss: 1.2822914681857145
  node_ip: 10.0.0.200
  pid: 27648
  should_checkpoint: true
  time_since_restore: 358.7603576183319
  time_this_iter_s: 358.7603576183319
  time_total_s: 358.7603576183319
  timestamp: 1609065515
  timesteps_since_restore: 0
  training_iteration: 1
  trial_id: fd21ad40
  
== Status ==
Memory usage on this node: 7.4/31.3 GiB
Using AsyncHyperBand: num_stopped=13
Bracket: Iter 8.000: -1.1805880680868897 | Iter 4.000: -1.222523618348037 | Iter 2.000: -1.9033971526954747
Res

[2m[36m(pid=27648)[0m Epoch     3: adjusting learning rate of group 0 to 4.8084e-03.
Result for DEFAULT_fd21ad40:
  accuracy: 0.6088
  date: 2020-12-27_10-50-22
  done: false
  experiment_id: 96f04b345c5647768ccd14e91289e910
  experiment_tag: 14_accum_iter=2,batch_size=16,is_amsgrad=False,lr=0.0060564,middle_fc=False,middle_fc_size=0,weight_decay=0.000125
  hostname: Nevsky
  iterations_since_restore: 3
  loss: 1.1354625247701813
  node_ip: 10.0.0.200
  pid: 27648
  should_checkpoint: true
  time_since_restore: 1066.6468515396118
  time_this_iter_s: 353.3251829147339
  time_total_s: 1066.6468515396118
  timestamp: 1609066222
  timesteps_since_restore: 0
  training_iteration: 3
  trial_id: fd21ad40
  
== Status ==
Memory usage on this node: 7.5/31.3 GiB
Using AsyncHyperBand: num_stopped=13
Bracket: Iter 8.000: -1.1805880680868897 | Iter 4.000: -1.222523618348037 | Iter 2.000: -1.6598399007999443
Resources requested: 8/8 CPUs, 1/1 GPUs, 0.0/16.16 GiB heap, 0.0/5.57 GiB objects (0/1.0 

[2m[36m(pid=27648)[0m Epoch     5: adjusting learning rate of group 0 to 3.0287e-03.
Result for DEFAULT_fd21ad40:
  accuracy: 0.6052
  date: 2020-12-27_11-02-11
  done: false
  experiment_id: 96f04b345c5647768ccd14e91289e910
  experiment_tag: 14_accum_iter=2,batch_size=16,is_amsgrad=False,lr=0.0060564,middle_fc=False,middle_fc_size=0,weight_decay=0.000125
  hostname: Nevsky
  iterations_since_restore: 5
  loss: 1.138121095639241
  node_ip: 10.0.0.200
  pid: 27648
  should_checkpoint: true
  time_since_restore: 1775.2299256324768
  time_this_iter_s: 354.3304615020752
  time_total_s: 1775.2299256324768
  timestamp: 1609066931
  timesteps_since_restore: 0
  training_iteration: 5
  trial_id: fd21ad40
  
== Status ==
Memory usage on this node: 7.5/31.3 GiB
Using AsyncHyperBand: num_stopped=13
Bracket: Iter 8.000: -1.1805880680868897 | Iter 4.000: -1.1822395385066164 | Iter 2.000: -1.6598399007999443
Resources requested: 8/8 CPUs, 1/1 GPUs, 0.0/16.16 GiB heap, 0.0/5.57 GiB objects (0/1.0 

[2m[36m(pid=27648)[0m Epoch     7: adjusting learning rate of group 0 to 1.2491e-03.
Result for DEFAULT_fd21ad40:
  accuracy: 0.6076
  date: 2020-12-27_11-14-00
  done: false
  experiment_id: 96f04b345c5647768ccd14e91289e910
  experiment_tag: 14_accum_iter=2,batch_size=16,is_amsgrad=False,lr=0.0060564,middle_fc=False,middle_fc_size=0,weight_decay=0.000125
  hostname: Nevsky
  iterations_since_restore: 7
  loss: 1.1350657321229767
  node_ip: 10.0.0.200
  pid: 27648
  should_checkpoint: true
  time_since_restore: 2484.0122632980347
  time_this_iter_s: 354.6464433670044
  time_total_s: 2484.0122632980347
  timestamp: 1609067640
  timesteps_since_restore: 0
  training_iteration: 7
  trial_id: fd21ad40
  
== Status ==
Memory usage on this node: 7.4/31.3 GiB
Using AsyncHyperBand: num_stopped=13
Bracket: Iter 8.000: -1.1805880680868897 | Iter 4.000: -1.1822395385066164 | Iter 2.000: -1.6598399007999443
Resources requested: 8/8 CPUs, 1/1 GPUs, 0.0/16.16 GiB heap, 0.0/5.57 GiB objects (0/1.0

[2m[36m(pid=27648)[0m Epoch     9: adjusting learning rate of group 0 to 1.4919e-04.
Result for DEFAULT_fd21ad40:
  accuracy: 0.6092
  date: 2020-12-27_11-25-48
  done: false
  experiment_id: 96f04b345c5647768ccd14e91289e910
  experiment_tag: 14_accum_iter=2,batch_size=16,is_amsgrad=False,lr=0.0060564,middle_fc=False,middle_fc_size=0,weight_decay=0.000125
  hostname: Nevsky
  iterations_since_restore: 9
  loss: 1.1345606634888468
  node_ip: 10.0.0.200
  pid: 27648
  should_checkpoint: true
  time_since_restore: 3191.9251325130463
  time_this_iter_s: 353.6663992404938
  time_total_s: 3191.9251325130463
  timestamp: 1609068348
  timesteps_since_restore: 0
  training_iteration: 9
  trial_id: fd21ad40
  
== Status ==
Memory usage on this node: 7.5/31.3 GiB
Using AsyncHyperBand: num_stopped=13
Bracket: Iter 8.000: -1.1590363451197176 | Iter 4.000: -1.1822395385066164 | Iter 2.000: -1.6598399007999443
Resources requested: 8/8 CPUs, 1/1 GPUs, 0.0/16.16 GiB heap, 0.0/5.57 GiB objects (0/1.0

[INFO 12-27 11:31:42] ax.service.ax_client: Completed trial 13 with data: {'loss': (1.14, 0.0)}.


Result for DEFAULT_fd21ad40:
  accuracy: 0.6072
  date: 2020-12-27_11-31-42
  done: true
  experiment_id: 96f04b345c5647768ccd14e91289e910
  experiment_tag: 14_accum_iter=2,batch_size=16,is_amsgrad=False,lr=0.0060564,middle_fc=False,middle_fc_size=0,weight_decay=0.000125
  hostname: Nevsky
  iterations_since_restore: 10
  loss: 1.1386608013623878
  node_ip: 10.0.0.200
  pid: 27648
  should_checkpoint: true
  time_since_restore: 3546.382082462311
  time_this_iter_s: 354.4569499492645
  time_total_s: 3546.382082462311
  timestamp: 1609068702
  timesteps_since_restore: 0
  training_iteration: 10
  trial_id: fd21ad40
  
== Status ==
Memory usage on this node: 7.5/31.3 GiB
Using AsyncHyperBand: num_stopped=14
Bracket: Iter 8.000: -1.1590363451197176 | Iter 4.000: -1.1822395385066164 | Iter 2.000: -1.6598399007999443
Resources requested: 8/8 CPUs, 1/1 GPUs, 0.0/16.16 GiB heap, 0.0/5.57 GiB objects (0/1.0 accelerator_type:RTX)
Result logdir: /opt/favordata/AI/Felix/kaggle-cassava/ray-results/

[INFO 12-27 11:31:43] ax.service.ax_client: Generated new trial 15 with parameters {'lr': 0.0, 'accum_iter': 4, 'batch_size': 16, 'weight_decay': 0.0, 'is_amsgrad': False, 'middle_fc': False, 'middle_fc_size': 0}.


[2m[36m(pid=13241)[0m Epoch     0: adjusting learning rate of group 0 to 2.6508e-02.
[2m[36m(pid=13241)[0m Epoch     1: adjusting learning rate of group 0 to 2.5860e-02.
Result for DEFAULT_d4f4f60e:
  accuracy: 0.6176
  date: 2020-12-27_11-38-22
  done: false
  experiment_id: 03d388f8a0ab43dbb0beda09920a0195
  experiment_tag: 15_accum_iter=8,batch_size=8,is_amsgrad=False,lr=0.026508,middle_fc=False,middle_fc_size=0,weight_decay=0.0005
  hostname: Nevsky
  iterations_since_restore: 1
  loss: 611.8167589404915
  node_ip: 10.0.0.200
  pid: 13241
  should_checkpoint: true
  time_since_restore: 397.9489619731903
  time_this_iter_s: 397.9489619731903
  time_total_s: 397.9489619731903
  timestamp: 1609069102
  timesteps_since_restore: 0
  training_iteration: 1
  trial_id: d4f4f60e
  
== Status ==
Memory usage on this node: 7.4/31.3 GiB
Using AsyncHyperBand: num_stopped=14
Bracket: Iter 8.000: -1.1590363451197176 | Iter 4.000: -1.1822395385066164 | Iter 2.000: -1.6598399007999443
Resourc

[INFO 12-27 11:44:56] ax.service.ax_client: Completed trial 14 with data: {'loss': (15169386881542.48, 0.0)}.


Result for DEFAULT_d4f4f60e:
  accuracy: 0.6176
  date: 2020-12-27_11-44-56
  done: true
  experiment_id: 03d388f8a0ab43dbb0beda09920a0195
  experiment_tag: 15_accum_iter=8,batch_size=8,is_amsgrad=False,lr=0.026508,middle_fc=False,middle_fc_size=0,weight_decay=0.0005
  hostname: Nevsky
  iterations_since_restore: 2
  loss: 15169386881542.48
  node_ip: 10.0.0.200
  pid: 13241
  should_checkpoint: true
  time_since_restore: 791.6164605617523
  time_this_iter_s: 393.667498588562
  time_total_s: 791.6164605617523
  timestamp: 1609069496
  timesteps_since_restore: 0
  training_iteration: 2
  trial_id: d4f4f60e
  
== Status ==
Memory usage on this node: 7.4/31.3 GiB
Using AsyncHyperBand: num_stopped=15
Bracket: Iter 8.000: -1.1590363451197176 | Iter 4.000: -1.1822395385066164 | Iter 2.000: -1.9033971526954747
Resources requested: 8/8 CPUs, 1/1 GPUs, 0.0/16.16 GiB heap, 0.0/5.57 GiB objects (0/1.0 accelerator_type:RTX)
Result logdir: /opt/favordata/AI/Felix/kaggle-cassava/ray-results/ax3
Numb

[INFO 12-27 11:44:56] ax.service.ax_client: Generated new trial 16 with parameters {'lr': 0.0, 'accum_iter': 4, 'batch_size': 16, 'weight_decay': 0.0, 'is_amsgrad': False, 'middle_fc': False, 'middle_fc_size': 0}.


[2m[36m(pid=17045)[0m Epoch     0: adjusting learning rate of group 0 to 2.1069e-03.
[2m[36m(pid=17045)[0m Epoch     1: adjusting learning rate of group 0 to 2.0554e-03.
Result for DEFAULT_17f235c2:
  accuracy: 0.6176
  date: 2020-12-27_11-50-56
  done: false
  experiment_id: 12742033ebee4e43b5a933a34a8eee22
  experiment_tag: 16_accum_iter=4,batch_size=16,is_amsgrad=False,lr=0.0021069,middle_fc=False,middle_fc_size=0,weight_decay=0.000125
  hostname: Nevsky
  iterations_since_restore: 1
  loss: 1.6371306037601037
  node_ip: 10.0.0.200
  pid: 17045
  should_checkpoint: true
  time_since_restore: 357.839209318161
  time_this_iter_s: 357.839209318161
  time_total_s: 357.839209318161
  timestamp: 1609069856
  timesteps_since_restore: 0
  training_iteration: 1
  trial_id: 17f235c2
  
== Status ==
Memory usage on this node: 7.4/31.3 GiB
Using AsyncHyperBand: num_stopped=15
Bracket: Iter 8.000: -1.1590363451197176 | Iter 4.000: -1.1822395385066164 | Iter 2.000: -1.9033971526954747
Resou

[2m[36m(pid=17045)[0m Epoch     3: adjusting learning rate of group 0 to 1.6729e-03.
Result for DEFAULT_17f235c2:
  accuracy: 0.64
  date: 2020-12-27_12-02-44
  done: false
  experiment_id: 12742033ebee4e43b5a933a34a8eee22
  experiment_tag: 16_accum_iter=4,batch_size=16,is_amsgrad=False,lr=0.0021069,middle_fc=False,middle_fc_size=0,weight_decay=0.000125
  hostname: Nevsky
  iterations_since_restore: 3
  loss: 1.068995693061925
  node_ip: 10.0.0.200
  pid: 17045
  should_checkpoint: true
  time_since_restore: 1065.6575989723206
  time_this_iter_s: 354.2875692844391
  time_total_s: 1065.6575989723206
  timestamp: 1609070564
  timesteps_since_restore: 0
  training_iteration: 3
  trial_id: 17f235c2
  
== Status ==
Memory usage on this node: 7.5/31.3 GiB
Using AsyncHyperBand: num_stopped=15
Bracket: Iter 8.000: -1.1590363451197176 | Iter 4.000: -1.1822395385066164 | Iter 2.000: -1.6598399007999443
Resources requested: 8/8 CPUs, 1/1 GPUs, 0.0/16.16 GiB heap, 0.0/5.57 GiB objects (0/1.0 ac

[2m[36m(pid=17045)[0m Epoch     5: adjusting learning rate of group 0 to 1.0539e-03.
Result for DEFAULT_17f235c2:
  accuracy: 0.6388
  date: 2020-12-27_12-14-32
  done: false
  experiment_id: 12742033ebee4e43b5a933a34a8eee22
  experiment_tag: 16_accum_iter=4,batch_size=16,is_amsgrad=False,lr=0.0021069,middle_fc=False,middle_fc_size=0,weight_decay=0.000125
  hostname: Nevsky
  iterations_since_restore: 5
  loss: 1.0683026547673382
  node_ip: 10.0.0.200
  pid: 17045
  should_checkpoint: true
  time_since_restore: 1773.9271471500397
  time_this_iter_s: 354.511483669281
  time_total_s: 1773.9271471500397
  timestamp: 1609071272
  timesteps_since_restore: 0
  training_iteration: 5
  trial_id: 17f235c2
  
== Status ==
Memory usage on this node: 7.5/31.3 GiB
Using AsyncHyperBand: num_stopped=15
Bracket: Iter 8.000: -1.1590363451197176 | Iter 4.000: -1.1600102838836137 | Iter 2.000: -1.6598399007999443
Resources requested: 8/8 CPUs, 1/1 GPUs, 0.0/16.16 GiB heap, 0.0/5.57 GiB objects (0/1.0 

[2m[36m(pid=17045)[0m Epoch     7: adjusting learning rate of group 0 to 4.3504e-04.
Result for DEFAULT_17f235c2:
  accuracy: 0.638
  date: 2020-12-27_12-26-20
  done: false
  experiment_id: 12742033ebee4e43b5a933a34a8eee22
  experiment_tag: 16_accum_iter=4,batch_size=16,is_amsgrad=False,lr=0.0021069,middle_fc=False,middle_fc_size=0,weight_decay=0.000125
  hostname: Nevsky
  iterations_since_restore: 7
  loss: 1.0792706254162365
  node_ip: 10.0.0.200
  pid: 17045
  should_checkpoint: true
  time_since_restore: 2482.4698100090027
  time_this_iter_s: 354.6407940387726
  time_total_s: 2482.4698100090027
  timestamp: 1609071980
  timesteps_since_restore: 0
  training_iteration: 7
  trial_id: 17f235c2
  
== Status ==
Memory usage on this node: 7.4/31.3 GiB
Using AsyncHyperBand: num_stopped=15
Bracket: Iter 8.000: -1.1590363451197176 | Iter 4.000: -1.1600102838836137 | Iter 2.000: -1.6598399007999443
Resources requested: 8/8 CPUs, 1/1 GPUs, 0.0/16.16 GiB heap, 0.0/5.57 GiB objects (0/1.0 

[2m[36m(pid=17045)[0m Epoch     9: adjusting learning rate of group 0 to 5.2535e-05.
Result for DEFAULT_17f235c2:
  accuracy: 0.638
  date: 2020-12-27_12-38-09
  done: false
  experiment_id: 12742033ebee4e43b5a933a34a8eee22
  experiment_tag: 16_accum_iter=4,batch_size=16,is_amsgrad=False,lr=0.0021069,middle_fc=False,middle_fc_size=0,weight_decay=0.000125
  hostname: Nevsky
  iterations_since_restore: 9
  loss: 1.0815954351726966
  node_ip: 10.0.0.200
  pid: 17045
  should_checkpoint: true
  time_since_restore: 3190.9972970485687
  time_this_iter_s: 354.45585083961487
  time_total_s: 3190.9972970485687
  timestamp: 1609072689
  timesteps_since_restore: 0
  training_iteration: 9
  trial_id: 17f235c2
  
== Status ==
Memory usage on this node: 7.5/31.3 GiB
Using AsyncHyperBand: num_stopped=15
Bracket: Iter 8.000: -1.1374846221525459 | Iter 4.000: -1.1600102838836137 | Iter 2.000: -1.6598399007999443
Resources requested: 8/8 CPUs, 1/1 GPUs, 0.0/16.16 GiB heap, 0.0/5.57 GiB objects (0/1.0

[INFO 12-27 12:44:03] ax.service.ax_client: Completed trial 15 with data: {'loss': (1.07, 0.0)}.


Result for DEFAULT_17f235c2:
  accuracy: 0.638
  date: 2020-12-27_12-44-03
  done: true
  experiment_id: 12742033ebee4e43b5a933a34a8eee22
  experiment_tag: 16_accum_iter=4,batch_size=16,is_amsgrad=False,lr=0.0021069,middle_fc=False,middle_fc_size=0,weight_decay=0.000125
  hostname: Nevsky
  iterations_since_restore: 10
  loss: 1.0689068142371843
  node_ip: 10.0.0.200
  pid: 17045
  should_checkpoint: true
  time_since_restore: 3545.15083360672
  time_this_iter_s: 354.15353655815125
  time_total_s: 3545.15083360672
  timestamp: 1609073043
  timesteps_since_restore: 0
  training_iteration: 10
  trial_id: 17f235c2
  
== Status ==
Memory usage on this node: 7.4/31.3 GiB
Using AsyncHyperBand: num_stopped=16
Bracket: Iter 8.000: -1.1374846221525459 | Iter 4.000: -1.1600102838836137 | Iter 2.000: -1.6598399007999443
Resources requested: 8/8 CPUs, 1/1 GPUs, 0.0/16.16 GiB heap, 0.0/5.57 GiB objects (0/1.0 accelerator_type:RTX)
Result logdir: /opt/favordata/AI/Felix/kaggle-cassava/ray-results/ax

[INFO 12-27 12:44:03] ax.service.ax_client: Generated new trial 17 with parameters {'lr': 0.09, 'accum_iter': 8, 'batch_size': 16, 'weight_decay': 0.0, 'is_amsgrad': False, 'middle_fc': False, 'middle_fc_size': 0}.


[2m[36m(pid=2312)[0m Epoch     0: adjusting learning rate of group 0 to 3.3247e-03.
[2m[36m(pid=2312)[0m Epoch     1: adjusting learning rate of group 0 to 3.2434e-03.
Result for DEFAULT_f100d25a:
  accuracy: 0.6352
  date: 2020-12-27_12-50-04
  done: false
  experiment_id: 7c0a2652f6d84083b5ba1ef4ae6f6471
  experiment_tag: 17_accum_iter=4,batch_size=16,is_amsgrad=False,lr=0.0033247,middle_fc=False,middle_fc_size=0,weight_decay=0.0005
  hostname: Nevsky
  iterations_since_restore: 1
  loss: 1.0517287299602847
  node_ip: 10.0.0.200
  pid: 2312
  should_checkpoint: true
  time_since_restore: 359.3742530345917
  time_this_iter_s: 359.3742530345917
  time_total_s: 359.3742530345917
  timestamp: 1609073404
  timesteps_since_restore: 0
  training_iteration: 1
  trial_id: f100d25a
  
== Status ==
Memory usage on this node: 7.4/31.3 GiB
Using AsyncHyperBand: num_stopped=16
Bracket: Iter 8.000: -1.1374846221525459 | Iter 4.000: -1.1600102838836137 | Iter 2.000: -1.6598399007999443
Resourc

[2m[36m(pid=2312)[0m Epoch     3: adjusting learning rate of group 0 to 2.6397e-03.
Result for DEFAULT_f100d25a:
  accuracy: 0.6148
  date: 2020-12-27_13-01-54
  done: false
  experiment_id: 7c0a2652f6d84083b5ba1ef4ae6f6471
  experiment_tag: 17_accum_iter=4,batch_size=16,is_amsgrad=False,lr=0.0033247,middle_fc=False,middle_fc_size=0,weight_decay=0.0005
  hostname: Nevsky
  iterations_since_restore: 3
  loss: 1.1214129721062094
  node_ip: 10.0.0.200
  pid: 2312
  should_checkpoint: true
  time_since_restore: 1069.4258484840393
  time_this_iter_s: 355.2141230106354
  time_total_s: 1069.4258484840393
  timestamp: 1609074114
  timesteps_since_restore: 0
  training_iteration: 3
  trial_id: f100d25a
  
== Status ==
Memory usage on this node: 7.4/31.3 GiB
Using AsyncHyperBand: num_stopped=16
Bracket: Iter 8.000: -1.1374846221525459 | Iter 4.000: -1.1600102838836137 | Iter 2.000: -1.416282648904414
Resources requested: 8/8 CPUs, 1/1 GPUs, 0.0/16.16 GiB heap, 0.0/5.57 GiB objects (0/1.0 acce

[2m[36m(pid=2312)[0m Epoch     5: adjusting learning rate of group 0 to 1.6629e-03.
Result for DEFAULT_f100d25a:
  accuracy: 0.6204
  date: 2020-12-27_13-13-45
  done: false
  experiment_id: 7c0a2652f6d84083b5ba1ef4ae6f6471
  experiment_tag: 17_accum_iter=4,batch_size=16,is_amsgrad=False,lr=0.0033247,middle_fc=False,middle_fc_size=0,weight_decay=0.0005
  hostname: Nevsky
  iterations_since_restore: 5
  loss: 1.1074678897857666
  node_ip: 10.0.0.200
  pid: 2312
  should_checkpoint: true
  time_since_restore: 1779.7398190498352
  time_this_iter_s: 354.9112915992737
  time_total_s: 1779.7398190498352
  timestamp: 1609074825
  timesteps_since_restore: 0
  training_iteration: 5
  trial_id: f100d25a
  
== Status ==
Memory usage on this node: 7.4/31.3 GiB
Using AsyncHyperBand: num_stopped=16
Bracket: Iter 8.000: -1.1374846221525459 | Iter 4.000: -1.1377810292606112 | Iter 2.000: -1.416282648904414
Resources requested: 8/8 CPUs, 1/1 GPUs, 0.0/16.16 GiB heap, 0.0/5.57 GiB objects (0/1.0 acce

[2m[36m(pid=2312)[0m Epoch     7: adjusting learning rate of group 0 to 6.8605e-04.
Result for DEFAULT_f100d25a:
  accuracy: 0.6208
  date: 2020-12-27_13-25-35
  done: false
  experiment_id: 7c0a2652f6d84083b5ba1ef4ae6f6471
  experiment_tag: 17_accum_iter=4,batch_size=16,is_amsgrad=False,lr=0.0033247,middle_fc=False,middle_fc_size=0,weight_decay=0.0005
  hostname: Nevsky
  iterations_since_restore: 7
  loss: 1.1088113482994368
  node_ip: 10.0.0.200
  pid: 2312
  should_checkpoint: true
  time_since_restore: 2490.0439126491547
  time_this_iter_s: 355.2145094871521
  time_total_s: 2490.0439126491547
  timestamp: 1609075535
  timesteps_since_restore: 0
  training_iteration: 7
  trial_id: f100d25a
  
== Status ==
Memory usage on this node: 7.4/31.3 GiB
Using AsyncHyperBand: num_stopped=16
Bracket: Iter 8.000: -1.1374846221525459 | Iter 4.000: -1.1377810292606112 | Iter 2.000: -1.416282648904414
Resources requested: 8/8 CPUs, 1/1 GPUs, 0.0/16.16 GiB heap, 0.0/5.57 GiB objects (0/1.0 acce

[2m[36m(pid=2312)[0m Epoch     9: adjusting learning rate of group 0 to 8.2338e-05.
Result for DEFAULT_f100d25a:
  accuracy: 0.6276
  date: 2020-12-27_13-37-25
  done: false
  experiment_id: 7c0a2652f6d84083b5ba1ef4ae6f6471
  experiment_tag: 17_accum_iter=4,batch_size=16,is_amsgrad=False,lr=0.0033247,middle_fc=False,middle_fc_size=0,weight_decay=0.0005
  hostname: Nevsky
  iterations_since_restore: 9
  loss: 1.100792486456376
  node_ip: 10.0.0.200
  pid: 2312
  should_checkpoint: true
  time_since_restore: 3200.1490099430084
  time_this_iter_s: 355.5687093734741
  time_total_s: 3200.1490099430084
  timestamp: 1609076245
  timesteps_since_restore: 0
  training_iteration: 9
  trial_id: f100d25a
  
== Status ==
Memory usage on this node: 7.4/31.3 GiB
Using AsyncHyperBand: num_stopped=16
Bracket: Iter 8.000: -1.1242831896377514 | Iter 4.000: -1.1377810292606112 | Iter 2.000: -1.416282648904414
Resources requested: 8/8 CPUs, 1/1 GPUs, 0.0/16.16 GiB heap, 0.0/5.57 GiB objects (0/1.0 accel

[INFO 12-27 13:43:20] ax.service.ax_client: Completed trial 16 with data: {'loss': (1.13, 0.0)}.


Result for DEFAULT_f100d25a:
  accuracy: 0.6108
  date: 2020-12-27_13-43-20
  done: true
  experiment_id: 7c0a2652f6d84083b5ba1ef4ae6f6471
  experiment_tag: 17_accum_iter=4,batch_size=16,is_amsgrad=False,lr=0.0033247,middle_fc=False,middle_fc_size=0,weight_decay=0.0005
  hostname: Nevsky
  iterations_since_restore: 10
  loss: 1.1331989123851438
  node_ip: 10.0.0.200
  pid: 2312
  should_checkpoint: true
  time_since_restore: 3554.8826320171356
  time_this_iter_s: 354.7336220741272
  time_total_s: 3554.8826320171356
  timestamp: 1609076600
  timesteps_since_restore: 0
  training_iteration: 10
  trial_id: f100d25a
  
== Status ==
Memory usage on this node: 7.4/31.3 GiB
Using AsyncHyperBand: num_stopped=17
Bracket: Iter 8.000: -1.1242831896377514 | Iter 4.000: -1.1377810292606112 | Iter 2.000: -1.416282648904414
Resources requested: 8/8 CPUs, 1/1 GPUs, 0.0/16.16 GiB heap, 0.0/5.57 GiB objects (0/1.0 accelerator_type:RTX)
Result logdir: /opt/favordata/AI/Felix/kaggle-cassava/ray-results/ax

[INFO 12-27 13:43:20] ax.service.ax_client: Generated new trial 18 with parameters {'lr': 0.0, 'accum_iter': 8, 'batch_size': 16, 'weight_decay': 0.0, 'is_amsgrad': False, 'middle_fc': False, 'middle_fc_size': 0}.
2020-12-27 13:43:22,418	ERROR trial_runner.py:793 -- Trial DEFAULT_334eb200: Error processing event.
Traceback (most recent call last):
  File "/opt/favordata/anaconda3/envs/kaggle/lib/python3.7/site-packages/ray/tune/trial_runner.py", line 726, in _process_trial
    result = self.trial_executor.fetch_result(trial)
  File "/opt/favordata/anaconda3/envs/kaggle/lib/python3.7/site-packages/ray/tune/ray_trial_executor.py", line 489, in fetch_result
    result = ray.get(trial_future[0], timeout=DEFAULT_GET_TIMEOUT)
  File "/opt/favordata/anaconda3/envs/kaggle/lib/python3.7/site-packages/ray/worker.py", line 1452, in get
    raise value.as_instanceof_cause()
ray.exceptions.RayTaskError(RuntimeError): [36mray::ImplicitFunc.train()[39m (pid=21616, ip=10.0.0.200)
  File "python/ray/

Traceback (most recent call last):
  File "/opt/favordata/anaconda3/envs/kaggle/lib/python3.7/site-packages/ray/function_manager.py", line 493, in _load_actor_class_from_gcs
    actor_class = pickle.loads(pickled_class)
AttributeError: Can't get attribute 'setup' on <module 'common_utils' from '/opt/favordata/AI/Felix/kaggle-cassava/common_utils.py'>

[2m[36m(pid=21926)[0m 2020-12-27 13:43:26,426	ERROR function_manager.py:495 -- Failed to load actor class ImplicitFunc.
[2m[36m(pid=21926)[0m Traceback (most recent call last):
[2m[36m(pid=21926)[0m   File "/opt/favordata/anaconda3/envs/kaggle/lib/python3.7/site-packages/ray/function_manager.py", line 493, in _load_actor_class_from_gcs
[2m[36m(pid=21926)[0m     actor_class = pickle.loads(pickled_class)
[2m[36m(pid=21926)[0m AttributeError: Can't get attribute 'setup' on <module 'common_utils' from '/opt/favordata/AI/Felix/kaggle-cassava/common_utils.py'>


== Status ==
Memory usage on this node: 5.1/31.3 GiB
Using AsyncHyperBand: num_stopped=17
Bracket: Iter 8.000: -1.1242831896377514 | Iter 4.000: -1.1377810292606112 | Iter 2.000: -1.416282648904414
Resources requested: 0/8 CPUs, 0/1 GPUs, 0.0/16.16 GiB heap, 0.0/5.57 GiB objects (0/1.0 accelerator_type:RTX)
Result logdir: /opt/favordata/AI/Felix/kaggle-cassava/ray-results/ax3
Number of trials: 22/50 (4 ERROR, 1 PENDING, 17 TERMINATED)
+------------------+------------+-------+--------------+--------------+--------------+------------+-------------+------------------+----------------+----------------+------------+----------------------+
| Trial name       | status     | loc   |   accum_iter |   batch_size | is_amsgrad   |         lr | middle_fc   |   middle_fc_size |   weight_decay |           loss |   accuracy |   training_iteration |
|------------------+------------+-------+--------------+--------------+--------------+------------+-------------+------------------+----------------+------

[INFO 12-27 13:43:26] ax.service.ax_client: Generated new trial 22 with parameters {'lr': 0.0, 'accum_iter': 2, 'batch_size': 16, 'weight_decay': 0.0, 'is_amsgrad': False, 'middle_fc': False, 'middle_fc_size': 0}.
2020-12-27 13:43:27,719	ERROR trial_runner.py:793 -- Trial DEFAULT_7e208472: Error processing event.
Traceback (most recent call last):
  File "/opt/favordata/anaconda3/envs/kaggle/lib/python3.7/site-packages/ray/tune/trial_runner.py", line 726, in _process_trial
    result = self.trial_executor.fetch_result(trial)
  File "/opt/favordata/anaconda3/envs/kaggle/lib/python3.7/site-packages/ray/tune/ray_trial_executor.py", line 489, in fetch_result
    result = ray.get(trial_future[0], timeout=DEFAULT_GET_TIMEOUT)
  File "/opt/favordata/anaconda3/envs/kaggle/lib/python3.7/site-packages/ray/worker.py", line 1452, in get
    raise value.as_instanceof_cause()
ray.exceptions.RayTaskError(RuntimeError): [36mray::ImplicitFunc.train()[39m (pid=22283, ip=10.0.0.200)
  File "python/ray/

[2m[36m(pid=22472)[0m 2020-12-27 13:43:31,571	ERROR function_manager.py:495 -- Failed to load actor class ImplicitFunc.
[2m[36m(pid=22472)[0m Traceback (most recent call last):
[2m[36m(pid=22472)[0m   File "/opt/favordata/anaconda3/envs/kaggle/lib/python3.7/site-packages/ray/function_manager.py", line 493, in _load_actor_class_from_gcs
[2m[36m(pid=22472)[0m     actor_class = pickle.loads(pickled_class)
Traceback (most recent call last):
  File "/opt/favordata/anaconda3/envs/kaggle/lib/python3.7/site-packages/ray/function_manager.py", line 493, in _load_actor_class_from_gcs
    actor_class = pickle.loads(pickled_class)
AttributeError: Can't get attribute 'setup' on <module 'common_utils' from '/opt/favordata/AI/Felix/kaggle-cassava/common_utils.py'>

[2m[36m(pid=22472)[0m AttributeError: Can't get attribute 'setup' on <module 'common_utils' from '/opt/favordata/AI/Felix/kaggle-cassava/common_utils.py'>


== Status ==
Memory usage on this node: 5.0/31.3 GiB
Using AsyncHyperBand: num_stopped=17
Bracket: Iter 8.000: -1.1242831896377514 | Iter 4.000: -1.1377810292606112 | Iter 2.000: -1.416282648904414
Resources requested: 0/8 CPUs, 0/1 GPUs, 0.0/16.16 GiB heap, 0.0/5.57 GiB objects (0/1.0 accelerator_type:RTX)
Result logdir: /opt/favordata/AI/Felix/kaggle-cassava/ray-results/ax3
Number of trials: 26/50 (8 ERROR, 1 PENDING, 17 TERMINATED)
+------------------+------------+-------+--------------+--------------+--------------+------------+-------------+------------------+----------------+----------------+------------+----------------------+
| Trial name       | status     | loc   |   accum_iter |   batch_size | is_amsgrad   |         lr | middle_fc   |   middle_fc_size |   weight_decay |           loss |   accuracy |   training_iteration |
|------------------+------------+-------+--------------+--------------+--------------+------------+-------------+------------------+----------------+------

[INFO 12-27 13:43:31] ax.service.ax_client: Generated new trial 26 with parameters {'lr': 0.07, 'accum_iter': 2, 'batch_size': 16, 'weight_decay': 0.0, 'is_amsgrad': False, 'middle_fc': False, 'middle_fc_size': 0}.
2020-12-27 13:43:32,930	ERROR trial_runner.py:793 -- Trial DEFAULT_813d1b34: Error processing event.
Traceback (most recent call last):
  File "/opt/favordata/anaconda3/envs/kaggle/lib/python3.7/site-packages/ray/tune/trial_runner.py", line 726, in _process_trial
    result = self.trial_executor.fetch_result(trial)
  File "/opt/favordata/anaconda3/envs/kaggle/lib/python3.7/site-packages/ray/tune/ray_trial_executor.py", line 489, in fetch_result
    result = ray.get(trial_future[0], timeout=DEFAULT_GET_TIMEOUT)
  File "/opt/favordata/anaconda3/envs/kaggle/lib/python3.7/site-packages/ray/worker.py", line 1452, in get
    raise value.as_instanceof_cause()
ray.exceptions.RayTaskError(RuntimeError): [36mray::ImplicitFunc.train()[39m (pid=22498, ip=10.0.0.200)
  File "python/ray

Traceback (most recent call last):
  File "/opt/favordata/anaconda3/envs/kaggle/lib/python3.7/site-packages/ray/function_manager.py", line 493, in _load_actor_class_from_gcs
    actor_class = pickle.loads(pickled_class)
AttributeError: Can't get attribute 'setup' on <module 'common_utils' from '/opt/favordata/AI/Felix/kaggle-cassava/common_utils.py'>

[2m[36m(pid=22656)[0m 2020-12-27 13:43:36,783	ERROR function_manager.py:495 -- Failed to load actor class ImplicitFunc.
[2m[36m(pid=22656)[0m Traceback (most recent call last):
[2m[36m(pid=22656)[0m   File "/opt/favordata/anaconda3/envs/kaggle/lib/python3.7/site-packages/ray/function_manager.py", line 493, in _load_actor_class_from_gcs
[2m[36m(pid=22656)[0m     actor_class = pickle.loads(pickled_class)
[2m[36m(pid=22656)[0m AttributeError: Can't get attribute 'setup' on <module 'common_utils' from '/opt/favordata/AI/Felix/kaggle-cassava/common_utils.py'>


== Status ==
Memory usage on this node: 5.1/31.3 GiB
Using AsyncHyperBand: num_stopped=17
Bracket: Iter 8.000: -1.1242831896377514 | Iter 4.000: -1.1377810292606112 | Iter 2.000: -1.416282648904414
Resources requested: 0/8 CPUs, 0/1 GPUs, 0.0/16.16 GiB heap, 0.0/5.57 GiB objects (0/1.0 accelerator_type:RTX)
Result logdir: /opt/favordata/AI/Felix/kaggle-cassava/ray-results/ax3
Number of trials: 30/50 (12 ERROR, 1 PENDING, 17 TERMINATED)
+------------------+------------+-------+--------------+--------------+--------------+------------+-------------+------------------+----------------+-------------+------------+----------------------+
| Trial name       | status     | loc   |   accum_iter |   batch_size | is_amsgrad   |         lr | middle_fc   |   middle_fc_size |   weight_decay |        loss |   accuracy |   training_iteration |
|------------------+------------+-------+--------------+--------------+--------------+------------+-------------+------------------+----------------+-----------

[INFO 12-27 13:43:37] ax.service.ax_client: Generated new trial 30 with parameters {'lr': 0.01, 'accum_iter': 2, 'batch_size': 8, 'weight_decay': 0.0, 'is_amsgrad': False, 'middle_fc': False, 'middle_fc_size': 0}.
Traceback (most recent call last):
  File "/opt/favordata/anaconda3/envs/kaggle/lib/python3.7/site-packages/ray/function_manager.py", line 493, in _load_actor_class_from_gcs
    actor_class = pickle.loads(pickled_class)
AttributeError: Can't get attribute 'setup' on <module 'common_utils' from '/opt/favordata/AI/Felix/kaggle-cassava/common_utils.py'>

2020-12-27 13:43:38,073	ERROR trial_runner.py:793 -- Trial DEFAULT_8457d052: Error processing event.
Traceback (most recent call last):
  File "/opt/favordata/anaconda3/envs/kaggle/lib/python3.7/site-packages/ray/tune/trial_runner.py", line 726, in _process_trial
    result = self.trial_executor.fetch_result(trial)
  File "/opt/favordata/anaconda3/envs/kaggle/lib/python3.7/site-packages/ray/tune/ray_trial_executor.py", line 489,

2020-12-27 13:43:41,935	ERROR trial_runner.py:793 -- Trial DEFAULT_86a61652: Error processing event.
Traceback (most recent call last):
  File "/opt/favordata/anaconda3/envs/kaggle/lib/python3.7/site-packages/ray/tune/trial_runner.py", line 726, in _process_trial
    result = self.trial_executor.fetch_result(trial)
  File "/opt/favordata/anaconda3/envs/kaggle/lib/python3.7/site-packages/ray/tune/ray_trial_executor.py", line 489, in fetch_result
    result = ray.get(trial_future[0], timeout=DEFAULT_GET_TIMEOUT)
  File "/opt/favordata/anaconda3/envs/kaggle/lib/python3.7/site-packages/ray/worker.py", line 1452, in get
    raise value.as_instanceof_cause()
ray.exceptions.RayTaskError(RuntimeError): [36mray::ImplicitFunc.train()[39m (pid=22845, ip=10.0.0.200)
  File "python/ray/_raylet.pyx", line 443, in ray._raylet.execute_task
  File "python/ray/_raylet.pyx", line 477, in ray._raylet.execute_task
  File "python/ray/_raylet.pyx", line 481, in ray._raylet.execute_task
  File "python/ray/_

== Status ==
Memory usage on this node: 5.0/31.3 GiB
Using AsyncHyperBand: num_stopped=17
Bracket: Iter 8.000: -1.1242831896377514 | Iter 4.000: -1.1377810292606112 | Iter 2.000: -1.416282648904414
Resources requested: 0/8 CPUs, 0/1 GPUs, 0.0/16.16 GiB heap, 0.0/5.57 GiB objects (0/1.0 accelerator_type:RTX)
Result logdir: /opt/favordata/AI/Felix/kaggle-cassava/ray-results/ax3
Number of trials: 34/50 (16 ERROR, 1 PENDING, 17 TERMINATED)
+------------------+------------+-------+--------------+--------------+--------------+------------+-------------+------------------+----------------+-------------+------------+----------------------+
| Trial name       | status     | loc   |   accum_iter |   batch_size | is_amsgrad   |         lr | middle_fc   |   middle_fc_size |   weight_decay |        loss |   accuracy |   training_iteration |
|------------------+------------+-------+--------------+--------------+--------------+------------+-------------+------------------+----------------+-----------

[INFO 12-27 13:43:42] ax.service.ax_client: Generated new trial 34 with parameters {'lr': 0.09, 'accum_iter': 2, 'batch_size': 16, 'weight_decay': 0.0, 'is_amsgrad': False, 'middle_fc': False, 'middle_fc_size': 0}.
2020-12-27 13:43:43,273	ERROR trial_runner.py:793 -- Trial DEFAULT_8769531a: Error processing event.
Traceback (most recent call last):
  File "/opt/favordata/anaconda3/envs/kaggle/lib/python3.7/site-packages/ray/tune/trial_runner.py", line 726, in _process_trial
    result = self.trial_executor.fetch_result(trial)
  File "/opt/favordata/anaconda3/envs/kaggle/lib/python3.7/site-packages/ray/tune/ray_trial_executor.py", line 489, in fetch_result
    result = ray.get(trial_future[0], timeout=DEFAULT_GET_TIMEOUT)
  File "/opt/favordata/anaconda3/envs/kaggle/lib/python3.7/site-packages/ray/worker.py", line 1452, in get
    raise value.as_instanceof_cause()
ray.exceptions.RayTaskError(RuntimeError): [36mray::ImplicitFunc.train()[39m (pid=22867, ip=10.0.0.200)
  File "python/ray

Traceback (most recent call last):
  File "/opt/favordata/anaconda3/envs/kaggle/lib/python3.7/site-packages/ray/function_manager.py", line 493, in _load_actor_class_from_gcs
    actor_class = pickle.loads(pickled_class)
AttributeError: Can't get attribute 'setup' on <module 'common_utils' from '/opt/favordata/AI/Felix/kaggle-cassava/common_utils.py'>

[2m[36m(pid=23010)[0m 2020-12-27 13:43:47,161	ERROR function_manager.py:495 -- Failed to load actor class ImplicitFunc.
[2m[36m(pid=23010)[0m Traceback (most recent call last):
[2m[36m(pid=23010)[0m   File "/opt/favordata/anaconda3/envs/kaggle/lib/python3.7/site-packages/ray/function_manager.py", line 493, in _load_actor_class_from_gcs
[2m[36m(pid=23010)[0m     actor_class = pickle.loads(pickled_class)
[2m[36m(pid=23010)[0m AttributeError: Can't get attribute 'setup' on <module 'common_utils' from '/opt/favordata/AI/Felix/kaggle-cassava/common_utils.py'>


== Status ==
Memory usage on this node: 5.1/31.3 GiB
Using AsyncHyperBand: num_stopped=17
Bracket: Iter 8.000: -1.1242831896377514 | Iter 4.000: -1.1377810292606112 | Iter 2.000: -1.416282648904414
Resources requested: 0/8 CPUs, 0/1 GPUs, 0.0/16.16 GiB heap, 0.0/5.57 GiB objects (0/1.0 accelerator_type:RTX)
Result logdir: /opt/favordata/AI/Felix/kaggle-cassava/ray-results/ax3
Number of trials: 38/50 (20 ERROR, 1 PENDING, 17 TERMINATED)
+------------------+------------+-------+--------------+--------------+--------------+------------+-------------+------------------+----------------+-------------+------------+----------------------+
| Trial name       | status     | loc   |   accum_iter |   batch_size | is_amsgrad   |         lr | middle_fc   |   middle_fc_size |   weight_decay |        loss |   accuracy |   training_iteration |
|------------------+------------+-------+--------------+--------------+--------------+------------+-------------+------------------+----------------+-----------

[INFO 12-27 13:43:47] ax.service.ax_client: Generated new trial 38 with parameters {'lr': 0.03, 'accum_iter': 8, 'batch_size': 8, 'weight_decay': 0.0, 'is_amsgrad': False, 'middle_fc': False, 'middle_fc_size': 0}.
2020-12-27 13:43:48,467	ERROR trial_runner.py:793 -- Trial DEFAULT_8a848b5a: Error processing event.
Traceback (most recent call last):
  File "/opt/favordata/anaconda3/envs/kaggle/lib/python3.7/site-packages/ray/tune/trial_runner.py", line 726, in _process_trial
    result = self.trial_executor.fetch_result(trial)
  File "/opt/favordata/anaconda3/envs/kaggle/lib/python3.7/site-packages/ray/tune/ray_trial_executor.py", line 489, in fetch_result
    result = ray.get(trial_future[0], timeout=DEFAULT_GET_TIMEOUT)
  File "/opt/favordata/anaconda3/envs/kaggle/lib/python3.7/site-packages/ray/worker.py", line 1452, in get
    raise value.as_instanceof_cause()
ray.exceptions.RayTaskError(RuntimeError): [36mray::ImplicitFunc.train()[39m (pid=23036, ip=10.0.0.200)
  File "python/ray/

Traceback (most recent call last):
  File "/opt/favordata/anaconda3/envs/kaggle/lib/python3.7/site-packages/ray/function_manager.py", line 493, in _load_actor_class_from_gcs
    actor_class = pickle.loads(pickled_class)
AttributeError: Can't get attribute 'setup' on <module 'common_utils' from '/opt/favordata/AI/Felix/kaggle-cassava/common_utils.py'>

[2m[36m(pid=23161)[0m 2020-12-27 13:43:52,357	ERROR function_manager.py:495 -- Failed to load actor class ImplicitFunc.
[2m[36m(pid=23161)[0m Traceback (most recent call last):
[2m[36m(pid=23161)[0m   File "/opt/favordata/anaconda3/envs/kaggle/lib/python3.7/site-packages/ray/function_manager.py", line 493, in _load_actor_class_from_gcs
[2m[36m(pid=23161)[0m     actor_class = pickle.loads(pickled_class)
[2m[36m(pid=23161)[0m AttributeError: Can't get attribute 'setup' on <module 'common_utils' from '/opt/favordata/AI/Felix/kaggle-cassava/common_utils.py'>


== Status ==
Memory usage on this node: 5.0/31.3 GiB
Using AsyncHyperBand: num_stopped=17
Bracket: Iter 8.000: -1.1242831896377514 | Iter 4.000: -1.1377810292606112 | Iter 2.000: -1.416282648904414
Resources requested: 0/8 CPUs, 0/1 GPUs, 0.0/16.16 GiB heap, 0.0/5.57 GiB objects (0/1.0 accelerator_type:RTX)
Result logdir: /opt/favordata/AI/Felix/kaggle-cassava/ray-results/ax3
Number of trials: 42/50 (24 ERROR, 1 PENDING, 17 TERMINATED)
+------------------+------------+-------+--------------+--------------+--------------+------------+-------------+------------------+----------------+-------------+------------+----------------------+
| Trial name       | status     | loc   |   accum_iter |   batch_size | is_amsgrad   |         lr | middle_fc   |   middle_fc_size |   weight_decay |        loss |   accuracy |   training_iteration |
|------------------+------------+-------+--------------+--------------+--------------+------------+-------------+------------------+----------------+-----------

[INFO 12-27 13:43:52] ax.service.ax_client: Generated new trial 42 with parameters {'lr': 0.01, 'accum_iter': 4, 'batch_size': 8, 'weight_decay': 0.0, 'is_amsgrad': False, 'middle_fc': False, 'middle_fc_size': 0}.
2020-12-27 13:43:53,695	ERROR trial_runner.py:793 -- Trial DEFAULT_8d9e2224: Error processing event.
Traceback (most recent call last):
  File "/opt/favordata/anaconda3/envs/kaggle/lib/python3.7/site-packages/ray/tune/trial_runner.py", line 726, in _process_trial
    result = self.trial_executor.fetch_result(trial)
  File "/opt/favordata/anaconda3/envs/kaggle/lib/python3.7/site-packages/ray/tune/ray_trial_executor.py", line 489, in fetch_result
    result = ray.get(trial_future[0], timeout=DEFAULT_GET_TIMEOUT)
  File "/opt/favordata/anaconda3/envs/kaggle/lib/python3.7/site-packages/ray/worker.py", line 1452, in get
    raise value.as_instanceof_cause()
ray.exceptions.RayTaskError(RuntimeError): [36mray::ImplicitFunc.train()[39m (pid=23187, ip=10.0.0.200)
  File "python/ray/

Traceback (most recent call last):
  File "/opt/favordata/anaconda3/envs/kaggle/lib/python3.7/site-packages/ray/function_manager.py", line 493, in _load_actor_class_from_gcs
    actor_class = pickle.loads(pickled_class)
AttributeError: Can't get attribute 'setup' on <module 'common_utils' from '/opt/favordata/AI/Felix/kaggle-cassava/common_utils.py'>

[2m[36m(pid=23354)[0m 2020-12-27 13:43:57,577	ERROR function_manager.py:495 -- Failed to load actor class ImplicitFunc.
[2m[36m(pid=23354)[0m Traceback (most recent call last):
[2m[36m(pid=23354)[0m   File "/opt/favordata/anaconda3/envs/kaggle/lib/python3.7/site-packages/ray/function_manager.py", line 493, in _load_actor_class_from_gcs
[2m[36m(pid=23354)[0m     actor_class = pickle.loads(pickled_class)
[2m[36m(pid=23354)[0m AttributeError: Can't get attribute 'setup' on <module 'common_utils' from '/opt/favordata/AI/Felix/kaggle-cassava/common_utils.py'>


== Status ==
Memory usage on this node: 5.1/31.3 GiB
Using AsyncHyperBand: num_stopped=17
Bracket: Iter 8.000: -1.1242831896377514 | Iter 4.000: -1.1377810292606112 | Iter 2.000: -1.416282648904414
Resources requested: 0/8 CPUs, 0/1 GPUs, 0.0/16.16 GiB heap, 0.0/5.57 GiB objects (0/1.0 accelerator_type:RTX)
Result logdir: /opt/favordata/AI/Felix/kaggle-cassava/ray-results/ax3
Number of trials: 46/50 (28 ERROR, 1 PENDING, 17 TERMINATED)
+------------------+------------+-------+--------------+--------------+--------------+------------+-------------+------------------+----------------+-------------+------------+----------------------+
| Trial name       | status     | loc   |   accum_iter |   batch_size | is_amsgrad   |         lr | middle_fc   |   middle_fc_size |   weight_decay |        loss |   accuracy |   training_iteration |
|------------------+------------+-------+--------------+--------------+--------------+------------+-------------+------------------+----------------+-----------

[INFO 12-27 13:43:57] ax.service.ax_client: Generated new trial 46 with parameters {'lr': 0.0, 'accum_iter': 8, 'batch_size': 8, 'weight_decay': 0.0, 'is_amsgrad': False, 'middle_fc': False, 'middle_fc_size': 0}.
2020-12-27 13:43:58,889	ERROR trial_runner.py:793 -- Trial DEFAULT_90ba54be: Error processing event.
Traceback (most recent call last):
  File "/opt/favordata/anaconda3/envs/kaggle/lib/python3.7/site-packages/ray/tune/trial_runner.py", line 726, in _process_trial
    result = self.trial_executor.fetch_result(trial)
  File "/opt/favordata/anaconda3/envs/kaggle/lib/python3.7/site-packages/ray/tune/ray_trial_executor.py", line 489, in fetch_result
    result = ray.get(trial_future[0], timeout=DEFAULT_GET_TIMEOUT)
  File "/opt/favordata/anaconda3/envs/kaggle/lib/python3.7/site-packages/ray/worker.py", line 1452, in get
    raise value.as_instanceof_cause()
ray.exceptions.RayTaskError(RuntimeError): [36mray::ImplicitFunc.train()[39m (pid=23376, ip=10.0.0.200)
  File "python/ray/_

Traceback (most recent call last):
  File "/opt/favordata/anaconda3/envs/kaggle/lib/python3.7/site-packages/ray/function_manager.py", line 493, in _load_actor_class_from_gcs
    actor_class = pickle.loads(pickled_class)
AttributeError: Can't get attribute 'setup' on <module 'common_utils' from '/opt/favordata/AI/Felix/kaggle-cassava/common_utils.py'>

[2m[36m(pid=23521)[0m 2020-12-27 13:44:02,772	ERROR function_manager.py:495 -- Failed to load actor class ImplicitFunc.
[2m[36m(pid=23521)[0m Traceback (most recent call last):
[2m[36m(pid=23521)[0m   File "/opt/favordata/anaconda3/envs/kaggle/lib/python3.7/site-packages/ray/function_manager.py", line 493, in _load_actor_class_from_gcs
[2m[36m(pid=23521)[0m     actor_class = pickle.loads(pickled_class)
[2m[36m(pid=23521)[0m AttributeError: Can't get attribute 'setup' on <module 'common_utils' from '/opt/favordata/AI/Felix/kaggle-cassava/common_utils.py'>


== Status ==
Memory usage on this node: 5.0/31.3 GiB
Using AsyncHyperBand: num_stopped=17
Bracket: Iter 8.000: -1.1242831896377514 | Iter 4.000: -1.1377810292606112 | Iter 2.000: -1.416282648904414
Resources requested: 0/8 CPUs, 0/1 GPUs, 0.0/16.16 GiB heap, 0.0/5.57 GiB objects (0/1.0 accelerator_type:RTX)
Result logdir: /opt/favordata/AI/Felix/kaggle-cassava/ray-results/ax3
Number of trials: 50/50 (32 ERROR, 1 PENDING, 17 TERMINATED)
+------------------+------------+-------+--------------+--------------+--------------+------------+-------------+------------------+----------------+-------------+------------+----------------------+
| Trial name       | status     | loc   |   accum_iter |   batch_size | is_amsgrad   |         lr | middle_fc   |   middle_fc_size |   weight_decay |        loss |   accuracy |   training_iteration |
|------------------+------------+-------+--------------+--------------+--------------+------------+-------------+------------------+----------------+-----------

2020-12-27 13:44:04,132	ERROR trial_runner.py:793 -- Trial DEFAULT_93d398a4: Error processing event.
Traceback (most recent call last):
  File "/opt/favordata/anaconda3/envs/kaggle/lib/python3.7/site-packages/ray/tune/trial_runner.py", line 726, in _process_trial
    result = self.trial_executor.fetch_result(trial)
  File "/opt/favordata/anaconda3/envs/kaggle/lib/python3.7/site-packages/ray/tune/ray_trial_executor.py", line 489, in fetch_result
    result = ray.get(trial_future[0], timeout=DEFAULT_GET_TIMEOUT)
  File "/opt/favordata/anaconda3/envs/kaggle/lib/python3.7/site-packages/ray/worker.py", line 1452, in get
    raise value.as_instanceof_cause()
ray.exceptions.RayTaskError(RuntimeError): [36mray::ImplicitFunc.train()[39m (pid=23543, ip=10.0.0.200)
  File "python/ray/_raylet.pyx", line 443, in ray._raylet.execute_task
  File "python/ray/_raylet.pyx", line 477, in ray._raylet.execute_task
  File "python/ray/_raylet.pyx", line 481, in ray._raylet.execute_task
  File "python/ray/_