https://github.com/woven-planet/l5kit/blob/master/examples/simulation/train.ipynb

## Prepare Data path and load cfg

By setting the `L5KIT_DATA_FOLDER` variable, we can point the script to the folder where the data lies.

Then, we load our config file with relative paths and other configurations (rasteriser, training params...).

### Setup

In [1]:
from pathlib import Path
import os

In [2]:
#NOTE: DONT USE RELATIVE PATHS FOR THE MODELS PROVIDED BY L5
experiments_directory = Path(Path(os.path.abspath('')).parent.parent, "Experiments")
experiments_directory.mkdir(parents=True, exist_ok=True)

data_directory = Path(experiments_directory, "data")
data_directory.mkdir(parents=True, exist_ok=True)

prediction_directory = Path(experiments_directory, "planning")
prediction_directory.mkdir(parents=True, exist_ok=True)

prediction_training_directory = Path(prediction_directory, "training")
prediction_training_directory.mkdir(parents=True, exist_ok=True)

save_directory = Path(prediction_training_directory, "saved_outputs")
save_directory.mkdir(parents=True, exist_ok=True)

In [3]:
import os
os.chdir(prediction_training_directory)

In [4]:
%%writefile requirements.txt
l5kit
pyyaml
ray==2.0.0rc1
ray[air]
wandb
optuna

Overwriting requirements.txt


In [5]:
%%capture
# !pip install -r requirements.txt
!pip install l5kit pyyaml
!pip install wandb --upgrade
!pip install ray==2.0.0rc1
!pip install "ray[air]"
!pip install optuna

In [6]:
from typing import Dict

from tempfile import gettempdir
import matplotlib.pyplot as plt
import numpy as np
import torch
from torch import nn, optim
from torch.utils.data import DataLoader
from torchvision.models.resnet import resnet50
from tqdm import tqdm

from l5kit.configs import load_config_data
from l5kit.data import LocalDataManager, ChunkedDataset
from l5kit.dataset import AgentDataset, EgoDataset
from l5kit.rasterization import build_rasterizer
from l5kit.evaluation import write_pred_csv, compute_metrics_csv, read_gt_csv, create_chopped_dataset
from l5kit.evaluation.chop_dataset import MIN_FUTURE_STEPS
from l5kit.evaluation.metrics import neg_multi_log_likelihood, time_displace, rmse, prob_true_mode, average_displacement_error_oracle, average_displacement_error_mean, final_displacement_error_oracle, final_displacement_error_mean, detect_collision, distance_to_reference_trajectory
from l5kit.geometry import transform_points
from l5kit.visualization import PREDICTED_POINTS_COLOR, TARGET_POINTS_COLOR, draw_trajectory
from prettytable import PrettyTable
from pathlib import Path

import os

In [7]:
from tempfile import gettempdir
import matplotlib.pyplot as plt
import numpy as np
import torch
from torch import nn, optim
from torch.utils.data import DataLoader
from tqdm import tqdm

from l5kit.configs import load_config_data
from l5kit.data import LocalDataManager, ChunkedDataset
from l5kit.dataset import EgoDataset
from l5kit.rasterization import build_rasterizer
from l5kit.geometry import transform_points
from l5kit.visualization import TARGET_POINTS_COLOR, draw_trajectory
from l5kit.planning.rasterized.model import RasterizedPlanningModel
from l5kit.kinematic import AckermanPerturbation
from l5kit.random import GaussianRandomGenerator

import os

### Get Data from Wandb

In [8]:
import wandb
wandb.login()

[34m[1mwandb[0m: Currently logged in as: [33ma-sh0ts[0m. Use [1m`wandb login --relogin`[0m to force relogin


True

In [9]:
# Run information
wandb_entity = "l5-demo"
project_name = "l5-planning"
run_name = "download-l5-data"
run_type = "download"
run_description = """
Download data for the task of training a planning model
"""
tags = ["download", "data"]

In [10]:
#🪄🐝
run = wandb.init(
    entity=wandb_entity,
    project=project_name,
    job_type=run_type,
    name=run_name,
    notes=run_description,
    tags=tags
)

[34m[1mwandb[0m: Currently logged in as: [33ma-sh0ts[0m ([33ml5-demo[0m). Use [1m`wandb login --relogin`[0m to force relogin


In [11]:
artifact_entity = "l5-demo"
artifact_project = "l5-common"
artifact_name = "l5-data"
artifact_alias = "latest"
artifact_type = "dataset"

In [12]:
#🪄🐝
artifact = run.use_artifact(f"{artifact_entity}/{artifact_project}/{artifact_name}:{artifact_alias}", type=artifact_type)

In [13]:
_ = artifact.download(data_directory)

[34m[1mwandb[0m: Downloading large artifact l5-data:latest, 2386.92MB. 517 files... Done. 0:0:0.2


In [14]:
#BUG: need to seperate runs into download and training due to issues with routing runs after ray.tune
run.finish()

VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

In [15]:
# Dataset is assumed to be on the folder specified
# in the L5KIT_DATA_FOLDER environment variable

# get config
cfg = load_config_data(Path(data_directory, "configurations", "planning_config.yaml"))

##TODO-Fix with logic flow to select dataset###
cfg["train_data_loader"]["key"] = "scenes/sample.zarr"
cfg["val_data_loader"]["key"] = "scenes/sample.zarr"

l5_data_location = Path(data_directory, "dataset")
# run.config.update(cfg)

In [16]:
# cfg["zarr_dataset_location"] = l5_data_location
os.environ["L5KIT_DATA_FOLDER"] = str(l5_data_location)

## Model

Our baseline is a simple `resnet50` pretrained on `imagenet`. We must replace the input and the final layer to address our requirements.

In [17]:
def build_model(cfg: Dict, num_input_channels, criterion):
    model = RasterizedPlanningModel(
        model_arch=cfg["model_params"]["model_architecture"],
        num_input_channels=num_input_channels,
        num_targets=3 * cfg["model_params"]["future_num_frames"],  # X, Y, Yaw * number of future states,
        weights_scaling= [1., 1., 1.],
        criterion=criterion
        )

    return model

In [18]:
def forward(data, model, criterion):    
    outputs = model(data)
    loss = outputs["loss"]
    return loss, outputs

In [19]:
def train_planning_model_epoch(data, model, criterion, optimizer):
    loss, outputs = forward(data, model, criterion)
    # Backward pass
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    return loss, outputs

Our data pipeline map a raw `.zarr` folder into a multi-processing instance ready for training by:
- loading the `zarr` into a `ChunkedDataset` object. This object has a reference to the different arrays into the zarr (e.g. agents and traffic lights);
- wrapping the `ChunkedDataset` into an `AgentDataset`, which inherits from torch `Dataset` class;
- passing the `AgentDataset` into a torch `DataLoader`

# Training

note: if you're on MacOS and using `py_satellite` rasterizer, you may need to disable opencv multiprocessing by adding:
`cv2.setNumThreads(0)` before the following cell. This seems to only affect running in python notebook and it's caused by the `cv2.warpaffine` function

In [20]:
import ray.train as train
from ray.air import session, Checkpoint

In [21]:
from ray import tune
from ray.tune.tuner import Tuner

In [22]:
def train_planning_model(tuner_cfg : Dict):
    trial_name = session.get_trial_name()
    trial_id = session.get_trial_id()
    trial_readable_name = f"{trial_name}_{trial_id}"
    
    dm = LocalDataManager()
    
    # ==== Configurations
    shuffle = tuner_cfg["shuffle"]
    batch_size = int(tuner_cfg["batch_size"])
    num_workers = tuner_cfg["num_workers"]
    lr = tuner_cfg["lr"]
    max_num_steps = int(tuner_cfg["max_num_steps"])
    dataset_key = tuner_cfg["dataset_key"]
    cfg = tuner_cfg["cfg"]
    
    perturb_prob = tuner_cfg["perturb_probability"]
    
    # ==== Loading Dataset
    rasterizer = build_rasterizer(cfg, dm)
    mean = np.array([0.0, 0.0, 0.0])  # lateral, longitudinal and angular
    std = np.array([0.5, 1.5, np.pi / 6])
    perturbation = AckermanPerturbation(
        random_offset_generator=GaussianRandomGenerator(mean=mean, std=std), perturb_prob=perturb_prob)

    train_zarr = ChunkedDataset(dm.require(dataset_key)).open()
    train_dataset = EgoDataset(cfg, train_zarr, rasterizer, perturbation)

    batch_size_per_worker = batch_size // session.get_world_size()
    train_dataloader = DataLoader(train_dataset, shuffle=shuffle, batch_size=batch_size_per_worker, num_workers=num_workers)
    train_dataloader = train.torch.prepare_data_loader(train_dataloader)
    
    # ==== Init model
    criterion = nn.MSELoss(reduction="none")
    num_input_channels = rasterizer.num_channels()
    
    model = build_model(cfg, num_input_channels, criterion)
    model = train.torch.prepare_model(model)
    optimizer = optim.Adam(model.parameters(), lr=lr)

    # ==== TRAIN LOOP
    tr_it = iter(train_dataloader)
    progress_bar = range(max_num_steps)
    num_checkpoints = 5
    steps_before_checkpointing = max_num_steps // num_checkpoints
    losses_train = []
    checkpoint_counter = 0
    
    for step in progress_bar:
        try:
            data = next(tr_it)
        except StopIteration:
            tr_it = iter(train_dataloader)
            data = next(tr_it)
            
        model.train()
        torch.set_grad_enabled(True)
        loss, _ = train_planning_model_epoch(data, model, criterion, optimizer)
        losses_train.append(loss.item())
        avg_loss = np.mean(losses_train)
        metrics = {
            "loss": loss.item(),
            "avg_loss": avg_loss
        }
        
        if train.world_rank() == 0:
            print(metrics)
        
        if (step%steps_before_checkpointing==0) or (step==max_num_steps-1):
            session.report(
                metrics=metrics,
                checkpoint=Checkpoint.from_dict(dict(step=step, model=model)))
            checkpoint_counter += 1
        else:
            session.report(
                metrics=metrics
            )

### Distributed Training using Ray

We calculate the available hardware for our current training sessions and efficiently split CPUs based on GPUs or split CPUs evenly if possible

In [23]:
from ray.train.torch import TorchTrainer
from ray.air.config import RunConfig, ScalingConfig
from ray.air.callbacks.wandb import WandbLoggerCallback #🪄🐝

In [24]:
import multiprocessing

In [25]:
USE_GPU = torch.cuda.is_available()
NUM_GPUS = torch.cuda.device_count()
NUM_CPUS = multiprocessing.cpu_count()

In [26]:
if USE_GPU:
    num_actors = NUM_GPUS
    num_data_workers = NUM_CPUS // num_actors
else:
    num_data_workers = 4 if NUM_CPUS>=4 else NUM_CPUS
    ideal_num_actors = NUM_CPUS // num_data_workers
    num_actors = ideal_num_actors if ideal_num_actors else 1

To use Ray all we need to simply do is wrap the training function above. The only addition needed above was calls to `report.session` to log metrics during training

In [27]:
#NOTE: To figure out if scaling config intuiutin is correct: num_actors divide resources between each actor and within the train func each actor can the utilize the shared resources
trainer = TorchTrainer(
    train_loop_per_worker=train_planning_model,
    scaling_config=ScalingConfig(num_workers=num_actors, use_gpu=USE_GPU),
)

2022-09-08 05:18:47,315	INFO worker.py:1487 -- Started a local Ray instance. View the dashboard at [1m[32mhttp://127.0.0.1:8265[39m[22m.


### Distributed Hyperparemeter Tuning using Ray

Due to Ray's easy interface we can simply extend our normal trainer to Ray's tuner which will allow us to do efficient hyperparameter optimization. In our case we use `optuna`

In [28]:
tuner_train_config = {}
##static
tuner_train_config["shuffle"] = cfg["train_data_loader"]["shuffle"]
tuner_train_config["num_workers"] = num_data_workers
tuner_train_config["dataset_key"] = cfg["train_data_loader"]["key"]
tuner_train_config["perturb_probability"] = cfg["train_data_loader"]["perturb_probability"]

##tunable
tuner_train_config["max_num_steps"] = 5
# tuner_train_config["max_num_steps"] = tune.quniform(1000, 5000, 250)
tuner_train_config["lr"] = tune.loguniform(1e-4, 1e-2)
tuner_train_config["batch_size"] = tune.quniform(6, 24, 6)
cfg["raster_params"]["map_type"] = tune.choice(["py_semantic", "py_satellite"])

tuner_train_config["cfg"] = cfg

In [29]:
from ray.tune.logger import LoggerCallback
from typing import Dict, List

In [30]:
from ray.tune.stopper import ExperimentPlateauStopper
from ray.tune.search.optuna import OptunaSearch

In [31]:
n_search_attempts = 25

In [32]:
optuna_search = OptunaSearch()

In [33]:
tuner = Tuner(
        trainer,
        tune_config=tune.TuneConfig(
            metric="avg_loss", #loss or avg_loss here?
            mode="min",
            search_alg=optuna_search,
            num_samples=n_search_attempts,
        ),
        param_space={
            "train_loop_config": tuner_train_config
        },
        run_config=RunConfig(
            stop=ExperimentPlateauStopper("avg_loss"),
            callbacks=[WandbLoggerCallback(project=f"{project_name}-trials", save_checkpoints=True),]))  #🪄🐝

  


### Aggregrate and Report Metrics from All Trials

In [34]:
analysis = tuner.fit()

[32m[I 2022-09-08 05:18:50,373][0m A new study created in memory with name: optuna[0m
2022-09-08 05:18:50,456	INFO wandb.py:119 -- Already logged into W&B.


Trial name,status,loc,train_loop_config...,train_loop_config....1,train_loop_config/lr,iter,total time (s),loss,avg_loss,_timestamp
TorchTrainer_b92ccfec,TERMINATED,10.150.0.3:4679,18,py_satellite,0.000585319,5,85.996,5.88253,9.68202,1662614419
TorchTrainer_bb34233a,TERMINATED,10.150.0.3:4751,12,py_semantic,0.000355264,5,60.1559,7.96299,11.9063,1662614398
TorchTrainer_bb3d4924,TERMINATED,10.150.0.3:6324,18,py_semantic,0.000109337,5,82.5115,14.6648,13.5782,1662614496
TorchTrainer_e91748fe,TERMINATED,10.150.0.3:6621,24,py_satellite,0.00293406,5,111.412,9.45308,9.00326,1662614546
TorchTrainer_f59446cc,TERMINATED,10.150.0.3:7512,24,py_semantic,0.00637506,5,109.832,8.3464,12.2668,1662614621
TorchTrainer_22aeee5a,TERMINATED,10.150.0.3:8344,24,py_satellite,0.00150031,5,107.464,3.52309,8.24595,1662614668
TorchTrainer_40435014,TERMINATED,10.150.0.3:9212,6,py_satellite,0.000119926,5,34.906,9.8483,13.6248,1662614673
TorchTrainer_6e8ceeee,TERMINATED,10.150.0.3:10178,24,py_semantic,0.000181459,5,109.722,10.7243,11.7222,1662614800
TorchTrainer_8d6a8772,TERMINATED,10.150.0.3:10189,18,py_satellite,0.000375263,5,84.4454,4.75276,11.841,1662614776
TorchTrainer_8df51bd0,TERMINATED,10.150.0.3:11812,18,py_satellite,0.000148744,5,77.9675,10.573,12.2735,1662614870


[2m[36m(RayTrainWorker pid=4750)[0m 2022-09-08 05:18:57,940	INFO config.py:72 -- Setting up process group for: env:// [rank=0, world_size=1]
[2m[36m(RayTrainWorker pid=4861)[0m 2022-09-08 05:19:04,326	INFO config.py:72 -- Setting up process group for: env:// [rank=0, world_size=1]
[2m[36m(RayTrainWorker pid=4750)[0m   f"The parameter '{pretrained_param}' is deprecated since 0.13 and will be removed in 0.15, "
[2m[36m(RayTrainWorker pid=4750)[0m 2022-09-08 05:19:08,829	INFO train_loop_utils.py:300 -- Moving model to device: cpu
[2m[36m(RayTrainWorker pid=4861)[0m   f"The parameter '{pretrained_param}' is deprecated since 0.13 and will be removed in 0.15, "
[2m[36m(RayTrainWorker pid=4861)[0m 2022-09-08 05:19:11,548	INFO train_loop_utils.py:300 -- Moving model to device: cpu


[2m[36m(RayTrainWorker pid=4861)[0m {'loss': 11.340069770812988, 'avg_loss': 11.340069770812988}


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-08_05-18-50/TorchTrainer_bb34233a_2_batch_size=12.0000,format_version=4,future_num_frames=12,history_num_frames=0,model_architecture=resnet50,_2022-09-08_05-18-53/checkpoint_000000)... 

Result for TorchTrainer_bb34233a:
  _time_this_iter_s: 18.661038637161255
  _timestamp: 1662614363
  _training_iteration: 1
  avg_loss: 11.340069770812988
  date: 2022-09-08_05-19-24
  done: false
  experiment_id: 8d6f1b75cbd04508a01e49b23e01eec7
  hostname: anish-l5-kit
  iterations_since_restore: 1
  loss: 11.340069770812988
  node_ip: 10.150.0.3
  pid: 4751
  should_checkpoint: true
  time_since_restore: 25.291318893432617
  time_this_iter_s: 25.291318893432617
  time_total_s: 25.291318893432617
  timestamp: 1662614364
  timesteps_since_restore: 0
  training_iteration: 1
  trial_id: bb34233a
  warmup_time: 0.01678323745727539
  


Done. 1.1s


[2m[36m(RayTrainWorker pid=4750)[0m {'loss': 15.483832359313965, 'avg_loss': 15.483832359313965}


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-08_05-18-50/TorchTrainer_b92ccfec_1_batch_size=18.0000,format_version=4,future_num_frames=12,history_num_frames=0,model_architecture=resnet50,_2022-09-08_05-18-50/checkpoint_000000)... 

Result for TorchTrainer_b92ccfec:
  _time_this_iter_s: 27.20443081855774
  _timestamp: 1662614365
  _training_iteration: 1
  avg_loss: 15.483832359313965
  date: 2022-09-08_05-19-26
  done: false
  experiment_id: f9a874a06c85451abb488b73768d9bf7
  hostname: anish-l5-kit
  iterations_since_restore: 1
  loss: 15.483832359313965
  node_ip: 10.150.0.3
  pid: 4679
  should_checkpoint: true
  time_since_restore: 32.57356834411621
  time_this_iter_s: 32.57356834411621
  time_total_s: 32.57356834411621
  timestamp: 1662614366
  timesteps_since_restore: 0
  training_iteration: 1
  trial_id: b92ccfec
  warmup_time: 0.004803895950317383
  


Done. 1.1s


[2m[36m(RayTrainWorker pid=4861)[0m {'loss': 13.776616096496582, 'avg_loss': 12.558342933654785}


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-08_05-18-50/TorchTrainer_bb34233a_2_batch_size=12.0000,format_version=4,future_num_frames=12,history_num_frames=0,model_architecture=resnet50,_2022-09-08_05-18-53/checkpoint_000001)... 

Result for TorchTrainer_bb34233a:
  _time_this_iter_s: 9.029812335968018
  _timestamp: 1662614372
  _training_iteration: 2
  avg_loss: 12.558342933654785
  date: 2022-09-08_05-19-32
  done: false
  experiment_id: 8d6f1b75cbd04508a01e49b23e01eec7
  hostname: anish-l5-kit
  iterations_since_restore: 2
  loss: 13.776616096496582
  node_ip: 10.150.0.3
  pid: 4751
  should_checkpoint: true
  time_since_restore: 34.19834589958191
  time_this_iter_s: 8.907027006149292
  time_total_s: 34.19834589958191
  timestamp: 1662614372
  timesteps_since_restore: 0
  training_iteration: 2
  trial_id: bb34233a
  warmup_time: 0.01678323745727539
  


Done. 1.1s


[2m[36m(RayTrainWorker pid=4750)[0m {'loss': 7.0995941162109375, 'avg_loss': 11.291713237762451}


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-08_05-18-50/TorchTrainer_b92ccfec_1_batch_size=18.0000,format_version=4,future_num_frames=12,history_num_frames=0,model_architecture=resnet50,_2022-09-08_05-18-50/checkpoint_000001)... 

Result for TorchTrainer_b92ccfec:
  _time_this_iter_s: 13.199923038482666
  _timestamp: 1662614378
  _training_iteration: 2
  avg_loss: 11.291713237762451
  date: 2022-09-08_05-19-39
  done: false
  experiment_id: f9a874a06c85451abb488b73768d9bf7
  hostname: anish-l5-kit
  iterations_since_restore: 2
  loss: 7.0995941162109375
  node_ip: 10.150.0.3
  pid: 4679
  should_checkpoint: true
  time_since_restore: 45.5499050617218
  time_this_iter_s: 12.97633671760559
  time_total_s: 45.5499050617218
  timestamp: 1662614379
  timesteps_since_restore: 0
  training_iteration: 2
  trial_id: b92ccfec
  warmup_time: 0.004803895950317383
  


Done. 1.1s


[2m[36m(RayTrainWorker pid=4861)[0m {'loss': 14.39365291595459, 'avg_loss': 13.17011292775472}


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-08_05-18-50/TorchTrainer_bb34233a_2_batch_size=12.0000,format_version=4,future_num_frames=12,history_num_frames=0,model_architecture=resnet50,_2022-09-08_05-18-53/checkpoint_000002)... 

Result for TorchTrainer_bb34233a:
  _time_this_iter_s: 8.54618239402771
  _timestamp: 1662614381
  _training_iteration: 3
  avg_loss: 13.17011292775472
  date: 2022-09-08_05-19-41
  done: false
  experiment_id: 8d6f1b75cbd04508a01e49b23e01eec7
  hostname: anish-l5-kit
  iterations_since_restore: 3
  loss: 14.39365291595459
  node_ip: 10.150.0.3
  pid: 4751
  should_checkpoint: true
  time_since_restore: 42.7030348777771
  time_this_iter_s: 8.50468897819519
  time_total_s: 42.7030348777771
  timestamp: 1662614381
  timesteps_since_restore: 0
  training_iteration: 3
  trial_id: bb34233a
  warmup_time: 0.01678323745727539
  


Done. 1.1s


[2m[36m(RayTrainWorker pid=4861)[0m {'loss': 12.058097839355469, 'avg_loss': 12.892109155654907}


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-08_05-18-50/TorchTrainer_bb34233a_2_batch_size=12.0000,format_version=4,future_num_frames=12,history_num_frames=0,model_architecture=resnet50,_2022-09-08_05-18-53/checkpoint_000003)... 

Result for TorchTrainer_bb34233a:
  _time_this_iter_s: 8.411531925201416
  _timestamp: 1662614389
  _training_iteration: 4
  avg_loss: 12.892109155654907
  date: 2022-09-08_05-19-49
  done: false
  experiment_id: 8d6f1b75cbd04508a01e49b23e01eec7
  hostname: anish-l5-kit
  iterations_since_restore: 4
  loss: 12.058097839355469
  node_ip: 10.150.0.3
  pid: 4751
  should_checkpoint: true
  time_since_restore: 51.14816331863403
  time_this_iter_s: 8.445128440856934
  time_total_s: 51.14816331863403
  timestamp: 1662614389
  timesteps_since_restore: 0
  training_iteration: 4
  trial_id: bb34233a
  warmup_time: 0.01678323745727539
  


Done. 1.0s


[2m[36m(RayTrainWorker pid=4750)[0m {'loss': 9.189425468444824, 'avg_loss': 10.590950647989908}
Result for TorchTrainer_b92ccfec:
  _time_this_iter_s: 12.931106805801392
  _timestamp: 1662614391
  _training_iteration: 3
  avg_loss: 10.590950647989908
  date: 2022-09-08_05-19-52
  done: false
  experiment_id: f9a874a06c85451abb488b73768d9bf7
  hostname: anish-l5-kit
  iterations_since_restore: 3
  loss: 9.189425468444824
  node_ip: 10.150.0.3
  pid: 4679
  should_checkpoint: true
  time_since_restore: 58.71311402320862
  time_this_iter_s: 13.163208961486816
  time_total_s: 58.71311402320862
  timestamp: 1662614392
  timesteps_since_restore: 0
  training_iteration: 3
  trial_id: b92ccfec
  warmup_time: 0.004803895950317383
  


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-08_05-18-50/TorchTrainer_b92ccfec_1_batch_size=18.0000,format_version=4,future_num_frames=12,history_num_frames=0,model_architecture=resnet50,_2022-09-08_05-18-50/checkpoint_000002)... Done. 1.5s


[2m[36m(RayTrainWorker pid=4861)[0m {'loss': 7.962991237640381, 'avg_loss': 11.906285572052003}


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-08_05-18-50/TorchTrainer_bb34233a_2_batch_size=12.0000,format_version=4,future_num_frames=12,history_num_frames=0,model_architecture=resnet50,_2022-09-08_05-18-53/checkpoint_000004)... 

Result for TorchTrainer_bb34233a:
  _time_this_iter_s: 8.996372938156128
  _timestamp: 1662614398
  _training_iteration: 5
  avg_loss: 11.906285572052003
  date: 2022-09-08_05-19-58
  done: false
  experiment_id: 8d6f1b75cbd04508a01e49b23e01eec7
  hostname: anish-l5-kit
  iterations_since_restore: 5
  loss: 7.962991237640381
  node_ip: 10.150.0.3
  pid: 4751
  should_checkpoint: true
  time_since_restore: 60.15587663650513
  time_this_iter_s: 9.007713317871094
  time_total_s: 60.15587663650513
  timestamp: 1662614398
  timesteps_since_restore: 0
  training_iteration: 5
  trial_id: bb34233a
  warmup_time: 0.01678323745727539
  


Done. 2.6s
[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-08_05-18-50/TorchTrainer_bb34233a_2_batch_size=12.0000,format_version=4,future_num_frames=12,history_num_frames=0,model_architecture=resnet50,_2022-09-08_05-18-53/checkpoint_000004)... Done. 0.7s


[2m[36m(RayTrainWorker pid=4750)[0m {'loss': 10.754727363586426, 'avg_loss': 10.631894826889038}


VBox(children=(Label(value='452.176 MB of 452.176 MB uploaded (90.296 MB deduped)\r'), FloatProgress(value=1.0…

Result for TorchTrainer_bb34233a:
  _time_this_iter_s: 8.996372938156128
  _timestamp: 1662614398
  _training_iteration: 5
  avg_loss: 11.906285572052003
  date: 2022-09-08_05-19-58
  done: true
  experiment_id: 8d6f1b75cbd04508a01e49b23e01eec7
  experiment_tag: 2_batch_size=12.0000,format_version=4,future_num_frames=12,history_num_frames=0,model_architecture=resnet50,render_ego_history=True,step_time=0.1000,dataset_meta_key=meta_json,disable_traffic_light_faces=False,ego_center=0_25_0_5,filter_agents_threshold=0.5000,map_type=py_semantic,pixel_size=0_5_0_5,raster_size=224_224,satellite_map_key=aerial_map_aerial_map_png,semantic_map_key=semantic_map_semantic_map_pb,set_origin_to_bottom=True,batch_size=12,key=scenes_sample_zarr,num_workers=16,perturb_probability=0.5000,shuffle=True,checkpoint_every_n_steps=10000,eval_every_n_steps=10000,max_num_steps=5,batch_size=12,key=scenes_sample_zarr,num_workers=16,shuffle=False,dataset_key=scenes_sample_zarr,lr=0.0004,max_num_steps=5,num_workers=4

[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-08_05-18-50/TorchTrainer_b92ccfec_1_batch_size=18.0000,format_version=4,future_num_frames=12,history_num_frames=0,model_architecture=resnet50,_2022-09-08_05-18-50/checkpoint_000003)... 

0,1
avg_loss,▁▆█▇▃
iterations_since_restore,▁▃▅▆█
loss,▅▇█▅▁
time_since_restore,▁▃▄▆█
time_this_iter_s,█▁▁▁▁
time_total_s,▁▃▄▆█
timestamp,▁▃▅▆█
timesteps_since_restore,▁▁▁▁▁
training_iteration,▁▃▅▆█
warmup_time,▁▁▁▁▁

0,1
avg_loss,11.90629
iterations_since_restore,5.0
loss,7.96299
time_since_restore,60.15588
time_this_iter_s,9.00771
time_total_s,60.15588
timestamp,1662614398.0
timesteps_since_restore,0.0
training_iteration,5.0
warmup_time,0.01678


Result for TorchTrainer_b92ccfec:
  _time_this_iter_s: 13.459456205368042
  _timestamp: 1662614405
  _training_iteration: 4
  avg_loss: 10.631894826889038
  date: 2022-09-08_05-20-05
  done: false
  experiment_id: f9a874a06c85451abb488b73768d9bf7
  hostname: anish-l5-kit
  iterations_since_restore: 4
  loss: 10.754727363586426
  node_ip: 10.150.0.3
  pid: 4679
  should_checkpoint: true
  time_since_restore: 71.88764500617981
  time_this_iter_s: 13.174530982971191
  time_total_s: 71.88764500617981
  timestamp: 1662614405
  timesteps_since_restore: 0
  training_iteration: 4
  trial_id: b92ccfec
  warmup_time: 0.004803895950317383
  


Done. 1.0s
[2m[36m(RayTrainWorker pid=6399)[0m 2022-09-08 05:20:17,538	INFO config.py:72 -- Setting up process group for: env:// [rank=0, world_size=1]


[2m[36m(RayTrainWorker pid=4750)[0m {'loss': 5.882531642913818, 'avg_loss': 9.682022190093994}


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-08_05-18-50/TorchTrainer_b92ccfec_1_batch_size=18.0000,format_version=4,future_num_frames=12,history_num_frames=0,model_architecture=resnet50,_2022-09-08_05-18-50/checkpoint_000004)... 

Result for TorchTrainer_b92ccfec:
  _time_this_iter_s: 14.017906904220581
  _timestamp: 1662614419
  _training_iteration: 5
  avg_loss: 9.682022190093994
  date: 2022-09-08_05-20-19
  done: false
  experiment_id: f9a874a06c85451abb488b73768d9bf7
  hostname: anish-l5-kit
  iterations_since_restore: 5
  loss: 5.882531642913818
  node_ip: 10.150.0.3
  pid: 4679
  should_checkpoint: true
  time_since_restore: 85.99602723121643
  time_this_iter_s: 14.108382225036621
  time_total_s: 85.99602723121643
  timestamp: 1662614419
  timesteps_since_restore: 0
  training_iteration: 5
  trial_id: b92ccfec
  warmup_time: 0.004803895950317383
  


Done. 1.0s
[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-08_05-18-50/TorchTrainer_b92ccfec_1_batch_size=18.0000,format_version=4,future_num_frames=12,history_num_frames=0,model_architecture=resnet50,_2022-09-08_05-18-50/checkpoint_000004)... Done. 0.3s


[2m[36m(RayTrainWorker pid=6399)[0m   f"The parameter '{pretrained_param}' is deprecated since 0.13 and will be removed in 0.15, "
[2m[36m(RayTrainWorker pid=6399)[0m 2022-09-08 05:20:24,073	INFO train_loop_utils.py:300 -- Moving model to device: cpu


VBox(children=(Label(value='452.176 MB of 452.176 MB uploaded (90.296 MB deduped)\r'), FloatProgress(value=1.0…

Result for TorchTrainer_b92ccfec:
  _time_this_iter_s: 14.017906904220581
  _timestamp: 1662614419
  _training_iteration: 5
  avg_loss: 9.682022190093994
  date: 2022-09-08_05-20-19
  done: true
  experiment_id: f9a874a06c85451abb488b73768d9bf7
  experiment_tag: 1_batch_size=18.0000,format_version=4,future_num_frames=12,history_num_frames=0,model_architecture=resnet50,render_ego_history=True,step_time=0.1000,dataset_meta_key=meta_json,disable_traffic_light_faces=False,ego_center=0_25_0_5,filter_agents_threshold=0.5000,map_type=py_satellite,pixel_size=0_5_0_5,raster_size=224_224,satellite_map_key=aerial_map_aerial_map_png,semantic_map_key=semantic_map_semantic_map_pb,set_origin_to_bottom=True,batch_size=12,key=scenes_sample_zarr,num_workers=16,perturb_probability=0.5000,shuffle=True,checkpoint_every_n_steps=10000,eval_every_n_steps=10000,max_num_steps=5,batch_size=12,key=scenes_sample_zarr,num_workers=16,shuffle=False,dataset_key=scenes_sample_zarr,lr=0.0006,max_num_steps=5,num_workers=

0,1
avg_loss,█▃▂▂▁
iterations_since_restore,▁▃▅▆█
loss,█▂▃▅▁
time_since_restore,▁▃▄▆█
time_this_iter_s,█▁▁▁▁
time_total_s,▁▃▄▆█
timestamp,▁▃▄▆█
timesteps_since_restore,▁▁▁▁▁
training_iteration,▁▃▅▆█
warmup_time,▁▁▁▁▁

0,1
avg_loss,9.68202
iterations_since_restore,5.0
loss,5.88253
time_since_restore,85.99603
time_this_iter_s,14.10838
time_total_s,85.99603
timestamp,1662614419.0
timesteps_since_restore,0.0
training_iteration,5.0
warmup_time,0.0048


[2m[36m(RayTrainWorker pid=6693)[0m 2022-09-08 05:20:39,257	INFO config.py:72 -- Setting up process group for: env:// [rank=0, world_size=1]


[2m[36m(RayTrainWorker pid=6399)[0m {'loss': 17.71111297607422, 'avg_loss': 17.71111297607422}


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-08_05-18-50/TorchTrainer_bb3d4924_3_batch_size=18.0000,format_version=4,future_num_frames=12,history_num_frames=0,model_architecture=resnet50,_2022-09-08_05-20-10/checkpoint_000000)... 

Result for TorchTrainer_bb3d4924:
  _time_this_iter_s: 23.200047969818115
  _timestamp: 1662614441
  _training_iteration: 1
  avg_loss: 17.71111297607422
  date: 2022-09-08_05-20-41
  done: false
  experiment_id: 43fed67e00a5493999e7abe1128673fd
  hostname: anish-l5-kit
  iterations_since_restore: 1
  loss: 17.71111297607422
  node_ip: 10.150.0.3
  pid: 6324
  should_checkpoint: true
  time_since_restore: 27.35178303718567
  time_this_iter_s: 27.35178303718567
  time_total_s: 27.35178303718567
  timestamp: 1662614441
  timesteps_since_restore: 0
  training_iteration: 1
  trial_id: bb3d4924
  warmup_time: 0.006744384765625
  


Done. 1.0s
[2m[36m(RayTrainWorker pid=6693)[0m   f"The parameter '{pretrained_param}' is deprecated since 0.13 and will be removed in 0.15, "
[2m[36m(RayTrainWorker pid=6693)[0m 2022-09-08 05:20:45,384	INFO train_loop_utils.py:300 -- Moving model to device: cpu


[2m[36m(RayTrainWorker pid=6399)[0m {'loss': 11.539502143859863, 'avg_loss': 14.625307559967041}


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-08_05-18-50/TorchTrainer_bb3d4924_3_batch_size=18.0000,format_version=4,future_num_frames=12,history_num_frames=0,model_architecture=resnet50,_2022-09-08_05-20-10/checkpoint_000001)... 

Result for TorchTrainer_bb3d4924:
  _time_this_iter_s: 16.950780153274536
  _timestamp: 1662614458
  _training_iteration: 2
  avg_loss: 14.625307559967041
  date: 2022-09-08_05-20-58
  done: false
  experiment_id: 43fed67e00a5493999e7abe1128673fd
  hostname: anish-l5-kit
  iterations_since_restore: 2
  loss: 11.539502143859863
  node_ip: 10.150.0.3
  pid: 6324
  should_checkpoint: true
  time_since_restore: 44.2383828163147
  time_this_iter_s: 16.88659977912903
  time_total_s: 44.2383828163147
  timestamp: 1662614458
  timesteps_since_restore: 0
  training_iteration: 2
  trial_id: bb3d4924
  warmup_time: 0.006744384765625
  


Done. 1.1s


[2m[36m(RayTrainWorker pid=6693)[0m {'loss': 10.288769721984863, 'avg_loss': 10.288769721984863}


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-08_05-18-50/TorchTrainer_e91748fe_4_batch_size=24.0000,format_version=4,future_num_frames=12,history_num_frames=0,model_architecture=resnet50,_2022-09-08_05-20-31/checkpoint_000000)... 

Result for TorchTrainer_e91748fe:
  _time_this_iter_s: 31.036097288131714
  _timestamp: 1662614470
  _training_iteration: 1
  avg_loss: 10.288769721984863
  date: 2022-09-08_05-21-11
  done: false
  experiment_id: 2e746e6826f24583aee627cc3973746e
  hostname: anish-l5-kit
  iterations_since_restore: 1
  loss: 10.288769721984863
  node_ip: 10.150.0.3
  pid: 6621
  should_checkpoint: true
  time_since_restore: 35.827818870544434
  time_this_iter_s: 35.827818870544434
  time_total_s: 35.827818870544434
  timestamp: 1662614471
  timesteps_since_restore: 0
  training_iteration: 1
  trial_id: e91748fe
  warmup_time: 0.007943391799926758
  
[2m[36m(RayTrainWorker pid=6399)[0m {'loss': 13.943074226379395, 'avg_loss': 14.397896448771158}


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-08_05-18-50/TorchTrainer_bb3d4924_3_batch_size=18.0000,format_version=4,future_num_frames=12,history_num_frames=0,model_architecture=resnet50,_2022-09-08_05-20-10/checkpoint_000002)... Done. 1.0s


Result for TorchTrainer_bb3d4924:
  _time_this_iter_s: 13.661121129989624
  _timestamp: 1662614471
  _training_iteration: 3
  avg_loss: 14.397896448771158
  date: 2022-09-08_05-21-12
  done: false
  experiment_id: 43fed67e00a5493999e7abe1128673fd
  hostname: anish-l5-kit
  iterations_since_restore: 3
  loss: 13.943074226379395
  node_ip: 10.150.0.3
  pid: 6324
  should_checkpoint: true
  time_since_restore: 57.82619524002075
  time_this_iter_s: 13.587812423706055
  time_total_s: 57.82619524002075
  timestamp: 1662614472
  timesteps_since_restore: 0
  training_iteration: 3
  trial_id: bb3d4924
  warmup_time: 0.006744384765625
  


Done. 1.1s


[2m[36m(RayTrainWorker pid=6399)[0m {'loss': 10.032659530639648, 'avg_loss': 13.306587219238281}


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-08_05-18-50/TorchTrainer_bb3d4924_3_batch_size=18.0000,format_version=4,future_num_frames=12,history_num_frames=0,model_architecture=resnet50,_2022-09-08_05-20-10/checkpoint_000003)... 

Result for TorchTrainer_bb3d4924:
  _time_this_iter_s: 12.259599208831787
  _timestamp: 1662614484
  _training_iteration: 4
  avg_loss: 13.306587219238281
  date: 2022-09-08_05-21-24
  done: false
  experiment_id: 43fed67e00a5493999e7abe1128673fd
  hostname: anish-l5-kit
  iterations_since_restore: 4
  loss: 10.032659530639648
  node_ip: 10.150.0.3
  pid: 6324
  should_checkpoint: true
  time_since_restore: 70.17089200019836
  time_this_iter_s: 12.344696760177612
  time_total_s: 70.17089200019836
  timestamp: 1662614484
  timesteps_since_restore: 0
  training_iteration: 4
  trial_id: bb3d4924
  warmup_time: 0.006744384765625
  


Done. 1.0s


[2m[36m(RayTrainWorker pid=6693)[0m {'loss': 12.019135475158691, 'avg_loss': 11.153952598571777}


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-08_05-18-50/TorchTrainer_e91748fe_4_batch_size=24.0000,format_version=4,future_num_frames=12,history_num_frames=0,model_architecture=resnet50,_2022-09-08_05-20-31/checkpoint_000001)... 

Result for TorchTrainer_e91748fe:
  _time_this_iter_s: 18.81213092803955
  _timestamp: 1662614489
  _training_iteration: 2
  avg_loss: 11.153952598571777
  date: 2022-09-08_05-21-30
  done: false
  experiment_id: 2e746e6826f24583aee627cc3973746e
  hostname: anish-l5-kit
  iterations_since_restore: 2
  loss: 12.019135475158691
  node_ip: 10.150.0.3
  pid: 6621
  should_checkpoint: true
  time_since_restore: 54.60904598236084
  time_this_iter_s: 18.781227111816406
  time_total_s: 54.60904598236084
  timestamp: 1662614490
  timesteps_since_restore: 0
  training_iteration: 2
  trial_id: e91748fe
  warmup_time: 0.007943391799926758
  


Done. 1.0s


[2m[36m(RayTrainWorker pid=6399)[0m {'loss': 14.664839744567871, 'avg_loss': 13.578237724304199}


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-08_05-18-50/TorchTrainer_bb3d4924_3_batch_size=18.0000,format_version=4,future_num_frames=12,history_num_frames=0,model_architecture=resnet50,_2022-09-08_05-20-10/checkpoint_000004)... 

Result for TorchTrainer_bb3d4924:
  _time_this_iter_s: 12.323672771453857
  _timestamp: 1662614496
  _training_iteration: 5
  avg_loss: 13.578237724304199
  date: 2022-09-08_05-21-37
  done: false
  experiment_id: 43fed67e00a5493999e7abe1128673fd
  hostname: anish-l5-kit
  iterations_since_restore: 5
  loss: 14.664839744567871
  node_ip: 10.150.0.3
  pid: 6324
  should_checkpoint: true
  time_since_restore: 82.51154518127441
  time_this_iter_s: 12.34065318107605
  time_total_s: 82.51154518127441
  timestamp: 1662614497
  timesteps_since_restore: 0
  training_iteration: 5
  trial_id: bb3d4924
  warmup_time: 0.006744384765625
  


Done. 1.0s
[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-08_05-18-50/TorchTrainer_bb3d4924_3_batch_size=18.0000,format_version=4,future_num_frames=12,history_num_frames=0,model_architecture=resnet50,_2022-09-08_05-20-10/checkpoint_000004)... Done. 0.3s


VBox(children=(Label(value='452.176 MB of 452.176 MB uploaded (90.296 MB deduped)\r'), FloatProgress(value=1.0…

0,1
avg_loss,█▃▃▁▁
iterations_since_restore,▁▃▅▆█
loss,█▂▅▁▅
time_since_restore,▁▃▅▆█
time_this_iter_s,█▃▂▁▁
time_total_s,▁▃▅▆█
timestamp,▁▃▅▆█
timesteps_since_restore,▁▁▁▁▁
training_iteration,▁▃▅▆█
warmup_time,▁▁▁▁▁

0,1
avg_loss,13.57824
iterations_since_restore,5.0
loss,14.66484
time_since_restore,82.51155
time_this_iter_s,12.34065
time_total_s,82.51155
timestamp,1662614497.0
timesteps_since_restore,0.0
training_iteration,5.0
warmup_time,0.00674


Result for TorchTrainer_bb3d4924:
  _time_this_iter_s: 12.323672771453857
  _timestamp: 1662614496
  _training_iteration: 5
  avg_loss: 13.578237724304199
  date: 2022-09-08_05-21-37
  done: true
  experiment_id: 43fed67e00a5493999e7abe1128673fd
  experiment_tag: 3_batch_size=18.0000,format_version=4,future_num_frames=12,history_num_frames=0,model_architecture=resnet50,render_ego_history=True,step_time=0.1000,dataset_meta_key=meta_json,disable_traffic_light_faces=False,ego_center=0_25_0_5,filter_agents_threshold=0.5000,map_type=py_semantic,pixel_size=0_5_0_5,raster_size=224_224,satellite_map_key=aerial_map_aerial_map_png,semantic_map_key=semantic_map_semantic_map_pb,set_origin_to_bottom=True,batch_size=12,key=scenes_sample_zarr,num_workers=16,perturb_probability=0.5000,shuffle=True,checkpoint_every_n_steps=10000,eval_every_n_steps=10000,max_num_steps=5,batch_size=12,key=scenes_sample_zarr,num_workers=16,shuffle=False,dataset_key=scenes_sample_zarr,lr=0.0001,max_num_steps=5,num_workers=

[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-08_05-18-50/TorchTrainer_e91748fe_4_batch_size=24.0000,format_version=4,future_num_frames=12,history_num_frames=0,model_architecture=resnet50,_2022-09-08_05-20-31/checkpoint_000002)... 

Result for TorchTrainer_e91748fe:
  _time_this_iter_s: 16.99057388305664
  _timestamp: 1662614506
  _training_iteration: 3
  avg_loss: 9.561657110850016
  date: 2022-09-08_05-21-47
  done: false
  experiment_id: 2e746e6826f24583aee627cc3973746e
  hostname: anish-l5-kit
  iterations_since_restore: 3
  loss: 6.377066135406494
  node_ip: 10.150.0.3
  pid: 6621
  should_checkpoint: true
  time_since_restore: 71.54146409034729
  time_this_iter_s: 16.93241810798645
  time_total_s: 71.54146409034729
  timestamp: 1662614507
  timesteps_since_restore: 0
  training_iteration: 3
  trial_id: e91748fe
  warmup_time: 0.007943391799926758
  


Done. 1.1s
[2m[36m(RayTrainWorker pid=7586)[0m 2022-09-08 05:21:55,725	INFO config.py:72 -- Setting up process group for: env:// [rank=0, world_size=1]
[2m[36m(RayTrainWorker pid=7586)[0m   f"The parameter '{pretrained_param}' is deprecated since 0.13 and will be removed in 0.15, "
[2m[36m(RayTrainWorker pid=7586)[0m 2022-09-08 05:22:02,306	INFO train_loop_utils.py:300 -- Moving model to device: cpu


[2m[36m(RayTrainWorker pid=6693)[0m {'loss': 6.878262042999268, 'avg_loss': 8.890808343887329}


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-08_05-18-50/TorchTrainer_e91748fe_4_batch_size=24.0000,format_version=4,future_num_frames=12,history_num_frames=0,model_architecture=resnet50,_2022-09-08_05-20-31/checkpoint_000003)... 

Result for TorchTrainer_e91748fe:
  _time_this_iter_s: 22.8879451751709
  _timestamp: 1662614529
  _training_iteration: 4
  avg_loss: 8.890808343887329
  date: 2022-09-08_05-22-10
  done: false
  experiment_id: 2e746e6826f24583aee627cc3973746e
  hostname: anish-l5-kit
  iterations_since_restore: 4
  loss: 6.878262042999268
  node_ip: 10.150.0.3
  pid: 6621
  should_checkpoint: true
  time_since_restore: 94.50905346870422
  time_this_iter_s: 22.967589378356934
  time_total_s: 94.50905346870422
  timestamp: 1662614530
  timesteps_since_restore: 0
  training_iteration: 4
  trial_id: e91748fe
  warmup_time: 0.007943391799926758
  


Done. 1.1s


[2m[36m(RayTrainWorker pid=7586)[0m {'loss': 12.986995697021484, 'avg_loss': 12.986995697021484}


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-08_05-18-50/TorchTrainer_f59446cc_5_batch_size=24.0000,format_version=4,future_num_frames=12,history_num_frames=0,model_architecture=resnet50,_2022-09-08_05-21-47/checkpoint_000000)... 

Result for TorchTrainer_f59446cc:
  _time_this_iter_s: 28.678772687911987
  _timestamp: 1662614544
  _training_iteration: 1
  avg_loss: 12.986995697021484
  date: 2022-09-08_05-22-25
  done: false
  experiment_id: 2bf869b6ed5b45eabe99aac30a30982f
  hostname: anish-l5-kit
  iterations_since_restore: 1
  loss: 12.986995697021484
  node_ip: 10.150.0.3
  pid: 7512
  should_checkpoint: true
  time_since_restore: 32.979408740997314
  time_this_iter_s: 32.979408740997314
  time_total_s: 32.979408740997314
  timestamp: 1662614545
  timesteps_since_restore: 0
  training_iteration: 1
  trial_id: f59446cc
  warmup_time: 0.0056552886962890625
  


Done. 1.0s


[2m[36m(RayTrainWorker pid=6693)[0m {'loss': 9.453082084655762, 'avg_loss': 9.003263092041015}


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-08_05-18-50/TorchTrainer_e91748fe_4_batch_size=24.0000,format_version=4,future_num_frames=12,history_num_frames=0,model_architecture=resnet50,_2022-09-08_05-20-31/checkpoint_000004)... 

Result for TorchTrainer_e91748fe:
  _time_this_iter_s: 16.986690282821655
  _timestamp: 1662614546
  _training_iteration: 5
  avg_loss: 9.003263092041015
  date: 2022-09-08_05-22-26
  done: false
  experiment_id: 2e746e6826f24583aee627cc3973746e
  hostname: anish-l5-kit
  iterations_since_restore: 5
  loss: 9.453082084655762
  node_ip: 10.150.0.3
  pid: 6621
  should_checkpoint: true
  time_since_restore: 111.41247320175171
  time_this_iter_s: 16.903419733047485
  time_total_s: 111.41247320175171
  timestamp: 1662614546
  timesteps_since_restore: 0
  training_iteration: 5
  trial_id: e91748fe
  warmup_time: 0.007943391799926758
  


Done. 1.0s
[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-08_05-18-50/TorchTrainer_e91748fe_4_batch_size=24.0000,format_version=4,future_num_frames=12,history_num_frames=0,model_architecture=resnet50,_2022-09-08_05-20-31/checkpoint_000004)... Done. 0.3s


VBox(children=(Label(value='452.176 MB of 452.176 MB uploaded (90.296 MB deduped)\r'), FloatProgress(value=1.0…

0,1
avg_loss,▅█▃▁▁
iterations_since_restore,▁▃▅▆█
loss,▆█▁▂▅
time_since_restore,▁▃▄▆█
time_this_iter_s,█▂▁▃▁
time_total_s,▁▃▄▆█
timestamp,▁▃▄▇█
timesteps_since_restore,▁▁▁▁▁
training_iteration,▁▃▅▆█
warmup_time,▁▁▁▁▁

0,1
avg_loss,9.00326
iterations_since_restore,5.0
loss,9.45308
time_since_restore,111.41247
time_this_iter_s,16.90342
time_total_s,111.41247
timestamp,1662614546.0
timesteps_since_restore,0.0
training_iteration,5.0
warmup_time,0.00794


Result for TorchTrainer_e91748fe:
  _time_this_iter_s: 16.986690282821655
  _timestamp: 1662614546
  _training_iteration: 5
  avg_loss: 9.003263092041015
  date: 2022-09-08_05-22-26
  done: true
  experiment_id: 2e746e6826f24583aee627cc3973746e
  experiment_tag: 4_batch_size=24.0000,format_version=4,future_num_frames=12,history_num_frames=0,model_architecture=resnet50,render_ego_history=True,step_time=0.1000,dataset_meta_key=meta_json,disable_traffic_light_faces=False,ego_center=0_25_0_5,filter_agents_threshold=0.5000,map_type=py_satellite,pixel_size=0_5_0_5,raster_size=224_224,satellite_map_key=aerial_map_aerial_map_png,semantic_map_key=semantic_map_semantic_map_pb,set_origin_to_bottom=True,batch_size=12,key=scenes_sample_zarr,num_workers=16,perturb_probability=0.5000,shuffle=True,checkpoint_every_n_steps=10000,eval_every_n_steps=10000,max_num_steps=5,batch_size=12,key=scenes_sample_zarr,num_workers=16,shuffle=False,dataset_key=scenes_sample_zarr,lr=0.0029,max_num_steps=5,num_workers=

[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-08_05-18-50/TorchTrainer_f59446cc_5_batch_size=24.0000,format_version=4,future_num_frames=12,history_num_frames=0,model_architecture=resnet50,_2022-09-08_05-21-47/checkpoint_000001)... 

Result for TorchTrainer_f59446cc:
  _time_this_iter_s: 19.19585609436035
  _timestamp: 1662614564
  _training_iteration: 2
  avg_loss: 17.847731590270996
  date: 2022-09-08_05-22-44
  done: false
  experiment_id: 2bf869b6ed5b45eabe99aac30a30982f
  hostname: anish-l5-kit
  iterations_since_restore: 2
  loss: 22.708467483520508
  node_ip: 10.150.0.3
  pid: 7512
  should_checkpoint: true
  time_since_restore: 52.24552845954895
  time_this_iter_s: 19.266119718551636
  time_total_s: 52.24552845954895
  timestamp: 1662614564
  timesteps_since_restore: 0
  training_iteration: 2
  trial_id: f59446cc
  warmup_time: 0.0056552886962890625
  


Done. 1.0s
[2m[36m(RayTrainWorker pid=8414)[0m 2022-09-08 05:22:45,864	INFO config.py:72 -- Setting up process group for: env:// [rank=0, world_size=1]
[2m[36m(RayTrainWorker pid=8414)[0m   f"The parameter '{pretrained_param}' is deprecated since 0.13 and will be removed in 0.15, "
[2m[36m(RayTrainWorker pid=8414)[0m 2022-09-08 05:22:51,106	INFO train_loop_utils.py:300 -- Moving model to device: cpu


[2m[36m(RayTrainWorker pid=7586)[0m {'loss': 7.8026862144470215, 'avg_loss': 14.499383131663004}


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-08_05-18-50/TorchTrainer_f59446cc_5_batch_size=24.0000,format_version=4,future_num_frames=12,history_num_frames=0,model_architecture=resnet50,_2022-09-08_05-21-47/checkpoint_000002)... 

Result for TorchTrainer_f59446cc:
  _time_this_iter_s: 20.795378923416138
  _timestamp: 1662614584
  _training_iteration: 3
  avg_loss: 14.499383131663004
  date: 2022-09-08_05-23-05
  done: false
  experiment_id: 2bf869b6ed5b45eabe99aac30a30982f
  hostname: anish-l5-kit
  iterations_since_restore: 3
  loss: 7.8026862144470215
  node_ip: 10.150.0.3
  pid: 7512
  should_checkpoint: true
  time_since_restore: 72.86362195014954
  time_this_iter_s: 20.618093490600586
  time_total_s: 72.86362195014954
  timestamp: 1662614585
  timesteps_since_restore: 0
  training_iteration: 3
  trial_id: f59446cc
  warmup_time: 0.0056552886962890625
  


Done. 1.6s


[2m[36m(RayTrainWorker pid=8414)[0m {'loss': 13.975713729858398, 'avg_loss': 13.975713729858398}


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-08_05-18-50/TorchTrainer_22aeee5a_6_batch_size=24.0000,format_version=4,future_num_frames=12,history_num_frames=0,model_architecture=resnet50,_2022-09-08_05-22-37/checkpoint_000000)... 

Result for TorchTrainer_22aeee5a:
  _time_this_iter_s: 28.086265325546265
  _timestamp: 1662614594
  _training_iteration: 1
  avg_loss: 13.975713729858398
  date: 2022-09-08_05-23-15
  done: false
  experiment_id: 5f1a5465d86e423b8698462e8b5734a3
  hostname: anish-l5-kit
  iterations_since_restore: 1
  loss: 13.975713729858398
  node_ip: 10.150.0.3
  pid: 8344
  should_checkpoint: true
  time_since_restore: 33.321842193603516
  time_this_iter_s: 33.321842193603516
  time_total_s: 33.321842193603516
  timestamp: 1662614595
  timesteps_since_restore: 0
  training_iteration: 1
  trial_id: 22aeee5a
  warmup_time: 0.007919549942016602
  


Done. 1.0s


[2m[36m(RayTrainWorker pid=7586)[0m {'loss': 9.489253997802734, 'avg_loss': 13.246850848197937}


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-08_05-18-50/TorchTrainer_f59446cc_5_batch_size=24.0000,format_version=4,future_num_frames=12,history_num_frames=0,model_architecture=resnet50,_2022-09-08_05-21-47/checkpoint_000003)... 

Result for TorchTrainer_f59446cc:
  _time_this_iter_s: 18.872154235839844
  _timestamp: 1662614603
  _training_iteration: 4
  avg_loss: 13.246850848197937
  date: 2022-09-08_05-23-24
  done: false
  experiment_id: 2bf869b6ed5b45eabe99aac30a30982f
  hostname: anish-l5-kit
  iterations_since_restore: 4
  loss: 9.489253997802734
  node_ip: 10.150.0.3
  pid: 7512
  should_checkpoint: true
  time_since_restore: 91.76584243774414
  time_this_iter_s: 18.902220487594604
  time_total_s: 91.76584243774414
  timestamp: 1662614604
  timesteps_since_restore: 0
  training_iteration: 4
  trial_id: f59446cc
  warmup_time: 0.0056552886962890625
  


Done. 1.1s


[2m[36m(RayTrainWorker pid=8414)[0m {'loss': 9.544771194458008, 'avg_loss': 11.760242462158203}


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-08_05-18-50/TorchTrainer_22aeee5a_6_batch_size=24.0000,format_version=4,future_num_frames=12,history_num_frames=0,model_architecture=resnet50,_2022-09-08_05-22-37/checkpoint_000001)... 

Result for TorchTrainer_22aeee5a:
  _time_this_iter_s: 18.829014539718628
  _timestamp: 1662614613
  _training_iteration: 2
  avg_loss: 11.760242462158203
  date: 2022-09-08_05-23-33
  done: false
  experiment_id: 5f1a5465d86e423b8698462e8b5734a3
  hostname: anish-l5-kit
  iterations_since_restore: 2
  loss: 9.544771194458008
  node_ip: 10.150.0.3
  pid: 8344
  should_checkpoint: true
  time_since_restore: 52.089160680770874
  time_this_iter_s: 18.76731848716736
  time_total_s: 52.089160680770874
  timestamp: 1662614613
  timesteps_since_restore: 0
  training_iteration: 2
  trial_id: 22aeee5a
  warmup_time: 0.007919549942016602
  


Done. 1.0s


[2m[36m(RayTrainWorker pid=7586)[0m {'loss': 8.34640121459961, 'avg_loss': 12.266760921478271}


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-08_05-18-50/TorchTrainer_f59446cc_5_batch_size=24.0000,format_version=4,future_num_frames=12,history_num_frames=0,model_architecture=resnet50,_2022-09-08_05-21-47/checkpoint_000004)... 

Result for TorchTrainer_f59446cc:
  _time_this_iter_s: 18.104395627975464
  _timestamp: 1662614621
  _training_iteration: 5
  avg_loss: 12.266760921478271
  date: 2022-09-08_05-23-42
  done: false
  experiment_id: 2bf869b6ed5b45eabe99aac30a30982f
  hostname: anish-l5-kit
  iterations_since_restore: 5
  loss: 8.34640121459961
  node_ip: 10.150.0.3
  pid: 7512
  should_checkpoint: true
  time_since_restore: 109.83228707313538
  time_this_iter_s: 18.066444635391235
  time_total_s: 109.83228707313538
  timestamp: 1662614622
  timesteps_since_restore: 0
  training_iteration: 5
  trial_id: f59446cc
  warmup_time: 0.0056552886962890625
  


Done. 1.0s
[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-08_05-18-50/TorchTrainer_f59446cc_5_batch_size=24.0000,format_version=4,future_num_frames=12,history_num_frames=0,model_architecture=resnet50,_2022-09-08_05-21-47/checkpoint_000004)... Done. 0.3s


VBox(children=(Label(value='452.176 MB of 452.176 MB uploaded (90.296 MB deduped)\r'), FloatProgress(value=1.0…

[2m[36m(RayTrainWorker pid=8414)[0m {'loss': 9.02727222442627, 'avg_loss': 10.849252382914225}


0,1
avg_loss,▂█▄▂▁
iterations_since_restore,▁▃▅▆█
loss,▃█▁▂▁
time_since_restore,▁▃▅▆█
time_this_iter_s,█▂▂▁▁
time_total_s,▁▃▅▆█
timestamp,▁▃▅▆█
timesteps_since_restore,▁▁▁▁▁
training_iteration,▁▃▅▆█
warmup_time,▁▁▁▁▁

0,1
avg_loss,12.26676
iterations_since_restore,5.0
loss,8.3464
time_since_restore,109.83229
time_this_iter_s,18.06644
time_total_s,109.83229
timestamp,1662614622.0
timesteps_since_restore,0.0
training_iteration,5.0
warmup_time,0.00566


Result for TorchTrainer_f59446cc:
  _time_this_iter_s: 18.104395627975464
  _timestamp: 1662614621
  _training_iteration: 5
  avg_loss: 12.266760921478271
  date: 2022-09-08_05-23-42
  done: true
  experiment_id: 2bf869b6ed5b45eabe99aac30a30982f
  experiment_tag: 5_batch_size=24.0000,format_version=4,future_num_frames=12,history_num_frames=0,model_architecture=resnet50,render_ego_history=True,step_time=0.1000,dataset_meta_key=meta_json,disable_traffic_light_faces=False,ego_center=0_25_0_5,filter_agents_threshold=0.5000,map_type=py_semantic,pixel_size=0_5_0_5,raster_size=224_224,satellite_map_key=aerial_map_aerial_map_png,semantic_map_key=semantic_map_semantic_map_pb,set_origin_to_bottom=True,batch_size=12,key=scenes_sample_zarr,num_workers=16,perturb_probability=0.5000,shuffle=True,checkpoint_every_n_steps=10000,eval_every_n_steps=10000,max_num_steps=5,batch_size=12,key=scenes_sample_zarr,num_workers=16,shuffle=False,dataset_key=scenes_sample_zarr,lr=0.0064,max_num_steps=5,num_workers=

[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-08_05-18-50/TorchTrainer_22aeee5a_6_batch_size=24.0000,format_version=4,future_num_frames=12,history_num_frames=0,model_architecture=resnet50,_2022-09-08_05-22-37/checkpoint_000002)... 

Result for TorchTrainer_22aeee5a:
  _time_this_iter_s: 17.170154571533203
  _timestamp: 1662614630
  _training_iteration: 3
  avg_loss: 10.849252382914225
  date: 2022-09-08_05-23-50
  done: false
  experiment_id: 5f1a5465d86e423b8698462e8b5734a3
  hostname: anish-l5-kit
  iterations_since_restore: 3
  loss: 9.02727222442627
  node_ip: 10.150.0.3
  pid: 8344
  should_checkpoint: true
  time_since_restore: 69.22324228286743
  time_this_iter_s: 17.134081602096558
  time_total_s: 69.22324228286743
  timestamp: 1662614630
  timesteps_since_restore: 0
  training_iteration: 3
  trial_id: 22aeee5a
  warmup_time: 0.007919549942016602
  


Done. 1.0s
[2m[36m(RayTrainWorker pid=9281)[0m 2022-09-08 05:24:01,902	INFO config.py:72 -- Setting up process group for: env:// [rank=0, world_size=1]
[2m[36m(RayTrainWorker pid=9281)[0m   f"The parameter '{pretrained_param}' is deprecated since 0.13 and will be removed in 0.15, "
[2m[36m(RayTrainWorker pid=9281)[0m 2022-09-08 05:24:09,469	INFO train_loop_utils.py:300 -- Moving model to device: cpu


[2m[36m(RayTrainWorker pid=8414)[0m {'loss': 5.15891170501709, 'avg_loss': 9.426667213439941}


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-08_05-18-50/TorchTrainer_22aeee5a_6_batch_size=24.0000,format_version=4,future_num_frames=12,history_num_frames=0,model_architecture=resnet50,_2022-09-08_05-22-37/checkpoint_000003)... 

Result for TorchTrainer_22aeee5a:
  _time_this_iter_s: 20.946260690689087
  _timestamp: 1662614651
  _training_iteration: 4
  avg_loss: 9.426667213439941
  date: 2022-09-08_05-24-11
  done: false
  experiment_id: 5f1a5465d86e423b8698462e8b5734a3
  hostname: anish-l5-kit
  iterations_since_restore: 4
  loss: 5.15891170501709
  node_ip: 10.150.0.3
  pid: 8344
  should_checkpoint: true
  time_since_restore: 90.14728784561157
  time_this_iter_s: 20.92404556274414
  time_total_s: 90.14728784561157
  timestamp: 1662614651
  timesteps_since_restore: 0
  training_iteration: 4
  trial_id: 22aeee5a
  warmup_time: 0.007919549942016602
  


Done. 1.0s


[2m[36m(RayTrainWorker pid=9281)[0m {'loss': 13.597375869750977, 'avg_loss': 13.597375869750977}
Result for TorchTrainer_40435014:
  _time_this_iter_s: 11.235937356948853
  _timestamp: 1662614654
  _training_iteration: 1
  avg_loss: 13.597375869750977
  date: 2022-09-08_05-24-15
  done: false
  experiment_id: c4571028d0c847e3a52f731dd62ec6f1
  hostname: anish-l5-kit
  iterations_since_restore: 1
  loss: 13.597375869750977
  node_ip: 10.150.0.3
  pid: 9212
  should_checkpoint: true
  time_since_restore: 16.150686025619507
  time_this_iter_s: 16.150686025619507
  time_total_s: 16.150686025619507
  timestamp: 1662614655
  timesteps_since_restore: 0
  training_iteration: 1
  trial_id: '40435014'
  warmup_time: 0.010715723037719727
  


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-08_05-18-50/TorchTrainer_40435014_7_batch_size=6.0000,format_version=4,future_num_frames=12,history_num_frames=0,model_architecture=resnet50,r_2022-09-08_05-23-54/checkpoint_000000)... Done. 1.1s


[2m[36m(RayTrainWorker pid=9281)[0m {'loss': 12.981776237487793, 'avg_loss': 13.289576053619385}


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-08_05-18-50/TorchTrainer_40435014_7_batch_size=6.0000,format_version=4,future_num_frames=12,history_num_frames=0,model_architecture=resnet50,r_2022-09-08_05-23-54/checkpoint_000001)... Done. 1.0s


[2m[36m(RayTrainWorker pid=9281)[0m {'loss': 18.94203758239746, 'avg_loss': 15.17372989654541}
Result for TorchTrainer_40435014:
  _time_this_iter_s: 4.710414409637451
  _timestamp: 1662614664
  _training_iteration: 3
  avg_loss: 15.17372989654541
  date: 2022-09-08_05-24-24
  done: false
  experiment_id: c4571028d0c847e3a52f731dd62ec6f1
  hostname: anish-l5-kit
  iterations_since_restore: 3
  loss: 18.94203758239746
  node_ip: 10.150.0.3
  pid: 9212
  should_checkpoint: true
  time_since_restore: 25.491501092910767
  time_this_iter_s: 4.612233877182007
  time_total_s: 25.491501092910767
  timestamp: 1662614664
  timesteps_since_restore: 0
  training_iteration: 3
  trial_id: '40435014'
  warmup_time: 0.010715723037719727
  


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-08_05-18-50/TorchTrainer_40435014_7_batch_size=6.0000,format_version=4,future_num_frames=12,history_num_frames=0,model_architecture=resnet50,r_2022-09-08_05-23-54/checkpoint_000002)... Done. 1.0s


[2m[36m(RayTrainWorker pid=8414)[0m {'loss': 3.5230886936187744, 'avg_loss': 8.245951509475708}
[2m[36m(RayTrainWorker pid=9281)[0m {'loss': 12.754528045654297, 'avg_loss': 14.568929433822632}


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-08_05-18-50/TorchTrainer_22aeee5a_6_batch_size=24.0000,format_version=4,future_num_frames=12,history_num_frames=0,model_architecture=resnet50,_2022-09-08_05-22-37/checkpoint_000004)... [34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-08_05-18-50/TorchTrainer_40435014_7_batch_size=6.0000,format_version=4,future_num_frames=12,history_num_frames=0,model_architecture=resnet50,r_2022-09-08_05-23-54/checkpoint_000003)... 

Result for TorchTrainer_22aeee5a:
  _time_this_iter_s: 17.242355585098267
  _timestamp: 1662614668
  _training_iteration: 5
  avg_loss: 8.245951509475708
  date: 2022-09-08_05-24-29
  done: false
  experiment_id: 5f1a5465d86e423b8698462e8b5734a3
  hostname: anish-l5-kit
  iterations_since_restore: 5
  loss: 3.5230886936187744
  node_ip: 10.150.0.3
  pid: 8344
  should_checkpoint: true
  time_since_restore: 107.46415305137634
  time_this_iter_s: 17.31686520576477
  time_total_s: 107.46415305137634
  timestamp: 1662614669
  timesteps_since_restore: 0
  training_iteration: 5
  trial_id: 22aeee5a
  warmup_time: 0.007919549942016602
  


Done. 1.6s
[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-08_05-18-50/TorchTrainer_22aeee5a_6_batch_size=24.0000,format_version=4,future_num_frames=12,history_num_frames=0,model_architecture=resnet50,_2022-09-08_05-22-37/checkpoint_000004)... Done. 1.6s
Done. 0.3s


[2m[36m(RayTrainWorker pid=9281)[0m {'loss': 9.848295211791992, 'avg_loss': 13.624802589416504}


VBox(children=(Label(value='452.176 MB of 452.176 MB uploaded (90.296 MB deduped)\r'), FloatProgress(value=1.0…

0,1
avg_loss,█▅▄▂▁
iterations_since_restore,▁▃▅▆█
loss,█▅▅▂▁
time_since_restore,▁▃▄▆█
time_this_iter_s,█▂▁▃▁
time_total_s,▁▃▄▆█
timestamp,▁▃▄▆█
timesteps_since_restore,▁▁▁▁▁
training_iteration,▁▃▅▆█
warmup_time,▁▁▁▁▁

0,1
avg_loss,8.24595
iterations_since_restore,5.0
loss,3.52309
time_since_restore,107.46415
time_this_iter_s,17.31687
time_total_s,107.46415
timestamp,1662614669.0
timesteps_since_restore,0.0
training_iteration,5.0
warmup_time,0.00792


Result for TorchTrainer_22aeee5a:
  _time_this_iter_s: 17.242355585098267
  _timestamp: 1662614668
  _training_iteration: 5
  avg_loss: 8.245951509475708
  date: 2022-09-08_05-24-29
  done: true
  experiment_id: 5f1a5465d86e423b8698462e8b5734a3
  experiment_tag: 6_batch_size=24.0000,format_version=4,future_num_frames=12,history_num_frames=0,model_architecture=resnet50,render_ego_history=True,step_time=0.1000,dataset_meta_key=meta_json,disable_traffic_light_faces=False,ego_center=0_25_0_5,filter_agents_threshold=0.5000,map_type=py_satellite,pixel_size=0_5_0_5,raster_size=224_224,satellite_map_key=aerial_map_aerial_map_png,semantic_map_key=semantic_map_semantic_map_pb,set_origin_to_bottom=True,batch_size=12,key=scenes_sample_zarr,num_workers=16,perturb_probability=0.5000,shuffle=True,checkpoint_every_n_steps=10000,eval_every_n_steps=10000,max_num_steps=5,batch_size=12,key=scenes_sample_zarr,num_workers=16,shuffle=False,dataset_key=scenes_sample_zarr,lr=0.0015,max_num_steps=5,num_workers=

[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-08_05-18-50/TorchTrainer_40435014_7_batch_size=6.0000,format_version=4,future_num_frames=12,history_num_frames=0,model_architecture=resnet50,r_2022-09-08_05-23-54/checkpoint_000004)... 

Result for TorchTrainer_40435014:
  _time_this_iter_s: 4.803667306900024
  _timestamp: 1662614673
  _training_iteration: 5
  avg_loss: 13.624802589416504
  date: 2022-09-08_05-24-34
  done: false
  experiment_id: c4571028d0c847e3a52f731dd62ec6f1
  hostname: anish-l5-kit
  iterations_since_restore: 5
  loss: 9.848295211791992
  node_ip: 10.150.0.3
  pid: 9212
  should_checkpoint: true
  time_since_restore: 34.905959367752075
  time_this_iter_s: 4.655702590942383
  time_total_s: 34.905959367752075
  timestamp: 1662614674
  timesteps_since_restore: 0
  training_iteration: 5
  trial_id: '40435014'
  warmup_time: 0.010715723037719727
  


Done. 1.0s
[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-08_05-18-50/TorchTrainer_40435014_7_batch_size=6.0000,format_version=4,future_num_frames=12,history_num_frames=0,model_architecture=resnet50,r_2022-09-08_05-23-54/checkpoint_000004)... Done. 0.3s


VBox(children=(Label(value='452.176 MB of 452.176 MB uploaded (90.296 MB deduped)\r'), FloatProgress(value=1.0…

0,1
avg_loss,▂▁█▆▂
iterations_since_restore,▁▃▅▆█
loss,▄▃█▃▁
time_since_restore,▁▃▄▆█
time_this_iter_s,█▁▁▁▁
time_total_s,▁▃▄▆█
timestamp,▁▂▄▆█
timesteps_since_restore,▁▁▁▁▁
training_iteration,▁▃▅▆█
warmup_time,▁▁▁▁▁

0,1
avg_loss,13.6248
iterations_since_restore,5.0
loss,9.8483
time_since_restore,34.90596
time_this_iter_s,4.6557
time_total_s,34.90596
timestamp,1662614674.0
timesteps_since_restore,0.0
training_iteration,5.0
warmup_time,0.01072


Result for TorchTrainer_40435014:
  _time_this_iter_s: 4.803667306900024
  _timestamp: 1662614673
  _training_iteration: 5
  avg_loss: 13.624802589416504
  date: 2022-09-08_05-24-34
  done: true
  experiment_id: c4571028d0c847e3a52f731dd62ec6f1
  experiment_tag: 7_batch_size=6.0000,format_version=4,future_num_frames=12,history_num_frames=0,model_architecture=resnet50,render_ego_history=True,step_time=0.1000,dataset_meta_key=meta_json,disable_traffic_light_faces=False,ego_center=0_25_0_5,filter_agents_threshold=0.5000,map_type=py_satellite,pixel_size=0_5_0_5,raster_size=224_224,satellite_map_key=aerial_map_aerial_map_png,semantic_map_key=semantic_map_semantic_map_pb,set_origin_to_bottom=True,batch_size=12,key=scenes_sample_zarr,num_workers=16,perturb_probability=0.5000,shuffle=True,checkpoint_every_n_steps=10000,eval_every_n_steps=10000,max_num_steps=5,batch_size=12,key=scenes_sample_zarr,num_workers=16,shuffle=False,dataset_key=scenes_sample_zarr,lr=0.0001,max_num_steps=5,num_workers=4

[2m[36m(RayTrainWorker pid=10310)[0m 2022-09-08 05:24:54,649	INFO config.py:72 -- Setting up process group for: env:// [rank=0, world_size=1]
[2m[36m(RayTrainWorker pid=10319)[0m 2022-09-08 05:24:55,872	INFO config.py:72 -- Setting up process group for: env:// [rank=0, world_size=1]
[2m[36m(RayTrainWorker pid=10319)[0m   f"The parameter '{pretrained_param}' is deprecated since 0.13 and will be removed in 0.15, "
[2m[36m(RayTrainWorker pid=10310)[0m   f"The parameter '{pretrained_param}' is deprecated since 0.13 and will be removed in 0.15, "
[2m[36m(RayTrainWorker pid=10319)[0m 2022-09-08 05:25:03,759	INFO train_loop_utils.py:300 -- Moving model to device: cpu
[2m[36m(RayTrainWorker pid=10310)[0m 2022-09-08 05:25:04,511	INFO train_loop_utils.py:300 -- Moving model to device: cpu


[2m[36m(RayTrainWorker pid=10319)[0m {'loss': 15.508536338806152, 'avg_loss': 15.508536338806152}


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-08_05-18-50/TorchTrainer_8d6a8772_9_batch_size=18.0000,format_version=4,future_num_frames=12,history_num_frames=0,model_architecture=resnet50,_2022-09-08_05-24-47/checkpoint_000000)... 

Result for TorchTrainer_8d6a8772:
  _time_this_iter_s: 24.101453065872192
  _timestamp: 1662614720
  _training_iteration: 1
  avg_loss: 15.508536338806152
  date: 2022-09-08_05-25-21
  done: false
  experiment_id: 694f591e6cce4ab29796235a5d7baac3
  hostname: anish-l5-kit
  iterations_since_restore: 1
  loss: 15.508536338806152
  node_ip: 10.150.0.3
  pid: 10189
  should_checkpoint: true
  time_since_restore: 29.076510429382324
  time_this_iter_s: 29.076510429382324
  time_total_s: 29.076510429382324
  timestamp: 1662614721
  timesteps_since_restore: 0
  training_iteration: 1
  trial_id: 8d6a8772
  warmup_time: 0.0318598747253418
  


Done. 1.1s


[2m[36m(RayTrainWorker pid=10310)[0m {'loss': 11.566911697387695, 'avg_loss': 11.566911697387695}


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-08_05-18-50/TorchTrainer_6e8ceeee_8_batch_size=24.0000,format_version=4,future_num_frames=12,history_num_frames=0,model_architecture=resnet50,_2022-09-08_05-24-46/checkpoint_000000)... 

Result for TorchTrainer_6e8ceeee:
  _time_this_iter_s: 32.00817155838013
  _timestamp: 1662614727
  _training_iteration: 1
  avg_loss: 11.566911697387695
  date: 2022-09-08_05-25-27
  done: false
  experiment_id: 4a299b52845641a5911d4bbb6023f3d0
  hostname: anish-l5-kit
  iterations_since_restore: 1
  loss: 11.566911697387695
  node_ip: 10.150.0.3
  pid: 10178
  should_checkpoint: true
  time_since_restore: 37.12333130836487
  time_this_iter_s: 37.12333130836487
  time_total_s: 37.12333130836487
  timestamp: 1662614727
  timesteps_since_restore: 0
  training_iteration: 1
  trial_id: 6e8ceeee
  warmup_time: 0.008984804153442383
  


Done. 1.0s


[2m[36m(RayTrainWorker pid=10319)[0m {'loss': 15.628698348999023, 'avg_loss': 15.568617343902588}


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-08_05-18-50/TorchTrainer_8d6a8772_9_batch_size=18.0000,format_version=4,future_num_frames=12,history_num_frames=0,model_architecture=resnet50,_2022-09-08_05-24-47/checkpoint_000001)... 

Result for TorchTrainer_8d6a8772:
  _time_this_iter_s: 13.710113525390625
  _timestamp: 1662614734
  _training_iteration: 2
  avg_loss: 15.568617343902588
  date: 2022-09-08_05-25-34
  done: false
  experiment_id: 694f591e6cce4ab29796235a5d7baac3
  hostname: anish-l5-kit
  iterations_since_restore: 2
  loss: 15.628698348999023
  node_ip: 10.150.0.3
  pid: 10189
  should_checkpoint: true
  time_since_restore: 42.74840307235718
  time_this_iter_s: 13.671892642974854
  time_total_s: 42.74840307235718
  timestamp: 1662614734
  timesteps_since_restore: 0
  training_iteration: 2
  trial_id: 8d6a8772
  warmup_time: 0.0318598747253418
  


Done. 1.0s


[2m[36m(RayTrainWorker pid=10310)[0m {'loss': 10.387297630310059, 'avg_loss': 10.977104663848877}


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-08_05-18-50/TorchTrainer_6e8ceeee_8_batch_size=24.0000,format_version=4,future_num_frames=12,history_num_frames=0,model_architecture=resnet50,_2022-09-08_05-24-46/checkpoint_000001)... 

Result for TorchTrainer_6e8ceeee:
  _time_this_iter_s: 17.536593914031982
  _timestamp: 1662614744
  _training_iteration: 2
  avg_loss: 10.977104663848877
  date: 2022-09-08_05-25-45
  done: false
  experiment_id: 4a299b52845641a5911d4bbb6023f3d0
  hostname: anish-l5-kit
  iterations_since_restore: 2
  loss: 10.387297630310059
  node_ip: 10.150.0.3
  pid: 10178
  should_checkpoint: true
  time_since_restore: 54.61062502861023
  time_this_iter_s: 17.48729372024536
  time_total_s: 54.61062502861023
  timestamp: 1662614745
  timesteps_since_restore: 0
  training_iteration: 2
  trial_id: 6e8ceeee
  warmup_time: 0.008984804153442383
  


Done. 1.0s


[2m[36m(RayTrainWorker pid=10319)[0m {'loss': 9.412477493286133, 'avg_loss': 13.516570727030436}


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-08_05-18-50/TorchTrainer_8d6a8772_9_batch_size=18.0000,format_version=4,future_num_frames=12,history_num_frames=0,model_architecture=resnet50,_2022-09-08_05-24-47/checkpoint_000002)... 

Result for TorchTrainer_8d6a8772:
  _time_this_iter_s: 13.549051523208618
  _timestamp: 1662614747
  _training_iteration: 3
  avg_loss: 13.516570727030436
  date: 2022-09-08_05-25-48
  done: false
  experiment_id: 694f591e6cce4ab29796235a5d7baac3
  hostname: anish-l5-kit
  iterations_since_restore: 3
  loss: 9.412477493286133
  node_ip: 10.150.0.3
  pid: 10189
  should_checkpoint: true
  time_since_restore: 56.316662311553955
  time_this_iter_s: 13.568259239196777
  time_total_s: 56.316662311553955
  timestamp: 1662614748
  timesteps_since_restore: 0
  training_iteration: 3
  trial_id: 8d6a8772
  warmup_time: 0.0318598747253418
  


Done. 1.1s


[2m[36m(RayTrainWorker pid=10319)[0m {'loss': 13.902549743652344, 'avg_loss': 13.613065481185913}


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-08_05-18-50/TorchTrainer_8d6a8772_9_batch_size=18.0000,format_version=4,future_num_frames=12,history_num_frames=0,model_architecture=resnet50,_2022-09-08_05-24-47/checkpoint_000003)... 

Result for TorchTrainer_8d6a8772:
  _time_this_iter_s: 13.357654809951782
  _timestamp: 1662614761
  _training_iteration: 4
  avg_loss: 13.613065481185913
  date: 2022-09-08_05-26-01
  done: false
  experiment_id: 694f591e6cce4ab29796235a5d7baac3
  hostname: anish-l5-kit
  iterations_since_restore: 4
  loss: 13.902549743652344
  node_ip: 10.150.0.3
  pid: 10189
  should_checkpoint: true
  time_since_restore: 69.77879166603088
  time_this_iter_s: 13.462129354476929
  time_total_s: 69.77879166603088
  timestamp: 1662614761
  timesteps_since_restore: 0
  training_iteration: 4
  trial_id: 8d6a8772
  warmup_time: 0.0318598747253418
  
[2m[36m(RayTrainWorker pid=10310)[0m {'loss': 12.690081596374512, 'avg_loss': 11.548096974690756}


Done. 1.1s


Result for TorchTrainer_6e8ceeee:
  _time_this_iter_s: 18.531675338745117
  _timestamp: 1662614763
  _training_iteration: 3
  avg_loss: 11.548096974690756
  date: 2022-09-08_05-26-04
  done: false
  experiment_id: 4a299b52845641a5911d4bbb6023f3d0
  hostname: anish-l5-kit
  iterations_since_restore: 3
  loss: 12.690081596374512
  node_ip: 10.150.0.3
  pid: 10178
  should_checkpoint: true
  time_since_restore: 73.42307806015015
  time_this_iter_s: 18.812453031539917
  time_total_s: 73.42307806015015
  timestamp: 1662614764
  timesteps_since_restore: 0
  training_iteration: 3
  trial_id: 6e8ceeee
  warmup_time: 0.008984804153442383
  


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-08_05-18-50/TorchTrainer_6e8ceeee_8_batch_size=24.0000,format_version=4,future_num_frames=12,history_num_frames=0,model_architecture=resnet50,_2022-09-08_05-24-46/checkpoint_000002)... Done. 1.0s


[2m[36m(RayTrainWorker pid=10319)[0m {'loss': 4.752763271331787, 'avg_loss': 11.841005039215087}


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-08_05-18-50/TorchTrainer_8d6a8772_9_batch_size=18.0000,format_version=4,future_num_frames=12,history_num_frames=0,model_architecture=resnet50,_2022-09-08_05-24-47/checkpoint_000004)... 

Result for TorchTrainer_8d6a8772:
  _time_this_iter_s: 14.811361074447632
  _timestamp: 1662614776
  _training_iteration: 5
  avg_loss: 11.841005039215087
  date: 2022-09-08_05-26-16
  done: false
  experiment_id: 694f591e6cce4ab29796235a5d7baac3
  hostname: anish-l5-kit
  iterations_since_restore: 5
  loss: 4.752763271331787
  node_ip: 10.150.0.3
  pid: 10189
  should_checkpoint: true
  time_since_restore: 84.44539165496826
  time_this_iter_s: 14.666599988937378
  time_total_s: 84.44539165496826
  timestamp: 1662614776
  timesteps_since_restore: 0
  training_iteration: 5
  trial_id: 8d6a8772
  warmup_time: 0.0318598747253418
  


Done. 1.3s
[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-08_05-18-50/TorchTrainer_8d6a8772_9_batch_size=18.0000,format_version=4,future_num_frames=12,history_num_frames=0,model_architecture=resnet50,_2022-09-08_05-24-47/checkpoint_000004)... Done. 0.3s


[2m[36m(RayTrainWorker pid=10310)[0m {'loss': 13.24268913269043, 'avg_loss': 11.971745014190674}


VBox(children=(Label(value='452.176 MB of 452.176 MB uploaded (90.296 MB deduped)\r'), FloatProgress(value=1.0…

Result for TorchTrainer_8d6a8772:
  _time_this_iter_s: 14.811361074447632
  _timestamp: 1662614776
  _training_iteration: 5
  avg_loss: 11.841005039215087
  date: 2022-09-08_05-26-16
  done: true
  experiment_id: 694f591e6cce4ab29796235a5d7baac3
  experiment_tag: 9_batch_size=18.0000,format_version=4,future_num_frames=12,history_num_frames=0,model_architecture=resnet50,render_ego_history=True,step_time=0.1000,dataset_meta_key=meta_json,disable_traffic_light_faces=False,ego_center=0_25_0_5,filter_agents_threshold=0.5000,map_type=py_satellite,pixel_size=0_5_0_5,raster_size=224_224,satellite_map_key=aerial_map_aerial_map_png,semantic_map_key=semantic_map_semantic_map_pb,set_origin_to_bottom=True,batch_size=12,key=scenes_sample_zarr,num_workers=16,perturb_probability=0.5000,shuffle=True,checkpoint_every_n_steps=10000,eval_every_n_steps=10000,max_num_steps=5,batch_size=12,key=scenes_sample_zarr,num_workers=16,shuffle=False,dataset_key=scenes_sample_zarr,lr=0.0004,max_num_steps=5,num_workers

[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-08_05-18-50/TorchTrainer_6e8ceeee_8_batch_size=24.0000,format_version=4,future_num_frames=12,history_num_frames=0,model_architecture=resnet50,_2022-09-08_05-24-46/checkpoint_000003)... 

Result for TorchTrainer_6e8ceeee:
  _time_this_iter_s: 17.297489881515503
  _timestamp: 1662614780
  _training_iteration: 4
  avg_loss: 11.971745014190674
  date: 2022-09-08_05-26-21
  done: false
  experiment_id: 4a299b52845641a5911d4bbb6023f3d0
  hostname: anish-l5-kit
  iterations_since_restore: 4
  loss: 13.24268913269043
  node_ip: 10.150.0.3
  pid: 10178
  should_checkpoint: true
  time_since_restore: 90.36831092834473
  time_this_iter_s: 16.94523286819458
  time_total_s: 90.36831092834473
  timestamp: 1662614781
  timesteps_since_restore: 0
  training_iteration: 4
  trial_id: 6e8ceeee
  warmup_time: 0.008984804153442383
  


Done. 1.0s


0,1
avg_loss,██▄▄▁
iterations_since_restore,▁▃▅▆█
loss,██▄▇▁
time_since_restore,▁▃▄▆█
time_this_iter_s,█▁▁▁▂
time_total_s,▁▃▄▆█
timestamp,▁▃▄▆█
timesteps_since_restore,▁▁▁▁▁
training_iteration,▁▃▅▆█
warmup_time,▁▁▁▁▁

0,1
avg_loss,11.84101
iterations_since_restore,5.0
loss,4.75276
time_since_restore,84.44539
time_this_iter_s,14.6666
time_total_s,84.44539
timestamp,1662614776.0
timesteps_since_restore,0.0
training_iteration,5.0
warmup_time,0.03186


[2m[36m(RayTrainWorker pid=11894)[0m 2022-09-08 05:26:35,314	INFO config.py:72 -- Setting up process group for: env:// [rank=0, world_size=1]


[2m[36m(RayTrainWorker pid=10310)[0m {'loss': 10.724266052246094, 'avg_loss': 11.722249221801757}


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-08_05-18-50/TorchTrainer_6e8ceeee_8_batch_size=24.0000,format_version=4,future_num_frames=12,history_num_frames=0,model_architecture=resnet50,_2022-09-08_05-24-46/checkpoint_000004)... 

Result for TorchTrainer_6e8ceeee:
  _time_this_iter_s: 19.32441210746765
  _timestamp: 1662614800
  _training_iteration: 5
  avg_loss: 11.722249221801757
  date: 2022-09-08_05-26-40
  done: false
  experiment_id: 4a299b52845641a5911d4bbb6023f3d0
  hostname: anish-l5-kit
  iterations_since_restore: 5
  loss: 10.724266052246094
  node_ip: 10.150.0.3
  pid: 10178
  should_checkpoint: true
  time_since_restore: 109.7222318649292
  time_this_iter_s: 19.353920936584473
  time_total_s: 109.7222318649292
  timestamp: 1662614800
  timesteps_since_restore: 0
  training_iteration: 5
  trial_id: 6e8ceeee
  warmup_time: 0.008984804153442383
  


[2m[36m(RayTrainWorker pid=11894)[0m   f"The parameter '{pretrained_param}' is deprecated since 0.13 and will be removed in 0.15, "
[2m[36m(RayTrainWorker pid=11894)[0m 2022-09-08 05:26:41,048	INFO train_loop_utils.py:300 -- Moving model to device: cpu
Done. 1.0s
[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-08_05-18-50/TorchTrainer_6e8ceeee_8_batch_size=24.0000,format_version=4,future_num_frames=12,history_num_frames=0,model_architecture=resnet50,_2022-09-08_05-24-46/checkpoint_000004)... Done. 0.3s


VBox(children=(Label(value='452.176 MB of 452.176 MB uploaded (90.296 MB deduped)\r'), FloatProgress(value=1.0…

Result for TorchTrainer_6e8ceeee:
  _time_this_iter_s: 19.32441210746765
  _timestamp: 1662614800
  _training_iteration: 5
  avg_loss: 11.722249221801757
  date: 2022-09-08_05-26-40
  done: true
  experiment_id: 4a299b52845641a5911d4bbb6023f3d0
  experiment_tag: 8_batch_size=24.0000,format_version=4,future_num_frames=12,history_num_frames=0,model_architecture=resnet50,render_ego_history=True,step_time=0.1000,dataset_meta_key=meta_json,disable_traffic_light_faces=False,ego_center=0_25_0_5,filter_agents_threshold=0.5000,map_type=py_semantic,pixel_size=0_5_0_5,raster_size=224_224,satellite_map_key=aerial_map_aerial_map_png,semantic_map_key=semantic_map_semantic_map_pb,set_origin_to_bottom=True,batch_size=12,key=scenes_sample_zarr,num_workers=16,perturb_probability=0.5000,shuffle=True,checkpoint_every_n_steps=10000,eval_every_n_steps=10000,max_num_steps=5,batch_size=12,key=scenes_sample_zarr,num_workers=16,shuffle=False,dataset_key=scenes_sample_zarr,lr=0.0002,max_num_steps=5,num_workers=4

0,1
avg_loss,▅▁▅█▆
iterations_since_restore,▁▃▅▆█
loss,▄▁▇█▂
time_since_restore,▁▃▅▆█
time_this_iter_s,█▁▂▁▂
time_total_s,▁▃▅▆█
timestamp,▁▃▅▆█
timesteps_since_restore,▁▁▁▁▁
training_iteration,▁▃▅▆█
warmup_time,▁▁▁▁▁

0,1
avg_loss,11.72225
iterations_since_restore,5.0
loss,10.72427
time_since_restore,109.72223
time_this_iter_s,19.35392
time_total_s,109.72223
timestamp,1662614800.0
timesteps_since_restore,0.0
training_iteration,5.0
warmup_time,0.00898


[2m[36m(RayTrainWorker pid=11894)[0m {'loss': 12.353342056274414, 'avg_loss': 12.353342056274414}


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-08_05-18-50/TorchTrainer_8df51bd0_10_batch_size=18.0000,format_version=4,future_num_frames=12,history_num_frames=0,model_architecture=resnet50_2022-09-08_05-26-28/checkpoint_000000)... 

Result for TorchTrainer_8df51bd0:
  _time_this_iter_s: 19.12893271446228
  _timestamp: 1662614815
  _training_iteration: 1
  avg_loss: 12.353342056274414
  date: 2022-09-08_05-26-55
  done: false
  experiment_id: 4bd38202dc264bbea5bc1345cb48b41b
  hostname: anish-l5-kit
  iterations_since_restore: 1
  loss: 12.353342056274414
  node_ip: 10.150.0.3
  pid: 11812
  should_checkpoint: true
  time_since_restore: 23.029694080352783
  time_this_iter_s: 23.029694080352783
  time_total_s: 23.029694080352783
  timestamp: 1662614815
  timesteps_since_restore: 0
  training_iteration: 1
  trial_id: 8df51bd0
  warmup_time: 0.006277322769165039
  


Done. 1.0s
[2m[36m(RayTrainWorker pid=12278)[0m 2022-09-08 05:27:00,908	INFO config.py:72 -- Setting up process group for: env:// [rank=0, world_size=1]
[2m[36m(RayTrainWorker pid=12278)[0m   f"The parameter '{pretrained_param}' is deprecated since 0.13 and will be removed in 0.15, "
[2m[36m(RayTrainWorker pid=12278)[0m 2022-09-08 05:27:08,257	INFO train_loop_utils.py:300 -- Moving model to device: cpu


[2m[36m(RayTrainWorker pid=11894)[0m {'loss': 14.074190139770508, 'avg_loss': 13.213766098022461}


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-08_05-18-50/TorchTrainer_8df51bd0_10_batch_size=18.0000,format_version=4,future_num_frames=12,history_num_frames=0,model_architecture=resnet50_2022-09-08_05-26-28/checkpoint_000001)... 

Result for TorchTrainer_8df51bd0:
  _time_this_iter_s: 16.84495258331299
  _timestamp: 1662614831
  _training_iteration: 2
  avg_loss: 13.213766098022461
  date: 2022-09-08_05-27-12
  done: false
  experiment_id: 4bd38202dc264bbea5bc1345cb48b41b
  hostname: anish-l5-kit
  iterations_since_restore: 2
  loss: 14.074190139770508
  node_ip: 10.150.0.3
  pid: 11812
  should_checkpoint: true
  time_since_restore: 39.77811050415039
  time_this_iter_s: 16.748416423797607
  time_total_s: 39.77811050415039
  timestamp: 1662614832
  timesteps_since_restore: 0
  training_iteration: 2
  trial_id: 8df51bd0
  warmup_time: 0.006277322769165039
  


Done. 1.3s


[2m[36m(RayTrainWorker pid=12278)[0m {'loss': 6.252495288848877, 'avg_loss': 6.252495288848877}


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-08_05-18-50/TorchTrainer_ca0ed052_11_batch_size=12.0000,format_version=4,future_num_frames=12,history_num_frames=0,model_architecture=resnet50_2022-09-08_05-26-53/checkpoint_000000)... 

Result for TorchTrainer_ca0ed052:
  _time_this_iter_s: 16.63005757331848
  _timestamp: 1662614838
  _training_iteration: 1
  avg_loss: 6.252495288848877
  date: 2022-09-08_05-27-18
  done: false
  experiment_id: 8e8b7abf70084f2e821b485e7277fb30
  hostname: anish-l5-kit
  iterations_since_restore: 1
  loss: 6.252495288848877
  node_ip: 10.150.0.3
  pid: 12182
  should_checkpoint: true
  time_since_restore: 21.3973867893219
  time_this_iter_s: 21.3973867893219
  time_total_s: 21.3973867893219
  timestamp: 1662614838
  timesteps_since_restore: 0
  training_iteration: 1
  trial_id: ca0ed052
  warmup_time: 0.006010770797729492
  


Done. 1.0s


[2m[36m(RayTrainWorker pid=11894)[0m {'loss': 11.39339542388916, 'avg_loss': 12.606975873311361}


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-08_05-18-50/TorchTrainer_8df51bd0_10_batch_size=18.0000,format_version=4,future_num_frames=12,history_num_frames=0,model_architecture=resnet50_2022-09-08_05-26-28/checkpoint_000002)... 

Result for TorchTrainer_8df51bd0:
  _time_this_iter_s: 13.023996591567993
  _timestamp: 1662614844
  _training_iteration: 3
  avg_loss: 12.606975873311361
  date: 2022-09-08_05-27-25
  done: false
  experiment_id: 4bd38202dc264bbea5bc1345cb48b41b
  hostname: anish-l5-kit
  iterations_since_restore: 3
  loss: 11.39339542388916
  node_ip: 10.150.0.3
  pid: 11812
  should_checkpoint: true
  time_since_restore: 52.681349754333496
  time_this_iter_s: 12.903239250183105
  time_total_s: 52.681349754333496
  timestamp: 1662614845
  timesteps_since_restore: 0
  training_iteration: 3
  trial_id: 8df51bd0
  warmup_time: 0.006277322769165039
  


Done. 1.0s


[2m[36m(RayTrainWorker pid=12278)[0m {'loss': 20.589731216430664, 'avg_loss': 13.42111325263977}


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-08_05-18-50/TorchTrainer_ca0ed052_11_batch_size=12.0000,format_version=4,future_num_frames=12,history_num_frames=0,model_architecture=resnet50_2022-09-08_05-26-53/checkpoint_000001)... 

Result for TorchTrainer_ca0ed052:
  _time_this_iter_s: 8.909281492233276
  _timestamp: 1662614847
  _training_iteration: 2
  avg_loss: 13.42111325263977
  date: 2022-09-08_05-27-27
  done: false
  experiment_id: 8e8b7abf70084f2e821b485e7277fb30
  hostname: anish-l5-kit
  iterations_since_restore: 2
  loss: 20.589731216430664
  node_ip: 10.150.0.3
  pid: 12182
  should_checkpoint: true
  time_since_restore: 30.307243585586548
  time_this_iter_s: 8.909856796264648
  time_total_s: 30.307243585586548
  timestamp: 1662614847
  timesteps_since_restore: 0
  training_iteration: 2
  trial_id: ca0ed052
  warmup_time: 0.006010770797729492
  


Done. 1.0s


[2m[36m(RayTrainWorker pid=12278)[0m {'loss': 6.35532283782959, 'avg_loss': 11.065849781036377}


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-08_05-18-50/TorchTrainer_ca0ed052_11_batch_size=12.0000,format_version=4,future_num_frames=12,history_num_frames=0,model_architecture=resnet50_2022-09-08_05-26-53/checkpoint_000002)... 

Result for TorchTrainer_ca0ed052:
  _time_this_iter_s: 8.420324802398682
  _timestamp: 1662614855
  _training_iteration: 3
  avg_loss: 11.065849781036377
  date: 2022-09-08_05-27-35
  done: false
  experiment_id: 8e8b7abf70084f2e821b485e7277fb30
  hostname: anish-l5-kit
  iterations_since_restore: 3
  loss: 6.35532283782959
  node_ip: 10.150.0.3
  pid: 12182
  should_checkpoint: true
  time_since_restore: 38.62424898147583
  time_this_iter_s: 8.317005395889282
  time_total_s: 38.62424898147583
  timestamp: 1662614855
  timesteps_since_restore: 0
  training_iteration: 3
  trial_id: ca0ed052
  warmup_time: 0.006010770797729492
  


Done. 1.0s


[2m[36m(RayTrainWorker pid=11894)[0m {'loss': 12.973479270935059, 'avg_loss': 12.698601722717285}
Result for TorchTrainer_8df51bd0:
  _time_this_iter_s: 12.729971647262573
  _timestamp: 1662614857
  _training_iteration: 4
  avg_loss: 12.698601722717285
  date: 2022-09-08_05-27-38
  done: false
  experiment_id: 4bd38202dc264bbea5bc1345cb48b41b
  hostname: anish-l5-kit
  iterations_since_restore: 4
  loss: 12.973479270935059
  node_ip: 10.150.0.3
  pid: 11812
  should_checkpoint: true
  time_since_restore: 65.46689867973328
  time_this_iter_s: 12.78554892539978
  time_total_s: 65.46689867973328
  timestamp: 1662614858
  timesteps_since_restore: 0
  training_iteration: 4
  trial_id: 8df51bd0
  warmup_time: 0.006277322769165039
  


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-08_05-18-50/TorchTrainer_8df51bd0_10_batch_size=18.0000,format_version=4,future_num_frames=12,history_num_frames=0,model_architecture=resnet50_2022-09-08_05-26-28/checkpoint_000003)... Done. 1.0s


[2m[36m(RayTrainWorker pid=12278)[0m {'loss': 7.460397720336914, 'avg_loss': 10.164486765861511}


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-08_05-18-50/TorchTrainer_ca0ed052_11_batch_size=12.0000,format_version=4,future_num_frames=12,history_num_frames=0,model_architecture=resnet50_2022-09-08_05-26-53/checkpoint_000003)... 

Result for TorchTrainer_ca0ed052:
  _time_this_iter_s: 8.75465440750122
  _timestamp: 1662614864
  _training_iteration: 4
  avg_loss: 10.164486765861511
  date: 2022-09-08_05-27-44
  done: false
  experiment_id: 8e8b7abf70084f2e821b485e7277fb30
  hostname: anish-l5-kit
  iterations_since_restore: 4
  loss: 7.460397720336914
  node_ip: 10.150.0.3
  pid: 12182
  should_checkpoint: true
  time_since_restore: 47.450605630874634
  time_this_iter_s: 8.826356649398804
  time_total_s: 47.450605630874634
  timestamp: 1662614864
  timesteps_since_restore: 0
  training_iteration: 4
  trial_id: ca0ed052
  warmup_time: 0.006010770797729492
  


Done. 1.6s


[2m[36m(RayTrainWorker pid=11894)[0m {'loss': 10.573002815246582, 'avg_loss': 12.273481941223144}


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-08_05-18-50/TorchTrainer_8df51bd0_10_batch_size=18.0000,format_version=4,future_num_frames=12,history_num_frames=0,model_architecture=resnet50_2022-09-08_05-26-28/checkpoint_000004)... 

Result for TorchTrainer_8df51bd0:
  _time_this_iter_s: 12.419198274612427
  _timestamp: 1662614870
  _training_iteration: 5
  avg_loss: 12.273481941223144
  date: 2022-09-08_05-27-50
  done: false
  experiment_id: 4bd38202dc264bbea5bc1345cb48b41b
  hostname: anish-l5-kit
  iterations_since_restore: 5
  loss: 10.573002815246582
  node_ip: 10.150.0.3
  pid: 11812
  should_checkpoint: true
  time_since_restore: 77.96748733520508
  time_this_iter_s: 12.500588655471802
  time_total_s: 77.96748733520508
  timestamp: 1662614870
  timesteps_since_restore: 0
  training_iteration: 5
  trial_id: 8df51bd0
  warmup_time: 0.006277322769165039
  
[2m[36m(RayTrainWorker pid=12278)[0m {'loss': 7.069854259490967, 'avg_loss': 9.545560264587403}


Done. 2.8s
[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-08_05-18-50/TorchTrainer_8df51bd0_10_batch_size=18.0000,format_version=4,future_num_frames=12,history_num_frames=0,model_architecture=resnet50_2022-09-08_05-26-28/checkpoint_000004)... Done. 0.2s


VBox(children=(Label(value='452.176 MB of 452.176 MB uploaded (90.296 MB deduped)\r'), FloatProgress(value=1.0…

Result for TorchTrainer_8df51bd0:
  _time_this_iter_s: 12.419198274612427
  _timestamp: 1662614870
  _training_iteration: 5
  avg_loss: 12.273481941223144
  date: 2022-09-08_05-27-50
  done: true
  experiment_id: 4bd38202dc264bbea5bc1345cb48b41b
  experiment_tag: 10_batch_size=18.0000,format_version=4,future_num_frames=12,history_num_frames=0,model_architecture=resnet50,render_ego_history=True,step_time=0.1000,dataset_meta_key=meta_json,disable_traffic_light_faces=False,ego_center=0_25_0_5,filter_agents_threshold=0.5000,map_type=py_satellite,pixel_size=0_5_0_5,raster_size=224_224,satellite_map_key=aerial_map_aerial_map_png,semantic_map_key=semantic_map_semantic_map_pb,set_origin_to_bottom=True,batch_size=12,key=scenes_sample_zarr,num_workers=16,perturb_probability=0.5000,shuffle=True,checkpoint_every_n_steps=10000,eval_every_n_steps=10000,max_num_steps=5,batch_size=12,key=scenes_sample_zarr,num_workers=16,shuffle=False,dataset_key=scenes_sample_zarr,lr=0.0001,max_num_steps=5,num_worker

[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-08_05-18-50/TorchTrainer_ca0ed052_11_batch_size=12.0000,format_version=4,future_num_frames=12,history_num_frames=0,model_architecture=resnet50_2022-09-08_05-26-53/checkpoint_000004)... 

Result for TorchTrainer_ca0ed052:
  _time_this_iter_s: 8.316184282302856
  _timestamp: 1662614872
  _training_iteration: 5
  avg_loss: 9.545560264587403
  date: 2022-09-08_05-27-52
  done: false
  experiment_id: 8e8b7abf70084f2e821b485e7277fb30
  hostname: anish-l5-kit
  iterations_since_restore: 5
  loss: 7.069854259490967
  node_ip: 10.150.0.3
  pid: 12182
  should_checkpoint: true
  time_since_restore: 55.68057680130005
  time_this_iter_s: 8.229971170425415
  time_total_s: 55.68057680130005
  timestamp: 1662614872
  timesteps_since_restore: 0
  training_iteration: 5
  trial_id: ca0ed052
  warmup_time: 0.006010770797729492
  


0,1
avg_loss,▂█▃▄▁
iterations_since_restore,▁▃▅▆█
loss,▅█▃▆▁
time_since_restore,▁▃▅▆█
time_this_iter_s,█▄▁▁▁
time_total_s,▁▃▅▆█
timestamp,▁▃▅▆█
timesteps_since_restore,▁▁▁▁▁
training_iteration,▁▃▅▆█
warmup_time,▁▁▁▁▁

0,1
avg_loss,12.27348
iterations_since_restore,5.0
loss,10.573
time_since_restore,77.96749
time_this_iter_s,12.50059
time_total_s,77.96749
timestamp,1662614870.0
timesteps_since_restore,0.0
training_iteration,5.0
warmup_time,0.00628


Done. 1.2s
[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-08_05-18-50/TorchTrainer_ca0ed052_11_batch_size=12.0000,format_version=4,future_num_frames=12,history_num_frames=0,model_architecture=resnet50_2022-09-08_05-26-53/checkpoint_000004)... Done. 0.3s


VBox(children=(Label(value='452.176 MB of 452.176 MB uploaded (90.296 MB deduped)\r'), FloatProgress(value=1.0…

Result for TorchTrainer_ca0ed052:
  _time_this_iter_s: 8.316184282302856
  _timestamp: 1662614872
  _training_iteration: 5
  avg_loss: 9.545560264587403
  date: 2022-09-08_05-27-52
  done: true
  experiment_id: 8e8b7abf70084f2e821b485e7277fb30
  experiment_tag: 11_batch_size=12.0000,format_version=4,future_num_frames=12,history_num_frames=0,model_architecture=resnet50,render_ego_history=True,step_time=0.1000,dataset_meta_key=meta_json,disable_traffic_light_faces=False,ego_center=0_25_0_5,filter_agents_threshold=0.5000,map_type=py_satellite,pixel_size=0_5_0_5,raster_size=224_224,satellite_map_key=aerial_map_aerial_map_png,semantic_map_key=semantic_map_semantic_map_pb,set_origin_to_bottom=True,batch_size=12,key=scenes_sample_zarr,num_workers=16,perturb_probability=0.5000,shuffle=True,checkpoint_every_n_steps=10000,eval_every_n_steps=10000,max_num_steps=5,batch_size=12,key=scenes_sample_zarr,num_workers=16,shuffle=False,dataset_key=scenes_sample_zarr,lr=0.0080,max_num_steps=5,num_workers=

0,1
avg_loss,▁█▆▅▄
iterations_since_restore,▁▃▅▆█
loss,▁█▁▂▁
time_since_restore,▁▃▅▆█
time_this_iter_s,█▁▁▁▁
time_total_s,▁▃▅▆█
timestamp,▁▃▅▆█
timesteps_since_restore,▁▁▁▁▁
training_iteration,▁▃▅▆█
warmup_time,▁▁▁▁▁

0,1
avg_loss,9.54556
iterations_since_restore,5.0
loss,7.06985
time_since_restore,55.68058
time_this_iter_s,8.22997
time_total_s,55.68058
timestamp,1662614872.0
timesteps_since_restore,0.0
training_iteration,5.0
warmup_time,0.00601


[2m[36m(RayTrainWorker pid=13743)[0m 2022-09-08 05:28:20,276	INFO config.py:72 -- Setting up process group for: env:// [rank=0, world_size=1]
[2m[36m(RayTrainWorker pid=13749)[0m 2022-09-08 05:28:21,047	INFO config.py:72 -- Setting up process group for: env:// [rank=0, world_size=1]
[2m[36m(RayTrainWorker pid=13743)[0m   f"The parameter '{pretrained_param}' is deprecated since 0.13 and will be removed in 0.15, "
[2m[36m(RayTrainWorker pid=13749)[0m   f"The parameter '{pretrained_param}' is deprecated since 0.13 and will be removed in 0.15, "
[2m[36m(RayTrainWorker pid=13743)[0m 2022-09-08 05:28:27,763	INFO train_loop_utils.py:300 -- Moving model to device: cpu
[2m[36m(RayTrainWorker pid=13749)[0m 2022-09-08 05:28:27,772	INFO train_loop_utils.py:300 -- Moving model to device: cpu


[2m[36m(RayTrainWorker pid=13749)[0m {'loss': 13.681168556213379, 'avg_loss': 13.681168556213379}


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-08_05-18-50/TorchTrainer_07b631de_13_batch_size=24.0000,format_version=4,future_num_frames=12,history_num_frames=0,model_architecture=resnet50_2022-09-08_05-28-12/checkpoint_000000)... 

Result for TorchTrainer_07b631de:
  _time_this_iter_s: 28.521783113479614
  _timestamp: 1662614930
  _training_iteration: 1
  avg_loss: 13.681168556213379
  date: 2022-09-08_05-28-50
  done: false
  experiment_id: 225174d65120469a8c09709d8f1aedd3
  hostname: anish-l5-kit
  iterations_since_restore: 1
  loss: 13.681168556213379
  node_ip: 10.150.0.3
  pid: 13649
  should_checkpoint: true
  time_since_restore: 33.710369348526
  time_this_iter_s: 33.710369348526
  time_total_s: 33.710369348526
  timestamp: 1662614930
  timesteps_since_restore: 0
  training_iteration: 1
  trial_id: 07b631de
  warmup_time: 0.007265567779541016
  


Done. 1.0s


[2m[36m(RayTrainWorker pid=13743)[0m {'loss': 19.652040481567383, 'avg_loss': 19.652040481567383}
Result for TorchTrainer_d8e3e4d2:
  _time_this_iter_s: 31.33941912651062
  _timestamp: 1662614932
  _training_iteration: 1
  avg_loss: 19.652040481567383
  date: 2022-09-08_05-28-52
  done: false
  experiment_id: 679bd30db8804779a93c4e1be5e96304
  hostname: anish-l5-kit
  iterations_since_restore: 1
  loss: 19.652040481567383
  node_ip: 10.150.0.3
  pid: 13605
  should_checkpoint: true
  time_since_restore: 36.525465965270996
  time_this_iter_s: 36.525465965270996
  time_total_s: 36.525465965270996
  timestamp: 1662614932
  timesteps_since_restore: 0
  training_iteration: 1
  trial_id: d8e3e4d2
  warmup_time: 0.007446765899658203
  


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-08_05-18-50/TorchTrainer_d8e3e4d2_12_batch_size=24.0000,format_version=4,future_num_frames=12,history_num_frames=0,model_architecture=resnet50_2022-09-08_05-28-11/checkpoint_000000)... Done. 1.0s


[2m[36m(RayTrainWorker pid=13749)[0m {'loss': 7.809698581695557, 'avg_loss': 10.745433568954468}


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-08_05-18-50/TorchTrainer_07b631de_13_batch_size=24.0000,format_version=4,future_num_frames=12,history_num_frames=0,model_architecture=resnet50_2022-09-08_05-28-12/checkpoint_000001)... 

Result for TorchTrainer_07b631de:
  _time_this_iter_s: 20.415300607681274
  _timestamp: 1662614950
  _training_iteration: 2
  avg_loss: 10.745433568954468
  date: 2022-09-08_05-29-11
  done: false
  experiment_id: 225174d65120469a8c09709d8f1aedd3
  hostname: anish-l5-kit
  iterations_since_restore: 2
  loss: 7.809698581695557
  node_ip: 10.150.0.3
  pid: 13649
  should_checkpoint: true
  time_since_restore: 54.10473704338074
  time_this_iter_s: 20.394367694854736
  time_total_s: 54.10473704338074
  timestamp: 1662614951
  timesteps_since_restore: 0
  training_iteration: 2
  trial_id: 07b631de
  warmup_time: 0.007265567779541016
  


Done. 1.0s


[2m[36m(RayTrainWorker pid=13743)[0m {'loss': 49.82984924316406, 'avg_loss': 34.74094486236572}
Result for TorchTrainer_d8e3e4d2:
  _time_this_iter_s: 20.60531258583069
  _timestamp: 1662614952
  _training_iteration: 2
  avg_loss: 34.74094486236572
  date: 2022-09-08_05-29-13
  done: false
  experiment_id: 679bd30db8804779a93c4e1be5e96304
  hostname: anish-l5-kit
  iterations_since_restore: 2
  loss: 49.82984924316406
  node_ip: 10.150.0.3
  pid: 13605
  should_checkpoint: true
  time_since_restore: 57.05427861213684
  time_this_iter_s: 20.528812646865845
  time_total_s: 57.05427861213684
  timestamp: 1662614953
  timesteps_since_restore: 0
  training_iteration: 2
  trial_id: d8e3e4d2
  warmup_time: 0.007446765899658203
  


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-08_05-18-50/TorchTrainer_d8e3e4d2_12_batch_size=24.0000,format_version=4,future_num_frames=12,history_num_frames=0,model_architecture=resnet50_2022-09-08_05-28-11/checkpoint_000001)... Done. 1.0s


[2m[36m(RayTrainWorker pid=13749)[0m {'loss': 5.055396556854248, 'avg_loss': 8.84875456492106}


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-08_05-18-50/TorchTrainer_07b631de_13_batch_size=24.0000,format_version=4,future_num_frames=12,history_num_frames=0,model_architecture=resnet50_2022-09-08_05-28-12/checkpoint_000002)... 

Result for TorchTrainer_07b631de:
  _time_this_iter_s: 18.243769884109497
  _timestamp: 1662614968
  _training_iteration: 3
  avg_loss: 8.84875456492106
  date: 2022-09-08_05-29-29
  done: false
  experiment_id: 225174d65120469a8c09709d8f1aedd3
  hostname: anish-l5-kit
  iterations_since_restore: 3
  loss: 5.055396556854248
  node_ip: 10.150.0.3
  pid: 13649
  should_checkpoint: true
  time_since_restore: 72.34583806991577
  time_this_iter_s: 18.241101026535034
  time_total_s: 72.34583806991577
  timestamp: 1662614969
  timesteps_since_restore: 0
  training_iteration: 3
  trial_id: 07b631de
  warmup_time: 0.007265567779541016
  


Done. 1.0s


[2m[36m(RayTrainWorker pid=13743)[0m {'loss': 8.151599884033203, 'avg_loss': 25.877829869588215}


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-08_05-18-50/TorchTrainer_d8e3e4d2_12_batch_size=24.0000,format_version=4,future_num_frames=12,history_num_frames=0,model_architecture=resnet50_2022-09-08_05-28-11/checkpoint_000002)... 

Result for TorchTrainer_d8e3e4d2:
  _time_this_iter_s: 18.004414558410645
  _timestamp: 1662614970
  _training_iteration: 3
  avg_loss: 25.877829869588215
  date: 2022-09-08_05-29-31
  done: false
  experiment_id: 679bd30db8804779a93c4e1be5e96304
  hostname: anish-l5-kit
  iterations_since_restore: 3
  loss: 8.151599884033203
  node_ip: 10.150.0.3
  pid: 13605
  should_checkpoint: true
  time_since_restore: 75.02615809440613
  time_this_iter_s: 17.971879482269287
  time_total_s: 75.02615809440613
  timestamp: 1662614971
  timesteps_since_restore: 0
  training_iteration: 3
  trial_id: d8e3e4d2
  warmup_time: 0.007446765899658203
  


Done. 1.0s


[2m[36m(RayTrainWorker pid=13749)[0m {'loss': 7.703278541564941, 'avg_loss': 8.562385559082031}


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-08_05-18-50/TorchTrainer_07b631de_13_batch_size=24.0000,format_version=4,future_num_frames=12,history_num_frames=0,model_architecture=resnet50_2022-09-08_05-28-12/checkpoint_000003)... 

Result for TorchTrainer_07b631de:
  _time_this_iter_s: 17.9385883808136
  _timestamp: 1662614986
  _training_iteration: 4
  avg_loss: 8.562385559082031
  date: 2022-09-08_05-29-47
  done: false
  experiment_id: 225174d65120469a8c09709d8f1aedd3
  hostname: anish-l5-kit
  iterations_since_restore: 4
  loss: 7.703278541564941
  node_ip: 10.150.0.3
  pid: 13649
  should_checkpoint: true
  time_since_restore: 90.26181650161743
  time_this_iter_s: 17.91597843170166
  time_total_s: 90.26181650161743
  timestamp: 1662614987
  timesteps_since_restore: 0
  training_iteration: 4
  trial_id: 07b631de
  warmup_time: 0.007265567779541016
  
[2m[36m(RayTrainWorker pid=13743)[0m {'loss': 7.853010177612305, 'avg_loss': 21.37162494659424}


Done. 1.3s
[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-08_05-18-50/TorchTrainer_d8e3e4d2_12_batch_size=24.0000,format_version=4,future_num_frames=12,history_num_frames=0,model_architecture=resnet50_2022-09-08_05-28-11/checkpoint_000003)... 

Result for TorchTrainer_d8e3e4d2:
  _time_this_iter_s: 17.513569593429565
  _timestamp: 1662614988
  _training_iteration: 4
  avg_loss: 21.37162494659424
  date: 2022-09-08_05-29-48
  done: false
  experiment_id: 679bd30db8804779a93c4e1be5e96304
  hostname: anish-l5-kit
  iterations_since_restore: 4
  loss: 7.853010177612305
  node_ip: 10.150.0.3
  pid: 13605
  should_checkpoint: true
  time_since_restore: 92.43566799163818
  time_this_iter_s: 17.409509897232056
  time_total_s: 92.43566799163818
  timestamp: 1662614988
  timesteps_since_restore: 0
  training_iteration: 4
  trial_id: d8e3e4d2
  warmup_time: 0.007446765899658203
  


Done. 1.1s


[2m[36m(RayTrainWorker pid=13749)[0m {'loss': 7.741596221923828, 'avg_loss': 8.39822769165039}


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-08_05-18-50/TorchTrainer_07b631de_13_batch_size=24.0000,format_version=4,future_num_frames=12,history_num_frames=0,model_architecture=resnet50_2022-09-08_05-28-12/checkpoint_000004)... 

Result for TorchTrainer_07b631de:
  _time_this_iter_s: 19.433927059173584
  _timestamp: 1662615006
  _training_iteration: 5
  avg_loss: 8.39822769165039
  date: 2022-09-08_05-30-06
  done: false
  experiment_id: 225174d65120469a8c09709d8f1aedd3
  hostname: anish-l5-kit
  iterations_since_restore: 5
  loss: 7.741596221923828
  node_ip: 10.150.0.3
  pid: 13649
  should_checkpoint: true
  time_since_restore: 109.66347312927246
  time_this_iter_s: 19.40165662765503
  time_total_s: 109.66347312927246
  timestamp: 1662615006
  timesteps_since_restore: 0
  training_iteration: 5
  trial_id: 07b631de
  warmup_time: 0.007265567779541016
  
[2m[36m(RayTrainWorker pid=13743)[0m {'loss': 14.314681053161621, 'avg_loss': 19.960236167907716}


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-08_05-18-50/TorchTrainer_d8e3e4d2_12_batch_size=24.0000,format_version=4,future_num_frames=12,history_num_frames=0,model_architecture=resnet50_2022-09-08_05-28-11/checkpoint_000004)... Done. 1.0s
[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-08_05-18-50/TorchTrainer_07b631de_13_batch_size=24.0000,format_version=4,future_num_frames=12,history_num_frames=0,model_architecture=resnet50_2022-09-08_05-28-12/checkpoint_000004)... 

Result for TorchTrainer_d8e3e4d2:
  _time_this_iter_s: 18.845186948776245
  _timestamp: 1662615007
  _training_iteration: 5
  avg_loss: 19.960236167907716
  date: 2022-09-08_05-30-07
  done: false
  experiment_id: 679bd30db8804779a93c4e1be5e96304
  hostname: anish-l5-kit
  iterations_since_restore: 5
  loss: 14.314681053161621
  node_ip: 10.150.0.3
  pid: 13605
  should_checkpoint: true
  time_since_restore: 111.26780319213867
  time_this_iter_s: 18.83213520050049
  time_total_s: 111.26780319213867
  timestamp: 1662615007
  timesteps_since_restore: 0
  training_iteration: 5
  trial_id: d8e3e4d2
  warmup_time: 0.007446765899658203
  


Done. 0.3s


Done. 1.0s


VBox(children=(Label(value='452.176 MB of 452.176 MB uploaded (90.296 MB deduped)\r'), FloatProgress(value=1.0…

Result for TorchTrainer_07b631de:
  _time_this_iter_s: 19.433927059173584
  _timestamp: 1662615006
  _training_iteration: 5
  avg_loss: 8.39822769165039
  date: 2022-09-08_05-30-06
  done: true
  experiment_id: 225174d65120469a8c09709d8f1aedd3
  experiment_tag: 13_batch_size=24.0000,format_version=4,future_num_frames=12,history_num_frames=0,model_architecture=resnet50,render_ego_history=True,step_time=0.1000,dataset_meta_key=meta_json,disable_traffic_light_faces=False,ego_center=0_25_0_5,filter_agents_threshold=0.5000,map_type=py_satellite,pixel_size=0_5_0_5,raster_size=224_224,satellite_map_key=aerial_map_aerial_map_png,semantic_map_key=semantic_map_semantic_map_pb,set_origin_to_bottom=True,batch_size=12,key=scenes_sample_zarr,num_workers=16,perturb_probability=0.5000,shuffle=True,checkpoint_every_n_steps=10000,eval_every_n_steps=10000,max_num_steps=5,batch_size=12,key=scenes_sample_zarr,num_workers=16,shuffle=False,dataset_key=scenes_sample_zarr,lr=0.0025,max_num_steps=5,num_workers=

[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-08_05-18-50/TorchTrainer_d8e3e4d2_12_batch_size=24.0000,format_version=4,future_num_frames=12,history_num_frames=0,model_architecture=resnet50_2022-09-08_05-28-11/checkpoint_000004)... 

0,1
avg_loss,█▄▂▁▁
iterations_since_restore,▁▃▅▆█
loss,█▃▁▃▃
time_since_restore,▁▃▅▆█
time_this_iter_s,█▂▁▁▂
time_total_s,▁▃▅▆█
timestamp,▁▃▅▆█
timesteps_since_restore,▁▁▁▁▁
training_iteration,▁▃▅▆█
warmup_time,▁▁▁▁▁

0,1
avg_loss,8.39823
iterations_since_restore,5.0
loss,7.7416
time_since_restore,109.66347
time_this_iter_s,19.40166
time_total_s,109.66347
timestamp,1662615006.0
timesteps_since_restore,0.0
training_iteration,5.0
warmup_time,0.00727


Done. 0.3s


VBox(children=(Label(value='452.176 MB of 452.176 MB uploaded (90.296 MB deduped)\r'), FloatProgress(value=1.0…

0,1
avg_loss,▁█▄▂▁
iterations_since_restore,▁▃▅▆█
loss,▃█▁▁▂
time_since_restore,▁▃▅▆█
time_this_iter_s,█▂▁▁▂
time_total_s,▁▃▅▆█
timestamp,▁▃▅▆█
timesteps_since_restore,▁▁▁▁▁
training_iteration,▁▃▅▆█
warmup_time,▁▁▁▁▁

0,1
avg_loss,19.96024
iterations_since_restore,5.0
loss,14.31468
time_since_restore,111.2678
time_this_iter_s,18.83214
time_total_s,111.2678
timestamp,1662615007.0
timesteps_since_restore,0.0
training_iteration,5.0
warmup_time,0.00745


Result for TorchTrainer_d8e3e4d2:
  _time_this_iter_s: 18.845186948776245
  _timestamp: 1662615007
  _training_iteration: 5
  avg_loss: 19.960236167907716
  date: 2022-09-08_05-30-07
  done: true
  experiment_id: 679bd30db8804779a93c4e1be5e96304
  experiment_tag: 12_batch_size=24.0000,format_version=4,future_num_frames=12,history_num_frames=0,model_architecture=resnet50,render_ego_history=True,step_time=0.1000,dataset_meta_key=meta_json,disable_traffic_light_faces=False,ego_center=0_25_0_5,filter_agents_threshold=0.5000,map_type=py_semantic,pixel_size=0_5_0_5,raster_size=224_224,satellite_map_key=aerial_map_aerial_map_png,semantic_map_key=semantic_map_semantic_map_pb,set_origin_to_bottom=True,batch_size=12,key=scenes_sample_zarr,num_workers=16,perturb_probability=0.5000,shuffle=True,checkpoint_every_n_steps=10000,eval_every_n_steps=10000,max_num_steps=5,batch_size=12,key=scenes_sample_zarr,num_workers=16,shuffle=False,dataset_key=scenes_sample_zarr,lr=0.0099,max_num_steps=5,num_workers

[2m[36m(RayTrainWorker pid=15472)[0m 2022-09-08 05:30:32,655	INFO config.py:72 -- Setting up process group for: env:// [rank=0, world_size=1]
[2m[36m(RayTrainWorker pid=15473)[0m 2022-09-08 05:30:33,078	INFO config.py:72 -- Setting up process group for: env:// [rank=0, world_size=1]
[2m[36m(RayTrainWorker pid=15472)[0m   f"The parameter '{pretrained_param}' is deprecated since 0.13 and will be removed in 0.15, "
[2m[36m(RayTrainWorker pid=15473)[0m   f"The parameter '{pretrained_param}' is deprecated since 0.13 and will be removed in 0.15, "
[2m[36m(RayTrainWorker pid=15472)[0m 2022-09-08 05:30:39,697	INFO train_loop_utils.py:300 -- Moving model to device: cpu
[2m[36m(RayTrainWorker pid=15473)[0m 2022-09-08 05:30:39,890	INFO train_loop_utils.py:300 -- Moving model to device: cpu


[2m[36m(RayTrainWorker pid=15473)[0m {'loss': 11.495034217834473, 'avg_loss': 11.495034217834473}


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-08_05-18-50/TorchTrainer_56478cc6_15_batch_size=24.0000,format_version=4,future_num_frames=12,history_num_frames=0,model_architecture=resnet50_2022-09-08_05-30-24/checkpoint_000000)... 

Result for TorchTrainer_56478cc6:
  _time_this_iter_s: 27.36151885986328
  _timestamp: 1662615061
  _training_iteration: 1
  avg_loss: 11.495034217834473
  date: 2022-09-08_05-31-01
  done: false
  experiment_id: 34fa016af84f4681bc07e53a68121a25
  hostname: anish-l5-kit
  iterations_since_restore: 1
  loss: 11.495034217834473
  node_ip: 10.150.0.3
  pid: 15357
  should_checkpoint: true
  time_since_restore: 32.6975953578949
  time_this_iter_s: 32.6975953578949
  time_total_s: 32.6975953578949
  timestamp: 1662615061
  timesteps_since_restore: 0
  training_iteration: 1
  trial_id: 56478cc6
  warmup_time: 0.008785724639892578
  
[2m[36m(RayTrainWorker pid=15472)[0m {'loss': 14.023079872131348, 'avg_loss': 14.023079872131348}


Done. 1.1s
[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-08_05-18-50/TorchTrainer_082e214e_14_batch_size=24.0000,format_version=4,future_num_frames=12,history_num_frames=0,model_architecture=resnet50_2022-09-08_05-30-23/checkpoint_000000)... 

Result for TorchTrainer_082e214e:
  _time_this_iter_s: 29.673914432525635
  _timestamp: 1662615062
  _training_iteration: 1
  avg_loss: 14.023079872131348
  date: 2022-09-08_05-31-04
  done: false
  experiment_id: 8b72537aec484c24bbb5972454c76aea
  hostname: anish-l5-kit
  iterations_since_restore: 1
  loss: 14.023079872131348
  node_ip: 10.150.0.3
  pid: 15339
  should_checkpoint: true
  time_since_restore: 35.178555965423584
  time_this_iter_s: 35.178555965423584
  time_total_s: 35.178555965423584
  timestamp: 1662615064
  timesteps_since_restore: 0
  training_iteration: 1
  trial_id: 082e214e
  warmup_time: 0.006707668304443359
  


Done. 1.1s


[2m[36m(RayTrainWorker pid=15473)[0m {'loss': 10.696879386901855, 'avg_loss': 11.095956802368164}


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-08_05-18-50/TorchTrainer_56478cc6_15_batch_size=24.0000,format_version=4,future_num_frames=12,history_num_frames=0,model_architecture=resnet50_2022-09-08_05-30-24/checkpoint_000001)... 

Result for TorchTrainer_56478cc6:
  _time_this_iter_s: 18.777771949768066
  _timestamp: 1662615079
  _training_iteration: 2
  avg_loss: 11.095956802368164
  date: 2022-09-08_05-31-20
  done: false
  experiment_id: 34fa016af84f4681bc07e53a68121a25
  hostname: anish-l5-kit
  iterations_since_restore: 2
  loss: 10.696879386901855
  node_ip: 10.150.0.3
  pid: 15357
  should_checkpoint: true
  time_since_restore: 51.34540772438049
  time_this_iter_s: 18.647812366485596
  time_total_s: 51.34540772438049
  timestamp: 1662615080
  timesteps_since_restore: 0
  training_iteration: 2
  trial_id: 56478cc6
  warmup_time: 0.008785724639892578
  
[2m[36m(RayTrainWorker pid=15472)[0m {'loss': 11.625349998474121, 'avg_loss': 12.824214935302734}


Done. 1.0s
[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-08_05-18-50/TorchTrainer_082e214e_14_batch_size=24.0000,format_version=4,future_num_frames=12,history_num_frames=0,model_architecture=resnet50_2022-09-08_05-30-23/checkpoint_000001)... 

Result for TorchTrainer_082e214e:
  _time_this_iter_s: 18.324623823165894
  _timestamp: 1662615081
  _training_iteration: 2
  avg_loss: 12.824214935302734
  date: 2022-09-08_05-31-21
  done: false
  experiment_id: 8b72537aec484c24bbb5972454c76aea
  hostname: anish-l5-kit
  iterations_since_restore: 2
  loss: 11.625349998474121
  node_ip: 10.150.0.3
  pid: 15339
  should_checkpoint: true
  time_since_restore: 52.7729389667511
  time_this_iter_s: 17.594383001327515
  time_total_s: 52.7729389667511
  timestamp: 1662615081
  timesteps_since_restore: 0
  training_iteration: 2
  trial_id: 082e214e
  warmup_time: 0.006707668304443359
  


Done. 1.0s


[2m[36m(RayTrainWorker pid=15473)[0m {'loss': 7.277172565460205, 'avg_loss': 9.823028723398844}


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-08_05-18-50/TorchTrainer_56478cc6_15_batch_size=24.0000,format_version=4,future_num_frames=12,history_num_frames=0,model_architecture=resnet50_2022-09-08_05-30-24/checkpoint_000002)... 

Result for TorchTrainer_56478cc6:
  _time_this_iter_s: 17.259112119674683
  _timestamp: 1662615097
  _training_iteration: 3
  avg_loss: 9.823028723398844
  date: 2022-09-08_05-31-37
  done: false
  experiment_id: 34fa016af84f4681bc07e53a68121a25
  hostname: anish-l5-kit
  iterations_since_restore: 3
  loss: 7.277172565460205
  node_ip: 10.150.0.3
  pid: 15357
  should_checkpoint: true
  time_since_restore: 68.57836103439331
  time_this_iter_s: 17.232953310012817
  time_total_s: 68.57836103439331
  timestamp: 1662615097
  timesteps_since_restore: 0
  training_iteration: 3
  trial_id: 56478cc6
  warmup_time: 0.008785724639892578
  
[2m[36m(RayTrainWorker pid=15472)[0m {'loss': 5.121688365936279, 'avg_loss': 10.25670607884725}


Done. 1.0s
[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-08_05-18-50/TorchTrainer_082e214e_14_batch_size=24.0000,format_version=4,future_num_frames=12,history_num_frames=0,model_architecture=resnet50_2022-09-08_05-30-23/checkpoint_000002)... 

Result for TorchTrainer_082e214e:
  _time_this_iter_s: 17.302026510238647
  _timestamp: 1662615098
  _training_iteration: 3
  avg_loss: 10.25670607884725
  date: 2022-09-08_05-31-39
  done: false
  experiment_id: 8b72537aec484c24bbb5972454c76aea
  hostname: anish-l5-kit
  iterations_since_restore: 3
  loss: 5.121688365936279
  node_ip: 10.150.0.3
  pid: 15339
  should_checkpoint: true
  time_since_restore: 70.0679132938385
  time_this_iter_s: 17.294974327087402
  time_total_s: 70.0679132938385
  timestamp: 1662615099
  timesteps_since_restore: 0
  training_iteration: 3
  trial_id: 082e214e
  warmup_time: 0.006707668304443359
  


Done. 1.1s


[2m[36m(RayTrainWorker pid=15473)[0m {'loss': 6.673937797546387, 'avg_loss': 9.03575599193573}


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-08_05-18-50/TorchTrainer_56478cc6_15_batch_size=24.0000,format_version=4,future_num_frames=12,history_num_frames=0,model_architecture=resnet50_2022-09-08_05-30-24/checkpoint_000003)... 

Result for TorchTrainer_56478cc6:
  _time_this_iter_s: 17.276448965072632
  _timestamp: 1662615114
  _training_iteration: 4
  avg_loss: 9.03575599193573
  date: 2022-09-08_05-31-54
  done: false
  experiment_id: 34fa016af84f4681bc07e53a68121a25
  hostname: anish-l5-kit
  iterations_since_restore: 4
  loss: 6.673937797546387
  node_ip: 10.150.0.3
  pid: 15357
  should_checkpoint: true
  time_since_restore: 85.85392427444458
  time_this_iter_s: 17.27556324005127
  time_total_s: 85.85392427444458
  timestamp: 1662615114
  timesteps_since_restore: 0
  training_iteration: 4
  trial_id: 56478cc6
  warmup_time: 0.008785724639892578
  
[2m[36m(RayTrainWorker pid=15472)[0m {'loss': 4.49884557723999, 'avg_loss': 8.817240953445435}


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-08_05-18-50/TorchTrainer_082e214e_14_batch_size=24.0000,format_version=4,future_num_frames=12,history_num_frames=0,model_architecture=resnet50_2022-09-08_05-30-23/checkpoint_000003)... 

Result for TorchTrainer_082e214e:
  _time_this_iter_s: 17.366901636123657
  _timestamp: 1662615115
  _training_iteration: 4
  avg_loss: 8.817240953445435
  date: 2022-09-08_05-31-56
  done: false
  experiment_id: 8b72537aec484c24bbb5972454c76aea
  hostname: anish-l5-kit
  iterations_since_restore: 4
  loss: 4.49884557723999
  node_ip: 10.150.0.3
  pid: 15339
  should_checkpoint: true
  time_since_restore: 87.40035915374756
  time_this_iter_s: 17.332445859909058
  time_total_s: 87.40035915374756
  timestamp: 1662615116
  timesteps_since_restore: 0
  training_iteration: 4
  trial_id: 082e214e
  warmup_time: 0.006707668304443359
  


Done. 2.3s
Done. 1.3s


[2m[36m(RayTrainWorker pid=15473)[0m {'loss': 4.437581539154053, 'avg_loss': 8.116121101379395}


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-08_05-18-50/TorchTrainer_56478cc6_15_batch_size=24.0000,format_version=4,future_num_frames=12,history_num_frames=0,model_architecture=resnet50_2022-09-08_05-30-24/checkpoint_000004)... 

Result for TorchTrainer_56478cc6:
  _time_this_iter_s: 18.685707330703735
  _timestamp: 1662615133
  _training_iteration: 5
  avg_loss: 8.116121101379395
  date: 2022-09-08_05-32-13
  done: false
  experiment_id: 34fa016af84f4681bc07e53a68121a25
  hostname: anish-l5-kit
  iterations_since_restore: 5
  loss: 4.437581539154053
  node_ip: 10.150.0.3
  pid: 15357
  should_checkpoint: true
  time_since_restore: 104.58564758300781
  time_this_iter_s: 18.731723308563232
  time_total_s: 104.58564758300781
  timestamp: 1662615133
  timesteps_since_restore: 0
  training_iteration: 5
  trial_id: 56478cc6
  warmup_time: 0.008785724639892578
  
[2m[36m(RayTrainWorker pid=15472)[0m {'loss': 6.939452171325684, 'avg_loss': 8.441683197021485}


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-08_05-18-50/TorchTrainer_082e214e_14_batch_size=24.0000,format_version=4,future_num_frames=12,history_num_frames=0,model_architecture=resnet50_2022-09-08_05-30-23/checkpoint_000004)... Done. 1.0s
[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-08_05-18-50/TorchTrainer_56478cc6_15_batch_size=24.0000,format_version=4,future_num_frames=12,history_num_frames=0,model_architecture=resnet50_2022-09-08_05-30-24/checkpoint_000004)... 

Result for TorchTrainer_082e214e:
  _time_this_iter_s: 18.20399284362793
  _timestamp: 1662615134
  _training_iteration: 5
  avg_loss: 8.441683197021485
  date: 2022-09-08_05-32-14
  done: false
  experiment_id: 8b72537aec484c24bbb5972454c76aea
  hostname: anish-l5-kit
  iterations_since_restore: 5
  loss: 6.939452171325684
  node_ip: 10.150.0.3
  pid: 15339
  should_checkpoint: true
  time_since_restore: 105.57437372207642
  time_this_iter_s: 18.174014568328857
  time_total_s: 105.57437372207642
  timestamp: 1662615134
  timesteps_since_restore: 0
  training_iteration: 5
  trial_id: 082e214e
  warmup_time: 0.006707668304443359
  


Done. 0.3s


Done. 1.0s


VBox(children=(Label(value='452.176 MB of 452.176 MB uploaded (90.296 MB deduped)\r'), FloatProgress(value=1.0…

Result for TorchTrainer_56478cc6:
  _time_this_iter_s: 18.685707330703735
  _timestamp: 1662615133
  _training_iteration: 5
  avg_loss: 8.116121101379395
  date: 2022-09-08_05-32-13
  done: true
  experiment_id: 34fa016af84f4681bc07e53a68121a25
  experiment_tag: 15_batch_size=24.0000,format_version=4,future_num_frames=12,history_num_frames=0,model_architecture=resnet50,render_ego_history=True,step_time=0.1000,dataset_meta_key=meta_json,disable_traffic_light_faces=False,ego_center=0_25_0_5,filter_agents_threshold=0.5000,map_type=py_satellite,pixel_size=0_5_0_5,raster_size=224_224,satellite_map_key=aerial_map_aerial_map_png,semantic_map_key=semantic_map_semantic_map_pb,set_origin_to_bottom=True,batch_size=12,key=scenes_sample_zarr,num_workers=16,perturb_probability=0.5000,shuffle=True,checkpoint_every_n_steps=10000,eval_every_n_steps=10000,max_num_steps=5,batch_size=12,key=scenes_sample_zarr,num_workers=16,shuffle=False,dataset_key=scenes_sample_zarr,lr=0.0017,max_num_steps=5,num_workers

[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-08_05-18-50/TorchTrainer_082e214e_14_batch_size=24.0000,format_version=4,future_num_frames=12,history_num_frames=0,model_architecture=resnet50_2022-09-08_05-30-23/checkpoint_000004)... Done. 0.3s


0,1
avg_loss,█▇▅▃▁
iterations_since_restore,▁▃▅▆█
loss,█▇▄▃▁
time_since_restore,▁▃▄▆█
time_this_iter_s,█▂▁▁▂
time_total_s,▁▃▄▆█
timestamp,▁▃▅▆█
timesteps_since_restore,▁▁▁▁▁
training_iteration,▁▃▅▆█
warmup_time,▁▁▁▁▁

0,1
avg_loss,8.11612
iterations_since_restore,5.0
loss,4.43758
time_since_restore,104.58565
time_this_iter_s,18.73172
time_total_s,104.58565
timestamp,1662615133.0
timesteps_since_restore,0.0
training_iteration,5.0
warmup_time,0.00879


VBox(children=(Label(value='452.176 MB of 452.176 MB uploaded (90.296 MB deduped)\r'), FloatProgress(value=1.0…

Result for TorchTrainer_082e214e:
  _time_this_iter_s: 18.20399284362793
  _timestamp: 1662615134
  _training_iteration: 5
  avg_loss: 8.441683197021485
  date: 2022-09-08_05-32-14
  done: true
  experiment_id: 8b72537aec484c24bbb5972454c76aea
  experiment_tag: 14_batch_size=24.0000,format_version=4,future_num_frames=12,history_num_frames=0,model_architecture=resnet50,render_ego_history=True,step_time=0.1000,dataset_meta_key=meta_json,disable_traffic_light_faces=False,ego_center=0_25_0_5,filter_agents_threshold=0.5000,map_type=py_satellite,pixel_size=0_5_0_5,raster_size=224_224,satellite_map_key=aerial_map_aerial_map_png,semantic_map_key=semantic_map_semantic_map_pb,set_origin_to_bottom=True,batch_size=12,key=scenes_sample_zarr,num_workers=16,perturb_probability=0.5000,shuffle=True,checkpoint_every_n_steps=10000,eval_every_n_steps=10000,max_num_steps=5,batch_size=12,key=scenes_sample_zarr,num_workers=16,shuffle=False,dataset_key=scenes_sample_zarr,lr=0.0025,max_num_steps=5,num_workers=

0,1
avg_loss,█▆▃▁▁
iterations_since_restore,▁▃▅▆█
loss,█▆▁▁▃
time_since_restore,▁▃▄▆█
time_this_iter_s,█▁▁▁▁
time_total_s,▁▃▄▆█
timestamp,▁▃▅▆█
timesteps_since_restore,▁▁▁▁▁
training_iteration,▁▃▅▆█
warmup_time,▁▁▁▁▁

0,1
avg_loss,8.44168
iterations_since_restore,5.0
loss,6.93945
time_since_restore,105.57437
time_this_iter_s,18.17401
time_total_s,105.57437
timestamp,1662615134.0
timesteps_since_restore,0.0
training_iteration,5.0
warmup_time,0.00671


[2m[36m(RayTrainWorker pid=17183)[0m 2022-09-08 05:32:42,977	INFO config.py:72 -- Setting up process group for: env:// [rank=0, world_size=1]
[2m[36m(RayTrainWorker pid=17191)[0m 2022-09-08 05:32:43,993	INFO config.py:72 -- Setting up process group for: env:// [rank=0, world_size=1]
[2m[36m(RayTrainWorker pid=17183)[0m   f"The parameter '{pretrained_param}' is deprecated since 0.13 and will be removed in 0.15, "
[2m[36m(RayTrainWorker pid=17183)[0m 2022-09-08 05:32:50,526	INFO train_loop_utils.py:300 -- Moving model to device: cpu
[2m[36m(RayTrainWorker pid=17191)[0m   f"The parameter '{pretrained_param}' is deprecated since 0.13 and will be removed in 0.15, "
[2m[36m(RayTrainWorker pid=17191)[0m 2022-09-08 05:32:52,250	INFO train_loop_utils.py:300 -- Moving model to device: cpu


[2m[36m(RayTrainWorker pid=17191)[0m {'loss': 22.533632278442383, 'avg_loss': 22.533632278442383}


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-08_05-18-50/TorchTrainer_a48893e4_17_batch_size=12.0000,format_version=4,future_num_frames=12,history_num_frames=0,model_architecture=resnet50_2022-09-08_05-32-35/checkpoint_000000)... 

Result for TorchTrainer_a48893e4:
  _time_this_iter_s: 18.643502950668335
  _timestamp: 1662615183
  _training_iteration: 1
  avg_loss: 22.533632278442383
  date: 2022-09-08_05-33-04
  done: false
  experiment_id: f40f725374c94adf83ceb1202c9a602e
  hostname: anish-l5-kit
  iterations_since_restore: 1
  loss: 22.533632278442383
  node_ip: 10.150.0.3
  pid: 17063
  should_checkpoint: true
  time_since_restore: 23.84572196006775
  time_this_iter_s: 23.84572196006775
  time_total_s: 23.84572196006775
  timestamp: 1662615184
  timesteps_since_restore: 0
  training_iteration: 1
  trial_id: a48893e4
  warmup_time: 0.008883953094482422
  


Done. 1.0s


[2m[36m(RayTrainWorker pid=17191)[0m {'loss': 9.516277313232422, 'avg_loss': 16.024954795837402}


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-08_05-18-50/TorchTrainer_a48893e4_17_batch_size=12.0000,format_version=4,future_num_frames=12,history_num_frames=0,model_architecture=resnet50_2022-09-08_05-32-35/checkpoint_000001)... 

Result for TorchTrainer_a48893e4:
  _time_this_iter_s: 8.689122676849365
  _timestamp: 1662615191
  _training_iteration: 2
  avg_loss: 16.024954795837402
  date: 2022-09-08_05-33-12
  done: false
  experiment_id: f40f725374c94adf83ceb1202c9a602e
  hostname: anish-l5-kit
  iterations_since_restore: 2
  loss: 9.516277313232422
  node_ip: 10.150.0.3
  pid: 17063
  should_checkpoint: true
  time_since_restore: 32.291722536087036
  time_this_iter_s: 8.446000576019287
  time_total_s: 32.291722536087036
  timestamp: 1662615192
  timesteps_since_restore: 0
  training_iteration: 2
  trial_id: a48893e4
  warmup_time: 0.008883953094482422
  
[2m[36m(RayTrainWorker pid=17183)[0m {'loss': 14.107507705688477, 'avg_loss': 14.107507705688477}


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-08_05-18-50/TorchTrainer_56dc131e_16_batch_size=24.0000,format_version=4,future_num_frames=12,history_num_frames=0,model_architecture=resnet50_2022-09-08_05-32-34/checkpoint_000000)... 

Result for TorchTrainer_56dc131e:
  _time_this_iter_s: 28.869539737701416
  _timestamp: 1662615192
  _training_iteration: 1
  avg_loss: 14.107507705688477
  date: 2022-09-08_05-33-13
  done: false
  experiment_id: 523fd946f42a4d8aac0c091b7a58f026
  hostname: anish-l5-kit
  iterations_since_restore: 1
  loss: 14.107507705688477
  node_ip: 10.150.0.3
  pid: 17041
  should_checkpoint: true
  time_since_restore: 34.01579189300537
  time_this_iter_s: 34.01579189300537
  time_total_s: 34.01579189300537
  timestamp: 1662615193
  timesteps_since_restore: 0
  training_iteration: 1
  trial_id: 56dc131e
  warmup_time: 0.011386871337890625
  


Done. 1.0s
Done. 1.0s


[2m[36m(RayTrainWorker pid=17191)[0m {'loss': 7.987810134887695, 'avg_loss': 13.345906575520834}


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-08_05-18-50/TorchTrainer_a48893e4_17_batch_size=12.0000,format_version=4,future_num_frames=12,history_num_frames=0,model_architecture=resnet50_2022-09-08_05-32-35/checkpoint_000002)... 

Result for TorchTrainer_a48893e4:
  _time_this_iter_s: 8.840969562530518
  _timestamp: 1662615200
  _training_iteration: 3
  avg_loss: 13.345906575520834
  date: 2022-09-08_05-33-21
  done: false
  experiment_id: f40f725374c94adf83ceb1202c9a602e
  hostname: anish-l5-kit
  iterations_since_restore: 3
  loss: 7.987810134887695
  node_ip: 10.150.0.3
  pid: 17063
  should_checkpoint: true
  time_since_restore: 41.06833338737488
  time_this_iter_s: 8.776610851287842
  time_total_s: 41.06833338737488
  timestamp: 1662615201
  timesteps_since_restore: 0
  training_iteration: 3
  trial_id: a48893e4
  warmup_time: 0.008883953094482422
  


Done. 1.1s


[2m[36m(RayTrainWorker pid=17191)[0m {'loss': 4.56952428817749, 'avg_loss': 11.151811003684998}


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-08_05-18-50/TorchTrainer_a48893e4_17_batch_size=12.0000,format_version=4,future_num_frames=12,history_num_frames=0,model_architecture=resnet50_2022-09-08_05-32-35/checkpoint_000003)... 

Result for TorchTrainer_a48893e4:
  _time_this_iter_s: 8.07839035987854
  _timestamp: 1662615208
  _training_iteration: 4
  avg_loss: 11.151811003684998
  date: 2022-09-08_05-33-29
  done: false
  experiment_id: f40f725374c94adf83ceb1202c9a602e
  hostname: anish-l5-kit
  iterations_since_restore: 4
  loss: 4.56952428817749
  node_ip: 10.150.0.3
  pid: 17063
  should_checkpoint: true
  time_since_restore: 49.089208364486694
  time_this_iter_s: 8.020874977111816
  time_total_s: 49.089208364486694
  timestamp: 1662615209
  timesteps_since_restore: 0
  training_iteration: 4
  trial_id: a48893e4
  warmup_time: 0.008883953094482422
  


Done. 1.0s


[2m[36m(RayTrainWorker pid=17183)[0m {'loss': 7.167483806610107, 'avg_loss': 10.637495756149292}
Result for TorchTrainer_56dc131e:
  _time_this_iter_s: 18.899908304214478
  _timestamp: 1662615211
  _training_iteration: 2
  avg_loss: 10.637495756149292
  date: 2022-09-08_05-33-32
  done: false
  experiment_id: 523fd946f42a4d8aac0c091b7a58f026
  hostname: anish-l5-kit
  iterations_since_restore: 2
  loss: 7.167483806610107
  node_ip: 10.150.0.3
  pid: 17041
  should_checkpoint: true
  time_since_restore: 52.811076164245605
  time_this_iter_s: 18.795284271240234
  time_total_s: 52.811076164245605
  timestamp: 1662615212
  timesteps_since_restore: 0
  training_iteration: 2
  trial_id: 56dc131e
  warmup_time: 0.011386871337890625
  


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-08_05-18-50/TorchTrainer_56dc131e_16_batch_size=24.0000,format_version=4,future_num_frames=12,history_num_frames=0,model_architecture=resnet50_2022-09-08_05-32-34/checkpoint_000001)... Done. 1.0s


[2m[36m(RayTrainWorker pid=17191)[0m {'loss': 7.906360149383545, 'avg_loss': 10.502720832824707}


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-08_05-18-50/TorchTrainer_a48893e4_17_batch_size=12.0000,format_version=4,future_num_frames=12,history_num_frames=0,model_architecture=resnet50_2022-09-08_05-32-35/checkpoint_000004)... 

Result for TorchTrainer_a48893e4:
  _time_this_iter_s: 8.704768419265747
  _timestamp: 1662615217
  _training_iteration: 5
  avg_loss: 10.502720832824707
  date: 2022-09-08_05-33-38
  done: false
  experiment_id: f40f725374c94adf83ceb1202c9a602e
  hostname: anish-l5-kit
  iterations_since_restore: 5
  loss: 7.906360149383545
  node_ip: 10.150.0.3
  pid: 17063
  should_checkpoint: true
  time_since_restore: 57.907556772232056
  time_this_iter_s: 8.818348407745361
  time_total_s: 57.907556772232056
  timestamp: 1662615218
  timesteps_since_restore: 0
  training_iteration: 5
  trial_id: a48893e4
  warmup_time: 0.008883953094482422
  


Done. 1.2s
[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-08_05-18-50/TorchTrainer_a48893e4_17_batch_size=12.0000,format_version=4,future_num_frames=12,history_num_frames=0,model_architecture=resnet50_2022-09-08_05-32-35/checkpoint_000004)... Done. 0.3s


VBox(children=(Label(value='452.176 MB of 452.176 MB uploaded (90.296 MB deduped)\r'), FloatProgress(value=1.0…

[2m[36m(RayTrainWorker pid=17183)[0m {'loss': 6.456693172454834, 'avg_loss': 9.243894894917807}
Result for TorchTrainer_a48893e4:
  _time_this_iter_s: 8.704768419265747
  _timestamp: 1662615217
  _training_iteration: 5
  avg_loss: 10.502720832824707
  date: 2022-09-08_05-33-38
  done: true
  experiment_id: f40f725374c94adf83ceb1202c9a602e
  experiment_tag: 17_batch_size=12.0000,format_version=4,future_num_frames=12,history_num_frames=0,model_architecture=resnet50,render_ego_history=True,step_time=0.1000,dataset_meta_key=meta_json,disable_traffic_light_faces=False,ego_center=0_25_0_5,filter_agents_threshold=0.5000,map_type=py_satellite,pixel_size=0_5_0_5,raster_size=224_224,satellite_map_key=aerial_map_aerial_map_png,semantic_map_key=semantic_map_semantic_map_pb,set_origin_to_bottom=True,batch_size=12,key=scenes_sample_zarr,num_workers=16,perturb_probability=0.5000,shuffle=True,checkpoint_every_n_steps=10000,eval_every_n_steps=10000,max_num_steps=5,batch_size=12,key=scenes_sample_zar

[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-08_05-18-50/TorchTrainer_56dc131e_16_batch_size=24.0000,format_version=4,future_num_frames=12,history_num_frames=0,model_architecture=resnet50_2022-09-08_05-32-34/checkpoint_000002)... 

0,1
avg_loss,█▄▃▁▁
iterations_since_restore,▁▃▅▆█
loss,█▃▂▁▂
time_since_restore,▁▃▅▆█
time_this_iter_s,█▁▁▁▁
time_total_s,▁▃▅▆█
timestamp,▁▃▅▆█
timesteps_since_restore,▁▁▁▁▁
training_iteration,▁▃▅▆█
warmup_time,▁▁▁▁▁

0,1
avg_loss,10.50272
iterations_since_restore,5.0
loss,7.90636
time_since_restore,57.90756
time_this_iter_s,8.81835
time_total_s,57.90756
timestamp,1662615218.0
timesteps_since_restore,0.0
training_iteration,5.0
warmup_time,0.00888


Result for TorchTrainer_56dc131e:
  _time_this_iter_s: 17.281771898269653
  _timestamp: 1662615228
  _training_iteration: 3
  avg_loss: 9.243894894917807
  date: 2022-09-08_05-33-49
  done: false
  experiment_id: 523fd946f42a4d8aac0c091b7a58f026
  hostname: anish-l5-kit
  iterations_since_restore: 3
  loss: 6.456693172454834
  node_ip: 10.150.0.3
  pid: 17041
  should_checkpoint: true
  time_since_restore: 70.02264308929443
  time_this_iter_s: 17.211566925048828
  time_total_s: 70.02264308929443
  timestamp: 1662615229
  timesteps_since_restore: 0
  training_iteration: 3
  trial_id: 56dc131e
  warmup_time: 0.011386871337890625
  


Done. 1.1s
[2m[36m(RayTrainWorker pid=18268)[0m 2022-09-08 05:33:56,827	INFO config.py:72 -- Setting up process group for: env:// [rank=0, world_size=1]
[2m[36m(RayTrainWorker pid=18268)[0m   f"The parameter '{pretrained_param}' is deprecated since 0.13 and will be removed in 0.15, "
[2m[36m(RayTrainWorker pid=18268)[0m 2022-09-08 05:34:02,907	INFO train_loop_utils.py:300 -- Moving model to device: cpu


[2m[36m(RayTrainWorker pid=18268)[0m {'loss': 8.184290885925293, 'avg_loss': 8.184290885925293}


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-08_05-18-50/TorchTrainer_a4f3d74e_18_batch_size=6.0000,format_version=4,future_num_frames=12,history_num_frames=0,model_architecture=resnet50,_2022-09-08_05-33-50/checkpoint_000000)... 

Result for TorchTrainer_a4f3d74e:
  _time_this_iter_s: 11.860567331314087
  _timestamp: 1662615249
  _training_iteration: 1
  avg_loss: 8.184290885925293
  date: 2022-09-08_05-34-09
  done: false
  experiment_id: 82d1e1eeaac14bba9dbb0a867f244494
  hostname: anish-l5-kit
  iterations_since_restore: 1
  loss: 8.184290885925293
  node_ip: 10.150.0.3
  pid: 18196
  should_checkpoint: true
  time_since_restore: 15.588792324066162
  time_this_iter_s: 15.588792324066162
  time_total_s: 15.588792324066162
  timestamp: 1662615249
  timesteps_since_restore: 0
  training_iteration: 1
  trial_id: a4f3d74e
  warmup_time: 0.006447553634643555
  


Done. 1.0s


[2m[36m(RayTrainWorker pid=17183)[0m {'loss': 3.9591867923736572, 'avg_loss': 7.922717869281769}
Result for TorchTrainer_56dc131e:
  _time_this_iter_s: 23.01553702354431
  _timestamp: 1662615251
  _training_iteration: 4
  avg_loss: 7.922717869281769
  date: 2022-09-08_05-34-12
  done: false
  experiment_id: 523fd946f42a4d8aac0c091b7a58f026
  hostname: anish-l5-kit
  iterations_since_restore: 4
  loss: 3.9591867923736572
  node_ip: 10.150.0.3
  pid: 17041
  should_checkpoint: true
  time_since_restore: 93.07944941520691
  time_this_iter_s: 23.056806325912476
  time_total_s: 93.07944941520691
  timestamp: 1662615252
  timesteps_since_restore: 0
  training_iteration: 4
  trial_id: 56dc131e
  warmup_time: 0.011386871337890625
  


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-08_05-18-50/TorchTrainer_56dc131e_16_batch_size=24.0000,format_version=4,future_num_frames=12,history_num_frames=0,model_architecture=resnet50_2022-09-08_05-32-34/checkpoint_000003)... Done. 1.0s


[2m[36m(RayTrainWorker pid=18268)[0m {'loss': 1.6875518560409546, 'avg_loss': 4.935921370983124}


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-08_05-18-50/TorchTrainer_a4f3d74e_18_batch_size=6.0000,format_version=4,future_num_frames=12,history_num_frames=0,model_architecture=resnet50,_2022-09-08_05-33-50/checkpoint_000001)... 

Result for TorchTrainer_a4f3d74e:
  _time_this_iter_s: 5.193772315979004
  _timestamp: 1662615254
  _training_iteration: 2
  avg_loss: 4.935921370983124
  date: 2022-09-08_05-34-14
  done: false
  experiment_id: 82d1e1eeaac14bba9dbb0a867f244494
  hostname: anish-l5-kit
  iterations_since_restore: 2
  loss: 1.6875518560409546
  node_ip: 10.150.0.3
  pid: 18196
  should_checkpoint: true
  time_since_restore: 20.771263599395752
  time_this_iter_s: 5.18247127532959
  time_total_s: 20.771263599395752
  timestamp: 1662615254
  timesteps_since_restore: 0
  training_iteration: 2
  trial_id: a4f3d74e
  warmup_time: 0.006447553634643555
  


Done. 1.0s


[2m[36m(RayTrainWorker pid=18268)[0m {'loss': 2.1012165546417236, 'avg_loss': 3.9910197655359902}


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-08_05-18-50/TorchTrainer_a4f3d74e_18_batch_size=6.0000,format_version=4,future_num_frames=12,history_num_frames=0,model_architecture=resnet50,_2022-09-08_05-33-50/checkpoint_000002)... Done. 1.0s


[2m[36m(RayTrainWorker pid=18268)[0m {'loss': 13.670435905456543, 'avg_loss': 6.4108738005161285}


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-08_05-18-50/TorchTrainer_a4f3d74e_18_batch_size=6.0000,format_version=4,future_num_frames=12,history_num_frames=0,model_architecture=resnet50,_2022-09-08_05-33-50/checkpoint_000003)... 

Result for TorchTrainer_a4f3d74e:
  _time_this_iter_s: 4.929238557815552
  _timestamp: 1662615264
  _training_iteration: 4
  avg_loss: 6.4108738005161285
  date: 2022-09-08_05-34-24
  done: false
  experiment_id: 82d1e1eeaac14bba9dbb0a867f244494
  hostname: anish-l5-kit
  iterations_since_restore: 4
  loss: 13.670435905456543
  node_ip: 10.150.0.3
  pid: 18196
  should_checkpoint: true
  time_since_restore: 30.482129335403442
  time_this_iter_s: 4.942955732345581
  time_total_s: 30.482129335403442
  timestamp: 1662615264
  timesteps_since_restore: 0
  training_iteration: 4
  trial_id: a4f3d74e
  warmup_time: 0.006447553634643555
  


Done. 1.0s


[2m[36m(RayTrainWorker pid=18268)[0m {'loss': 8.795048713684082, 'avg_loss': 6.887708783149719}


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-08_05-18-50/TorchTrainer_a4f3d74e_18_batch_size=6.0000,format_version=4,future_num_frames=12,history_num_frames=0,model_architecture=resnet50,_2022-09-08_05-33-50/checkpoint_000004)... Done. 1.0s


[2m[36m(RayTrainWorker pid=17183)[0m {'loss': 5.060309410095215, 'avg_loss': 7.350236177444458}


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-08_05-18-50/TorchTrainer_a4f3d74e_18_batch_size=6.0000,format_version=4,future_num_frames=12,history_num_frames=0,model_architecture=resnet50,_2022-09-08_05-33-50/checkpoint_000004)... Done. 0.3s


VBox(children=(Label(value='452.176 MB of 452.176 MB uploaded (90.296 MB deduped)\r'), FloatProgress(value=1.0…

0,1
avg_loss,█▃▁▅▆
iterations_since_restore,▁▃▅▆█
loss,▅▁▁█▅
time_since_restore,▁▃▅▆█
time_this_iter_s,█▁▁▁▁
time_total_s,▁▃▅▆█
timestamp,▁▃▅▆█
timesteps_since_restore,▁▁▁▁▁
training_iteration,▁▃▅▆█
warmup_time,▁▁▁▁▁

0,1
avg_loss,6.88771
iterations_since_restore,5.0
loss,8.79505
time_since_restore,35.03598
time_this_iter_s,4.55385
time_total_s,35.03598
timestamp,1662615269.0
timesteps_since_restore,0.0
training_iteration,5.0
warmup_time,0.00645


Result for TorchTrainer_a4f3d74e:
  _time_this_iter_s: 4.669480323791504
  _timestamp: 1662615268
  _training_iteration: 5
  avg_loss: 6.887708783149719
  date: 2022-09-08_05-34-29
  done: true
  experiment_id: 82d1e1eeaac14bba9dbb0a867f244494
  experiment_tag: 18_batch_size=6.0000,format_version=4,future_num_frames=12,history_num_frames=0,model_architecture=resnet50,render_ego_history=True,step_time=0.1000,dataset_meta_key=meta_json,disable_traffic_light_faces=False,ego_center=0_25_0_5,filter_agents_threshold=0.5000,map_type=py_satellite,pixel_size=0_5_0_5,raster_size=224_224,satellite_map_key=aerial_map_aerial_map_png,semantic_map_key=semantic_map_semantic_map_pb,set_origin_to_bottom=True,batch_size=12,key=scenes_sample_zarr,num_workers=16,perturb_probability=0.5000,shuffle=True,checkpoint_every_n_steps=10000,eval_every_n_steps=10000,max_num_steps=5,batch_size=12,key=scenes_sample_zarr,num_workers=16,shuffle=False,dataset_key=scenes_sample_zarr,lr=0.0012,max_num_steps=5,num_workers=4

[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-08_05-18-50/TorchTrainer_56dc131e_16_batch_size=24.0000,format_version=4,future_num_frames=12,history_num_frames=0,model_architecture=resnet50_2022-09-08_05-32-34/checkpoint_000004)... 

Result for TorchTrainer_56dc131e:
  _time_this_iter_s: 18.19042134284973
  _timestamp: 1662615270
  _training_iteration: 5
  avg_loss: 7.350236177444458
  date: 2022-09-08_05-34-30
  done: false
  experiment_id: 523fd946f42a4d8aac0c091b7a58f026
  hostname: anish-l5-kit
  iterations_since_restore: 5
  loss: 5.060309410095215
  node_ip: 10.150.0.3
  pid: 17041
  should_checkpoint: true
  time_since_restore: 111.32075810432434
  time_this_iter_s: 18.24130868911743
  time_total_s: 111.32075810432434
  timestamp: 1662615270
  timesteps_since_restore: 0
  training_iteration: 5
  trial_id: 56dc131e
  warmup_time: 0.011386871337890625
  


Done. 1.0s
[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-08_05-18-50/TorchTrainer_56dc131e_16_batch_size=24.0000,format_version=4,future_num_frames=12,history_num_frames=0,model_architecture=resnet50_2022-09-08_05-32-34/checkpoint_000004)... Done. 0.3s


VBox(children=(Label(value='452.176 MB of 452.176 MB uploaded (90.296 MB deduped)\r'), FloatProgress(value=1.0…

0,1
avg_loss,█▄▃▂▁
iterations_since_restore,▁▃▅▆█
loss,█▃▃▁▂
time_since_restore,▁▃▄▆█
time_this_iter_s,█▂▁▃▁
time_total_s,▁▃▄▆█
timestamp,▁▃▄▆█
timesteps_since_restore,▁▁▁▁▁
training_iteration,▁▃▅▆█
warmup_time,▁▁▁▁▁

0,1
avg_loss,7.35024
iterations_since_restore,5.0
loss,5.06031
time_since_restore,111.32076
time_this_iter_s,18.24131
time_total_s,111.32076
timestamp,1662615270.0
timesteps_since_restore,0.0
training_iteration,5.0
warmup_time,0.01139


Result for TorchTrainer_56dc131e:
  _time_this_iter_s: 18.19042134284973
  _timestamp: 1662615270
  _training_iteration: 5
  avg_loss: 7.350236177444458
  date: 2022-09-08_05-34-30
  done: true
  experiment_id: 523fd946f42a4d8aac0c091b7a58f026
  experiment_tag: 16_batch_size=24.0000,format_version=4,future_num_frames=12,history_num_frames=0,model_architecture=resnet50,render_ego_history=True,step_time=0.1000,dataset_meta_key=meta_json,disable_traffic_light_faces=False,ego_center=0_25_0_5,filter_agents_threshold=0.5000,map_type=py_satellite,pixel_size=0_5_0_5,raster_size=224_224,satellite_map_key=aerial_map_aerial_map_png,semantic_map_key=semantic_map_semantic_map_pb,set_origin_to_bottom=True,batch_size=12,key=scenes_sample_zarr,num_workers=16,perturb_probability=0.5000,shuffle=True,checkpoint_every_n_steps=10000,eval_every_n_steps=10000,max_num_steps=5,batch_size=12,key=scenes_sample_zarr,num_workers=16,shuffle=False,dataset_key=scenes_sample_zarr,lr=0.0016,max_num_steps=5,num_workers=

[2m[36m(RayTrainWorker pid=19317)[0m 2022-09-08 05:34:54,475	INFO config.py:72 -- Setting up process group for: env:// [rank=0, world_size=1]
[2m[36m(RayTrainWorker pid=19323)[0m 2022-09-08 05:34:54,834	INFO config.py:72 -- Setting up process group for: env:// [rank=0, world_size=1]
[2m[36m(RayTrainWorker pid=19317)[0m   f"The parameter '{pretrained_param}' is deprecated since 0.13 and will be removed in 0.15, "
[2m[36m(RayTrainWorker pid=19323)[0m   f"The parameter '{pretrained_param}' is deprecated since 0.13 and will be removed in 0.15, "
[2m[36m(RayTrainWorker pid=19317)[0m 2022-09-08 05:35:02,225	INFO train_loop_utils.py:300 -- Moving model to device: cpu
[2m[36m(RayTrainWorker pid=19323)[0m 2022-09-08 05:35:02,282	INFO train_loop_utils.py:300 -- Moving model to device: cpu


[2m[36m(RayTrainWorker pid=19317)[0m {'loss': 7.125338077545166, 'avg_loss': 7.125338077545166}
[2m[36m(RayTrainWorker pid=19323)[0m {'loss': 4.717043399810791, 'avg_loss': 4.717043399810791}


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-08_05-18-50/TorchTrainer_d1981512_19_batch_size=6.0000,format_version=4,future_num_frames=12,history_num_frames=0,model_architecture=resnet50,_2022-09-08_05-34-46/checkpoint_000000)... [34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-08_05-18-50/TorchTrainer_f2d73334_20_batch_size=6.0000,format_version=4,future_num_frames=12,history_num_frames=0,model_architecture=resnet50,_2022-09-08_05-34-46/checkpoint_000000)... 

Result for TorchTrainer_d1981512:
  _time_this_iter_s: 13.620954036712646
  _timestamp: 1662615308
  _training_iteration: 1
  avg_loss: 7.125338077545166
  date: 2022-09-08_05-35-09
  done: false
  experiment_id: bc114db5cc0749d8a61edd3dbfe554b7
  hostname: anish-l5-kit
  iterations_since_restore: 1
  loss: 7.125338077545166
  node_ip: 10.150.0.3
  pid: 19185
  should_checkpoint: true
  time_since_restore: 18.817776918411255
  time_this_iter_s: 18.817776918411255
  time_total_s: 18.817776918411255
  timestamp: 1662615309
  timesteps_since_restore: 0
  training_iteration: 1
  trial_id: d1981512
  warmup_time: 0.00798654556274414
  
Result for TorchTrainer_f2d73334:
  _time_this_iter_s: 13.25819730758667
  _timestamp: 1662615308
  _training_iteration: 1
  avg_loss: 4.717043399810791
  date: 2022-09-08_05-35-09
  done: false
  experiment_id: 1329cf433fbe41878c3f3c893a1feeaf
  hostname: anish-l5-kit
  iterations_since_restore: 1
  loss: 4.717043399810791
  node_ip: 10.150.0.3
  pid: 19193


Done. 1.7s
Done. 1.8s


[2m[36m(RayTrainWorker pid=19317)[0m {'loss': 12.018231391906738, 'avg_loss': 9.571784734725952}
[2m[36m(RayTrainWorker pid=19323)[0m {'loss': 11.622198104858398, 'avg_loss': 8.169620752334595}


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-08_05-18-50/TorchTrainer_d1981512_19_batch_size=6.0000,format_version=4,future_num_frames=12,history_num_frames=0,model_architecture=resnet50,_2022-09-08_05-34-46/checkpoint_000001)... [34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-08_05-18-50/TorchTrainer_f2d73334_20_batch_size=6.0000,format_version=4,future_num_frames=12,history_num_frames=0,model_architecture=resnet50,_2022-09-08_05-34-46/checkpoint_000001)... 

Result for TorchTrainer_d1981512:
  _time_this_iter_s: 5.459226608276367
  _timestamp: 1662615314
  _training_iteration: 2
  avg_loss: 9.571784734725952
  date: 2022-09-08_05-35-14
  done: false
  experiment_id: bc114db5cc0749d8a61edd3dbfe554b7
  hostname: anish-l5-kit
  iterations_since_restore: 2
  loss: 12.018231391906738
  node_ip: 10.150.0.3
  pid: 19185
  should_checkpoint: true
  time_since_restore: 24.159955739974976
  time_this_iter_s: 5.342178821563721
  time_total_s: 24.159955739974976
  timestamp: 1662615314
  timesteps_since_restore: 0
  training_iteration: 2
  trial_id: d1981512
  warmup_time: 0.00798654556274414
  
Result for TorchTrainer_f2d73334:
  _time_this_iter_s: 5.4405128955841064
  _timestamp: 1662615314
  _training_iteration: 2
  avg_loss: 8.169620752334595
  date: 2022-09-08_05-35-14
  done: false
  experiment_id: 1329cf433fbe41878c3f3c893a1feeaf
  hostname: anish-l5-kit
  iterations_since_restore: 2
  loss: 11.622198104858398
  node_ip: 10.150.0.3
  pid: 19193

Done. 1.7s
Done. 1.7s


[2m[36m(RayTrainWorker pid=19317)[0m {'loss': 7.049599647521973, 'avg_loss': 8.731056372324625}
[2m[36m(RayTrainWorker pid=19323)[0m {'loss': 9.869887351989746, 'avg_loss': 8.736376285552979}


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-08_05-18-50/TorchTrainer_d1981512_19_batch_size=6.0000,format_version=4,future_num_frames=12,history_num_frames=0,model_architecture=resnet50,_2022-09-08_05-34-46/checkpoint_000002)... [34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-08_05-18-50/TorchTrainer_f2d73334_20_batch_size=6.0000,format_version=4,future_num_frames=12,history_num_frames=0,model_architecture=resnet50,_2022-09-08_05-34-46/checkpoint_000002)... 

Result for TorchTrainer_d1981512:
  _time_this_iter_s: 5.246995210647583
  _timestamp: 1662615319
  _training_iteration: 3
  avg_loss: 8.731056372324625
  date: 2022-09-08_05-35-19
  done: false
  experiment_id: bc114db5cc0749d8a61edd3dbfe554b7
  hostname: anish-l5-kit
  iterations_since_restore: 3
  loss: 7.049599647521973
  node_ip: 10.150.0.3
  pid: 19185
  should_checkpoint: true
  time_since_restore: 29.38698172569275
  time_this_iter_s: 5.227025985717773
  time_total_s: 29.38698172569275
  timestamp: 1662615319
  timesteps_since_restore: 0
  training_iteration: 3
  trial_id: d1981512
  warmup_time: 0.00798654556274414
  
Result for TorchTrainer_f2d73334:
  _time_this_iter_s: 5.241349458694458
  _timestamp: 1662615319
  _training_iteration: 3
  avg_loss: 8.736376285552979
  date: 2022-09-08_05-35-19
  done: false
  experiment_id: 1329cf433fbe41878c3f3c893a1feeaf
  hostname: anish-l5-kit
  iterations_since_restore: 3
  loss: 9.869887351989746
  node_ip: 10.150.0.3
  pid: 19193
  sh

Done. 1.8s
Done. 1.8s


[2m[36m(RayTrainWorker pid=19323)[0m {'loss': 7.742615222930908, 'avg_loss': 8.487936019897461}
[2m[36m(RayTrainWorker pid=19317)[0m {'loss': 7.455737113952637, 'avg_loss': 8.412226557731628}


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-08_05-18-50/TorchTrainer_f2d73334_20_batch_size=6.0000,format_version=4,future_num_frames=12,history_num_frames=0,model_architecture=resnet50,_2022-09-08_05-34-46/checkpoint_000003)... 

Result for TorchTrainer_f2d73334:
  _time_this_iter_s: 5.0470147132873535
  _timestamp: 1662615324
  _training_iteration: 4
  avg_loss: 8.487936019897461
  date: 2022-09-08_05-35-25
  done: false
  experiment_id: 1329cf433fbe41878c3f3c893a1feeaf
  hostname: anish-l5-kit
  iterations_since_restore: 4
  loss: 7.742615222930908
  node_ip: 10.150.0.3
  pid: 19193
  should_checkpoint: true
  time_since_restore: 34.014949321746826
  time_this_iter_s: 5.098156929016113
  time_total_s: 34.014949321746826
  timestamp: 1662615325
  timesteps_since_restore: 0
  training_iteration: 4
  trial_id: f2d73334
  warmup_time: 0.0074040889739990234
  


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-08_05-18-50/TorchTrainer_d1981512_19_batch_size=6.0000,format_version=4,future_num_frames=12,history_num_frames=0,model_architecture=resnet50,_2022-09-08_05-34-46/checkpoint_000003)... 

Result for TorchTrainer_d1981512:
  _time_this_iter_s: 5.314985990524292
  _timestamp: 1662615324
  _training_iteration: 4
  avg_loss: 8.412226557731628
  date: 2022-09-08_05-35-25
  done: false
  experiment_id: bc114db5cc0749d8a61edd3dbfe554b7
  hostname: anish-l5-kit
  iterations_since_restore: 4
  loss: 7.455737113952637
  node_ip: 10.150.0.3
  pid: 19185
  should_checkpoint: true
  time_since_restore: 34.83897280693054
  time_this_iter_s: 5.451991081237793
  time_total_s: 34.83897280693054
  timestamp: 1662615325
  timesteps_since_restore: 0
  training_iteration: 4
  trial_id: d1981512
  warmup_time: 0.00798654556274414
  


Done. 1.3s
Done. 1.5s


[2m[36m(RayTrainWorker pid=19323)[0m {'loss': 3.3201563358306885, 'avg_loss': 7.454380083084106}
[2m[36m(RayTrainWorker pid=19317)[0m {'loss': 3.7349305152893066, 'avg_loss': 7.476767349243164}
Result for TorchTrainer_f2d73334:
  _time_this_iter_s: 5.178601264953613
  _timestamp: 1662615329
  _training_iteration: 5
  avg_loss: 7.454380083084106
  date: 2022-09-08_05-35-30
  done: false
  experiment_id: 1329cf433fbe41878c3f3c893a1feeaf
  hostname: anish-l5-kit
  iterations_since_restore: 5
  loss: 3.3201563358306885
  node_ip: 10.150.0.3
  pid: 19193
  should_checkpoint: true
  time_since_restore: 39.10718631744385
  time_this_iter_s: 5.0922369956970215
  time_total_s: 39.10718631744385
  timestamp: 1662615330
  timesteps_since_restore: 0
  training_iteration: 5
  trial_id: f2d73334
  warmup_time: 0.0074040889739990234
  


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-08_05-18-50/TorchTrainer_f2d73334_20_batch_size=6.0000,format_version=4,future_num_frames=12,history_num_frames=0,model_architecture=resnet50,_2022-09-08_05-34-46/checkpoint_000004)... [34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-08_05-18-50/TorchTrainer_d1981512_19_batch_size=6.0000,format_version=4,future_num_frames=12,history_num_frames=0,model_architecture=resnet50,_2022-09-08_05-34-46/checkpoint_000004)... 

Result for TorchTrainer_d1981512:
  _time_this_iter_s: 5.248953819274902
  _timestamp: 1662615330
  _training_iteration: 5
  avg_loss: 7.476767349243164
  date: 2022-09-08_05-35-30
  done: false
  experiment_id: bc114db5cc0749d8a61edd3dbfe554b7
  hostname: anish-l5-kit
  iterations_since_restore: 5
  loss: 3.7349305152893066
  node_ip: 10.150.0.3
  pid: 19185
  should_checkpoint: true
  time_since_restore: 39.97370100021362
  time_this_iter_s: 5.134728193283081
  time_total_s: 39.97370100021362
  timestamp: 1662615330
  timesteps_since_restore: 0
  training_iteration: 5
  trial_id: d1981512
  warmup_time: 0.00798654556274414
  


Done. 2.2s
Done. 1.9s
[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-08_05-18-50/TorchTrainer_f2d73334_20_batch_size=6.0000,format_version=4,future_num_frames=12,history_num_frames=0,model_architecture=resnet50,_2022-09-08_05-34-46/checkpoint_000004)... Done. 0.2s


VBox(children=(Label(value='452.176 MB of 452.176 MB uploaded (90.296 MB deduped)\r'), FloatProgress(value=1.0…

Result for TorchTrainer_f2d73334:
  _time_this_iter_s: 5.178601264953613
  _timestamp: 1662615329
  _training_iteration: 5
  avg_loss: 7.454380083084106
  date: 2022-09-08_05-35-30
  done: true
  experiment_id: 1329cf433fbe41878c3f3c893a1feeaf
  experiment_tag: 20_batch_size=6.0000,format_version=4,future_num_frames=12,history_num_frames=0,model_architecture=resnet50,render_ego_history=True,step_time=0.1000,dataset_meta_key=meta_json,disable_traffic_light_faces=False,ego_center=0_25_0_5,filter_agents_threshold=0.5000,map_type=py_satellite,pixel_size=0_5_0_5,raster_size=224_224,satellite_map_key=aerial_map_aerial_map_png,semantic_map_key=semantic_map_semantic_map_pb,set_origin_to_bottom=True,batch_size=12,key=scenes_sample_zarr,num_workers=16,perturb_probability=0.5000,shuffle=True,checkpoint_every_n_steps=10000,eval_every_n_steps=10000,max_num_steps=5,batch_size=12,key=scenes_sample_zarr,num_workers=16,shuffle=False,dataset_key=scenes_sample_zarr,lr=0.0008,max_num_steps=5,num_workers=4

[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-08_05-18-50/TorchTrainer_d1981512_19_batch_size=6.0000,format_version=4,future_num_frames=12,history_num_frames=0,model_architecture=resnet50,_2022-09-08_05-34-46/checkpoint_000004)... 

Done. 0.3s


0,1
avg_loss,▁▇██▆
iterations_since_restore,▁▃▅▆█
loss,▂█▇▅▁
time_since_restore,▁▃▅▆█
time_this_iter_s,█▁▁▁▁
time_total_s,▁▃▅▆█
timestamp,▁▃▄▆█
timesteps_since_restore,▁▁▁▁▁
training_iteration,▁▃▅▆█
warmup_time,▁▁▁▁▁

0,1
avg_loss,7.45438
iterations_since_restore,5.0
loss,3.32016
time_since_restore,39.10719
time_this_iter_s,5.09224
time_total_s,39.10719
timestamp,1662615330.0
timesteps_since_restore,0.0
training_iteration,5.0
warmup_time,0.0074


VBox(children=(Label(value='452.176 MB of 452.176 MB uploaded (90.296 MB deduped)\r'), FloatProgress(value=1.0…

Result for TorchTrainer_d1981512:
  _time_this_iter_s: 5.248953819274902
  _timestamp: 1662615330
  _training_iteration: 5
  avg_loss: 7.476767349243164
  date: 2022-09-08_05-35-30
  done: true
  experiment_id: bc114db5cc0749d8a61edd3dbfe554b7
  experiment_tag: 19_batch_size=6.0000,format_version=4,future_num_frames=12,history_num_frames=0,model_architecture=resnet50,render_ego_history=True,step_time=0.1000,dataset_meta_key=meta_json,disable_traffic_light_faces=False,ego_center=0_25_0_5,filter_agents_threshold=0.5000,map_type=py_satellite,pixel_size=0_5_0_5,raster_size=224_224,satellite_map_key=aerial_map_aerial_map_png,semantic_map_key=semantic_map_semantic_map_pb,set_origin_to_bottom=True,batch_size=12,key=scenes_sample_zarr,num_workers=16,perturb_probability=0.5000,shuffle=True,checkpoint_every_n_steps=10000,eval_every_n_steps=10000,max_num_steps=5,batch_size=12,key=scenes_sample_zarr,num_workers=16,shuffle=False,dataset_key=scenes_sample_zarr,lr=0.0012,max_num_steps=5,num_workers=4

0,1
avg_loss,▁█▆▅▂
iterations_since_restore,▁▃▅▆█
loss,▄█▄▄▁
time_since_restore,▁▃▄▆█
time_this_iter_s,█▁▁▁▁
time_total_s,▁▃▄▆█
timestamp,▁▃▄▆█
timesteps_since_restore,▁▁▁▁▁
training_iteration,▁▃▅▆█
warmup_time,▁▁▁▁▁

0,1
avg_loss,7.47677
iterations_since_restore,5.0
loss,3.73493
time_since_restore,39.9737
time_this_iter_s,5.13473
time_total_s,39.9737
timestamp,1662615330.0
timesteps_since_restore,0.0
training_iteration,5.0
warmup_time,0.00799


[2m[36m(RayTrainWorker pid=20490)[0m 2022-09-08 05:35:59,661	INFO config.py:72 -- Setting up process group for: env:// [rank=0, world_size=1]
[2m[36m(RayTrainWorker pid=20497)[0m 2022-09-08 05:36:00,280	INFO config.py:72 -- Setting up process group for: env:// [rank=0, world_size=1]
[2m[36m(RayTrainWorker pid=20490)[0m   f"The parameter '{pretrained_param}' is deprecated since 0.13 and will be removed in 0.15, "
[2m[36m(RayTrainWorker pid=20497)[0m   f"The parameter '{pretrained_param}' is deprecated since 0.13 and will be removed in 0.15, "
[2m[36m(RayTrainWorker pid=20490)[0m 2022-09-08 05:36:08,612	INFO train_loop_utils.py:300 -- Moving model to device: cpu
[2m[36m(RayTrainWorker pid=20497)[0m 2022-09-08 05:36:08,695	INFO train_loop_utils.py:300 -- Moving model to device: cpu


[2m[36m(RayTrainWorker pid=20490)[0m {'loss': 18.125497817993164, 'avg_loss': 18.125497817993164}


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-08_05-18-50/TorchTrainer_f32107e8_21_batch_size=6.0000,format_version=4,future_num_frames=12,history_num_frames=0,model_architecture=resnet50,_2022-09-08_05-35-51/checkpoint_000000)... 

Result for TorchTrainer_f32107e8:
  _time_this_iter_s: 14.184948444366455
  _timestamp: 1662615374
  _training_iteration: 1
  avg_loss: 18.125497817993164
  date: 2022-09-08_05-36-15
  done: false
  experiment_id: 82d61c0b22294d8a82cc5fe6cb792121
  hostname: anish-l5-kit
  iterations_since_restore: 1
  loss: 18.125497817993164
  node_ip: 10.150.0.3
  pid: 20344
  should_checkpoint: true
  time_since_restore: 19.430758476257324
  time_this_iter_s: 19.430758476257324
  time_total_s: 19.430758476257324
  timestamp: 1662615375
  timesteps_since_restore: 0
  training_iteration: 1
  trial_id: f32107e8
  warmup_time: 0.006804943084716797
  


Done. 1.1s


[2m[36m(RayTrainWorker pid=20497)[0m {'loss': 12.634977340698242, 'avg_loss': 12.634977340698242}
[2m[36m(RayTrainWorker pid=20490)[0m {'loss': 5.910640239715576, 'avg_loss': 12.01806902885437}
Result for TorchTrainer_19844d6e:
  _time_this_iter_s: 18.183110237121582
  _timestamp: 1662615379
  _training_iteration: 1
  avg_loss: 12.634977340698242
  date: 2022-09-08_05-36-19
  done: false
  experiment_id: b61eae6f290a4823a80575937c7f845c
  hostname: anish-l5-kit
  iterations_since_restore: 1
  loss: 12.634977340698242
  node_ip: 10.150.0.3
  pid: 20378
  should_checkpoint: true
  time_since_restore: 23.139058351516724
  time_this_iter_s: 23.139058351516724
  time_total_s: 23.139058351516724
  timestamp: 1662615379
  timesteps_since_restore: 0
  training_iteration: 1
  trial_id: 19844d6e
  warmup_time: 0.010516166687011719
  


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-08_05-18-50/TorchTrainer_19844d6e_22_batch_size=12.0000,format_version=4,future_num_frames=12,history_num_frames=0,model_architecture=resnet50_2022-09-08_05-35-51/checkpoint_000000)... [34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-08_05-18-50/TorchTrainer_f32107e8_21_batch_size=6.0000,format_version=4,future_num_frames=12,history_num_frames=0,model_architecture=resnet50,_2022-09-08_05-35-51/checkpoint_000001)... 

Result for TorchTrainer_f32107e8:
  _time_this_iter_s: 4.902566194534302
  _timestamp: 1662615379
  _training_iteration: 2
  avg_loss: 12.01806902885437
  date: 2022-09-08_05-36-20
  done: false
  experiment_id: 82d61c0b22294d8a82cc5fe6cb792121
  hostname: anish-l5-kit
  iterations_since_restore: 2
  loss: 5.910640239715576
  node_ip: 10.150.0.3
  pid: 20344
  should_checkpoint: true
  time_since_restore: 24.44508719444275
  time_this_iter_s: 5.014328718185425
  time_total_s: 24.44508719444275
  timestamp: 1662615380
  timesteps_since_restore: 0
  training_iteration: 2
  trial_id: f32107e8
  warmup_time: 0.006804943084716797
  


Done. 1.4s
Done. 1.3s


[2m[36m(RayTrainWorker pid=20490)[0m {'loss': 10.40995979309082, 'avg_loss': 11.482032616933187}


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-08_05-18-50/TorchTrainer_f32107e8_21_batch_size=6.0000,format_version=4,future_num_frames=12,history_num_frames=0,model_architecture=resnet50,_2022-09-08_05-35-51/checkpoint_000002)... 

Result for TorchTrainer_f32107e8:
  _time_this_iter_s: 5.406080722808838
  _timestamp: 1662615384
  _training_iteration: 3
  avg_loss: 11.482032616933187
  date: 2022-09-08_05-36-25
  done: false
  experiment_id: 82d61c0b22294d8a82cc5fe6cb792121
  hostname: anish-l5-kit
  iterations_since_restore: 3
  loss: 10.40995979309082
  node_ip: 10.150.0.3
  pid: 20344
  should_checkpoint: true
  time_since_restore: 29.635840892791748
  time_this_iter_s: 5.190753698348999
  time_total_s: 29.635840892791748
  timestamp: 1662615385
  timesteps_since_restore: 0
  training_iteration: 3
  trial_id: f32107e8
  warmup_time: 0.006804943084716797
  


Done. 1.0s


[2m[36m(RayTrainWorker pid=20497)[0m {'loss': 14.638758659362793, 'avg_loss': 13.636868000030518}


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-08_05-18-50/TorchTrainer_19844d6e_22_batch_size=12.0000,format_version=4,future_num_frames=12,history_num_frames=0,model_architecture=resnet50_2022-09-08_05-35-51/checkpoint_000001)... 

Result for TorchTrainer_19844d6e:
  _time_this_iter_s: 9.33182978630066
  _timestamp: 1662615388
  _training_iteration: 2
  avg_loss: 13.636868000030518
  date: 2022-09-08_05-36-28
  done: false
  experiment_id: b61eae6f290a4823a80575937c7f845c
  hostname: anish-l5-kit
  iterations_since_restore: 2
  loss: 14.638758659362793
  node_ip: 10.150.0.3
  pid: 20378
  should_checkpoint: true
  time_since_restore: 32.410399436950684
  time_this_iter_s: 9.27134108543396
  time_total_s: 32.410399436950684
  timestamp: 1662615388
  timesteps_since_restore: 0
  training_iteration: 2
  trial_id: 19844d6e
  warmup_time: 0.010516166687011719
  
[2m[36m(RayTrainWorker pid=20490)[0m {'loss': 3.0071396827697754, 'avg_loss': 9.363309383392334}


Done. 1.1s


Result for TorchTrainer_f32107e8:
  _time_this_iter_s: 5.041220664978027
  _timestamp: 1662615389
  _training_iteration: 4
  avg_loss: 9.363309383392334
  date: 2022-09-08_05-36-30
  done: false
  experiment_id: 82d61c0b22294d8a82cc5fe6cb792121
  hostname: anish-l5-kit
  iterations_since_restore: 4
  loss: 3.0071396827697754
  node_ip: 10.150.0.3
  pid: 20344
  should_checkpoint: true
  time_since_restore: 34.74055457115173
  time_this_iter_s: 5.104713678359985
  time_total_s: 34.74055457115173
  timestamp: 1662615390
  timesteps_since_restore: 0
  training_iteration: 4
  trial_id: f32107e8
  warmup_time: 0.006804943084716797
  


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-08_05-18-50/TorchTrainer_f32107e8_21_batch_size=6.0000,format_version=4,future_num_frames=12,history_num_frames=0,model_architecture=resnet50,_2022-09-08_05-35-51/checkpoint_000003)... Done. 1.0s


[2m[36m(RayTrainWorker pid=20490)[0m {'loss': 3.944519519805908, 'avg_loss': 8.27955141067505}


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-08_05-18-50/TorchTrainer_f32107e8_21_batch_size=6.0000,format_version=4,future_num_frames=12,history_num_frames=0,model_architecture=resnet50,_2022-09-08_05-35-51/checkpoint_000004)... Done. 1.0s
[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-08_05-18-50/TorchTrainer_f32107e8_21_batch_size=6.0000,format_version=4,future_num_frames=12,history_num_frames=0,model_architecture=resnet50,_2022-09-08_05-35-51/checkpoint_000004)... Done. 0.3s


[2m[36m(RayTrainWorker pid=20497)[0m {'loss': 4.8252387046813965, 'avg_loss': 10.699658234914144}


VBox(children=(Label(value='452.176 MB of 452.176 MB uploaded (90.296 MB deduped)\r'), FloatProgress(value=1.0…

0,1
avg_loss,█▄▃▂▁
iterations_since_restore,▁▃▅▆█
loss,█▂▄▁▁
time_since_restore,▁▃▅▆█
time_this_iter_s,█▁▁▁▁
time_total_s,▁▃▅▆█
timestamp,▁▃▅▆█
timesteps_since_restore,▁▁▁▁▁
training_iteration,▁▃▅▆█
warmup_time,▁▁▁▁▁

0,1
avg_loss,8.27955
iterations_since_restore,5.0
loss,3.94452
time_since_restore,39.74283
time_this_iter_s,5.00227
time_total_s,39.74283
timestamp,1662615395.0
timesteps_since_restore,0.0
training_iteration,5.0
warmup_time,0.0068


Result for TorchTrainer_f32107e8:
  _time_this_iter_s: 4.963345766067505
  _timestamp: 1662615394
  _training_iteration: 5
  avg_loss: 8.27955141067505
  date: 2022-09-08_05-36-35
  done: true
  experiment_id: 82d61c0b22294d8a82cc5fe6cb792121
  experiment_tag: 21_batch_size=6.0000,format_version=4,future_num_frames=12,history_num_frames=0,model_architecture=resnet50,render_ego_history=True,step_time=0.1000,dataset_meta_key=meta_json,disable_traffic_light_faces=False,ego_center=0_25_0_5,filter_agents_threshold=0.5000,map_type=py_satellite,pixel_size=0_5_0_5,raster_size=224_224,satellite_map_key=aerial_map_aerial_map_png,semantic_map_key=semantic_map_semantic_map_pb,set_origin_to_bottom=True,batch_size=12,key=scenes_sample_zarr,num_workers=16,perturb_probability=0.5000,shuffle=True,checkpoint_every_n_steps=10000,eval_every_n_steps=10000,max_num_steps=5,batch_size=12,key=scenes_sample_zarr,num_workers=16,shuffle=False,dataset_key=scenes_sample_zarr,lr=0.0007,max_num_steps=5,num_workers=4,

[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-08_05-18-50/TorchTrainer_19844d6e_22_batch_size=12.0000,format_version=4,future_num_frames=12,history_num_frames=0,model_architecture=resnet50_2022-09-08_05-35-51/checkpoint_000002)... 

Result for TorchTrainer_19844d6e:
  _time_this_iter_s: 9.012226343154907
  _timestamp: 1662615397
  _training_iteration: 3
  avg_loss: 10.699658234914144
  date: 2022-09-08_05-36-37
  done: false
  experiment_id: b61eae6f290a4823a80575937c7f845c
  hostname: anish-l5-kit
  iterations_since_restore: 3
  loss: 4.8252387046813965
  node_ip: 10.150.0.3
  pid: 20378
  should_checkpoint: true
  time_since_restore: 41.37596249580383
  time_this_iter_s: 8.96556305885315
  time_total_s: 41.37596249580383
  timestamp: 1662615397
  timesteps_since_restore: 0
  training_iteration: 3
  trial_id: 19844d6e
  warmup_time: 0.010516166687011719
  


Done. 1.0s


[2m[36m(RayTrainWorker pid=20497)[0m {'loss': 8.904004096984863, 'avg_loss': 10.250744700431824}


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-08_05-18-50/TorchTrainer_19844d6e_22_batch_size=12.0000,format_version=4,future_num_frames=12,history_num_frames=0,model_architecture=resnet50_2022-09-08_05-35-51/checkpoint_000003)... Done. 1.2s
[2m[36m(RayTrainWorker pid=21555)[0m 2022-09-08 05:36:52,512	INFO config.py:72 -- Setting up process group for: env:// [rank=0, world_size=1]


[2m[36m(RayTrainWorker pid=20497)[0m {'loss': 11.967046737670898, 'avg_loss': 10.594005107879639}
Result for TorchTrainer_19844d6e:
  _time_this_iter_s: 10.196188688278198
  _timestamp: 1662615415
  _training_iteration: 5
  avg_loss: 10.594005107879639
  date: 2022-09-08_05-36-56
  done: false
  experiment_id: b61eae6f290a4823a80575937c7f845c
  hostname: anish-l5-kit
  iterations_since_restore: 5
  loss: 11.967046737670898
  node_ip: 10.150.0.3
  pid: 20378
  should_checkpoint: true
  time_since_restore: 59.6180145740509
  time_this_iter_s: 10.068226099014282
  time_total_s: 59.6180145740509
  timestamp: 1662615416
  timesteps_since_restore: 0
  training_iteration: 5
  trial_id: 19844d6e
  warmup_time: 0.010516166687011719
  


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-08_05-18-50/TorchTrainer_19844d6e_22_batch_size=12.0000,format_version=4,future_num_frames=12,history_num_frames=0,model_architecture=resnet50_2022-09-08_05-35-51/checkpoint_000004)... Done. 1.2s
[2m[36m(RayTrainWorker pid=21555)[0m   f"The parameter '{pretrained_param}' is deprecated since 0.13 and will be removed in 0.15, "
Done. 0.4s


[2m[36m(RayTrainWorker pid=21555)[0m 2022-09-08 05:36:58,041	INFO train_loop_utils.py:300 -- Moving model to device: cpu


VBox(children=(Label(value='452.176 MB of 452.176 MB uploaded (90.296 MB deduped)\r'), FloatProgress(value=1.0…

0,1
avg_loss,▆█▂▁▂
iterations_since_restore,▁▃▅▆█
loss,▇█▁▄▆
time_since_restore,▁▃▄▆█
time_this_iter_s,█▂▁▁▂
time_total_s,▁▃▄▆█
timestamp,▁▃▄▆█
timesteps_since_restore,▁▁▁▁▁
training_iteration,▁▃▅▆█
warmup_time,▁▁▁▁▁

0,1
avg_loss,10.59401
iterations_since_restore,5.0
loss,11.96705
time_since_restore,59.61801
time_this_iter_s,10.06823
time_total_s,59.61801
timestamp,1662615416.0
timesteps_since_restore,0.0
training_iteration,5.0
warmup_time,0.01052


Result for TorchTrainer_19844d6e:
  _time_this_iter_s: 10.196188688278198
  _timestamp: 1662615415
  _training_iteration: 5
  avg_loss: 10.594005107879639
  date: 2022-09-08_05-36-56
  done: true
  experiment_id: b61eae6f290a4823a80575937c7f845c
  experiment_tag: 22_batch_size=12.0000,format_version=4,future_num_frames=12,history_num_frames=0,model_architecture=resnet50,render_ego_history=True,step_time=0.1000,dataset_meta_key=meta_json,disable_traffic_light_faces=False,ego_center=0_25_0_5,filter_agents_threshold=0.5000,map_type=py_satellite,pixel_size=0_5_0_5,raster_size=224_224,satellite_map_key=aerial_map_aerial_map_png,semantic_map_key=semantic_map_semantic_map_pb,set_origin_to_bottom=True,batch_size=12,key=scenes_sample_zarr,num_workers=16,perturb_probability=0.5000,shuffle=True,checkpoint_every_n_steps=10000,eval_every_n_steps=10000,max_num_steps=5,batch_size=12,key=scenes_sample_zarr,num_workers=16,shuffle=False,dataset_key=scenes_sample_zarr,lr=0.0049,max_num_steps=5,num_worker

[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-08_05-18-50/TorchTrainer_19e3435a_23_batch_size=12.0000,format_version=4,future_num_frames=12,history_num_frames=0,model_architecture=resnet50_2022-09-08_05-36-44/checkpoint_000000)... 

Result for TorchTrainer_19e3435a:
  _time_this_iter_s: 15.565674304962158
  _timestamp: 1662615428
  _training_iteration: 1
  avg_loss: 12.127396583557129
  date: 2022-09-08_05-37-09
  done: false
  experiment_id: 2c8fbcda7aba4b7988d4da1d4f1b34c7
  hostname: anish-l5-kit
  iterations_since_restore: 1
  loss: 12.127396583557129
  node_ip: 10.150.0.3
  pid: 21458
  should_checkpoint: true
  time_since_restore: 19.993744134902954
  time_this_iter_s: 19.993744134902954
  time_total_s: 19.993744134902954
  timestamp: 1662615429
  timesteps_since_restore: 0
  training_iteration: 1
  trial_id: 19e3435a
  warmup_time: 0.018710851669311523
  


Done. 2.3s
[2m[36m(RayTrainWorker pid=22357)[0m 2022-09-08 05:37:14,176	INFO config.py:72 -- Setting up process group for: env:// [rank=0, world_size=1]


[2m[36m(RayTrainWorker pid=21555)[0m {'loss': 16.253801345825195, 'avg_loss': 14.190598964691162}


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-08_05-18-50/TorchTrainer_19e3435a_23_batch_size=12.0000,format_version=4,future_num_frames=12,history_num_frames=0,model_architecture=resnet50_2022-09-08_05-36-44/checkpoint_000001)... 

Result for TorchTrainer_19e3435a:
  _time_this_iter_s: 10.617517709732056
  _timestamp: 1662615439
  _training_iteration: 2
  avg_loss: 14.190598964691162
  date: 2022-09-08_05-37-19
  done: false
  experiment_id: 2c8fbcda7aba4b7988d4da1d4f1b34c7
  hostname: anish-l5-kit
  iterations_since_restore: 2
  loss: 16.253801345825195
  node_ip: 10.150.0.3
  pid: 21458
  should_checkpoint: true
  time_since_restore: 30.471387147903442
  time_this_iter_s: 10.477643013000488
  time_total_s: 30.471387147903442
  timestamp: 1662615439
  timesteps_since_restore: 0
  training_iteration: 2
  trial_id: 19e3435a
  warmup_time: 0.018710851669311523
  


[2m[36m(RayTrainWorker pid=22357)[0m   f"The parameter '{pretrained_param}' is deprecated since 0.13 and will be removed in 0.15, "
Done. 1.0s
[2m[36m(RayTrainWorker pid=22357)[0m 2022-09-08 05:37:20,418	INFO train_loop_utils.py:300 -- Moving model to device: cpu


[2m[36m(RayTrainWorker pid=22357)[0m {'loss': 21.612876892089844, 'avg_loss': 21.612876892089844}


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-08_05-18-50/TorchTrainer_39689a2c_24_batch_size=6.0000,format_version=4,future_num_frames=12,history_num_frames=0,model_architecture=resnet50,_2022-09-08_05-37-06/checkpoint_000000)... 

Result for TorchTrainer_39689a2c:
  _time_this_iter_s: 10.851271152496338
  _timestamp: 1662615445
  _training_iteration: 1
  avg_loss: 21.612876892089844
  date: 2022-09-08_05-37-26
  done: false
  experiment_id: 4fc076d0b78a49c48627d9ca8dd2fb36
  hostname: anish-l5-kit
  iterations_since_restore: 1
  loss: 21.612876892089844
  node_ip: 10.150.0.3
  pid: 22267
  should_checkpoint: true
  time_since_restore: 15.322520971298218
  time_this_iter_s: 15.322520971298218
  time_total_s: 15.322520971298218
  timestamp: 1662615446
  timesteps_since_restore: 0
  training_iteration: 1
  trial_id: 39689a2c
  warmup_time: 0.00509190559387207
  


Done. 1.2s


[2m[36m(RayTrainWorker pid=21555)[0m {'loss': 8.477602005004883, 'avg_loss': 12.286266644795736}


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-08_05-18-50/TorchTrainer_19e3435a_23_batch_size=12.0000,format_version=4,future_num_frames=12,history_num_frames=0,model_architecture=resnet50_2022-09-08_05-36-44/checkpoint_000002)... 

Result for TorchTrainer_19e3435a:
  _time_this_iter_s: 9.171177864074707
  _timestamp: 1662615448
  _training_iteration: 3
  avg_loss: 12.286266644795736
  date: 2022-09-08_05-37-28
  done: false
  experiment_id: 2c8fbcda7aba4b7988d4da1d4f1b34c7
  hostname: anish-l5-kit
  iterations_since_restore: 3
  loss: 8.477602005004883
  node_ip: 10.150.0.3
  pid: 21458
  should_checkpoint: true
  time_since_restore: 39.649136543273926
  time_this_iter_s: 9.177749395370483
  time_total_s: 39.649136543273926
  timestamp: 1662615448
  timesteps_since_restore: 0
  training_iteration: 3
  trial_id: 19e3435a
  warmup_time: 0.018710851669311523
  


Done. 1.0s


[2m[36m(RayTrainWorker pid=22357)[0m {'loss': 7.317259788513184, 'avg_loss': 14.465068340301514}
Result for TorchTrainer_39689a2c:
  _time_this_iter_s: 5.445980072021484
  _timestamp: 1662615451
  _training_iteration: 2
  avg_loss: 14.465068340301514
  date: 2022-09-08_05-37-31
  done: false
  experiment_id: 4fc076d0b78a49c48627d9ca8dd2fb36
  hostname: anish-l5-kit
  iterations_since_restore: 2
  loss: 7.317259788513184
  node_ip: 10.150.0.3
  pid: 22267
  should_checkpoint: true
  time_since_restore: 20.720855951309204
  time_this_iter_s: 5.398334980010986
  time_total_s: 20.720855951309204
  timestamp: 1662615451
  timesteps_since_restore: 0
  training_iteration: 2
  trial_id: 39689a2c
  warmup_time: 0.00509190559387207
  


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-08_05-18-50/TorchTrainer_39689a2c_24_batch_size=6.0000,format_version=4,future_num_frames=12,history_num_frames=0,model_architecture=resnet50,_2022-09-08_05-37-06/checkpoint_000001)... Done. 1.0s


[2m[36m(RayTrainWorker pid=22357)[0m {'loss': 16.19473648071289, 'avg_loss': 15.041624387105307}


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-08_05-18-50/TorchTrainer_39689a2c_24_batch_size=6.0000,format_version=4,future_num_frames=12,history_num_frames=0,model_architecture=resnet50,_2022-09-08_05-37-06/checkpoint_000002)... 

[2m[36m(RayTrainWorker pid=21555)[0m {'loss': 4.94487190246582, 'avg_loss': 10.450917959213257}


Done. 1.0s


Result for TorchTrainer_19e3435a:
  _time_this_iter_s: 9.026799440383911
  _timestamp: 1662615457
  _training_iteration: 4
  avg_loss: 10.450917959213257
  date: 2022-09-08_05-37-37
  done: false
  experiment_id: 2c8fbcda7aba4b7988d4da1d4f1b34c7
  hostname: anish-l5-kit
  iterations_since_restore: 4
  loss: 4.94487190246582
  node_ip: 10.150.0.3
  pid: 21458
  should_checkpoint: true
  time_since_restore: 48.71517324447632
  time_this_iter_s: 9.066036701202393
  time_total_s: 48.71517324447632
  timestamp: 1662615457
  timesteps_since_restore: 0
  training_iteration: 4
  trial_id: 19e3435a
  warmup_time: 0.018710851669311523
  


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-08_05-18-50/TorchTrainer_19e3435a_23_batch_size=12.0000,format_version=4,future_num_frames=12,history_num_frames=0,model_architecture=resnet50_2022-09-08_05-36-44/checkpoint_000003)... Done. 1.0s


[2m[36m(RayTrainWorker pid=22357)[0m {'loss': 8.221229553222656, 'avg_loss': 13.336525678634644}


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-08_05-18-50/TorchTrainer_39689a2c_24_batch_size=6.0000,format_version=4,future_num_frames=12,history_num_frames=0,model_architecture=resnet50,_2022-09-08_05-37-06/checkpoint_000003)... 

Result for TorchTrainer_39689a2c:
  _time_this_iter_s: 5.254847049713135
  _timestamp: 1662615461
  _training_iteration: 4
  avg_loss: 13.336525678634644
  date: 2022-09-08_05-37-41
  done: false
  experiment_id: 4fc076d0b78a49c48627d9ca8dd2fb36
  hostname: anish-l5-kit
  iterations_since_restore: 4
  loss: 8.221229553222656
  node_ip: 10.150.0.3
  pid: 22267
  should_checkpoint: true
  time_since_restore: 30.819172382354736
  time_this_iter_s: 5.245149850845337
  time_total_s: 30.819172382354736
  timestamp: 1662615461
  timesteps_since_restore: 0
  training_iteration: 4
  trial_id: 39689a2c
  warmup_time: 0.00509190559387207
  


Done. 1.5s


[2m[36m(RayTrainWorker pid=22357)[0m {'loss': 2.078657865524292, 'avg_loss': 11.084952116012573}
[2m[36m(RayTrainWorker pid=21555)[0m {'loss': 5.814975261688232, 'avg_loss': 9.523729419708252}


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-08_05-18-50/TorchTrainer_39689a2c_24_batch_size=6.0000,format_version=4,future_num_frames=12,history_num_frames=0,model_architecture=resnet50,_2022-09-08_05-37-06/checkpoint_000004)... Done. 2.3s
[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-08_05-18-50/TorchTrainer_39689a2c_24_batch_size=6.0000,format_version=4,future_num_frames=12,history_num_frames=0,model_architecture=resnet50,_2022-09-08_05-37-06/checkpoint_000004)... Done. 0.3s


VBox(children=(Label(value='452.176 MB of 452.176 MB uploaded (90.296 MB deduped)\r'), FloatProgress(value=1.0…

Result for TorchTrainer_39689a2c:
  _time_this_iter_s: 4.8184614181518555
  _timestamp: 1662615466
  _training_iteration: 5
  avg_loss: 11.084952116012573
  date: 2022-09-08_05-37-46
  done: true
  experiment_id: 4fc076d0b78a49c48627d9ca8dd2fb36
  experiment_tag: 24_batch_size=6.0000,format_version=4,future_num_frames=12,history_num_frames=0,model_architecture=resnet50,render_ego_history=True,step_time=0.1000,dataset_meta_key=meta_json,disable_traffic_light_faces=False,ego_center=0_25_0_5,filter_agents_threshold=0.5000,map_type=py_satellite,pixel_size=0_5_0_5,raster_size=224_224,satellite_map_key=aerial_map_aerial_map_png,semantic_map_key=semantic_map_semantic_map_pb,set_origin_to_bottom=True,batch_size=12,key=scenes_sample_zarr,num_workers=16,perturb_probability=0.5000,shuffle=True,checkpoint_every_n_steps=10000,eval_every_n_steps=10000,max_num_steps=5,batch_size=12,key=scenes_sample_zarr,num_workers=16,shuffle=False,dataset_key=scenes_sample_zarr,lr=0.0007,max_num_steps=5,num_workers

[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-08_05-18-50/TorchTrainer_19e3435a_23_batch_size=12.0000,format_version=4,future_num_frames=12,history_num_frames=0,model_architecture=resnet50_2022-09-08_05-36-44/checkpoint_000004)... 

0,1
avg_loss,█▃▄▂▁
iterations_since_restore,▁▃▅▆█
loss,█▃▆▃▁
time_since_restore,▁▃▅▆█
time_this_iter_s,█▁▁▁▁
time_total_s,▁▃▅▆█
timestamp,▁▃▅▆█
timesteps_since_restore,▁▁▁▁▁
training_iteration,▁▃▅▆█
warmup_time,▁▁▁▁▁

0,1
avg_loss,11.08495
iterations_since_restore,5.0
loss,2.07866
time_since_restore,35.68733
time_this_iter_s,4.86816
time_total_s,35.68733
timestamp,1662615466.0
timesteps_since_restore,0.0
training_iteration,5.0
warmup_time,0.00509


Result for TorchTrainer_19e3435a:
  _time_this_iter_s: 8.981808185577393
  _timestamp: 1662615466
  _training_iteration: 5
  avg_loss: 9.523729419708252
  date: 2022-09-08_05-37-46
  done: false
  experiment_id: 2c8fbcda7aba4b7988d4da1d4f1b34c7
  hostname: anish-l5-kit
  iterations_since_restore: 5
  loss: 5.814975261688232
  node_ip: 10.150.0.3
  pid: 21458
  should_checkpoint: true
  time_since_restore: 57.77728867530823
  time_this_iter_s: 9.06211543083191
  time_total_s: 57.77728867530823
  timestamp: 1662615466
  timesteps_since_restore: 0
  training_iteration: 5
  trial_id: 19e3435a
  warmup_time: 0.018710851669311523
  


Done. 1.0s
[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-08_05-18-50/TorchTrainer_19e3435a_23_batch_size=12.0000,format_version=4,future_num_frames=12,history_num_frames=0,model_architecture=resnet50_2022-09-08_05-36-44/checkpoint_000004)... Done. 0.3s


VBox(children=(Label(value='452.176 MB of 452.176 MB uploaded (90.296 MB deduped)\r'), FloatProgress(value=1.0…

0,1
avg_loss,▅█▅▂▁
iterations_since_restore,▁▃▅▆█
loss,▅█▃▁▂
time_since_restore,▁▃▅▆█
time_this_iter_s,█▂▁▁▁
time_total_s,▁▃▅▆█
timestamp,▁▃▅▆█
timesteps_since_restore,▁▁▁▁▁
training_iteration,▁▃▅▆█
warmup_time,▁▁▁▁▁

0,1
avg_loss,9.52373
iterations_since_restore,5.0
loss,5.81498
time_since_restore,57.77729
time_this_iter_s,9.06212
time_total_s,57.77729
timestamp,1662615466.0
timesteps_since_restore,0.0
training_iteration,5.0
warmup_time,0.01871


Result for TorchTrainer_19e3435a:
  _time_this_iter_s: 8.981808185577393
  _timestamp: 1662615466
  _training_iteration: 5
  avg_loss: 9.523729419708252
  date: 2022-09-08_05-37-46
  done: true
  experiment_id: 2c8fbcda7aba4b7988d4da1d4f1b34c7
  experiment_tag: 23_batch_size=12.0000,format_version=4,future_num_frames=12,history_num_frames=0,model_architecture=resnet50,render_ego_history=True,step_time=0.1000,dataset_meta_key=meta_json,disable_traffic_light_faces=False,ego_center=0_25_0_5,filter_agents_threshold=0.5000,map_type=py_satellite,pixel_size=0_5_0_5,raster_size=224_224,satellite_map_key=aerial_map_aerial_map_png,semantic_map_key=semantic_map_semantic_map_pb,set_origin_to_bottom=True,batch_size=12,key=scenes_sample_zarr,num_workers=16,perturb_probability=0.5000,shuffle=True,checkpoint_every_n_steps=10000,eval_every_n_steps=10000,max_num_steps=5,batch_size=12,key=scenes_sample_zarr,num_workers=16,shuffle=False,dataset_key=scenes_sample_zarr,lr=0.0045,max_num_steps=5,num_workers=

[2m[36m(RayTrainWorker pid=23341)[0m 2022-09-08 05:38:11,718	INFO config.py:72 -- Setting up process group for: env:// [rank=0, world_size=1]
[2m[36m(RayTrainWorker pid=23341)[0m   f"The parameter '{pretrained_param}' is deprecated since 0.13 and will be removed in 0.15, "
[2m[36m(RayTrainWorker pid=23341)[0m 2022-09-08 05:38:16,609	INFO train_loop_utils.py:300 -- Moving model to device: cpu


[2m[36m(RayTrainWorker pid=23341)[0m {'loss': 16.544782638549805, 'avg_loss': 16.544782638549805}


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-08_05-18-50/TorchTrainer_466c2d38_25_batch_size=6.0000,format_version=4,future_num_frames=12,history_num_frames=0,model_architecture=resnet50,_2022-09-08_05-38-06/checkpoint_000000)... 

Result for TorchTrainer_466c2d38:
  _time_this_iter_s: 9.157495498657227
  _timestamp: 1662615501
  _training_iteration: 1
  avg_loss: 16.544782638549805
  date: 2022-09-08_05-38-21
  done: false
  experiment_id: 80b60343279b41babfb88933ef289972
  hostname: anish-l5-kit
  iterations_since_restore: 1
  loss: 16.544782638549805
  node_ip: 10.150.0.3
  pid: 23274
  should_checkpoint: true
  time_since_restore: 12.438738107681274
  time_this_iter_s: 12.438738107681274
  time_total_s: 12.438738107681274
  timestamp: 1662615501
  timesteps_since_restore: 0
  training_iteration: 1
  trial_id: 466c2d38
  warmup_time: 0.0057277679443359375
  


Done. 1.0s


[2m[36m(RayTrainWorker pid=23341)[0m {'loss': 12.255053520202637, 'avg_loss': 14.39991807937622}


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-08_05-18-50/TorchTrainer_466c2d38_25_batch_size=6.0000,format_version=4,future_num_frames=12,history_num_frames=0,model_architecture=resnet50,_2022-09-08_05-38-06/checkpoint_000001)... Done. 1.0s


[2m[36m(RayTrainWorker pid=23341)[0m {'loss': 11.214361190795898, 'avg_loss': 13.33806578318278}
Result for TorchTrainer_466c2d38:
  _time_this_iter_s: 4.415987730026245
  _timestamp: 1662615510
  _training_iteration: 3
  avg_loss: 13.33806578318278
  date: 2022-09-08_05-38-30
  done: false
  experiment_id: 80b60343279b41babfb88933ef289972
  hostname: anish-l5-kit
  iterations_since_restore: 3
  loss: 11.214361190795898
  node_ip: 10.150.0.3
  pid: 23274
  should_checkpoint: true
  time_since_restore: 21.211457014083862
  time_this_iter_s: 4.373098611831665
  time_total_s: 21.211457014083862
  timestamp: 1662615510
  timesteps_since_restore: 0
  training_iteration: 3
  trial_id: 466c2d38
  warmup_time: 0.0057277679443359375
  


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-08_05-18-50/TorchTrainer_466c2d38_25_batch_size=6.0000,format_version=4,future_num_frames=12,history_num_frames=0,model_architecture=resnet50,_2022-09-08_05-38-06/checkpoint_000002)... Done. 1.0s


[2m[36m(RayTrainWorker pid=23341)[0m {'loss': 8.85403823852539, 'avg_loss': 12.217058897018433}


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-08_05-18-50/TorchTrainer_466c2d38_25_batch_size=6.0000,format_version=4,future_num_frames=12,history_num_frames=0,model_architecture=resnet50,_2022-09-08_05-38-06/checkpoint_000003)... Done. 1.0s


[2m[36m(RayTrainWorker pid=23341)[0m {'loss': 6.03929328918457, 'avg_loss': 10.98150577545166}
Result for TorchTrainer_466c2d38:
  _time_this_iter_s: 4.296858787536621
  _timestamp: 1662615518
  _training_iteration: 5
  avg_loss: 10.98150577545166
  date: 2022-09-08_05-38-39
  done: false
  experiment_id: 80b60343279b41babfb88933ef289972
  hostname: anish-l5-kit
  iterations_since_restore: 5
  loss: 6.03929328918457
  node_ip: 10.150.0.3
  pid: 23274
  should_checkpoint: true
  time_since_restore: 29.81986403465271
  time_this_iter_s: 4.297126054763794
  time_total_s: 29.81986403465271
  timestamp: 1662615519
  timesteps_since_restore: 0
  training_iteration: 5
  trial_id: 466c2d38
  warmup_time: 0.0057277679443359375
  


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-08_05-18-50/TorchTrainer_466c2d38_25_batch_size=6.0000,format_version=4,future_num_frames=12,history_num_frames=0,model_architecture=resnet50,_2022-09-08_05-38-06/checkpoint_000004)... Done. 1.0s
[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-08_05-18-50/TorchTrainer_466c2d38_25_batch_size=6.0000,format_version=4,future_num_frames=12,history_num_frames=0,model_architecture=resnet50,_2022-09-08_05-38-06/checkpoint_000004)... Done. 0.3s


VBox(children=(Label(value='452.176 MB of 452.176 MB uploaded (90.296 MB deduped)\r'), FloatProgress(value=1.0…

0,1
avg_loss,█▅▄▃▁
iterations_since_restore,▁▃▅▆█
loss,█▅▄▃▁
time_since_restore,▁▃▅▆█
time_this_iter_s,█▁▁▁▁
time_total_s,▁▃▅▆█
timestamp,▁▃▅▆█
timesteps_since_restore,▁▁▁▁▁
training_iteration,▁▃▅▆█
warmup_time,▁▁▁▁▁

0,1
avg_loss,10.98151
iterations_since_restore,5.0
loss,6.03929
time_since_restore,29.81986
time_this_iter_s,4.29713
time_total_s,29.81986
timestamp,1662615519.0
timesteps_since_restore,0.0
training_iteration,5.0
warmup_time,0.00573


Result for TorchTrainer_466c2d38:
  _time_this_iter_s: 4.296858787536621
  _timestamp: 1662615518
  _training_iteration: 5
  avg_loss: 10.98150577545166
  date: 2022-09-08_05-38-39
  done: true
  experiment_id: 80b60343279b41babfb88933ef289972
  experiment_tag: 25_batch_size=6.0000,format_version=4,future_num_frames=12,history_num_frames=0,model_architecture=resnet50,render_ego_history=True,step_time=0.1000,dataset_meta_key=meta_json,disable_traffic_light_faces=False,ego_center=0_25_0_5,filter_agents_threshold=0.5000,map_type=py_satellite,pixel_size=0_5_0_5,raster_size=224_224,satellite_map_key=aerial_map_aerial_map_png,semantic_map_key=semantic_map_semantic_map_pb,set_origin_to_bottom=True,batch_size=12,key=scenes_sample_zarr,num_workers=16,perturb_probability=0.5000,shuffle=True,checkpoint_every_n_steps=10000,eval_every_n_steps=10000,max_num_steps=5,batch_size=12,key=scenes_sample_zarr,num_workers=16,shuffle=False,dataset_key=scenes_sample_zarr,lr=0.0008,max_num_steps=5,num_workers=4

2022-09-08 05:38:46,558	INFO tune.py:759 -- Total run time: 1196.20 seconds (1195.97 seconds for the tuning loop).


In [35]:
import time

In [36]:
time.sleep(30)

In [37]:
analysis_df = analysis.get_dataframe()

In [38]:
analysis_df

Unnamed: 0,loss,avg_loss,_timestamp,_time_this_iter_s,_training_iteration,time_this_iter_s,should_checkpoint,done,timesteps_total,episodes_total,...,config/train_loop_config/cfg/train_data_loader/perturb_probability,config/train_loop_config/cfg/train_data_loader/shuffle,config/train_loop_config/cfg/train_params/checkpoint_every_n_steps,config/train_loop_config/cfg/train_params/eval_every_n_steps,config/train_loop_config/cfg/train_params/max_num_steps,config/train_loop_config/cfg/val_data_loader/batch_size,config/train_loop_config/cfg/val_data_loader/key,config/train_loop_config/cfg/val_data_loader/num_workers,config/train_loop_config/cfg/val_data_loader/shuffle,logdir
0,5.882532,9.682022,1662614419,14.017907,5,14.108382,True,False,,,...,0.5,True,10000,10000,5,12,scenes/sample.zarr,16,False,/home/jupyter/ray_results/TorchTrainer_2022-09...
1,7.962991,11.906286,1662614398,8.996373,5,9.007713,True,False,,,...,0.5,True,10000,10000,5,12,scenes/sample.zarr,16,False,/home/jupyter/ray_results/TorchTrainer_2022-09...
2,14.66484,13.578238,1662614496,12.323673,5,12.340653,True,False,,,...,0.5,True,10000,10000,5,12,scenes/sample.zarr,16,False,/home/jupyter/ray_results/TorchTrainer_2022-09...
3,9.453082,9.003263,1662614546,16.98669,5,16.90342,True,False,,,...,0.5,True,10000,10000,5,12,scenes/sample.zarr,16,False,/home/jupyter/ray_results/TorchTrainer_2022-09...
4,8.346401,12.266761,1662614621,18.104396,5,18.066445,True,False,,,...,0.5,True,10000,10000,5,12,scenes/sample.zarr,16,False,/home/jupyter/ray_results/TorchTrainer_2022-09...
5,3.523089,8.245952,1662614668,17.242356,5,17.316865,True,False,,,...,0.5,True,10000,10000,5,12,scenes/sample.zarr,16,False,/home/jupyter/ray_results/TorchTrainer_2022-09...
6,9.848295,13.624803,1662614673,4.803667,5,4.655703,True,False,,,...,0.5,True,10000,10000,5,12,scenes/sample.zarr,16,False,/home/jupyter/ray_results/TorchTrainer_2022-09...
7,10.724266,11.722249,1662614800,19.324412,5,19.353921,True,False,,,...,0.5,True,10000,10000,5,12,scenes/sample.zarr,16,False,/home/jupyter/ray_results/TorchTrainer_2022-09...
8,4.752763,11.841005,1662614776,14.811361,5,14.6666,True,False,,,...,0.5,True,10000,10000,5,12,scenes/sample.zarr,16,False,/home/jupyter/ray_results/TorchTrainer_2022-09...
9,10.573003,12.273482,1662614870,12.419198,5,12.500589,True,False,,,...,0.5,True,10000,10000,5,12,scenes/sample.zarr,16,False,/home/jupyter/ray_results/TorchTrainer_2022-09...


In [39]:
# Run information
wandb_entity = "l5-demo"
project_name = "l5-planning"
run_name = "train-planning-model"
run_type = "train"
run_description = """
Train planing model
"""
tags = ["train", "planning"]

In [40]:
#🪄🐝
run = wandb.init(
    entity=wandb_entity,
    project=project_name,
    job_type=run_type,
    name=run_name,
    notes=run_description,
    tags=tags,
    config=cfg
)

In [41]:
#BUG: to force a connection on the lineage graph
#🪄🐝
artifact = run.use_artifact(f"{artifact_entity}/{artifact_project}/{artifact_name}:{artifact_alias}", type=artifact_type)

In [42]:
#🪄🐝
analysis_table = wandb.Table(dataframe=analysis_df)

In [43]:
#BUG: run gets lost after tune job due to change in cwd. Forced to make 2 runs
if len(analysis_table.data) == 0:
    raise ValueError("bad table for some reason")
else:
    run.log({"analysis_table": analysis_table})
    run.finish()

VBox(children=(Label(value='0.031 MB of 0.054 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.575293…