https://github.com/woven-planet/l5kit/blob/master/examples/simulation/train.ipynb

## Prepare Data path and load cfg

By setting the `L5KIT_DATA_FOLDER` variable, we can point the script to the folder where the data lies.

Then, we load our config file with relative paths and other configurations (rasteriser, training params...).

### Setup

In [1]:
from pathlib import Path
import os

In [2]:
#NOTE: DONT USE RELATIVE PATHS FOR THE MODELS PROVIDED BY L5
experiments_directory = Path(Path(os.path.abspath('')).parent.parent, "Experiments")
experiments_directory.mkdir(parents=True, exist_ok=True)

data_directory = Path(experiments_directory, "data")
data_directory.mkdir(parents=True, exist_ok=True)

prediction_directory = Path(experiments_directory, "simulation")
prediction_directory.mkdir(parents=True, exist_ok=True)

prediction_training_directory = Path(prediction_directory, "training")
prediction_training_directory.mkdir(parents=True, exist_ok=True)

save_directory = Path(prediction_training_directory, "saved_outputs")
save_directory.mkdir(parents=True, exist_ok=True)

In [3]:
import os
os.chdir(prediction_training_directory)

In [4]:
%%writefile requirements.txt
l5kit
pyyaml
ray==2.0.0rc1
ray[air]
wandb
optuna

Overwriting requirements.txt


In [5]:
%%capture
# !pip install -r requirements.txt
!pip install l5kit pyyaml
!pip install wandb --upgrade
!pip install ray==2.0.0rc1
!pip install "ray[air]"
!pip install optuna

In [6]:
from typing import Dict

from tempfile import gettempdir
import matplotlib.pyplot as plt
import numpy as np
import torch
from torch import nn, optim
from torch.utils.data import DataLoader
from torchvision.models.resnet import resnet50
from tqdm import tqdm

from l5kit.configs import load_config_data
from l5kit.data import LocalDataManager, ChunkedDataset
from l5kit.dataset import AgentDataset, EgoDataset
from l5kit.rasterization import build_rasterizer
from l5kit.evaluation import write_pred_csv, compute_metrics_csv, read_gt_csv, create_chopped_dataset
from l5kit.evaluation.chop_dataset import MIN_FUTURE_STEPS
from l5kit.evaluation.metrics import neg_multi_log_likelihood, time_displace, rmse, prob_true_mode, average_displacement_error_oracle, average_displacement_error_mean, final_displacement_error_oracle, final_displacement_error_mean, detect_collision, distance_to_reference_trajectory
from l5kit.geometry import transform_points
from l5kit.visualization import PREDICTED_POINTS_COLOR, TARGET_POINTS_COLOR, draw_trajectory
from prettytable import PrettyTable
from pathlib import Path

import os

In [7]:
from tempfile import gettempdir
import matplotlib.pyplot as plt
import numpy as np
import torch
from torch import nn, optim
from torch.utils.data import DataLoader
from tqdm import tqdm

from l5kit.configs import load_config_data
from l5kit.data import LocalDataManager, ChunkedDataset
from l5kit.dataset import AgentDataset
from l5kit.rasterization import build_rasterizer
from l5kit.geometry import transform_points
from l5kit.visualization import TARGET_POINTS_COLOR, draw_trajectory
from l5kit.planning.rasterized.model import RasterizedPlanningModel

import os

### Get Data from Wandb

In [8]:
import wandb
wandb.login()

[34m[1mwandb[0m: Currently logged in as: [33ma-sh0ts[0m. Use [1m`wandb login --relogin`[0m to force relogin


True

In [9]:
# Run information
wandb_entity = "l5-demo"
project_name = "l5-simulation"
run_name = "download-l5-data"
run_type = "download"
run_description = """
Download data for the task of training a prediction model
"""
tags = ["download", "data"]

In [10]:
#🪄🐝
run = wandb.init(
    entity=wandb_entity,
    project=project_name,
    job_type=run_type,
    name=run_name,
    notes=run_description,
    tags=tags
)

[34m[1mwandb[0m: Currently logged in as: [33ma-sh0ts[0m ([33ml5-demo[0m). Use [1m`wandb login --relogin`[0m to force relogin


In [11]:
artifact_entity = "l5-demo"
artifact_project = "l5-common"
artifact_name = "l5-data"
artifact_alias = "latest"
artifact_type = "dataset"

In [12]:
#🪄🐝
artifact = run.use_artifact(f"{artifact_entity}/{artifact_project}/{artifact_name}:{artifact_alias}", type=artifact_type)

In [13]:
_ = artifact.download(data_directory)

[34m[1mwandb[0m: Downloading large artifact l5-data:latest, 2386.92MB. 517 files... Done. 0:0:0.1


In [14]:
#BUG: need to seperate runs into download and training due to issues with routing runs after ray.tune
run.finish()

VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

In [15]:
# Dataset is assumed to be on the folder specified
# in the L5KIT_DATA_FOLDER environment variable

# get config
cfg = load_config_data(Path(data_directory, "configurations", "simulation_config.yaml"))

##TODO-Fix with logic flow to select dataset###
cfg["train_data_loader"]["key"] = "scenes/sample.zarr"
cfg["val_data_loader"]["key"] = "scenes/sample.zarr"

l5_data_location = Path(data_directory, "dataset")
# run.config.update(cfg)

In [16]:
# cfg["zarr_dataset_location"] = l5_data_location
os.environ["L5KIT_DATA_FOLDER"] = str(l5_data_location)

## Model

Our baseline is a simple `resnet50` pretrained on `imagenet`. We must replace the input and the final layer to address our requirements.

In [17]:
def build_model(cfg: Dict, num_input_channels, criterion):
    model = RasterizedPlanningModel(
        model_arch=cfg["model_params"]["model_architecture"],
        num_input_channels=num_input_channels,
        num_targets=3 * cfg["model_params"]["future_num_frames"],  # X, Y, Yaw * number of future states,
        weights_scaling= [1., 1., 1.],
        criterion=criterion
        )

    return model

In [18]:
def forward(data, model, criterion):    
    outputs = model(data)
    loss = outputs["loss"]
    return loss, outputs

In [19]:
def train_prediction_model_epoch(data, model, criterion, optimizer):
    loss, outputs = forward(data, model, criterion)
    # Backward pass
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    return loss, outputs

Our data pipeline map a raw `.zarr` folder into a multi-processing instance ready for training by:
- loading the `zarr` into a `ChunkedDataset` object. This object has a reference to the different arrays into the zarr (e.g. agents and traffic lights);
- wrapping the `ChunkedDataset` into an `AgentDataset`, which inherits from torch `Dataset` class;
- passing the `AgentDataset` into a torch `DataLoader`

# Training

note: if you're on MacOS and using `py_satellite` rasterizer, you may need to disable opencv multiprocessing by adding:
`cv2.setNumThreads(0)` before the following cell. This seems to only affect running in python notebook and it's caused by the `cv2.warpaffine` function

In [20]:
import ray.train as train
from ray.air import session, Checkpoint

In [21]:
from ray import tune
from ray.tune.tuner import Tuner

In [22]:
def train_simulation_model(tuner_cfg : Dict):
    trial_name = session.get_trial_name()
    trial_id = session.get_trial_id()
    trial_readable_name = f"{trial_name}_{trial_id}"
    
    dm = LocalDataManager()
    
    # ==== Configurations
    shuffle = tuner_cfg["shuffle"]
    batch_size = int(tuner_cfg["batch_size"])
    num_workers = tuner_cfg["num_workers"]
    lr = tuner_cfg["lr"]
    max_num_steps = int(tuner_cfg["max_num_steps"])
    dataset_key = tuner_cfg["dataset_key"]
    cfg = tuner_cfg["cfg"]
    
    # ==== Loading Dataset
    rasterizer = build_rasterizer(cfg, dm)

    train_zarr = ChunkedDataset(dm.require(dataset_key)).open()
    train_dataset = AgentDataset(cfg, train_zarr, rasterizer)

    batch_size_per_worker = batch_size // session.get_world_size()
    train_dataloader = DataLoader(train_dataset, shuffle=shuffle, batch_size=batch_size_per_worker, num_workers=num_workers)
    train_dataloader = train.torch.prepare_data_loader(train_dataloader)
    
    # ==== Init model
    criterion = nn.MSELoss(reduction="none")
    num_input_channels = rasterizer.num_channels()
    
    model = build_model(cfg, num_input_channels, criterion)
    model = train.torch.prepare_model(model)
    optimizer = optim.Adam(model.parameters(), lr=lr)

    # ==== TRAIN LOOP
    tr_it = iter(train_dataloader)
    progress_bar = range(max_num_steps)
    num_checkpoints = 5
    steps_before_checkpointing = max_num_steps // num_checkpoints
    losses_train = []
    checkpoint_counter = 0
    
    for step in progress_bar:
        try:
            data = next(tr_it)
        except StopIteration:
            tr_it = iter(train_dataloader)
            data = next(tr_it)
            
        model.train()
        torch.set_grad_enabled(True)
        loss, _ = train_prediction_model_epoch(data, model, criterion, optimizer)
        losses_train.append(loss.item())
        avg_loss = np.mean(losses_train)
        metrics = {
            "loss": loss.item(),
            "avg_loss": avg_loss
        }
        
        if train.world_rank() == 0:
            print(metrics)
        
        if (step%steps_before_checkpointing==0) or (step==max_num_steps-1):
            session.report(
                metrics=metrics,
                checkpoint=Checkpoint.from_dict(dict(step=step, model=model)))
            checkpoint_counter += 1
        else:
            session.report(
                metrics=metrics
            )

### Distributed Training using Ray

We calculate the available hardware for our current training sessions and efficiently split CPUs based on GPUs or split CPUs evenly if possible

In [23]:
from ray.train.torch import TorchTrainer
from ray.air.config import RunConfig, ScalingConfig
from ray.air.callbacks.wandb import WandbLoggerCallback #🪄🐝

In [24]:
import multiprocessing

In [25]:
USE_GPU = torch.cuda.is_available()
NUM_GPUS = torch.cuda.device_count()
NUM_CPUS = multiprocessing.cpu_count()

In [26]:
if USE_GPU:
    num_actors = NUM_GPUS
    num_data_workers = NUM_CPUS // num_actors
else:
    num_data_workers = 4 if NUM_CPUS>=4 else NUM_CPUS
    ideal_num_actors = NUM_CPUS // num_data_workers
    num_actors = ideal_num_actors if ideal_num_actors else 1

To use Ray all we need to simply do is wrap the training function above. The only addition needed above was calls to `report.session` to log metrics during training

In [27]:
#NOTE: To figure out if scaling config intuiutin is correct: num_actors divide resources between each actor and within the train func each actor can the utilize the shared resources
trainer = TorchTrainer(
    train_loop_per_worker=train_simulation_model,
    scaling_config=ScalingConfig(num_workers=num_actors, use_gpu=USE_GPU),
)

2022-09-07 21:06:04,412	INFO worker.py:1487 -- Started a local Ray instance. View the dashboard at [1m[32mhttp://127.0.0.1:8265[39m[22m.


### Distributed Hyperparemeter Tuning using Ray

Due to Ray's easy interface we can simply extend our normal trainer to Ray's tuner which will allow us to do efficient hyperparameter optimization. In our case we use `optuna`

In [28]:
tuner_train_config = {}
##static
tuner_train_config["shuffle"] = cfg["train_data_loader"]["shuffle"]
tuner_train_config["num_workers"] = num_data_workers
tuner_train_config["dataset_key"] = cfg["train_data_loader"]["key"]

##tunable
tuner_train_config["max_num_steps"] = 5
# tuner_train_config["max_num_steps"] = tune.quniform(1000, 5000, 250)
tuner_train_config["lr"] = tune.loguniform(1e-4, 1e-2)
tuner_train_config["batch_size"] = tune.quniform(6, 24, 6)
cfg["raster_params"]["map_type"] = tune.choice(["py_semantic", "py_satellite"])

tuner_train_config["cfg"] = cfg

In [29]:
from ray.tune.logger import LoggerCallback
from typing import Dict, List

In [30]:
from ray.tune.stopper import ExperimentPlateauStopper
from ray.tune.search.optuna import OptunaSearch

In [31]:
n_search_attempts = 25

In [32]:
optuna_search = OptunaSearch()

In [33]:
tuner = Tuner(
        trainer,
        tune_config=tune.TuneConfig(
            metric="avg_loss", #loss or avg_loss here?
            mode="min",
            search_alg=optuna_search,
            num_samples=n_search_attempts,
        ),
        param_space={
            "train_loop_config": tuner_train_config
        },
        run_config=RunConfig(
            stop=ExperimentPlateauStopper("avg_loss"),
            callbacks=[WandbLoggerCallback(project=f"{project_name}-trials", save_checkpoints=True),]))  #🪄🐝

  


### Aggregrate and Report Metrics from All Trials

In [34]:
analysis = tuner.fit()

[32m[I 2022-09-07 21:06:06,636][0m A new study created in memory with name: optuna[0m
2022-09-07 21:06:06,654	INFO wandb.py:119 -- Already logged into W&B.


Trial name,status,loc,train_loop_config...,train_loop_config....1,train_loop_config/lr,iter,total time (s),loss,avg_loss,_timestamp
TorchTrainer_e3c5f5d4,TERMINATED,10.150.0.3:25992,12,py_satellite,0.0072049,5,60.1556,74.5676,77.8439,1662584829
TorchTrainer_e5c1813c,TERMINATED,10.150.0.3:26064,18,py_semantic,0.000556754,5,87.8817,67.7284,90.8705,1662584862
TorchTrainer_e5c9abaa,TERMINATED,10.150.0.3:27061,12,py_semantic,0.000177311,5,59.6294,17.9698,35.8953,1662584903
TorchTrainer_0f7cd6a2,TERMINATED,10.150.0.3:27382,6,py_satellite,0.00160407,5,37.5158,5.84599,98.9697,1662584913
TorchTrainer_2328e218,TERMINATED,10.150.0.3:28334,6,py_semantic,0.000690849,5,40.6134,138.552,41.8565,1662584968
TorchTrainer_4160e4a6,TERMINATED,10.150.0.3:28343,18,py_satellite,0.000392097,5,84.8762,47.9169,59.9695,1662585013
TorchTrainer_41bc41c0,TERMINATED,10.150.0.3:29336,18,py_satellite,0.00249845,5,82.1401,14.7764,42.0436,1662585061
TorchTrainer_611d7426,TERMINATED,10.150.0.3:30153,18,py_semantic,0.00158679,5,81.7708,93.3388,68.7019,1662585114
TorchTrainer_7e39ebde,TERMINATED,10.150.0.3:30991,24,py_semantic,0.000487912,5,105.296,37.8674,52.5062,1662585183
TorchTrainer_9b8feb8e,TERMINATED,10.150.0.3:31815,12,py_satellite,0.000313175,5,55.4984,119.398,69.8637,1662585186


[2m[36m(RayTrainWorker pid=26063)[0m 2022-09-07 21:06:13,828	INFO config.py:72 -- Setting up process group for: env:// [rank=0, world_size=1]
[2m[36m(RayTrainWorker pid=26176)[0m 2022-09-07 21:06:18,306	INFO config.py:72 -- Setting up process group for: env:// [rank=0, world_size=1]
[2m[36m(RayTrainWorker pid=26063)[0m   f"The parameter '{pretrained_param}' is deprecated since 0.13 and will be removed in 0.15, "
[2m[36m(RayTrainWorker pid=26063)[0m 2022-09-07 21:06:21,019	INFO train_loop_utils.py:300 -- Moving model to device: cpu
[2m[36m(RayTrainWorker pid=26176)[0m   f"The parameter '{pretrained_param}' is deprecated since 0.13 and will be removed in 0.15, "
[2m[36m(RayTrainWorker pid=26176)[0m 2022-09-07 21:06:25,954	INFO train_loop_utils.py:300 -- Moving model to device: cpu


[2m[36m(RayTrainWorker pid=26063)[0m {'loss': 34.04572677612305, 'avg_loss': 34.04572677612305}


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_21-06-06/TorchTrainer_e3c5f5d4_1_batch_size=12.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50,_2022-09-07_21-06-06/checkpoint_000000)... 

Result for TorchTrainer_e3c5f5d4:
  _time_this_iter_s: 18.791529178619385
  _timestamp: 1662584793
  _training_iteration: 1
  avg_loss: 34.04572677612305
  date: 2022-09-07_21-06-33
  done: false
  experiment_id: cecbcbdf243740089cf922d5a64cba6b
  hostname: anish-l5-kit
  iterations_since_restore: 1
  loss: 34.04572677612305
  node_ip: 10.150.0.3
  pid: 25992
  should_checkpoint: true
  time_since_restore: 23.725836515426636
  time_this_iter_s: 23.725836515426636
  time_total_s: 23.725836515426636
  timestamp: 1662584793
  timesteps_since_restore: 0
  training_iteration: 1
  trial_id: e3c5f5d4
  warmup_time: 0.0050389766693115234
  


Done. 1.3s


[2m[36m(RayTrainWorker pid=26063)[0m {'loss': 48.830894470214844, 'avg_loss': 41.438310623168945}


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_21-06-06/TorchTrainer_e3c5f5d4_1_batch_size=12.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50,_2022-09-07_21-06-06/checkpoint_000001)... 

Result for TorchTrainer_e3c5f5d4:
  _time_this_iter_s: 8.529665470123291
  _timestamp: 1662584801
  _training_iteration: 2
  avg_loss: 41.438310623168945
  date: 2022-09-07_21-06-42
  done: false
  experiment_id: cecbcbdf243740089cf922d5a64cba6b
  hostname: anish-l5-kit
  iterations_since_restore: 2
  loss: 48.830894470214844
  node_ip: 10.150.0.3
  pid: 25992
  should_checkpoint: true
  time_since_restore: 32.18539762496948
  time_this_iter_s: 8.459561109542847
  time_total_s: 32.18539762496948
  timestamp: 1662584802
  timesteps_since_restore: 0
  training_iteration: 2
  trial_id: e3c5f5d4
  warmup_time: 0.0050389766693115234
  
[2m[36m(RayTrainWorker pid=26176)[0m {'loss': 150.79981994628906, 'avg_loss': 150.79981994628906}


Done. 1.0s
[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_21-06-06/TorchTrainer_e5c1813c_2_batch_size=18.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50,_2022-09-07_21-06-10/checkpoint_000000)... 

Result for TorchTrainer_e5c1813c:
  _time_this_iter_s: 23.81967258453369
  _timestamp: 1662584802
  _training_iteration: 1
  avg_loss: 150.79981994628906
  date: 2022-09-07_21-06-43
  done: false
  experiment_id: 93bf4a54174d4a8f99d9bc7b268a5c25
  hostname: anish-l5-kit
  iterations_since_restore: 1
  loss: 150.79981994628906
  node_ip: 10.150.0.3
  pid: 26064
  should_checkpoint: true
  time_since_restore: 28.50425434112549
  time_this_iter_s: 28.50425434112549
  time_total_s: 28.50425434112549
  timestamp: 1662584803
  timesteps_since_restore: 0
  training_iteration: 1
  trial_id: e5c1813c
  warmup_time: 0.006105661392211914
  


Done. 1.1s


[2m[36m(RayTrainWorker pid=26063)[0m {'loss': 148.806396484375, 'avg_loss': 77.2276725769043}


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_21-06-06/TorchTrainer_e3c5f5d4_1_batch_size=12.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50,_2022-09-07_21-06-06/checkpoint_000002)... 

Result for TorchTrainer_e3c5f5d4:
  _time_this_iter_s: 8.988826036453247
  _timestamp: 1662584810
  _training_iteration: 3
  avg_loss: 77.2276725769043
  date: 2022-09-07_21-06-51
  done: false
  experiment_id: cecbcbdf243740089cf922d5a64cba6b
  hostname: anish-l5-kit
  iterations_since_restore: 3
  loss: 148.806396484375
  node_ip: 10.150.0.3
  pid: 25992
  should_checkpoint: true
  time_since_restore: 41.153194427490234
  time_this_iter_s: 8.967796802520752
  time_total_s: 41.153194427490234
  timestamp: 1662584811
  timesteps_since_restore: 0
  training_iteration: 3
  trial_id: e3c5f5d4
  warmup_time: 0.0050389766693115234
  


Done. 1.0s


[2m[36m(RayTrainWorker pid=26176)[0m {'loss': 111.55011749267578, 'avg_loss': 131.17496871948242}


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_21-06-06/TorchTrainer_e5c1813c_2_batch_size=18.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50,_2022-09-07_21-06-10/checkpoint_000001)... 

Result for TorchTrainer_e5c1813c:
  _time_this_iter_s: 13.597301959991455
  _timestamp: 1662584816
  _training_iteration: 2
  avg_loss: 131.17496871948242
  date: 2022-09-07_21-06-56
  done: false
  experiment_id: 93bf4a54174d4a8f99d9bc7b268a5c25
  hostname: anish-l5-kit
  iterations_since_restore: 2
  loss: 111.55011749267578
  node_ip: 10.150.0.3
  pid: 26064
  should_checkpoint: true
  time_since_restore: 42.05224847793579
  time_this_iter_s: 13.547994136810303
  time_total_s: 42.05224847793579
  timestamp: 1662584816
  timesteps_since_restore: 0
  training_iteration: 2
  trial_id: e5c1813c
  warmup_time: 0.006105661392211914
  


Done. 1.1s


[2m[36m(RayTrainWorker pid=26063)[0m {'loss': 82.96881866455078, 'avg_loss': 78.66295909881592}


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_21-06-06/TorchTrainer_e3c5f5d4_1_batch_size=12.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50,_2022-09-07_21-06-06/checkpoint_000003)... 

Result for TorchTrainer_e3c5f5d4:
  _time_this_iter_s: 9.113450288772583
  _timestamp: 1662584819
  _training_iteration: 4
  avg_loss: 78.66295909881592
  date: 2022-09-07_21-07-00
  done: false
  experiment_id: cecbcbdf243740089cf922d5a64cba6b
  hostname: anish-l5-kit
  iterations_since_restore: 4
  loss: 82.96881866455078
  node_ip: 10.150.0.3
  pid: 25992
  should_checkpoint: true
  time_since_restore: 50.25953006744385
  time_this_iter_s: 9.106335639953613
  time_total_s: 50.25953006744385
  timestamp: 1662584820
  timesteps_since_restore: 0
  training_iteration: 4
  trial_id: e3c5f5d4
  warmup_time: 0.0050389766693115234
  


Done. 1.4s


[2m[36m(RayTrainWorker pid=26063)[0m {'loss': 74.56755065917969, 'avg_loss': 77.84387741088867}


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_21-06-06/TorchTrainer_e3c5f5d4_1_batch_size=12.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50,_2022-09-07_21-06-06/checkpoint_000004)... 

Result for TorchTrainer_e3c5f5d4:
  _time_this_iter_s: 9.905043601989746
  _timestamp: 1662584829
  _training_iteration: 5
  avg_loss: 77.84387741088867
  date: 2022-09-07_21-07-10
  done: false
  experiment_id: cecbcbdf243740089cf922d5a64cba6b
  hostname: anish-l5-kit
  iterations_since_restore: 5
  loss: 74.56755065917969
  node_ip: 10.150.0.3
  pid: 25992
  should_checkpoint: true
  time_since_restore: 60.15558481216431
  time_this_iter_s: 9.896054744720459
  time_total_s: 60.15558481216431
  timestamp: 1662584830
  timesteps_since_restore: 0
  training_iteration: 5
  trial_id: e3c5f5d4
  warmup_time: 0.0050389766693115234
  


Done. 1.0s


[2m[36m(RayTrainWorker pid=26176)[0m {'loss': 115.19401550292969, 'avg_loss': 125.84798431396484}


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_21-06-06/TorchTrainer_e3c5f5d4_1_batch_size=12.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50,_2022-09-07_21-06-06/checkpoint_000004)... Done. 0.3s


VBox(children=(Label(value='456.836 MB of 456.836 MB uploaded (91.307 MB deduped)\r'), FloatProgress(value=1.0…

0,1
avg_loss,▁▂███
iterations_since_restore,▁▃▅▆█
loss,▁▂█▄▃
time_since_restore,▁▃▄▆█
time_this_iter_s,█▁▁▁▂
time_total_s,▁▃▄▆█
timestamp,▁▃▄▆█
timesteps_since_restore,▁▁▁▁▁
training_iteration,▁▃▅▆█
warmup_time,▁▁▁▁▁

0,1
avg_loss,77.84388
iterations_since_restore,5.0
loss,74.56755
time_since_restore,60.15558
time_this_iter_s,9.89605
time_total_s,60.15558
timestamp,1662584830.0
timesteps_since_restore,0.0
training_iteration,5.0
warmup_time,0.00504


Result for TorchTrainer_e3c5f5d4:
  _time_this_iter_s: 9.905043601989746
  _timestamp: 1662584829
  _training_iteration: 5
  avg_loss: 77.84387741088867
  date: 2022-09-07_21-07-10
  done: true
  experiment_id: cecbcbdf243740089cf922d5a64cba6b
  experiment_tag: 1_batch_size=12.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50,render_ego_history=False,step_time=0.1000,dataset_meta_key=meta_json,disable_traffic_light_faces=False,ego_center=0_25_0_5,filter_agents_threshold=0.5000,map_type=py_satellite,pixel_size=0_5_0_5,raster_size=224_224,satellite_map_key=aerial_map_aerial_map_png,semantic_map_key=semantic_map_semantic_map_pb,set_origin_to_bottom=True,batch_size=12,key=scenes_sample_zarr,num_workers=16,perturb_probability=0.0000,shuffle=True,checkpoint_every_n_steps=10000,eval_every_n_steps=10000,max_num_steps=5,batch_size=12,key=scenes_sample_zarr,num_workers=16,shuffle=False,dataset_key=scenes_sample_zarr,lr=0.0072,max_num_steps=5,num_workers=

[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_21-06-06/TorchTrainer_e5c1813c_2_batch_size=18.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50,_2022-09-07_21-06-10/checkpoint_000002)... 

Result for TorchTrainer_e5c1813c:
  _time_this_iter_s: 14.86629867553711
  _timestamp: 1662584831
  _training_iteration: 3
  avg_loss: 125.84798431396484
  date: 2022-09-07_21-07-11
  done: false
  experiment_id: 93bf4a54174d4a8f99d9bc7b268a5c25
  hostname: anish-l5-kit
  iterations_since_restore: 3
  loss: 115.19401550292969
  node_ip: 10.150.0.3
  pid: 26064
  should_checkpoint: true
  time_since_restore: 56.94431924819946
  time_this_iter_s: 14.892070770263672
  time_total_s: 56.94431924819946
  timestamp: 1662584831
  timesteps_since_restore: 0
  training_iteration: 3
  trial_id: e5c1813c
  warmup_time: 0.006105661392211914
  


Done. 1.0s


[2m[36m(RayTrainWorker pid=26176)[0m {'loss': 9.0802001953125, 'avg_loss': 96.65603828430176}


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_21-06-06/TorchTrainer_e5c1813c_2_batch_size=18.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50,_2022-09-07_21-06-10/checkpoint_000003)... 

Result for TorchTrainer_e5c1813c:
  _time_this_iter_s: 14.794326543807983
  _timestamp: 1662584846
  _training_iteration: 4
  avg_loss: 96.65603828430176
  date: 2022-09-07_21-07-26
  done: false
  experiment_id: 93bf4a54174d4a8f99d9bc7b268a5c25
  hostname: anish-l5-kit
  iterations_since_restore: 4
  loss: 9.0802001953125
  node_ip: 10.150.0.3
  pid: 26064
  should_checkpoint: true
  time_since_restore: 71.72718787193298
  time_this_iter_s: 14.78286862373352
  time_total_s: 71.72718787193298
  timestamp: 1662584846
  timesteps_since_restore: 0
  training_iteration: 4
  trial_id: e5c1813c
  warmup_time: 0.006105661392211914
  


Done. 1.0s
[2m[36m(RayTrainWorker pid=27131)[0m 2022-09-07 21:07:27,485	INFO config.py:72 -- Setting up process group for: env:// [rank=0, world_size=1]
[2m[36m(RayTrainWorker pid=27131)[0m   f"The parameter '{pretrained_param}' is deprecated since 0.13 and will be removed in 0.15, "
[2m[36m(RayTrainWorker pid=27131)[0m 2022-09-07 21:07:33,349	INFO train_loop_utils.py:300 -- Moving model to device: cpu


[2m[36m(RayTrainWorker pid=26176)[0m {'loss': 67.72843170166016, 'avg_loss': 90.87051696777344}


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_21-06-06/TorchTrainer_e5c1813c_2_batch_size=18.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50,_2022-09-07_21-06-10/checkpoint_000004)... 

Result for TorchTrainer_e5c1813c:
  _time_this_iter_s: 16.22751212120056
  _timestamp: 1662584862
  _training_iteration: 5
  avg_loss: 90.87051696777344
  date: 2022-09-07_21-07-42
  done: false
  experiment_id: 93bf4a54174d4a8f99d9bc7b268a5c25
  hostname: anish-l5-kit
  iterations_since_restore: 5
  loss: 67.72843170166016
  node_ip: 10.150.0.3
  pid: 26064
  should_checkpoint: true
  time_since_restore: 87.8817241191864
  time_this_iter_s: 16.154536247253418
  time_total_s: 87.8817241191864
  timestamp: 1662584862
  timesteps_since_restore: 0
  training_iteration: 5
  trial_id: e5c1813c
  warmup_time: 0.006105661392211914
  


Done. 1.0s
[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_21-06-06/TorchTrainer_e5c1813c_2_batch_size=18.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50,_2022-09-07_21-06-10/checkpoint_000004)... Done. 0.3s


[2m[36m(RayTrainWorker pid=27131)[0m {'loss': 21.008337020874023, 'avg_loss': 21.008337020874023}


VBox(children=(Label(value='456.836 MB of 456.836 MB uploaded (91.307 MB deduped)\r'), FloatProgress(value=1.0…

0,1
avg_loss,█▆▅▂▁
iterations_since_restore,▁▃▅▆█
loss,█▆▆▁▄
time_since_restore,▁▃▄▆█
time_this_iter_s,█▁▂▂▂
time_total_s,▁▃▄▆█
timestamp,▁▃▄▆█
timesteps_since_restore,▁▁▁▁▁
training_iteration,▁▃▅▆█
warmup_time,▁▁▁▁▁

0,1
avg_loss,90.87052
iterations_since_restore,5.0
loss,67.72843
time_since_restore,87.88172
time_this_iter_s,16.15454
time_total_s,87.88172
timestamp,1662584862.0
timesteps_since_restore,0.0
training_iteration,5.0
warmup_time,0.00611


Result for TorchTrainer_e5c1813c:
  _time_this_iter_s: 16.22751212120056
  _timestamp: 1662584862
  _training_iteration: 5
  avg_loss: 90.87051696777344
  date: 2022-09-07_21-07-42
  done: true
  experiment_id: 93bf4a54174d4a8f99d9bc7b268a5c25
  experiment_tag: 2_batch_size=18.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50,render_ego_history=False,step_time=0.1000,dataset_meta_key=meta_json,disable_traffic_light_faces=False,ego_center=0_25_0_5,filter_agents_threshold=0.5000,map_type=py_semantic,pixel_size=0_5_0_5,raster_size=224_224,satellite_map_key=aerial_map_aerial_map_png,semantic_map_key=semantic_map_semantic_map_pb,set_origin_to_bottom=True,batch_size=12,key=scenes_sample_zarr,num_workers=16,perturb_probability=0.0000,shuffle=True,checkpoint_every_n_steps=10000,eval_every_n_steps=10000,max_num_steps=5,batch_size=12,key=scenes_sample_zarr,num_workers=16,shuffle=False,dataset_key=scenes_sample_zarr,lr=0.0006,max_num_steps=5,num_workers=4

[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_21-06-06/TorchTrainer_e5c9abaa_3_batch_size=12.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50,_2022-09-07_21-07-19/checkpoint_000000)... 

Result for TorchTrainer_e5c9abaa:
  _time_this_iter_s: 16.391650676727295
  _timestamp: 1662584864
  _training_iteration: 1
  avg_loss: 21.008337020874023
  date: 2022-09-07_21-07-44
  done: false
  experiment_id: 5dac44765c4746559210b4cfdc863abd
  hostname: anish-l5-kit
  iterations_since_restore: 1
  loss: 21.008337020874023
  node_ip: 10.150.0.3
  pid: 27061
  should_checkpoint: true
  time_since_restore: 20.639700412750244
  time_this_iter_s: 20.639700412750244
  time_total_s: 20.639700412750244
  timestamp: 1662584864
  timesteps_since_restore: 0
  training_iteration: 1
  trial_id: e5c9abaa
  warmup_time: 0.006452322006225586
  
[2m[36m(RayTrainWorker pid=27131)[0m {'loss': 50.585506439208984, 'avg_loss': 35.796921730041504}


Done. 1.0s
[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_21-06-06/TorchTrainer_e5c9abaa_3_batch_size=12.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50,_2022-09-07_21-07-19/checkpoint_000001)... Done. 1.0s
[2m[36m(RayTrainWorker pid=27474)[0m 2022-09-07 21:07:59,915	INFO config.py:72 -- Setting up process group for: env:// [rank=0, world_size=1]


[2m[36m(RayTrainWorker pid=27131)[0m {'loss': 55.19760513305664, 'avg_loss': 42.26381619771322}


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_21-06-06/TorchTrainer_e5c9abaa_3_batch_size=12.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50,_2022-09-07_21-07-19/checkpoint_000002)... 

Result for TorchTrainer_e5c9abaa:
  _time_this_iter_s: 12.033479452133179
  _timestamp: 1662584884
  _training_iteration: 3
  avg_loss: 42.26381619771322
  date: 2022-09-07_21-08-04
  done: false
  experiment_id: 5dac44765c4746559210b4cfdc863abd
  hostname: anish-l5-kit
  iterations_since_restore: 3
  loss: 55.19760513305664
  node_ip: 10.150.0.3
  pid: 27061
  should_checkpoint: true
  time_since_restore: 40.78971290588379
  time_this_iter_s: 12.100828647613525
  time_total_s: 40.78971290588379
  timestamp: 1662584884
  timesteps_since_restore: 0
  training_iteration: 3
  trial_id: e5c9abaa
  warmup_time: 0.006452322006225586
  


Done. 1.0s
[2m[36m(RayTrainWorker pid=27474)[0m   f"The parameter '{pretrained_param}' is deprecated since 0.13 and will be removed in 0.15, "
[2m[36m(RayTrainWorker pid=27474)[0m 2022-09-07 21:08:08,084	INFO train_loop_utils.py:300 -- Moving model to device: cpu


[2m[36m(RayTrainWorker pid=27474)[0m {'loss': 198.983642578125, 'avg_loss': 198.983642578125}


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_21-06-06/TorchTrainer_0f7cd6a2_4_batch_size=6.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50,r_2022-09-07_21-07-52/checkpoint_000000)... 

Result for TorchTrainer_0f7cd6a2:
  _time_this_iter_s: 12.727861881256104
  _timestamp: 1662584893
  _training_iteration: 1
  avg_loss: 198.983642578125
  date: 2022-09-07_21-08-13
  done: false
  experiment_id: f9ca05c00cdf4a049934ae1761ca4c0c
  hostname: anish-l5-kit
  iterations_since_restore: 1
  loss: 198.983642578125
  node_ip: 10.150.0.3
  pid: 27382
  should_checkpoint: true
  time_since_restore: 17.007647275924683
  time_this_iter_s: 17.007647275924683
  time_total_s: 17.007647275924683
  timestamp: 1662584893
  timesteps_since_restore: 0
  training_iteration: 1
  trial_id: 0f7cd6a2
  warmup_time: 0.007363080978393555
  
[2m[36m(RayTrainWorker pid=27131)[0m {'loss': 34.7153205871582, 'avg_loss': 40.37669229507446}


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_21-06-06/TorchTrainer_e5c9abaa_3_batch_size=12.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50,_2022-09-07_21-07-19/checkpoint_000003)... Done. 1.0s


Result for TorchTrainer_e5c9abaa:
  _time_this_iter_s: 9.700194120407104
  _timestamp: 1662584894
  _training_iteration: 4
  avg_loss: 40.37669229507446
  date: 2022-09-07_21-08-14
  done: false
  experiment_id: 5dac44765c4746559210b4cfdc863abd
  hostname: anish-l5-kit
  iterations_since_restore: 4
  loss: 34.7153205871582
  node_ip: 10.150.0.3
  pid: 27061
  should_checkpoint: true
  time_since_restore: 50.376463651657104
  time_this_iter_s: 9.586750745773315
  time_total_s: 50.376463651657104
  timestamp: 1662584894
  timesteps_since_restore: 0
  training_iteration: 4
  trial_id: e5c9abaa
  warmup_time: 0.006452322006225586
  


Done. 1.0s


[2m[36m(RayTrainWorker pid=27474)[0m {'loss': 75.64552307128906, 'avg_loss': 137.31458282470703}
Result for TorchTrainer_0f7cd6a2:
  _time_this_iter_s: 5.254258394241333
  _timestamp: 1662584898
  _training_iteration: 2
  avg_loss: 137.31458282470703
  date: 2022-09-07_21-08-18
  done: false
  experiment_id: f9ca05c00cdf4a049934ae1761ca4c0c
  hostname: anish-l5-kit
  iterations_since_restore: 2
  loss: 75.64552307128906
  node_ip: 10.150.0.3
  pid: 27382
  should_checkpoint: true
  time_since_restore: 22.22438669204712
  time_this_iter_s: 5.2167394161224365
  time_total_s: 22.22438669204712
  timestamp: 1662584898
  timesteps_since_restore: 0
  training_iteration: 2
  trial_id: 0f7cd6a2
  warmup_time: 0.007363080978393555
  


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_21-06-06/TorchTrainer_0f7cd6a2_4_batch_size=6.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50,r_2022-09-07_21-07-52/checkpoint_000001)... Done. 1.0s


[2m[36m(RayTrainWorker pid=27474)[0m {'loss': 140.78103637695312, 'avg_loss': 138.4700673421224}
[2m[36m(RayTrainWorker pid=27131)[0m {'loss': 17.969776153564453, 'avg_loss': 35.89530906677246}


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_21-06-06/TorchTrainer_0f7cd6a2_4_batch_size=6.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50,r_2022-09-07_21-07-52/checkpoint_000002)... 

Result for TorchTrainer_e5c9abaa:
  _time_this_iter_s: 9.094890832901001
  _timestamp: 1662584903
  _training_iteration: 5
  avg_loss: 35.89530906677246
  date: 2022-09-07_21-08-23
  done: false
  experiment_id: 5dac44765c4746559210b4cfdc863abd
  hostname: anish-l5-kit
  iterations_since_restore: 5
  loss: 17.969776153564453
  node_ip: 10.150.0.3
  pid: 27061
  should_checkpoint: true
  time_since_restore: 59.62936210632324
  time_this_iter_s: 9.252898454666138
  time_total_s: 59.62936210632324
  timestamp: 1662584903
  timesteps_since_restore: 0
  training_iteration: 5
  trial_id: e5c9abaa
  warmup_time: 0.006452322006225586
  


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_21-06-06/TorchTrainer_e5c9abaa_3_batch_size=12.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50,_2022-09-07_21-07-19/checkpoint_000004)... Done. 2.3s
Done. 2.6s
[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_21-06-06/TorchTrainer_e5c9abaa_3_batch_size=12.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50,_2022-09-07_21-07-19/checkpoint_000004)... Done. 0.3s


[2m[36m(RayTrainWorker pid=27474)[0m {'loss': 73.59223937988281, 'avg_loss': 122.2506103515625}


VBox(children=(Label(value='456.836 MB of 456.836 MB uploaded (91.307 MB deduped)\r'), FloatProgress(value=1.0…

[2m[36m(RayTrainWorker pid=27474)[0m {'loss': 5.845992088317871, 'avg_loss': 98.96968669891358}


0,1
avg_loss,▁▆█▇▆
iterations_since_restore,▁▃▅▆█
loss,▂▇█▄▁
time_since_restore,▁▂▅▆█
time_this_iter_s,█▁▃▂▂
time_total_s,▁▂▅▆█
timestamp,▁▂▅▆█
timesteps_since_restore,▁▁▁▁▁
training_iteration,▁▃▅▆█
warmup_time,▁▁▁▁▁

0,1
avg_loss,35.89531
iterations_since_restore,5.0
loss,17.96978
time_since_restore,59.62936
time_this_iter_s,9.2529
time_total_s,59.62936
timestamp,1662584903.0
timesteps_since_restore,0.0
training_iteration,5.0
warmup_time,0.00645


Result for TorchTrainer_e5c9abaa:
  _time_this_iter_s: 9.094890832901001
  _timestamp: 1662584903
  _training_iteration: 5
  avg_loss: 35.89530906677246
  date: 2022-09-07_21-08-23
  done: true
  experiment_id: 5dac44765c4746559210b4cfdc863abd
  experiment_tag: 3_batch_size=12.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50,render_ego_history=False,step_time=0.1000,dataset_meta_key=meta_json,disable_traffic_light_faces=False,ego_center=0_25_0_5,filter_agents_threshold=0.5000,map_type=py_semantic,pixel_size=0_5_0_5,raster_size=224_224,satellite_map_key=aerial_map_aerial_map_png,semantic_map_key=semantic_map_semantic_map_pb,set_origin_to_bottom=True,batch_size=12,key=scenes_sample_zarr,num_workers=16,perturb_probability=0.0000,shuffle=True,checkpoint_every_n_steps=10000,eval_every_n_steps=10000,max_num_steps=5,batch_size=12,key=scenes_sample_zarr,num_workers=16,shuffle=False,dataset_key=scenes_sample_zarr,lr=0.0002,max_num_steps=5,num_workers=4

[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_21-06-06/TorchTrainer_0f7cd6a2_4_batch_size=6.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50,r_2022-09-07_21-07-52/checkpoint_000003)... 

Result for TorchTrainer_0f7cd6a2:
  _time_this_iter_s: 5.0469419956207275
  _timestamp: 1662584908
  _training_iteration: 4
  avg_loss: 122.2506103515625
  date: 2022-09-07_21-08-28
  done: false
  experiment_id: f9ca05c00cdf4a049934ae1761ca4c0c
  hostname: anish-l5-kit
  iterations_since_restore: 4
  loss: 73.59223937988281
  node_ip: 10.150.0.3
  pid: 27382
  should_checkpoint: true
  time_since_restore: 31.945348978042603
  time_this_iter_s: 4.948726415634155
  time_total_s: 31.945348978042603
  timestamp: 1662584908
  timesteps_since_restore: 0
  training_iteration: 4
  trial_id: 0f7cd6a2
  warmup_time: 0.007363080978393555
  


Done. 1.0s
[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_21-06-06/TorchTrainer_0f7cd6a2_4_batch_size=6.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50,r_2022-09-07_21-07-52/checkpoint_000004)... Done. 1.7s
[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_21-06-06/TorchTrainer_0f7cd6a2_4_batch_size=6.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50,r_2022-09-07_21-07-52/checkpoint_000004)... Done. 0.2s


VBox(children=(Label(value='456.835 MB of 456.835 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0,…

0,1
avg_loss,█▄▄▃▁
iterations_since_restore,▁▃▅▆█
loss,█▄▆▃▁
time_since_restore,▁▃▄▆█
time_this_iter_s,█▁▁▁▁
time_total_s,▁▃▄▆█
timestamp,▁▃▄▆█
timesteps_since_restore,▁▁▁▁▁
training_iteration,▁▃▅▆█
warmup_time,▁▁▁▁▁

0,1
avg_loss,98.96969
iterations_since_restore,5.0
loss,5.84599
time_since_restore,37.51575
time_this_iter_s,5.5704
time_total_s,37.51575
timestamp,1662584914.0
timesteps_since_restore,0.0
training_iteration,5.0
warmup_time,0.00736


Result for TorchTrainer_0f7cd6a2:
  _time_this_iter_s: 5.566701412200928
  _timestamp: 1662584913
  _training_iteration: 5
  avg_loss: 98.96968669891358
  date: 2022-09-07_21-08-34
  done: true
  experiment_id: f9ca05c00cdf4a049934ae1761ca4c0c
  experiment_tag: 4_batch_size=6.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50,render_ego_history=False,step_time=0.1000,dataset_meta_key=meta_json,disable_traffic_light_faces=False,ego_center=0_25_0_5,filter_agents_threshold=0.5000,map_type=py_satellite,pixel_size=0_5_0_5,raster_size=224_224,satellite_map_key=aerial_map_aerial_map_png,semantic_map_key=semantic_map_semantic_map_pb,set_origin_to_bottom=True,batch_size=12,key=scenes_sample_zarr,num_workers=16,perturb_probability=0.0000,shuffle=True,checkpoint_every_n_steps=10000,eval_every_n_steps=10000,max_num_steps=5,batch_size=12,key=scenes_sample_zarr,num_workers=16,shuffle=False,dataset_key=scenes_sample_zarr,lr=0.0016,max_num_steps=5,num_workers=4

[2m[36m(RayTrainWorker pid=28466)[0m 2022-09-07 21:08:51,681	INFO config.py:72 -- Setting up process group for: env:// [rank=0, world_size=1]
[2m[36m(RayTrainWorker pid=28474)[0m 2022-09-07 21:08:52,651	INFO config.py:72 -- Setting up process group for: env:// [rank=0, world_size=1]
[2m[36m(RayTrainWorker pid=28466)[0m   f"The parameter '{pretrained_param}' is deprecated since 0.13 and will be removed in 0.15, "
[2m[36m(RayTrainWorker pid=28474)[0m   f"The parameter '{pretrained_param}' is deprecated since 0.13 and will be removed in 0.15, "
[2m[36m(RayTrainWorker pid=28466)[0m 2022-09-07 21:08:59,509	INFO train_loop_utils.py:300 -- Moving model to device: cpu
[2m[36m(RayTrainWorker pid=28474)[0m 2022-09-07 21:08:59,635	INFO train_loop_utils.py:300 -- Moving model to device: cpu


[2m[36m(RayTrainWorker pid=28466)[0m {'loss': 47.281429290771484, 'avg_loss': 47.281429290771484}


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_21-06-06/TorchTrainer_2328e218_5_batch_size=6.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50,r_2022-09-07_21-08-43/checkpoint_000000)... 

Result for TorchTrainer_2328e218:
  _time_this_iter_s: 16.315255880355835
  _timestamp: 1662584948
  _training_iteration: 1
  avg_loss: 47.281429290771484
  date: 2022-09-07_21-09-09
  done: false
  experiment_id: 624913ce79f046198982a0e158b0492d
  hostname: anish-l5-kit
  iterations_since_restore: 1
  loss: 47.281429290771484
  node_ip: 10.150.0.3
  pid: 28334
  should_checkpoint: true
  time_since_restore: 21.20513129234314
  time_this_iter_s: 21.20513129234314
  time_total_s: 21.20513129234314
  timestamp: 1662584949
  timesteps_since_restore: 0
  training_iteration: 1
  trial_id: 2328e218
  warmup_time: 0.007533073425292969
  


Done. 1.0s


[2m[36m(RayTrainWorker pid=28466)[0m {'loss': 11.595184326171875, 'avg_loss': 29.43830680847168}


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_21-06-06/TorchTrainer_2328e218_5_batch_size=6.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50,r_2022-09-07_21-08-43/checkpoint_000001)... Done. 1.0s


[2m[36m(RayTrainWorker pid=28474)[0m {'loss': 84.72283935546875, 'avg_loss': 84.72283935546875}


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_21-06-06/TorchTrainer_4160e4a6_6_batch_size=18.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50,_2022-09-07_21-08-44/checkpoint_000000)... 

Result for TorchTrainer_4160e4a6:
  _time_this_iter_s: 24.299423217773438
  _timestamp: 1662584957
  _training_iteration: 1
  avg_loss: 84.72283935546875
  date: 2022-09-07_21-09-18
  done: false
  experiment_id: 4617519f82cf4bac893ed1bd4c503b09
  hostname: anish-l5-kit
  iterations_since_restore: 1
  loss: 84.72283935546875
  node_ip: 10.150.0.3
  pid: 28343
  should_checkpoint: true
  time_since_restore: 29.341326236724854
  time_this_iter_s: 29.341326236724854
  time_total_s: 29.341326236724854
  timestamp: 1662584958
  timesteps_since_restore: 0
  training_iteration: 1
  trial_id: 4160e4a6
  warmup_time: 0.0077135562896728516
  
[2m[36m(RayTrainWorker pid=28466)[0m {'loss': 9.606078147888184, 'avg_loss': 22.827563921610516}
Result for TorchTrainer_2328e218:
  _time_this_iter_s: 4.988373041152954
  _timestamp: 1662584958
  _training_iteration: 3
  avg_loss: 22.827563921610516
  date: 2022-09-07_21-09-19
  done: false
  experiment_id: 624913ce79f046198982a0e158b0492d
  hostname: a

[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_21-06-06/TorchTrainer_2328e218_5_batch_size=6.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50,r_2022-09-07_21-08-43/checkpoint_000002)... Done. 1.0s
Done. 1.1s


[2m[36m(RayTrainWorker pid=28466)[0m {'loss': 2.2478649616241455, 'avg_loss': 17.682639181613922}


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_21-06-06/TorchTrainer_2328e218_5_batch_size=6.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50,r_2022-09-07_21-08-43/checkpoint_000003)... Done. 1.0s


[2m[36m(RayTrainWorker pid=28466)[0m {'loss': 138.5517120361328, 'avg_loss': 41.8564537525177}
Result for TorchTrainer_2328e218:
  _time_this_iter_s: 4.6088340282440186
  _timestamp: 1662584968
  _training_iteration: 5
  avg_loss: 41.8564537525177
  date: 2022-09-07_21-09-28
  done: false
  experiment_id: 624913ce79f046198982a0e158b0492d
  hostname: anish-l5-kit
  iterations_since_restore: 5
  loss: 138.5517120361328
  node_ip: 10.150.0.3
  pid: 28334
  should_checkpoint: true
  time_since_restore: 40.613444566726685
  time_this_iter_s: 4.618595600128174
  time_total_s: 40.613444566726685
  timestamp: 1662584968
  timesteps_since_restore: 0
  training_iteration: 5
  trial_id: 2328e218
  warmup_time: 0.007533073425292969
  


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_21-06-06/TorchTrainer_2328e218_5_batch_size=6.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50,r_2022-09-07_21-08-43/checkpoint_000004)... Done. 1.0s
[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_21-06-06/TorchTrainer_2328e218_5_batch_size=6.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50,r_2022-09-07_21-08-43/checkpoint_000004)... Done. 0.3s


[2m[36m(RayTrainWorker pid=28474)[0m {'loss': 79.79784393310547, 'avg_loss': 82.26034164428711}


VBox(children=(Label(value='456.836 MB of 456.836 MB uploaded (91.307 MB deduped)\r'), FloatProgress(value=1.0…

0,1
avg_loss,█▄▂▁▇
iterations_since_restore,▁▃▅▆█
loss,▃▁▁▁█
time_since_restore,▁▃▅▆█
time_this_iter_s,█▁▁▁▁
time_total_s,▁▃▅▆█
timestamp,▁▃▅▆█
timesteps_since_restore,▁▁▁▁▁
training_iteration,▁▃▅▆█
warmup_time,▁▁▁▁▁

0,1
avg_loss,41.85645
iterations_since_restore,5.0
loss,138.55171
time_since_restore,40.61344
time_this_iter_s,4.6186
time_total_s,40.61344
timestamp,1662584968.0
timesteps_since_restore,0.0
training_iteration,5.0
warmup_time,0.00753


Result for TorchTrainer_2328e218:
  _time_this_iter_s: 4.6088340282440186
  _timestamp: 1662584968
  _training_iteration: 5
  avg_loss: 41.8564537525177
  date: 2022-09-07_21-09-28
  done: true
  experiment_id: 624913ce79f046198982a0e158b0492d
  experiment_tag: 5_batch_size=6.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50,render_ego_history=False,step_time=0.1000,dataset_meta_key=meta_json,disable_traffic_light_faces=False,ego_center=0_25_0_5,filter_agents_threshold=0.5000,map_type=py_semantic,pixel_size=0_5_0_5,raster_size=224_224,satellite_map_key=aerial_map_aerial_map_png,semantic_map_key=semantic_map_semantic_map_pb,set_origin_to_bottom=True,batch_size=12,key=scenes_sample_zarr,num_workers=16,perturb_probability=0.0000,shuffle=True,checkpoint_every_n_steps=10000,eval_every_n_steps=10000,max_num_steps=5,batch_size=12,key=scenes_sample_zarr,num_workers=16,shuffle=False,dataset_key=scenes_sample_zarr,lr=0.0007,max_num_steps=5,num_workers=4,

[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_21-06-06/TorchTrainer_4160e4a6_6_batch_size=18.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50,_2022-09-07_21-08-44/checkpoint_000001)... 

Result for TorchTrainer_4160e4a6:
  _time_this_iter_s: 13.404391765594482
  _timestamp: 1662584971
  _training_iteration: 2
  avg_loss: 82.26034164428711
  date: 2022-09-07_21-09-31
  done: false
  experiment_id: 4617519f82cf4bac893ed1bd4c503b09
  hostname: anish-l5-kit
  iterations_since_restore: 2
  loss: 79.79784393310547
  node_ip: 10.150.0.3
  pid: 28343
  should_checkpoint: true
  time_since_restore: 42.66159677505493
  time_this_iter_s: 13.320270538330078
  time_total_s: 42.66159677505493
  timestamp: 1662584971
  timesteps_since_restore: 0
  training_iteration: 2
  trial_id: 4160e4a6
  warmup_time: 0.0077135562896728516
  


Done. 1.0s


[2m[36m(RayTrainWorker pid=28474)[0m {'loss': 77.8492660522461, 'avg_loss': 80.78998311360677}


[2m[36m(RayTrainWorker pid=29416)[0m 2022-09-07 21:09:44,199	INFO config.py:72 -- Setting up process group for: env:// [rank=0, world_size=1]
[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_21-06-06/TorchTrainer_4160e4a6_6_batch_size=18.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50,_2022-09-07_21-08-44/checkpoint_000002)... 

Result for TorchTrainer_4160e4a6:
  _time_this_iter_s: 13.039492130279541
  _timestamp: 1662584984
  _training_iteration: 3
  avg_loss: 80.78998311360677
  date: 2022-09-07_21-09-44
  done: false
  experiment_id: 4617519f82cf4bac893ed1bd4c503b09
  hostname: anish-l5-kit
  iterations_since_restore: 3
  loss: 77.8492660522461
  node_ip: 10.150.0.3
  pid: 28343
  should_checkpoint: true
  time_since_restore: 55.756284952163696
  time_this_iter_s: 13.094688177108765
  time_total_s: 55.756284952163696
  timestamp: 1662584984
  timesteps_since_restore: 0
  training_iteration: 3
  trial_id: 4160e4a6
  warmup_time: 0.0077135562896728516
  


Done. 1.0s
[2m[36m(RayTrainWorker pid=29416)[0m   f"The parameter '{pretrained_param}' is deprecated since 0.13 and will be removed in 0.15, "
[2m[36m(RayTrainWorker pid=29416)[0m 2022-09-07 21:09:50,219	INFO train_loop_utils.py:300 -- Moving model to device: cpu


[2m[36m(RayTrainWorker pid=28474)[0m {'loss': 9.560626029968262, 'avg_loss': 62.982643842697144}


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_21-06-06/TorchTrainer_4160e4a6_6_batch_size=18.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50,_2022-09-07_21-08-44/checkpoint_000003)... 

Result for TorchTrainer_4160e4a6:
  _time_this_iter_s: 15.198510885238647
  _timestamp: 1662584999
  _training_iteration: 4
  avg_loss: 62.982643842697144
  date: 2022-09-07_21-09-59
  done: false
  experiment_id: 4617519f82cf4bac893ed1bd4c503b09
  hostname: anish-l5-kit
  iterations_since_restore: 4
  loss: 9.560626029968262
  node_ip: 10.150.0.3
  pid: 28343
  should_checkpoint: true
  time_since_restore: 70.9718325138092
  time_this_iter_s: 15.215547561645508
  time_total_s: 70.9718325138092
  timestamp: 1662584999
  timesteps_since_restore: 0
  training_iteration: 4
  trial_id: 4160e4a6
  warmup_time: 0.0077135562896728516
  


Done. 1.6s


[2m[36m(RayTrainWorker pid=29416)[0m {'loss': 85.187744140625, 'avg_loss': 85.187744140625}


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_21-06-06/TorchTrainer_41bc41c0_7_batch_size=18.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50,_2022-09-07_21-09-36/checkpoint_000000)... 

Result for TorchTrainer_41bc41c0:
  _time_this_iter_s: 23.092734575271606
  _timestamp: 1662585007
  _training_iteration: 1
  avg_loss: 85.187744140625
  date: 2022-09-07_21-10-08
  done: false
  experiment_id: bc756455f26c4e5f9a223ed5058324d4
  hostname: anish-l5-kit
  iterations_since_restore: 1
  loss: 85.187744140625
  node_ip: 10.150.0.3
  pid: 29336
  should_checkpoint: true
  time_since_restore: 27.652225255966187
  time_this_iter_s: 27.652225255966187
  time_total_s: 27.652225255966187
  timestamp: 1662585008
  timesteps_since_restore: 0
  training_iteration: 1
  trial_id: 41bc41c0
  warmup_time: 0.00790262222290039
  


Done. 1.1s


[2m[36m(RayTrainWorker pid=28474)[0m {'loss': 47.91688537597656, 'avg_loss': 59.96949214935303}


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_21-06-06/TorchTrainer_4160e4a6_6_batch_size=18.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50,_2022-09-07_21-08-44/checkpoint_000004)... 

Result for TorchTrainer_4160e4a6:
  _time_this_iter_s: 14.004685401916504
  _timestamp: 1662585013
  _training_iteration: 5
  avg_loss: 59.96949214935303
  date: 2022-09-07_21-10-13
  done: false
  experiment_id: 4617519f82cf4bac893ed1bd4c503b09
  hostname: anish-l5-kit
  iterations_since_restore: 5
  loss: 47.91688537597656
  node_ip: 10.150.0.3
  pid: 28343
  should_checkpoint: true
  time_since_restore: 84.87619829177856
  time_this_iter_s: 13.90436577796936
  time_total_s: 84.87619829177856
  timestamp: 1662585013
  timesteps_since_restore: 0
  training_iteration: 5
  trial_id: 4160e4a6
  warmup_time: 0.0077135562896728516
  


Done. 1.0s
[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_21-06-06/TorchTrainer_4160e4a6_6_batch_size=18.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50,_2022-09-07_21-08-44/checkpoint_000004)... Done. 0.3s


VBox(children=(Label(value='456.836 MB of 456.836 MB uploaded (91.307 MB deduped)\r'), FloatProgress(value=1.0…

[2m[36m(RayTrainWorker pid=29416)[0m {'loss': 51.79338455200195, 'avg_loss': 68.49056434631348}


0,1
avg_loss,█▇▇▂▁
iterations_since_restore,▁▃▅▆█
loss,██▇▁▅
time_since_restore,▁▃▄▆█
time_this_iter_s,█▁▁▂▁
time_total_s,▁▃▄▆█
timestamp,▁▃▄▆█
timesteps_since_restore,▁▁▁▁▁
training_iteration,▁▃▅▆█
warmup_time,▁▁▁▁▁

0,1
avg_loss,59.96949
iterations_since_restore,5.0
loss,47.91689
time_since_restore,84.8762
time_this_iter_s,13.90437
time_total_s,84.8762
timestamp,1662585013.0
timesteps_since_restore,0.0
training_iteration,5.0
warmup_time,0.00771


Result for TorchTrainer_4160e4a6:
  _time_this_iter_s: 14.004685401916504
  _timestamp: 1662585013
  _training_iteration: 5
  avg_loss: 59.96949214935303
  date: 2022-09-07_21-10-13
  done: true
  experiment_id: 4617519f82cf4bac893ed1bd4c503b09
  experiment_tag: 6_batch_size=18.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50,render_ego_history=False,step_time=0.1000,dataset_meta_key=meta_json,disable_traffic_light_faces=False,ego_center=0_25_0_5,filter_agents_threshold=0.5000,map_type=py_satellite,pixel_size=0_5_0_5,raster_size=224_224,satellite_map_key=aerial_map_aerial_map_png,semantic_map_key=semantic_map_semantic_map_pb,set_origin_to_bottom=True,batch_size=12,key=scenes_sample_zarr,num_workers=16,perturb_probability=0.0000,shuffle=True,checkpoint_every_n_steps=10000,eval_every_n_steps=10000,max_num_steps=5,batch_size=12,key=scenes_sample_zarr,num_workers=16,shuffle=False,dataset_key=scenes_sample_zarr,lr=0.0004,max_num_steps=5,num_workers

[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_21-06-06/TorchTrainer_41bc41c0_7_batch_size=18.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50,_2022-09-07_21-09-36/checkpoint_000001)... 

Result for TorchTrainer_41bc41c0:
  _time_this_iter_s: 13.217745304107666
  _timestamp: 1662585021
  _training_iteration: 2
  avg_loss: 68.49056434631348
  date: 2022-09-07_21-10-21
  done: false
  experiment_id: bc756455f26c4e5f9a223ed5058324d4
  hostname: anish-l5-kit
  iterations_since_restore: 2
  loss: 51.79338455200195
  node_ip: 10.150.0.3
  pid: 29336
  should_checkpoint: true
  time_since_restore: 40.78637480735779
  time_this_iter_s: 13.134149551391602
  time_total_s: 40.78637480735779
  timestamp: 1662585021
  timesteps_since_restore: 0
  training_iteration: 2
  trial_id: 41bc41c0
  warmup_time: 0.00790262222290039
  


Done. 1.0s


[2m[36m(RayTrainWorker pid=29416)[0m {'loss': 16.159589767456055, 'avg_loss': 51.046906153361}


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_21-06-06/TorchTrainer_41bc41c0_7_batch_size=18.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50,_2022-09-07_21-09-36/checkpoint_000002)... 

Result for TorchTrainer_41bc41c0:
  _time_this_iter_s: 12.995812177658081
  _timestamp: 1662585034
  _training_iteration: 3
  avg_loss: 51.046906153361
  date: 2022-09-07_21-10-34
  done: false
  experiment_id: bc756455f26c4e5f9a223ed5058324d4
  hostname: anish-l5-kit
  iterations_since_restore: 3
  loss: 16.159589767456055
  node_ip: 10.150.0.3
  pid: 29336
  should_checkpoint: true
  time_since_restore: 53.84964561462402
  time_this_iter_s: 13.063270807266235
  time_total_s: 53.84964561462402
  timestamp: 1662585034
  timesteps_since_restore: 0
  training_iteration: 3
  trial_id: 41bc41c0
  warmup_time: 0.00790262222290039
  


Done. 1.0s
[2m[36m(RayTrainWorker pid=30223)[0m 2022-09-07 21:10:36,524	INFO config.py:72 -- Setting up process group for: env:// [rank=0, world_size=1]
[2m[36m(RayTrainWorker pid=30223)[0m   f"The parameter '{pretrained_param}' is deprecated since 0.13 and will be removed in 0.15, "
[2m[36m(RayTrainWorker pid=30223)[0m 2022-09-07 21:10:41,921	INFO train_loop_utils.py:300 -- Moving model to device: cpu


[2m[36m(RayTrainWorker pid=29416)[0m {'loss': 42.300750732421875, 'avg_loss': 48.86036729812622}


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_21-06-06/TorchTrainer_41bc41c0_7_batch_size=18.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50,_2022-09-07_21-09-36/checkpoint_000003)... 

Result for TorchTrainer_41bc41c0:
  _time_this_iter_s: 15.192680358886719
  _timestamp: 1662585049
  _training_iteration: 4
  avg_loss: 48.86036729812622
  date: 2022-09-07_21-10-49
  done: false
  experiment_id: bc756455f26c4e5f9a223ed5058324d4
  hostname: anish-l5-kit
  iterations_since_restore: 4
  loss: 42.300750732421875
  node_ip: 10.150.0.3
  pid: 29336
  should_checkpoint: true
  time_since_restore: 68.9288637638092
  time_this_iter_s: 15.07921814918518
  time_total_s: 68.9288637638092
  timestamp: 1662585049
  timesteps_since_restore: 0
  training_iteration: 4
  trial_id: 41bc41c0
  warmup_time: 0.00790262222290039
  


Done. 1.1s


[2m[36m(RayTrainWorker pid=30223)[0m {'loss': 82.30538940429688, 'avg_loss': 82.30538940429688}


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_21-06-06/TorchTrainer_611d7426_8_batch_size=18.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50,_2022-09-07_21-10-25/checkpoint_000000)... 

Result for TorchTrainer_611d7426:
  _time_this_iter_s: 20.962560415267944
  _timestamp: 1662585057
  _training_iteration: 1
  avg_loss: 82.30538940429688
  date: 2022-09-07_21-10-58
  done: false
  experiment_id: bce9580011204ae892f11fb8c739ac2e
  hostname: anish-l5-kit
  iterations_since_restore: 1
  loss: 82.30538940429688
  node_ip: 10.150.0.3
  pid: 30153
  should_checkpoint: true
  time_since_restore: 25.224627017974854
  time_this_iter_s: 25.224627017974854
  time_total_s: 25.224627017974854
  timestamp: 1662585058
  timesteps_since_restore: 0
  training_iteration: 1
  trial_id: 611d7426
  warmup_time: 0.009753704071044922
  


Done. 1.0s


[2m[36m(RayTrainWorker pid=29416)[0m {'loss': 14.77640438079834, 'avg_loss': 42.04357471466064}
Result for TorchTrainer_41bc41c0:
  _time_this_iter_s: 12.540035486221313
  _timestamp: 1662585061
  _training_iteration: 5
  avg_loss: 42.04357471466064
  date: 2022-09-07_21-11-02
  done: false
  experiment_id: bc756455f26c4e5f9a223ed5058324d4
  hostname: anish-l5-kit
  iterations_since_restore: 5
  loss: 14.77640438079834
  node_ip: 10.150.0.3
  pid: 29336
  should_checkpoint: true
  time_since_restore: 82.14011311531067
  time_this_iter_s: 13.211249351501465
  time_total_s: 82.14011311531067
  timestamp: 1662585062
  timesteps_since_restore: 0
  training_iteration: 5
  trial_id: 41bc41c0
  warmup_time: 0.00790262222290039
  


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_21-06-06/TorchTrainer_41bc41c0_7_batch_size=18.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50,_2022-09-07_21-09-36/checkpoint_000004)... Done. 1.2s
[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_21-06-06/TorchTrainer_41bc41c0_7_batch_size=18.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50,_2022-09-07_21-09-36/checkpoint_000004)... Done. 0.3s


VBox(children=(Label(value='456.836 MB of 456.836 MB uploaded (91.307 MB deduped)\r'), FloatProgress(value=1.0…

[2m[36m(RayTrainWorker pid=30223)[0m {'loss': 37.45103073120117, 'avg_loss': 59.87821006774902}


Result for TorchTrainer_41bc41c0:
  _time_this_iter_s: 12.540035486221313
  _timestamp: 1662585061
  _training_iteration: 5
  avg_loss: 42.04357471466064
  date: 2022-09-07_21-11-02
  done: true
  experiment_id: bc756455f26c4e5f9a223ed5058324d4
  experiment_tag: 7_batch_size=18.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50,render_ego_history=False,step_time=0.1000,dataset_meta_key=meta_json,disable_traffic_light_faces=False,ego_center=0_25_0_5,filter_agents_threshold=0.5000,map_type=py_satellite,pixel_size=0_5_0_5,raster_size=224_224,satellite_map_key=aerial_map_aerial_map_png,semantic_map_key=semantic_map_semantic_map_pb,set_origin_to_bottom=True,batch_size=12,key=scenes_sample_zarr,num_workers=16,perturb_probability=0.0000,shuffle=True,checkpoint_every_n_steps=10000,eval_every_n_steps=10000,max_num_steps=5,batch_size=12,key=scenes_sample_zarr,num_workers=16,shuffle=False,dataset_key=scenes_sample_zarr,lr=0.0025,max_num_steps=5,num_workers

[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_21-06-06/TorchTrainer_611d7426_8_batch_size=18.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50,_2022-09-07_21-10-25/checkpoint_000001)... 

Result for TorchTrainer_611d7426:
  _time_this_iter_s: 13.83748483657837
  _timestamp: 1662585071
  _training_iteration: 2
  avg_loss: 59.87821006774902
  date: 2022-09-07_21-11-12
  done: false
  experiment_id: bce9580011204ae892f11fb8c739ac2e
  hostname: anish-l5-kit
  iterations_since_restore: 2
  loss: 37.45103073120117
  node_ip: 10.150.0.3
  pid: 30153
  should_checkpoint: true
  time_since_restore: 38.992254972457886
  time_this_iter_s: 13.767627954483032
  time_total_s: 38.992254972457886
  timestamp: 1662585072
  timesteps_since_restore: 0
  training_iteration: 2
  trial_id: 611d7426
  warmup_time: 0.009753704071044922
  


0,1
avg_loss,█▅▂▂▁
iterations_since_restore,▁▃▅▆█
loss,█▅▁▄▁
time_since_restore,▁▃▄▆█
time_this_iter_s,█▁▁▂▁
time_total_s,▁▃▄▆█
timestamp,▁▃▄▆█
timesteps_since_restore,▁▁▁▁▁
training_iteration,▁▃▅▆█
warmup_time,▁▁▁▁▁

0,1
avg_loss,42.04357
iterations_since_restore,5.0
loss,14.7764
time_since_restore,82.14011
time_this_iter_s,13.21125
time_total_s,82.14011
timestamp,1662585062.0
timesteps_since_restore,0.0
training_iteration,5.0
warmup_time,0.0079


Done. 1.0s
[2m[36m(RayTrainWorker pid=31066)[0m 2022-09-07 21:11:21,492	INFO config.py:72 -- Setting up process group for: env:// [rank=0, world_size=1]


[2m[36m(RayTrainWorker pid=30223)[0m {'loss': 113.56055450439453, 'avg_loss': 77.77232487996419}


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_21-06-06/TorchTrainer_611d7426_8_batch_size=18.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50,_2022-09-07_21-10-25/checkpoint_000002)... 

Result for TorchTrainer_611d7426:
  _time_this_iter_s: 14.799139976501465
  _timestamp: 1662585086
  _training_iteration: 3
  avg_loss: 77.77232487996419
  date: 2022-09-07_21-11-27
  done: false
  experiment_id: bce9580011204ae892f11fb8c739ac2e
  hostname: anish-l5-kit
  iterations_since_restore: 3
  loss: 113.56055450439453
  node_ip: 10.150.0.3
  pid: 30153
  should_checkpoint: true
  time_since_restore: 53.89546465873718
  time_this_iter_s: 14.903209686279297
  time_total_s: 53.89546465873718
  timestamp: 1662585087
  timesteps_since_restore: 0
  training_iteration: 3
  trial_id: 611d7426
  warmup_time: 0.009753704071044922
  


Done. 1.1s
[2m[36m(RayTrainWorker pid=31066)[0m   f"The parameter '{pretrained_param}' is deprecated since 0.13 and will be removed in 0.15, "
[2m[36m(RayTrainWorker pid=31066)[0m 2022-09-07 21:11:28,748	INFO train_loop_utils.py:300 -- Moving model to device: cpu


[2m[36m(RayTrainWorker pid=30223)[0m {'loss': 16.853878021240234, 'avg_loss': 62.5427131652832}


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_21-06-06/TorchTrainer_611d7426_8_batch_size=18.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50,_2022-09-07_21-10-25/checkpoint_000003)... 

Result for TorchTrainer_611d7426:
  _time_this_iter_s: 15.22741413116455
  _timestamp: 1662585101
  _training_iteration: 4
  avg_loss: 62.5427131652832
  date: 2022-09-07_21-11-42
  done: false
  experiment_id: bce9580011204ae892f11fb8c739ac2e
  hostname: anish-l5-kit
  iterations_since_restore: 4
  loss: 16.853878021240234
  node_ip: 10.150.0.3
  pid: 30153
  should_checkpoint: true
  time_since_restore: 69.06386518478394
  time_this_iter_s: 15.168400526046753
  time_total_s: 69.06386518478394
  timestamp: 1662585102
  timesteps_since_restore: 0
  training_iteration: 4
  trial_id: 611d7426
  warmup_time: 0.009753704071044922
  


Done. 1.5s


[2m[36m(RayTrainWorker pid=31066)[0m {'loss': 57.298194885253906, 'avg_loss': 57.298194885253906}


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_21-06-06/TorchTrainer_7e39ebde_9_batch_size=24.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50,_2022-09-07_21-11-14/checkpoint_000000)... 

Result for TorchTrainer_7e39ebde:
  _time_this_iter_s: 28.106632471084595
  _timestamp: 1662585110
  _training_iteration: 1
  avg_loss: 57.298194885253906
  date: 2022-09-07_21-11-50
  done: false
  experiment_id: 32ca8538878a4c0c8911eda81c682137
  hostname: anish-l5-kit
  iterations_since_restore: 1
  loss: 57.298194885253906
  node_ip: 10.150.0.3
  pid: 30991
  should_checkpoint: true
  time_since_restore: 32.18951964378357
  time_this_iter_s: 32.18951964378357
  time_total_s: 32.18951964378357
  timestamp: 1662585110
  timesteps_since_restore: 0
  training_iteration: 1
  trial_id: 7e39ebde
  warmup_time: 0.005149126052856445
  


Done. 1.0s


[2m[36m(RayTrainWorker pid=30223)[0m {'loss': 93.33875274658203, 'avg_loss': 68.70192108154296}
Result for TorchTrainer_611d7426:
  _time_this_iter_s: 12.791445255279541
  _timestamp: 1662585114
  _training_iteration: 5
  avg_loss: 68.70192108154296
  date: 2022-09-07_21-11-54
  done: false
  experiment_id: bce9580011204ae892f11fb8c739ac2e
  hostname: anish-l5-kit
  iterations_since_restore: 5
  loss: 93.33875274658203
  node_ip: 10.150.0.3
  pid: 30153
  should_checkpoint: true
  time_since_restore: 81.7708113193512
  time_this_iter_s: 12.70694613456726
  time_total_s: 81.7708113193512
  timestamp: 1662585114
  timesteps_since_restore: 0
  training_iteration: 5
  trial_id: 611d7426
  warmup_time: 0.009753704071044922
  


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_21-06-06/TorchTrainer_611d7426_8_batch_size=18.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50,_2022-09-07_21-10-25/checkpoint_000004)... Done. 1.0s
[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_21-06-06/TorchTrainer_611d7426_8_batch_size=18.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50,_2022-09-07_21-10-25/checkpoint_000004)... Done. 0.3s


VBox(children=(Label(value='456.836 MB of 456.836 MB uploaded (91.307 MB deduped)\r'), FloatProgress(value=1.0…

Result for TorchTrainer_611d7426:
  _time_this_iter_s: 12.791445255279541
  _timestamp: 1662585114
  _training_iteration: 5
  avg_loss: 68.70192108154296
  date: 2022-09-07_21-11-54
  done: true
  experiment_id: bce9580011204ae892f11fb8c739ac2e
  experiment_tag: 8_batch_size=18.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50,render_ego_history=False,step_time=0.1000,dataset_meta_key=meta_json,disable_traffic_light_faces=False,ego_center=0_25_0_5,filter_agents_threshold=0.5000,map_type=py_semantic,pixel_size=0_5_0_5,raster_size=224_224,satellite_map_key=aerial_map_aerial_map_png,semantic_map_key=semantic_map_semantic_map_pb,set_origin_to_bottom=True,batch_size=12,key=scenes_sample_zarr,num_workers=16,perturb_probability=0.0000,shuffle=True,checkpoint_every_n_steps=10000,eval_every_n_steps=10000,max_num_steps=5,batch_size=12,key=scenes_sample_zarr,num_workers=16,shuffle=False,dataset_key=scenes_sample_zarr,lr=0.0016,max_num_steps=5,num_workers=

0,1
avg_loss,█▁▇▂▄
iterations_since_restore,▁▃▅▆█
loss,▆▂█▁▇
time_since_restore,▁▃▅▆█
time_this_iter_s,█▂▂▂▁
time_total_s,▁▃▅▆█
timestamp,▁▃▅▇█
timesteps_since_restore,▁▁▁▁▁
training_iteration,▁▃▅▆█
warmup_time,▁▁▁▁▁

0,1
avg_loss,68.70192
iterations_since_restore,5.0
loss,93.33875
time_since_restore,81.77081
time_this_iter_s,12.70695
time_total_s,81.77081
timestamp,1662585114.0
timesteps_since_restore,0.0
training_iteration,5.0
warmup_time,0.00975


[2m[36m(RayTrainWorker pid=31066)[0m {'loss': 42.44829177856445, 'avg_loss': 49.87324333190918}


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_21-06-06/TorchTrainer_7e39ebde_9_batch_size=24.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50,_2022-09-07_21-11-14/checkpoint_000001)... 

Result for TorchTrainer_7e39ebde:
  _time_this_iter_s: 18.113369464874268
  _timestamp: 1662585128
  _training_iteration: 2
  avg_loss: 49.87324333190918
  date: 2022-09-07_21-12-08
  done: false
  experiment_id: 32ca8538878a4c0c8911eda81c682137
  hostname: anish-l5-kit
  iterations_since_restore: 2
  loss: 42.44829177856445
  node_ip: 10.150.0.3
  pid: 30991
  should_checkpoint: true
  time_since_restore: 50.37285923957825
  time_this_iter_s: 18.183339595794678
  time_total_s: 50.37285923957825
  timestamp: 1662585128
  timesteps_since_restore: 0
  training_iteration: 2
  trial_id: 7e39ebde
  warmup_time: 0.005149126052856445
  


Done. 1.0s
[2m[36m(RayTrainWorker pid=31908)[0m 2022-09-07 21:12:15,080	INFO config.py:72 -- Setting up process group for: env:// [rank=0, world_size=1]
[2m[36m(RayTrainWorker pid=31908)[0m   f"The parameter '{pretrained_param}' is deprecated since 0.13 and will be removed in 0.15, "
[2m[36m(RayTrainWorker pid=31908)[0m 2022-09-07 21:12:21,112	INFO train_loop_utils.py:300 -- Moving model to device: cpu


[2m[36m(RayTrainWorker pid=31066)[0m {'loss': 44.227928161621094, 'avg_loss': 47.99147160847982}


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_21-06-06/TorchTrainer_7e39ebde_9_batch_size=24.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50,_2022-09-07_21-11-14/checkpoint_000002)... 

Result for TorchTrainer_7e39ebde:
  _time_this_iter_s: 19.18120527267456
  _timestamp: 1662585147
  _training_iteration: 3
  avg_loss: 47.99147160847982
  date: 2022-09-07_21-12-27
  done: false
  experiment_id: 32ca8538878a4c0c8911eda81c682137
  hostname: anish-l5-kit
  iterations_since_restore: 3
  loss: 44.227928161621094
  node_ip: 10.150.0.3
  pid: 30991
  should_checkpoint: true
  time_since_restore: 69.42990136146545
  time_this_iter_s: 19.057042121887207
  time_total_s: 69.42990136146545
  timestamp: 1662585147
  timesteps_since_restore: 0
  training_iteration: 3
  trial_id: 7e39ebde
  warmup_time: 0.005149126052856445
  


Done. 1.0s


[2m[36m(RayTrainWorker pid=31908)[0m {'loss': 33.433773040771484, 'avg_loss': 33.433773040771484}
Result for TorchTrainer_9b8feb8e:
  _time_this_iter_s: 15.778479814529419
  _timestamp: 1662585151
  _training_iteration: 1
  avg_loss: 33.433773040771484
  date: 2022-09-07_21-12-31
  done: false
  experiment_id: 8b88f15dfc9445518d5f122c8d9f65da
  hostname: anish-l5-kit
  iterations_since_restore: 1
  loss: 33.433773040771484
  node_ip: 10.150.0.3
  pid: 31815
  should_checkpoint: true
  time_since_restore: 20.216514825820923
  time_this_iter_s: 20.216514825820923
  time_total_s: 20.216514825820923
  timestamp: 1662585151
  timesteps_since_restore: 0
  training_iteration: 1
  trial_id: 9b8feb8e
  warmup_time: 0.00574803352355957
  


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_21-06-06/TorchTrainer_9b8feb8e_10_batch_size=12.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50_2022-09-07_21-12-06/checkpoint_000000)... Done. 1.0s


[2m[36m(RayTrainWorker pid=31908)[0m {'loss': 45.98919677734375, 'avg_loss': 39.71148490905762}


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_21-06-06/TorchTrainer_9b8feb8e_10_batch_size=12.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50_2022-09-07_21-12-06/checkpoint_000001)... 

Result for TorchTrainer_9b8feb8e:
  _time_this_iter_s: 8.413378953933716
  _timestamp: 1662585159
  _training_iteration: 2
  avg_loss: 39.71148490905762
  date: 2022-09-07_21-12-40
  done: false
  experiment_id: 8b88f15dfc9445518d5f122c8d9f65da
  hostname: anish-l5-kit
  iterations_since_restore: 2
  loss: 45.98919677734375
  node_ip: 10.150.0.3
  pid: 31815
  should_checkpoint: true
  time_since_restore: 28.579983234405518
  time_this_iter_s: 8.363468408584595
  time_total_s: 28.579983234405518
  timestamp: 1662585160
  timesteps_since_restore: 0
  training_iteration: 2
  trial_id: 9b8feb8e
  warmup_time: 0.00574803352355957
  


Done. 1.0s


[2m[36m(RayTrainWorker pid=31066)[0m {'loss': 80.68941497802734, 'avg_loss': 56.1659574508667}
Result for TorchTrainer_7e39ebde:
  _time_this_iter_s: 17.21562671661377
  _timestamp: 1662585164
  _training_iteration: 4
  avg_loss: 56.1659574508667
  date: 2022-09-07_21-12-45
  done: false
  experiment_id: 32ca8538878a4c0c8911eda81c682137
  hostname: anish-l5-kit
  iterations_since_restore: 4
  loss: 80.68941497802734
  node_ip: 10.150.0.3
  pid: 30991
  should_checkpoint: true
  time_since_restore: 86.61032295227051
  time_this_iter_s: 17.180421590805054
  time_total_s: 86.61032295227051
  timestamp: 1662585165
  timesteps_since_restore: 0
  training_iteration: 4
  trial_id: 7e39ebde
  warmup_time: 0.005149126052856445
  


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_21-06-06/TorchTrainer_7e39ebde_9_batch_size=24.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50,_2022-09-07_21-11-14/checkpoint_000003)... Done. 1.0s


[2m[36m(RayTrainWorker pid=31908)[0m {'loss': 85.8507308959961, 'avg_loss': 55.091233571370445}


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_21-06-06/TorchTrainer_9b8feb8e_10_batch_size=12.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50_2022-09-07_21-12-06/checkpoint_000002)... 

Result for TorchTrainer_9b8feb8e:
  _time_this_iter_s: 9.07651686668396
  _timestamp: 1662585168
  _training_iteration: 3
  avg_loss: 55.091233571370445
  date: 2022-09-07_21-12-49
  done: false
  experiment_id: 8b88f15dfc9445518d5f122c8d9f65da
  hostname: anish-l5-kit
  iterations_since_restore: 3
  loss: 85.8507308959961
  node_ip: 10.150.0.3
  pid: 31815
  should_checkpoint: true
  time_since_restore: 37.622604846954346
  time_this_iter_s: 9.042621612548828
  time_total_s: 37.622604846954346
  timestamp: 1662585169
  timesteps_since_restore: 0
  training_iteration: 3
  trial_id: 9b8feb8e
  warmup_time: 0.00574803352355957
  


Done. 1.5s


[2m[36m(RayTrainWorker pid=31908)[0m {'loss': 64.64684295654297, 'avg_loss': 57.480135917663574}


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_21-06-06/TorchTrainer_9b8feb8e_10_batch_size=12.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50_2022-09-07_21-12-06/checkpoint_000003)... 

Result for TorchTrainer_9b8feb8e:
  _time_this_iter_s: 8.164771556854248
  _timestamp: 1662585176
  _training_iteration: 4
  avg_loss: 57.480135917663574
  date: 2022-09-07_21-12-57
  done: false
  experiment_id: 8b88f15dfc9445518d5f122c8d9f65da
  hostname: anish-l5-kit
  iterations_since_restore: 4
  loss: 64.64684295654297
  node_ip: 10.150.0.3
  pid: 31815
  should_checkpoint: true
  time_since_restore: 45.783631563186646
  time_this_iter_s: 8.1610267162323
  time_total_s: 45.783631563186646
  timestamp: 1662585177
  timesteps_since_restore: 0
  training_iteration: 4
  trial_id: 9b8feb8e
  warmup_time: 0.00574803352355957
  


Done. 1.4s


[2m[36m(RayTrainWorker pid=31066)[0m {'loss': 37.86739730834961, 'avg_loss': 52.50624542236328}


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_21-06-06/TorchTrainer_7e39ebde_9_batch_size=24.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50,_2022-09-07_21-11-14/checkpoint_000004)... 

Result for TorchTrainer_7e39ebde:
  _time_this_iter_s: 18.638954877853394
  _timestamp: 1662585183
  _training_iteration: 5
  avg_loss: 52.50624542236328
  date: 2022-09-07_21-13-03
  done: false
  experiment_id: 32ca8538878a4c0c8911eda81c682137
  hostname: anish-l5-kit
  iterations_since_restore: 5
  loss: 37.86739730834961
  node_ip: 10.150.0.3
  pid: 30991
  should_checkpoint: true
  time_since_restore: 105.29592990875244
  time_this_iter_s: 18.685606956481934
  time_total_s: 105.29592990875244
  timestamp: 1662585183
  timesteps_since_restore: 0
  training_iteration: 5
  trial_id: 7e39ebde
  warmup_time: 0.005149126052856445
  


Done. 1.0s
[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_21-06-06/TorchTrainer_7e39ebde_9_batch_size=24.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50,_2022-09-07_21-11-14/checkpoint_000004)... Done. 0.3s


[2m[36m(RayTrainWorker pid=31908)[0m {'loss': 119.39801025390625, 'avg_loss': 69.86371078491212}


VBox(children=(Label(value='456.836 MB of 456.836 MB uploaded (91.307 MB deduped)\r'), FloatProgress(value=1.0…

0,1
avg_loss,█▂▁▇▄
iterations_since_restore,▁▃▅▆█
loss,▄▂▂█▁
time_since_restore,▁▃▅▆█
time_this_iter_s,█▁▂▁▂
time_total_s,▁▃▅▆█
timestamp,▁▃▅▆█
timesteps_since_restore,▁▁▁▁▁
training_iteration,▁▃▅▆█
warmup_time,▁▁▁▁▁

0,1
avg_loss,52.50625
iterations_since_restore,5.0
loss,37.8674
time_since_restore,105.29593
time_this_iter_s,18.68561
time_total_s,105.29593
timestamp,1662585183.0
timesteps_since_restore,0.0
training_iteration,5.0
warmup_time,0.00515


Result for TorchTrainer_7e39ebde:
  _time_this_iter_s: 18.638954877853394
  _timestamp: 1662585183
  _training_iteration: 5
  avg_loss: 52.50624542236328
  date: 2022-09-07_21-13-03
  done: true
  experiment_id: 32ca8538878a4c0c8911eda81c682137
  experiment_tag: 9_batch_size=24.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50,render_ego_history=False,step_time=0.1000,dataset_meta_key=meta_json,disable_traffic_light_faces=False,ego_center=0_25_0_5,filter_agents_threshold=0.5000,map_type=py_semantic,pixel_size=0_5_0_5,raster_size=224_224,satellite_map_key=aerial_map_aerial_map_png,semantic_map_key=semantic_map_semantic_map_pb,set_origin_to_bottom=True,batch_size=12,key=scenes_sample_zarr,num_workers=16,perturb_probability=0.0000,shuffle=True,checkpoint_every_n_steps=10000,eval_every_n_steps=10000,max_num_steps=5,batch_size=12,key=scenes_sample_zarr,num_workers=16,shuffle=False,dataset_key=scenes_sample_zarr,lr=0.0005,max_num_steps=5,num_workers=

[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_21-06-06/TorchTrainer_9b8feb8e_10_batch_size=12.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50_2022-09-07_21-12-06/checkpoint_000004)... 

Result for TorchTrainer_9b8feb8e:
  _time_this_iter_s: 9.733388423919678
  _timestamp: 1662585186
  _training_iteration: 5
  avg_loss: 69.86371078491212
  date: 2022-09-07_21-13-07
  done: false
  experiment_id: 8b88f15dfc9445518d5f122c8d9f65da
  hostname: anish-l5-kit
  iterations_since_restore: 5
  loss: 119.39801025390625
  node_ip: 10.150.0.3
  pid: 31815
  should_checkpoint: true
  time_since_restore: 55.49842095375061
  time_this_iter_s: 9.714789390563965
  time_total_s: 55.49842095375061
  timestamp: 1662585187
  timesteps_since_restore: 0
  training_iteration: 5
  trial_id: 9b8feb8e
  warmup_time: 0.00574803352355957
  


Done. 1.0s
[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_21-06-06/TorchTrainer_9b8feb8e_10_batch_size=12.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50_2022-09-07_21-12-06/checkpoint_000004)... Done. 0.3s


VBox(children=(Label(value='456.836 MB of 456.836 MB uploaded (91.307 MB deduped)\r'), FloatProgress(value=1.0…

0,1
avg_loss,▁▂▅▆█
iterations_since_restore,▁▃▅▆█
loss,▁▂▅▄█
time_since_restore,▁▃▄▆█
time_this_iter_s,█▁▂▁▂
time_total_s,▁▃▄▆█
timestamp,▁▃▅▆█
timesteps_since_restore,▁▁▁▁▁
training_iteration,▁▃▅▆█
warmup_time,▁▁▁▁▁

0,1
avg_loss,69.86371
iterations_since_restore,5.0
loss,119.39801
time_since_restore,55.49842
time_this_iter_s,9.71479
time_total_s,55.49842
timestamp,1662585187.0
timesteps_since_restore,0.0
training_iteration,5.0
warmup_time,0.00575


Result for TorchTrainer_9b8feb8e:
  _time_this_iter_s: 9.733388423919678
  _timestamp: 1662585186
  _training_iteration: 5
  avg_loss: 69.86371078491212
  date: 2022-09-07_21-13-07
  done: true
  experiment_id: 8b88f15dfc9445518d5f122c8d9f65da
  experiment_tag: 10_batch_size=12.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50,render_ego_history=False,step_time=0.1000,dataset_meta_key=meta_json,disable_traffic_light_faces=False,ego_center=0_25_0_5,filter_agents_threshold=0.5000,map_type=py_satellite,pixel_size=0_5_0_5,raster_size=224_224,satellite_map_key=aerial_map_aerial_map_png,semantic_map_key=semantic_map_semantic_map_pb,set_origin_to_bottom=True,batch_size=12,key=scenes_sample_zarr,num_workers=16,perturb_probability=0.0000,shuffle=True,checkpoint_every_n_steps=10000,eval_every_n_steps=10000,max_num_steps=5,batch_size=12,key=scenes_sample_zarr,num_workers=16,shuffle=False,dataset_key=scenes_sample_zarr,lr=0.0003,max_num_steps=5,num_workers

[2m[36m(RayTrainWorker pid=413)[0m 2022-09-07 21:13:28,312	INFO config.py:72 -- Setting up process group for: env:// [rank=0, world_size=1]
[2m[36m(RayTrainWorker pid=420)[0m 2022-09-07 21:13:29,215	INFO config.py:72 -- Setting up process group for: env:// [rank=0, world_size=1]
[2m[36m(RayTrainWorker pid=413)[0m   f"The parameter '{pretrained_param}' is deprecated since 0.13 and will be removed in 0.15, "
[2m[36m(RayTrainWorker pid=420)[0m   f"The parameter '{pretrained_param}' is deprecated since 0.13 and will be removed in 0.15, "
[2m[36m(RayTrainWorker pid=413)[0m 2022-09-07 21:13:36,419	INFO train_loop_utils.py:300 -- Moving model to device: cpu
[2m[36m(RayTrainWorker pid=420)[0m 2022-09-07 21:13:36,959	INFO train_loop_utils.py:300 -- Moving model to device: cpu


[2m[36m(RayTrainWorker pid=413)[0m {'loss': 48.339210510253906, 'avg_loss': 48.339210510253906}


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_21-06-06/TorchTrainer_ba8cfb08_11_batch_size=12.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50_2022-09-07_21-13-20/checkpoint_000000)... 

Result for TorchTrainer_ba8cfb08:
  _time_this_iter_s: 21.050806283950806
  _timestamp: 1662585229
  _training_iteration: 1
  avg_loss: 48.339210510253906
  date: 2022-09-07_21-13-50
  done: false
  experiment_id: 12da93f9422e461983d356970d37ba76
  hostname: anish-l5-kit
  iterations_since_restore: 1
  loss: 48.339210510253906
  node_ip: 10.150.0.3
  pid: 32745
  should_checkpoint: true
  time_since_restore: 25.78248929977417
  time_this_iter_s: 25.78248929977417
  time_total_s: 25.78248929977417
  timestamp: 1662585230
  timesteps_since_restore: 0
  training_iteration: 1
  trial_id: ba8cfb08
  warmup_time: 0.010708808898925781
  


Done. 1.0s


[2m[36m(RayTrainWorker pid=413)[0m {'loss': 41.07661819458008, 'avg_loss': 44.70791435241699}


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_21-06-06/TorchTrainer_ba8cfb08_11_batch_size=12.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50_2022-09-07_21-13-20/checkpoint_000001)... 

Result for TorchTrainer_ba8cfb08:
  _time_this_iter_s: 8.480549097061157
  _timestamp: 1662585238
  _training_iteration: 2
  avg_loss: 44.70791435241699
  date: 2022-09-07_21-13-58
  done: false
  experiment_id: 12da93f9422e461983d356970d37ba76
  hostname: anish-l5-kit
  iterations_since_restore: 2
  loss: 41.07661819458008
  node_ip: 10.150.0.3
  pid: 32745
  should_checkpoint: true
  time_since_restore: 34.20060634613037
  time_this_iter_s: 8.418117046356201
  time_total_s: 34.20060634613037
  timestamp: 1662585238
  timesteps_since_restore: 0
  training_iteration: 2
  trial_id: ba8cfb08
  warmup_time: 0.010708808898925781
  


Done. 1.0s


[2m[36m(RayTrainWorker pid=420)[0m {'loss': 26.357561111450195, 'avg_loss': 26.357561111450195}


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_21-06-06/TorchTrainer_e6749e2e_12_batch_size=24.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50_2022-09-07_21-13-21/checkpoint_000000)... 

Result for TorchTrainer_e6749e2e:
  _time_this_iter_s: 30.133382320404053
  _timestamp: 1662585239
  _training_iteration: 1
  avg_loss: 26.357561111450195
  date: 2022-09-07_21-14-00
  done: false
  experiment_id: 80e86f1ff34c49d98349a5e7a5766181
  hostname: anish-l5-kit
  iterations_since_restore: 1
  loss: 26.357561111450195
  node_ip: 10.150.0.3
  pid: 32756
  should_checkpoint: true
  time_since_restore: 34.88279747962952
  time_this_iter_s: 34.88279747962952
  time_total_s: 34.88279747962952
  timestamp: 1662585240
  timesteps_since_restore: 0
  training_iteration: 1
  trial_id: e6749e2e
  warmup_time: 0.006491899490356445
  


Done. 1.1s


[2m[36m(RayTrainWorker pid=413)[0m {'loss': 74.394775390625, 'avg_loss': 54.60353469848633}


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_21-06-06/TorchTrainer_ba8cfb08_11_batch_size=12.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50_2022-09-07_21-13-20/checkpoint_000002)... 

Result for TorchTrainer_ba8cfb08:
  _time_this_iter_s: 10.222918033599854
  _timestamp: 1662585248
  _training_iteration: 3
  avg_loss: 54.60353469848633
  date: 2022-09-07_21-14-09
  done: false
  experiment_id: 12da93f9422e461983d356970d37ba76
  hostname: anish-l5-kit
  iterations_since_restore: 3
  loss: 74.394775390625
  node_ip: 10.150.0.3
  pid: 32745
  should_checkpoint: true
  time_since_restore: 44.38354134559631
  time_this_iter_s: 10.182934999465942
  time_total_s: 44.38354134559631
  timestamp: 1662585249
  timesteps_since_restore: 0
  training_iteration: 3
  trial_id: ba8cfb08
  warmup_time: 0.010708808898925781
  


Done. 1.0s


[2m[36m(RayTrainWorker pid=413)[0m {'loss': 106.49340057373047, 'avg_loss': 67.57600116729736}


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_21-06-06/TorchTrainer_ba8cfb08_11_batch_size=12.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50_2022-09-07_21-13-20/checkpoint_000003)... 

Result for TorchTrainer_ba8cfb08:
  _time_this_iter_s: 8.190218210220337
  _timestamp: 1662585256
  _training_iteration: 4
  avg_loss: 67.57600116729736
  date: 2022-09-07_21-14-17
  done: false
  experiment_id: 12da93f9422e461983d356970d37ba76
  hostname: anish-l5-kit
  iterations_since_restore: 4
  loss: 106.49340057373047
  node_ip: 10.150.0.3
  pid: 32745
  should_checkpoint: true
  time_since_restore: 52.58155703544617
  time_this_iter_s: 8.198015689849854
  time_total_s: 52.58155703544617
  timestamp: 1662585257
  timesteps_since_restore: 0
  training_iteration: 4
  trial_id: ba8cfb08
  warmup_time: 0.010708808898925781
  


Done. 1.0s


[2m[36m(RayTrainWorker pid=420)[0m {'loss': 148.92935180664062, 'avg_loss': 87.64345645904541}
Result for TorchTrainer_e6749e2e:
  _time_this_iter_s: 18.96597719192505
  _timestamp: 1662585258
  _training_iteration: 2
  avg_loss: 87.64345645904541
  date: 2022-09-07_21-14-19
  done: false
  experiment_id: 80e86f1ff34c49d98349a5e7a5766181
  hostname: anish-l5-kit
  iterations_since_restore: 2
  loss: 148.92935180664062
  node_ip: 10.150.0.3
  pid: 32756
  should_checkpoint: true
  time_since_restore: 53.80530524253845
  time_this_iter_s: 18.922507762908936
  time_total_s: 53.80530524253845
  timestamp: 1662585259
  timesteps_since_restore: 0
  training_iteration: 2
  trial_id: e6749e2e
  warmup_time: 0.006491899490356445
  


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_21-06-06/TorchTrainer_e6749e2e_12_batch_size=24.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50_2022-09-07_21-13-21/checkpoint_000001)... Done. 1.1s


[2m[36m(RayTrainWorker pid=413)[0m {'loss': 30.556293487548828, 'avg_loss': 60.17205963134766}


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_21-06-06/TorchTrainer_ba8cfb08_11_batch_size=12.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50_2022-09-07_21-13-20/checkpoint_000004)... 

Result for TorchTrainer_ba8cfb08:
  _time_this_iter_s: 8.76252555847168
  _timestamp: 1662585265
  _training_iteration: 5
  avg_loss: 60.17205963134766
  date: 2022-09-07_21-14-26
  done: false
  experiment_id: 12da93f9422e461983d356970d37ba76
  hostname: anish-l5-kit
  iterations_since_restore: 5
  loss: 30.556293487548828
  node_ip: 10.150.0.3
  pid: 32745
  should_checkpoint: true
  time_since_restore: 61.448429107666016
  time_this_iter_s: 8.866872072219849
  time_total_s: 61.448429107666016
  timestamp: 1662585266
  timesteps_since_restore: 0
  training_iteration: 5
  trial_id: ba8cfb08
  warmup_time: 0.010708808898925781
  


Done. 1.0s
[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_21-06-06/TorchTrainer_ba8cfb08_11_batch_size=12.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50_2022-09-07_21-13-20/checkpoint_000004)... Done. 0.3s


VBox(children=(Label(value='456.836 MB of 456.836 MB uploaded (91.307 MB deduped)\r'), FloatProgress(value=1.0…

0,1
avg_loss,▂▁▄█▆
iterations_since_restore,▁▃▅▆█
loss,▃▂▅█▁
time_since_restore,▁▃▅▆█
time_this_iter_s,█▁▂▁▁
time_total_s,▁▃▅▆█
timestamp,▁▃▅▆█
timesteps_since_restore,▁▁▁▁▁
training_iteration,▁▃▅▆█
warmup_time,▁▁▁▁▁

0,1
avg_loss,60.17206
iterations_since_restore,5.0
loss,30.55629
time_since_restore,61.44843
time_this_iter_s,8.86687
time_total_s,61.44843
timestamp,1662585266.0
timesteps_since_restore,0.0
training_iteration,5.0
warmup_time,0.01071


Result for TorchTrainer_ba8cfb08:
  _time_this_iter_s: 8.76252555847168
  _timestamp: 1662585265
  _training_iteration: 5
  avg_loss: 60.17205963134766
  date: 2022-09-07_21-14-26
  done: true
  experiment_id: 12da93f9422e461983d356970d37ba76
  experiment_tag: 11_batch_size=12.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50,render_ego_history=False,step_time=0.1000,dataset_meta_key=meta_json,disable_traffic_light_faces=False,ego_center=0_25_0_5,filter_agents_threshold=0.5000,map_type=py_semantic,pixel_size=0_5_0_5,raster_size=224_224,satellite_map_key=aerial_map_aerial_map_png,semantic_map_key=semantic_map_semantic_map_pb,set_origin_to_bottom=True,batch_size=12,key=scenes_sample_zarr,num_workers=16,perturb_probability=0.0000,shuffle=True,checkpoint_every_n_steps=10000,eval_every_n_steps=10000,max_num_steps=5,batch_size=12,key=scenes_sample_zarr,num_workers=16,shuffle=False,dataset_key=scenes_sample_zarr,lr=0.0007,max_num_steps=5,num_workers=4

[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_21-06-06/TorchTrainer_e6749e2e_12_batch_size=24.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50_2022-09-07_21-13-21/checkpoint_000002)... 

Result for TorchTrainer_e6749e2e:
  _time_this_iter_s: 16.9034206867218
  _timestamp: 1662585275
  _training_iteration: 3
  avg_loss: 74.10995038350423
  date: 2022-09-07_21-14-36
  done: false
  experiment_id: 80e86f1ff34c49d98349a5e7a5766181
  hostname: anish-l5-kit
  iterations_since_restore: 3
  loss: 47.042938232421875
  node_ip: 10.150.0.3
  pid: 32756
  should_checkpoint: true
  time_since_restore: 70.74636316299438
  time_this_iter_s: 16.941057920455933
  time_total_s: 70.74636316299438
  timestamp: 1662585276
  timesteps_since_restore: 0
  training_iteration: 3
  trial_id: e6749e2e
  warmup_time: 0.006491899490356445
  


Done. 1.0s
[2m[36m(RayTrainWorker pid=1519)[0m 2022-09-07 21:14:43,057	INFO config.py:72 -- Setting up process group for: env:// [rank=0, world_size=1]
[2m[36m(RayTrainWorker pid=1519)[0m   f"The parameter '{pretrained_param}' is deprecated since 0.13 and will be removed in 0.15, "
[2m[36m(RayTrainWorker pid=1519)[0m 2022-09-07 21:14:49,234	INFO train_loop_utils.py:300 -- Moving model to device: cpu


[2m[36m(RayTrainWorker pid=420)[0m {'loss': 72.06648254394531, 'avg_loss': 73.5990834236145}


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_21-06-06/TorchTrainer_e6749e2e_12_batch_size=24.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50_2022-09-07_21-13-21/checkpoint_000003)... 

Result for TorchTrainer_e6749e2e:
  _time_this_iter_s: 22.400439500808716
  _timestamp: 1662585298
  _training_iteration: 4
  avg_loss: 73.5990834236145
  date: 2022-09-07_21-14-58
  done: false
  experiment_id: 80e86f1ff34c49d98349a5e7a5766181
  hostname: anish-l5-kit
  iterations_since_restore: 4
  loss: 72.06648254394531
  node_ip: 10.150.0.3
  pid: 32756
  should_checkpoint: true
  time_since_restore: 93.0526020526886
  time_this_iter_s: 22.306238889694214
  time_total_s: 93.0526020526886
  timestamp: 1662585298
  timesteps_since_restore: 0
  training_iteration: 4
  trial_id: e6749e2e
  warmup_time: 0.006491899490356445
  


Done. 2.2s


[2m[36m(RayTrainWorker pid=1519)[0m {'loss': 90.96295166015625, 'avg_loss': 90.96295166015625}


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_21-06-06/TorchTrainer_e6d9de10_13_batch_size=24.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50_2022-09-07_21-14-36/checkpoint_000000)... 

Result for TorchTrainer_e6d9de10:
  _time_this_iter_s: 28.593892335891724
  _timestamp: 1662585312
  _training_iteration: 1
  avg_loss: 90.96295166015625
  date: 2022-09-07_21-15-12
  done: false
  experiment_id: 9c9009d6bae9450889471fef27aee77c
  hostname: anish-l5-kit
  iterations_since_restore: 1
  loss: 90.96295166015625
  node_ip: 10.150.0.3
  pid: 1424
  should_checkpoint: true
  time_since_restore: 32.41099667549133
  time_this_iter_s: 32.41099667549133
  time_total_s: 32.41099667549133
  timestamp: 1662585312
  timesteps_since_restore: 0
  training_iteration: 1
  trial_id: e6d9de10
  warmup_time: 0.01052093505859375
  


Done. 1.0s


[2m[36m(RayTrainWorker pid=420)[0m {'loss': 83.03189086914062, 'avg_loss': 75.48564491271972}


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_21-06-06/TorchTrainer_e6749e2e_12_batch_size=24.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50_2022-09-07_21-13-21/checkpoint_000004)... 

Result for TorchTrainer_e6749e2e:
  _time_this_iter_s: 19.31663990020752
  _timestamp: 1662585317
  _training_iteration: 5
  avg_loss: 75.48564491271972
  date: 2022-09-07_21-15-18
  done: false
  experiment_id: 80e86f1ff34c49d98349a5e7a5766181
  hostname: anish-l5-kit
  iterations_since_restore: 5
  loss: 83.03189086914062
  node_ip: 10.150.0.3
  pid: 32756
  should_checkpoint: true
  time_since_restore: 112.49649214744568
  time_this_iter_s: 19.44389009475708
  time_total_s: 112.49649214744568
  timestamp: 1662585318
  timesteps_since_restore: 0
  training_iteration: 5
  trial_id: e6749e2e
  warmup_time: 0.006491899490356445
  


Done. 1.0s
[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_21-06-06/TorchTrainer_e6749e2e_12_batch_size=24.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50_2022-09-07_21-13-21/checkpoint_000004)... Done. 0.3s


VBox(children=(Label(value='456.836 MB of 456.836 MB uploaded (91.307 MB deduped)\r'), FloatProgress(value=1.0…

0,1
avg_loss,▁█▆▆▇
iterations_since_restore,▁▃▅▆█
loss,▁█▂▄▄
time_since_restore,▁▃▄▆█
time_this_iter_s,█▂▁▃▂
time_total_s,▁▃▄▆█
timestamp,▁▃▄▆█
timesteps_since_restore,▁▁▁▁▁
training_iteration,▁▃▅▆█
warmup_time,▁▁▁▁▁

0,1
avg_loss,75.48564
iterations_since_restore,5.0
loss,83.03189
time_since_restore,112.49649
time_this_iter_s,19.44389
time_total_s,112.49649
timestamp,1662585318.0
timesteps_since_restore,0.0
training_iteration,5.0
warmup_time,0.00649


Result for TorchTrainer_e6749e2e:
  _time_this_iter_s: 19.31663990020752
  _timestamp: 1662585317
  _training_iteration: 5
  avg_loss: 75.48564491271972
  date: 2022-09-07_21-15-18
  done: true
  experiment_id: 80e86f1ff34c49d98349a5e7a5766181
  experiment_tag: 12_batch_size=24.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50,render_ego_history=False,step_time=0.1000,dataset_meta_key=meta_json,disable_traffic_light_faces=False,ego_center=0_25_0_5,filter_agents_threshold=0.5000,map_type=py_semantic,pixel_size=0_5_0_5,raster_size=224_224,satellite_map_key=aerial_map_aerial_map_png,semantic_map_key=semantic_map_semantic_map_pb,set_origin_to_bottom=True,batch_size=12,key=scenes_sample_zarr,num_workers=16,perturb_probability=0.0000,shuffle=True,checkpoint_every_n_steps=10000,eval_every_n_steps=10000,max_num_steps=5,batch_size=12,key=scenes_sample_zarr,num_workers=16,shuffle=False,dataset_key=scenes_sample_zarr,lr=0.0002,max_num_steps=5,num_workers=

[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_21-06-06/TorchTrainer_e6d9de10_13_batch_size=24.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50_2022-09-07_21-14-36/checkpoint_000001)... 

Result for TorchTrainer_e6d9de10:
  _time_this_iter_s: 17.699648141860962
  _timestamp: 1662585329
  _training_iteration: 2
  avg_loss: 89.69064712524414
  date: 2022-09-07_21-15-30
  done: false
  experiment_id: 9c9009d6bae9450889471fef27aee77c
  hostname: anish-l5-kit
  iterations_since_restore: 2
  loss: 88.41834259033203
  node_ip: 10.150.0.3
  pid: 1424
  should_checkpoint: true
  time_since_restore: 50.21741509437561
  time_this_iter_s: 17.806418418884277
  time_total_s: 50.21741509437561
  timestamp: 1662585330
  timesteps_since_restore: 0
  training_iteration: 2
  trial_id: e6d9de10
  warmup_time: 0.01052093505859375
  


Done. 1.1s
[2m[36m(RayTrainWorker pid=2316)[0m 2022-09-07 21:15:35,799	INFO config.py:72 -- Setting up process group for: env:// [rank=0, world_size=1]
[2m[36m(RayTrainWorker pid=2316)[0m   f"The parameter '{pretrained_param}' is deprecated since 0.13 and will be removed in 0.15, "
[2m[36m(RayTrainWorker pid=2316)[0m 2022-09-07 21:15:41,531	INFO train_loop_utils.py:300 -- Moving model to device: cpu


[2m[36m(RayTrainWorker pid=2316)[0m {'loss': 3.762737512588501, 'avg_loss': 3.762737512588501}


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_21-06-06/TorchTrainer_1362af8e_14_batch_size=6.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50,_2022-09-07_21-15-28/checkpoint_000000)... 

Result for TorchTrainer_1362af8e:
  _time_this_iter_s: 11.131107568740845
  _timestamp: 1662585347
  _training_iteration: 1
  avg_loss: 3.762737512588501
  date: 2022-09-07_21-15-48
  done: false
  experiment_id: c7c39989bcdd4eeeb221ae390a920e7f
  hostname: anish-l5-kit
  iterations_since_restore: 1
  loss: 3.762737512588501
  node_ip: 10.150.0.3
  pid: 2227
  should_checkpoint: true
  time_since_restore: 15.691909551620483
  time_this_iter_s: 15.691909551620483
  time_total_s: 15.691909551620483
  timestamp: 1662585348
  timesteps_since_restore: 0
  training_iteration: 1
  trial_id: 1362af8e
  warmup_time: 0.006009578704833984
  


Done. 1.5s


[2m[36m(RayTrainWorker pid=1519)[0m {'loss': 100.92546844482422, 'avg_loss': 93.43558756510417}
Result for TorchTrainer_e6d9de10:
  _time_this_iter_s: 20.32417130470276
  _timestamp: 1662585350
  _training_iteration: 3
  avg_loss: 93.43558756510417
  date: 2022-09-07_21-15-50
  done: false
  experiment_id: 9c9009d6bae9450889471fef27aee77c
  hostname: anish-l5-kit
  iterations_since_restore: 3
  loss: 100.92546844482422
  node_ip: 10.150.0.3
  pid: 1424
  should_checkpoint: true
  time_since_restore: 70.42025876045227
  time_this_iter_s: 20.20284366607666
  time_total_s: 70.42025876045227
  timestamp: 1662585350
  timesteps_since_restore: 0
  training_iteration: 3
  trial_id: e6d9de10
  warmup_time: 0.01052093505859375
  


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_21-06-06/TorchTrainer_e6d9de10_13_batch_size=24.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50_2022-09-07_21-14-36/checkpoint_000002)... Done. 1.1s


[2m[36m(RayTrainWorker pid=2316)[0m {'loss': 95.09058380126953, 'avg_loss': 49.426660656929016}


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_21-06-06/TorchTrainer_1362af8e_14_batch_size=6.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50,_2022-09-07_21-15-28/checkpoint_000001)... 

Result for TorchTrainer_1362af8e:
  _time_this_iter_s: 5.6053526401519775
  _timestamp: 1662585353
  _training_iteration: 2
  avg_loss: 49.426660656929016
  date: 2022-09-07_21-15-53
  done: false
  experiment_id: c7c39989bcdd4eeeb221ae390a920e7f
  hostname: anish-l5-kit
  iterations_since_restore: 2
  loss: 95.09058380126953
  node_ip: 10.150.0.3
  pid: 2227
  should_checkpoint: true
  time_since_restore: 21.15885639190674
  time_this_iter_s: 5.466946840286255
  time_total_s: 21.15885639190674
  timestamp: 1662585353
  timesteps_since_restore: 0
  training_iteration: 2
  trial_id: 1362af8e
  warmup_time: 0.006009578704833984
  


Done. 1.0s


[2m[36m(RayTrainWorker pid=2316)[0m {'loss': 144.66831970214844, 'avg_loss': 81.17388033866882}


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_21-06-06/TorchTrainer_1362af8e_14_batch_size=6.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50,_2022-09-07_21-15-28/checkpoint_000002)... Done. 1.0s


[2m[36m(RayTrainWorker pid=2316)[0m {'loss': 2.683864116668701, 'avg_loss': 61.55137628316879}


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_21-06-06/TorchTrainer_1362af8e_14_batch_size=6.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50,_2022-09-07_21-15-28/checkpoint_000003)... 

Result for TorchTrainer_1362af8e:
  _time_this_iter_s: 5.900113105773926
  _timestamp: 1662585363
  _training_iteration: 4
  avg_loss: 61.55137628316879
  date: 2022-09-07_21-16-04
  done: false
  experiment_id: c7c39989bcdd4eeeb221ae390a920e7f
  hostname: anish-l5-kit
  iterations_since_restore: 4
  loss: 2.683864116668701
  node_ip: 10.150.0.3
  pid: 2227
  should_checkpoint: true
  time_since_restore: 31.774733543395996
  time_this_iter_s: 5.9329633712768555
  time_total_s: 31.774733543395996
  timestamp: 1662585364
  timesteps_since_restore: 0
  training_iteration: 4
  trial_id: 1362af8e
  warmup_time: 0.006009578704833984
  


Done. 1.5s


[2m[36m(RayTrainWorker pid=2316)[0m {'loss': 99.00631713867188, 'avg_loss': 69.0423644542694}


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_21-06-06/TorchTrainer_1362af8e_14_batch_size=6.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50,_2022-09-07_21-15-28/checkpoint_000004)... 

[2m[36m(RayTrainWorker pid=1519)[0m {'loss': 162.64517211914062, 'avg_loss': 110.73798370361328}


Done. 1.0s
[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_21-06-06/TorchTrainer_1362af8e_14_batch_size=6.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50,_2022-09-07_21-15-28/checkpoint_000004)... Done. 0.3s


VBox(children=(Label(value='456.836 MB of 456.836 MB uploaded (91.307 MB deduped)\r'), FloatProgress(value=1.0…

0,1
avg_loss,▁▅█▆▇
iterations_since_restore,▁▃▅▆█
loss,▁▆█▁▆
time_since_restore,▁▃▄▆█
time_this_iter_s,█▁▁▂▁
time_total_s,▁▃▄▆█
timestamp,▁▃▄▆█
timesteps_since_restore,▁▁▁▁▁
training_iteration,▁▃▅▆█
warmup_time,▁▁▁▁▁

0,1
avg_loss,69.04236
iterations_since_restore,5.0
loss,99.00632
time_since_restore,36.72284
time_this_iter_s,4.94811
time_total_s,36.72284
timestamp,1662585369.0
timesteps_since_restore,0.0
training_iteration,5.0
warmup_time,0.00601


Result for TorchTrainer_1362af8e:
  _time_this_iter_s: 4.992352485656738
  _timestamp: 1662585368
  _training_iteration: 5
  avg_loss: 69.0423644542694
  date: 2022-09-07_21-16-09
  done: true
  experiment_id: c7c39989bcdd4eeeb221ae390a920e7f
  experiment_tag: 14_batch_size=6.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50,render_ego_history=False,step_time=0.1000,dataset_meta_key=meta_json,disable_traffic_light_faces=False,ego_center=0_25_0_5,filter_agents_threshold=0.5000,map_type=py_semantic,pixel_size=0_5_0_5,raster_size=224_224,satellite_map_key=aerial_map_aerial_map_png,semantic_map_key=semantic_map_semantic_map_pb,set_origin_to_bottom=True,batch_size=12,key=scenes_sample_zarr,num_workers=16,perturb_probability=0.0000,shuffle=True,checkpoint_every_n_steps=10000,eval_every_n_steps=10000,max_num_steps=5,batch_size=12,key=scenes_sample_zarr,num_workers=16,shuffle=False,dataset_key=scenes_sample_zarr,lr=0.0001,max_num_steps=5,num_workers=4,

[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_21-06-06/TorchTrainer_e6d9de10_13_batch_size=24.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50_2022-09-07_21-14-36/checkpoint_000003)... 

Result for TorchTrainer_e6d9de10:
  _time_this_iter_s: 19.205628871917725
  _timestamp: 1662585369
  _training_iteration: 4
  avg_loss: 110.73798370361328
  date: 2022-09-07_21-16-09
  done: false
  experiment_id: 9c9009d6bae9450889471fef27aee77c
  hostname: anish-l5-kit
  iterations_since_restore: 4
  loss: 162.64517211914062
  node_ip: 10.150.0.3
  pid: 1424
  should_checkpoint: true
  time_since_restore: 89.56204152107239
  time_this_iter_s: 19.141782760620117
  time_total_s: 89.56204152107239
  timestamp: 1662585369
  timesteps_since_restore: 0
  training_iteration: 4
  trial_id: e6d9de10
  warmup_time: 0.01052093505859375
  


Done. 1.0s


[2m[36m(RayTrainWorker pid=1519)[0m {'loss': 80.13710021972656, 'avg_loss': 104.61780700683593}


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_21-06-06/TorchTrainer_e6d9de10_13_batch_size=24.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50_2022-09-07_21-14-36/checkpoint_000004)... [2m[36m(RayTrainWorker pid=3181)[0m 2022-09-07 21:16:27,837	INFO config.py:72 -- Setting up process group for: env:// [rank=0, world_size=1]


Result for TorchTrainer_e6d9de10:
  _time_this_iter_s: 18.033401012420654
  _timestamp: 1662585387
  _training_iteration: 5
  avg_loss: 104.61780700683593
  date: 2022-09-07_21-16-27
  done: false
  experiment_id: 9c9009d6bae9450889471fef27aee77c
  hostname: anish-l5-kit
  iterations_since_restore: 5
  loss: 80.13710021972656
  node_ip: 10.150.0.3
  pid: 1424
  should_checkpoint: true
  time_since_restore: 107.66465735435486
  time_this_iter_s: 18.10261583328247
  time_total_s: 107.66465735435486
  timestamp: 1662585387
  timesteps_since_restore: 0
  training_iteration: 5
  trial_id: e6d9de10
  warmup_time: 0.01052093505859375
  


Done. 1.0s
[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_21-06-06/TorchTrainer_e6d9de10_13_batch_size=24.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50_2022-09-07_21-14-36/checkpoint_000004)... Done. 0.3s


VBox(children=(Label(value='456.836 MB of 456.836 MB uploaded (91.307 MB deduped)\r'), FloatProgress(value=1.0…

[2m[36m(RayTrainWorker pid=3181)[0m   f"The parameter '{pretrained_param}' is deprecated since 0.13 and will be removed in 0.15, "
[2m[36m(RayTrainWorker pid=3181)[0m 2022-09-07 21:16:33,662	INFO train_loop_utils.py:300 -- Moving model to device: cpu


0,1
avg_loss,▁▁▂█▆
iterations_since_restore,▁▃▅▆█
loss,▂▂▃█▁
time_since_restore,▁▃▅▆█
time_this_iter_s,█▁▂▂▁
time_total_s,▁▃▅▆█
timestamp,▁▃▅▆█
timesteps_since_restore,▁▁▁▁▁
training_iteration,▁▃▅▆█
warmup_time,▁▁▁▁▁

0,1
avg_loss,104.61781
iterations_since_restore,5.0
loss,80.1371
time_since_restore,107.66466
time_this_iter_s,18.10262
time_total_s,107.66466
timestamp,1662585387.0
timesteps_since_restore,0.0
training_iteration,5.0
warmup_time,0.01052


Result for TorchTrainer_e6d9de10:
  _time_this_iter_s: 18.033401012420654
  _timestamp: 1662585387
  _training_iteration: 5
  avg_loss: 104.61780700683593
  date: 2022-09-07_21-16-27
  done: true
  experiment_id: 9c9009d6bae9450889471fef27aee77c
  experiment_tag: 13_batch_size=24.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50,render_ego_history=False,step_time=0.1000,dataset_meta_key=meta_json,disable_traffic_light_faces=False,ego_center=0_25_0_5,filter_agents_threshold=0.5000,map_type=py_semantic,pixel_size=0_5_0_5,raster_size=224_224,satellite_map_key=aerial_map_aerial_map_png,semantic_map_key=semantic_map_semantic_map_pb,set_origin_to_bottom=True,batch_size=12,key=scenes_sample_zarr,num_workers=16,perturb_probability=0.0000,shuffle=True,checkpoint_every_n_steps=10000,eval_every_n_steps=10000,max_num_steps=5,batch_size=12,key=scenes_sample_zarr,num_workers=16,shuffle=False,dataset_key=scenes_sample_zarr,lr=0.0001,max_num_steps=5,num_worker

[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_21-06-06/TorchTrainer_329b280e_15_batch_size=6.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50,_2022-09-07_21-16-20/checkpoint_000000)... 

Result for TorchTrainer_329b280e:
  _time_this_iter_s: 11.497009038925171
  _timestamp: 1662585399
  _training_iteration: 1
  avg_loss: 63.57952880859375
  date: 2022-09-07_21-16-40
  done: false
  experiment_id: 9588e05e33f5479b86355b2977269e0e
  hostname: anish-l5-kit
  iterations_since_restore: 1
  loss: 63.57952880859375
  node_ip: 10.150.0.3
  pid: 3108
  should_checkpoint: true
  time_since_restore: 16.195173978805542
  time_this_iter_s: 16.195173978805542
  time_total_s: 16.195173978805542
  timestamp: 1662585400
  timesteps_since_restore: 0
  training_iteration: 1
  trial_id: 329b280e
  warmup_time: 0.0068933963775634766
  


Done. 1.4s
[2m[36m(RayTrainWorker pid=3484)[0m 2022-09-07 21:16:44,243	INFO config.py:72 -- Setting up process group for: env:// [rank=0, world_size=1]


[2m[36m(RayTrainWorker pid=3181)[0m {'loss': 81.21192932128906, 'avg_loss': 72.3957290649414}


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_21-06-06/TorchTrainer_329b280e_15_batch_size=6.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50,_2022-09-07_21-16-20/checkpoint_000001)... 

Result for TorchTrainer_329b280e:
  _time_this_iter_s: 6.189112663269043
  _timestamp: 1662585406
  _training_iteration: 2
  avg_loss: 72.3957290649414
  date: 2022-09-07_21-16-46
  done: false
  experiment_id: 9588e05e33f5479b86355b2977269e0e
  hostname: anish-l5-kit
  iterations_since_restore: 2
  loss: 81.21192932128906
  node_ip: 10.150.0.3
  pid: 3108
  should_checkpoint: true
  time_since_restore: 22.306111335754395
  time_this_iter_s: 6.1109373569488525
  time_total_s: 22.306111335754395
  timestamp: 1662585406
  timesteps_since_restore: 0
  training_iteration: 2
  trial_id: 329b280e
  warmup_time: 0.0068933963775634766
  


Done. 1.1s
[2m[36m(RayTrainWorker pid=3484)[0m   f"The parameter '{pretrained_param}' is deprecated since 0.13 and will be removed in 0.15, "
[2m[36m(RayTrainWorker pid=3484)[0m 2022-09-07 21:16:51,181	INFO train_loop_utils.py:300 -- Moving model to device: cpu


[2m[36m(RayTrainWorker pid=3181)[0m {'loss': 13.053741455078125, 'avg_loss': 52.61506652832031}


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_21-06-06/TorchTrainer_329b280e_15_batch_size=6.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50,_2022-09-07_21-16-20/checkpoint_000002)... 

Result for TorchTrainer_329b280e:
  _time_this_iter_s: 6.125319957733154
  _timestamp: 1662585412
  _training_iteration: 3
  avg_loss: 52.61506652832031
  date: 2022-09-07_21-16-52
  done: false
  experiment_id: 9588e05e33f5479b86355b2977269e0e
  hostname: anish-l5-kit
  iterations_since_restore: 3
  loss: 13.053741455078125
  node_ip: 10.150.0.3
  pid: 3108
  should_checkpoint: true
  time_since_restore: 28.514044046401978
  time_this_iter_s: 6.207932710647583
  time_total_s: 28.514044046401978
  timestamp: 1662585412
  timesteps_since_restore: 0
  training_iteration: 3
  trial_id: 329b280e
  warmup_time: 0.0068933963775634766
  


Done. 1.1s


[2m[36m(RayTrainWorker pid=3181)[0m {'loss': 4.139969825744629, 'avg_loss': 40.49629235267639}
[2m[36m(RayTrainWorker pid=3484)[0m {'loss': 2.285607099533081, 'avg_loss': 2.285607099533081}


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_21-06-06/TorchTrainer_329b280e_15_batch_size=6.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50,_2022-09-07_21-16-20/checkpoint_000003)... [34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_21-06-06/TorchTrainer_5162d994_16_batch_size=6.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50,_2022-09-07_21-16-37/checkpoint_000000)... 

Result for TorchTrainer_5162d994:
  _time_this_iter_s: 12.746010780334473
  _timestamp: 1662585417
  _training_iteration: 1
  avg_loss: 2.285607099533081
  date: 2022-09-07_21-16-58
  done: false
  experiment_id: b323b27af3d740359bf5c74199a95bb4
  hostname: anish-l5-kit
  iterations_since_restore: 1
  loss: 2.285607099533081
  node_ip: 10.150.0.3
  pid: 3392
  should_checkpoint: true
  time_since_restore: 17.05582308769226
  time_this_iter_s: 17.05582308769226
  time_total_s: 17.05582308769226
  timestamp: 1662585418
  timesteps_since_restore: 0
  training_iteration: 1
  trial_id: 5162d994
  warmup_time: 0.006201505661010742
  


Done. 1.3s
Done. 1.5s


[2m[36m(RayTrainWorker pid=3181)[0m {'loss': 102.97291564941406, 'avg_loss': 52.99161701202392}
[2m[36m(RayTrainWorker pid=3484)[0m {'loss': 58.07220458984375, 'avg_loss': 30.178905844688416}


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_21-06-06/TorchTrainer_329b280e_15_batch_size=6.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50,_2022-09-07_21-16-20/checkpoint_000004)... 

Result for TorchTrainer_329b280e:
  _time_this_iter_s: 6.303596258163452
  _timestamp: 1662585423
  _training_iteration: 5
  avg_loss: 52.99161701202392
  date: 2022-09-07_21-17-03
  done: false
  experiment_id: 9588e05e33f5479b86355b2977269e0e
  hostname: anish-l5-kit
  iterations_since_restore: 5
  loss: 102.97291564941406
  node_ip: 10.150.0.3
  pid: 3108
  should_checkpoint: true
  time_since_restore: 39.67205619812012
  time_this_iter_s: 6.223296642303467
  time_total_s: 39.67205619812012
  timestamp: 1662585423
  timesteps_since_restore: 0
  training_iteration: 5
  trial_id: 329b280e
  warmup_time: 0.0068933963775634766
  


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_21-06-06/TorchTrainer_5162d994_16_batch_size=6.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50,_2022-09-07_21-16-37/checkpoint_000001)... 

Result for TorchTrainer_5162d994:
  _time_this_iter_s: 6.354784250259399
  _timestamp: 1662585423
  _training_iteration: 2
  avg_loss: 30.178905844688416
  date: 2022-09-07_21-17-04
  done: false
  experiment_id: b323b27af3d740359bf5c74199a95bb4
  hostname: anish-l5-kit
  iterations_since_restore: 2
  loss: 58.07220458984375
  node_ip: 10.150.0.3
  pid: 3392
  should_checkpoint: true
  time_since_restore: 23.22834873199463
  time_this_iter_s: 6.172525644302368
  time_total_s: 23.22834873199463
  timestamp: 1662585424
  timesteps_since_restore: 0
  training_iteration: 2
  trial_id: 5162d994
  warmup_time: 0.006201505661010742
  


Done. 1.2s
[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_21-06-06/TorchTrainer_329b280e_15_batch_size=6.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50,_2022-09-07_21-16-20/checkpoint_000004)... Done. 1.4s
Done. 0.5s


[2m[36m(RayTrainWorker pid=3484)[0m {'loss': 142.11419677734375, 'avg_loss': 67.49066948890686}


VBox(children=(Label(value='456.836 MB of 456.836 MB uploaded (91.307 MB deduped)\r'), FloatProgress(value=1.0…

[2m[36m(RayTrainWorker pid=3484)[0m {'loss': 0.11065903306007385, 'avg_loss': 50.645666874945164}


0,1
avg_loss,▆█▄▁▄
iterations_since_restore,▁▃▅▆█
loss,▅▆▂▁█
time_since_restore,▁▃▅▆█
time_this_iter_s,█▂▂▁▂
time_total_s,▁▃▅▆█
timestamp,▁▃▅▆█
timesteps_since_restore,▁▁▁▁▁
training_iteration,▁▃▅▆█
warmup_time,▁▁▁▁▁

0,1
avg_loss,52.99162
iterations_since_restore,5.0
loss,102.97292
time_since_restore,39.67206
time_this_iter_s,6.2233
time_total_s,39.67206
timestamp,1662585423.0
timesteps_since_restore,0.0
training_iteration,5.0
warmup_time,0.00689


Result for TorchTrainer_329b280e:
  _time_this_iter_s: 6.303596258163452
  _timestamp: 1662585423
  _training_iteration: 5
  avg_loss: 52.99161701202392
  date: 2022-09-07_21-17-03
  done: true
  experiment_id: 9588e05e33f5479b86355b2977269e0e
  experiment_tag: 15_batch_size=6.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50,render_ego_history=False,step_time=0.1000,dataset_meta_key=meta_json,disable_traffic_light_faces=False,ego_center=0_25_0_5,filter_agents_threshold=0.5000,map_type=py_semantic,pixel_size=0_5_0_5,raster_size=224_224,satellite_map_key=aerial_map_aerial_map_png,semantic_map_key=semantic_map_semantic_map_pb,set_origin_to_bottom=True,batch_size=12,key=scenes_sample_zarr,num_workers=16,perturb_probability=0.0000,shuffle=True,checkpoint_every_n_steps=10000,eval_every_n_steps=10000,max_num_steps=5,batch_size=12,key=scenes_sample_zarr,num_workers=16,shuffle=False,dataset_key=scenes_sample_zarr,lr=0.0002,max_num_steps=5,num_workers=4

[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_21-06-06/TorchTrainer_5162d994_16_batch_size=6.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50,_2022-09-07_21-16-37/checkpoint_000002)... 

Result for TorchTrainer_5162d994:
  _time_this_iter_s: 4.552388429641724
  _timestamp: 1662585428
  _training_iteration: 3
  avg_loss: 67.49066948890686
  date: 2022-09-07_21-17-08
  done: false
  experiment_id: b323b27af3d740359bf5c74199a95bb4
  hostname: anish-l5-kit
  iterations_since_restore: 3
  loss: 142.11419677734375
  node_ip: 10.150.0.3
  pid: 3392
  should_checkpoint: true
  time_since_restore: 27.7666916847229
  time_this_iter_s: 4.5383429527282715
  time_total_s: 27.7666916847229
  timestamp: 1662585428
  timesteps_since_restore: 0
  training_iteration: 3
  trial_id: 5162d994
  warmup_time: 0.006201505661010742
  


Done. 1.0s
[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_21-06-06/TorchTrainer_5162d994_16_batch_size=6.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50,_2022-09-07_21-16-37/checkpoint_000003)... Done. 1.1s


[2m[36m(RayTrainWorker pid=3484)[0m {'loss': 20.474498748779297, 'avg_loss': 44.61143324971199}


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_21-06-06/TorchTrainer_5162d994_16_batch_size=6.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50,_2022-09-07_21-16-37/checkpoint_000004)... 

Result for TorchTrainer_5162d994:
  _time_this_iter_s: 6.072742462158203
  _timestamp: 1662585440
  _training_iteration: 5
  avg_loss: 44.61143324971199
  date: 2022-09-07_21-17-20
  done: false
  experiment_id: b323b27af3d740359bf5c74199a95bb4
  hostname: anish-l5-kit
  iterations_since_restore: 5
  loss: 20.474498748779297
  node_ip: 10.150.0.3
  pid: 3392
  should_checkpoint: true
  time_since_restore: 39.65791988372803
  time_this_iter_s: 6.116276979446411
  time_total_s: 39.65791988372803
  timestamp: 1662585440
  timesteps_since_restore: 0
  training_iteration: 5
  trial_id: 5162d994
  warmup_time: 0.006201505661010742
  


Done. 1.0s
[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_21-06-06/TorchTrainer_5162d994_16_batch_size=6.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50,_2022-09-07_21-16-37/checkpoint_000004)... Done. 0.3s


[2m[36m(RayTrainWorker pid=4329)[0m 2022-09-07 21:17:22,043	INFO config.py:72 -- Setting up process group for: env:// [rank=0, world_size=1]


VBox(children=(Label(value='456.836 MB of 456.836 MB uploaded (91.307 MB deduped)\r'), FloatProgress(value=1.0…

[2m[36m(RayTrainWorker pid=4329)[0m   f"The parameter '{pretrained_param}' is deprecated since 0.13 and will be removed in 0.15, "
[2m[36m(RayTrainWorker pid=4329)[0m 2022-09-07 21:17:27,492	INFO train_loop_utils.py:300 -- Moving model to device: cpu


Result for TorchTrainer_5162d994:
  _time_this_iter_s: 6.072742462158203
  _timestamp: 1662585440
  _training_iteration: 5
  avg_loss: 44.61143324971199
  date: 2022-09-07_21-17-20
  done: true
  experiment_id: b323b27af3d740359bf5c74199a95bb4
  experiment_tag: 16_batch_size=6.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50,render_ego_history=False,step_time=0.1000,dataset_meta_key=meta_json,disable_traffic_light_faces=False,ego_center=0_25_0_5,filter_agents_threshold=0.5000,map_type=py_semantic,pixel_size=0_5_0_5,raster_size=224_224,satellite_map_key=aerial_map_aerial_map_png,semantic_map_key=semantic_map_semantic_map_pb,set_origin_to_bottom=True,batch_size=12,key=scenes_sample_zarr,num_workers=16,perturb_probability=0.0000,shuffle=True,checkpoint_every_n_steps=10000,eval_every_n_steps=10000,max_num_steps=5,batch_size=12,key=scenes_sample_zarr,num_workers=16,shuffle=False,dataset_key=scenes_sample_zarr,lr=0.0001,max_num_steps=5,num_workers=4

0,1
avg_loss,▁▄█▆▆
iterations_since_restore,▁▃▅▆█
loss,▁▄█▁▂
time_since_restore,▁▃▄▆█
time_this_iter_s,█▂▁▂▂
time_total_s,▁▃▄▆█
timestamp,▁▃▄▆█
timesteps_since_restore,▁▁▁▁▁
training_iteration,▁▃▅▆█
warmup_time,▁▁▁▁▁

0,1
avg_loss,44.61143
iterations_since_restore,5.0
loss,20.4745
time_since_restore,39.65792
time_this_iter_s,6.11628
time_total_s,39.65792
timestamp,1662585440.0
timesteps_since_restore,0.0
training_iteration,5.0
warmup_time,0.0062


[2m[36m(RayTrainWorker pid=4329)[0m {'loss': 118.15971374511719, 'avg_loss': 118.15971374511719}


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_21-06-06/TorchTrainer_5b830cfa_17_batch_size=6.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50,_2022-09-07_21-17-15/checkpoint_000000)... 

Result for TorchTrainer_5b830cfa:
  _time_this_iter_s: 10.60528564453125
  _timestamp: 1662585453
  _training_iteration: 1
  avg_loss: 118.15971374511719
  date: 2022-09-07_21-17-33
  done: false
  experiment_id: 9c6c49d3d41a4874969e77dd30e3ec8c
  hostname: anish-l5-kit
  iterations_since_restore: 1
  loss: 118.15971374511719
  node_ip: 10.150.0.3
  pid: 4238
  should_checkpoint: true
  time_since_restore: 14.306106805801392
  time_this_iter_s: 14.306106805801392
  time_total_s: 14.306106805801392
  timestamp: 1662585453
  timesteps_since_restore: 0
  training_iteration: 1
  trial_id: 5b830cfa
  warmup_time: 0.005975961685180664
  


Done. 1.0s


[2m[36m(RayTrainWorker pid=4329)[0m {'loss': 72.99945068359375, 'avg_loss': 95.57958221435547}


[2m[36m(RayTrainWorker pid=4670)[0m 2022-09-07 21:17:39,518	INFO config.py:72 -- Setting up process group for: env:// [rank=0, world_size=1]
[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_21-06-06/TorchTrainer_5b830cfa_17_batch_size=6.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50,_2022-09-07_21-17-15/checkpoint_000001)... 

Result for TorchTrainer_5b830cfa:
  _time_this_iter_s: 6.342345952987671
  _timestamp: 1662585459
  _training_iteration: 2
  avg_loss: 95.57958221435547
  date: 2022-09-07_21-17-39
  done: false
  experiment_id: 9c6c49d3d41a4874969e77dd30e3ec8c
  hostname: anish-l5-kit
  iterations_since_restore: 2
  loss: 72.99945068359375
  node_ip: 10.150.0.3
  pid: 4238
  should_checkpoint: true
  time_since_restore: 20.594079732894897
  time_this_iter_s: 6.287972927093506
  time_total_s: 20.594079732894897
  timestamp: 1662585459
  timesteps_since_restore: 0
  training_iteration: 2
  trial_id: 5b830cfa
  warmup_time: 0.005975961685180664
  


Done. 1.0s


[2m[36m(RayTrainWorker pid=4329)[0m {'loss': 1.0052332878112793, 'avg_loss': 64.05479923884074}


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_21-06-06/TorchTrainer_5b830cfa_17_batch_size=6.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50,_2022-09-07_21-17-15/checkpoint_000002)... 

Result for TorchTrainer_5b830cfa:
  _time_this_iter_s: 5.741736173629761
  _timestamp: 1662585465
  _training_iteration: 3
  avg_loss: 64.05479923884074
  date: 2022-09-07_21-17-45
  done: false
  experiment_id: 9c6c49d3d41a4874969e77dd30e3ec8c
  hostname: anish-l5-kit
  iterations_since_restore: 3
  loss: 1.0052332878112793
  node_ip: 10.150.0.3
  pid: 4238
  should_checkpoint: true
  time_since_restore: 26.180867910385132
  time_this_iter_s: 5.586788177490234
  time_total_s: 26.180867910385132
  timestamp: 1662585465
  timesteps_since_restore: 0
  training_iteration: 3
  trial_id: 5b830cfa
  warmup_time: 0.005975961685180664
  


[2m[36m(RayTrainWorker pid=4670)[0m   f"The parameter '{pretrained_param}' is deprecated since 0.13 and will be removed in 0.15, "
Done. 1.0s
[2m[36m(RayTrainWorker pid=4670)[0m 2022-09-07 21:17:46,548	INFO train_loop_utils.py:300 -- Moving model to device: cpu


[2m[36m(RayTrainWorker pid=4329)[0m {'loss': 71.40657806396484, 'avg_loss': 65.89274394512177}


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_21-06-06/TorchTrainer_5b830cfa_17_batch_size=6.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50,_2022-09-07_21-17-15/checkpoint_000003)... 

Result for TorchTrainer_5b830cfa:
  _time_this_iter_s: 5.707468032836914
  _timestamp: 1662585470
  _training_iteration: 4
  avg_loss: 65.89274394512177
  date: 2022-09-07_21-17-51
  done: false
  experiment_id: 9c6c49d3d41a4874969e77dd30e3ec8c
  hostname: anish-l5-kit
  iterations_since_restore: 4
  loss: 71.40657806396484
  node_ip: 10.150.0.3
  pid: 4238
  should_checkpoint: true
  time_since_restore: 31.878676176071167
  time_this_iter_s: 5.697808265686035
  time_total_s: 31.878676176071167
  timestamp: 1662585471
  timesteps_since_restore: 0
  training_iteration: 4
  trial_id: 5b830cfa
  warmup_time: 0.005975961685180664
  


Done. 1.0s


[2m[36m(RayTrainWorker pid=4670)[0m {'loss': 43.847599029541016, 'avg_loss': 43.847599029541016}


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_21-06-06/TorchTrainer_7246099c_18_batch_size=6.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50,_2022-09-07_21-17-31/checkpoint_000000)... 

Result for TorchTrainer_7246099c:
  _time_this_iter_s: 12.829390525817871
  _timestamp: 1662585472
  _training_iteration: 1
  avg_loss: 43.847599029541016
  date: 2022-09-07_21-17-53
  done: false
  experiment_id: 2d163536a5af4b44adbbb7a41f8eb8a8
  hostname: anish-l5-kit
  iterations_since_restore: 1
  loss: 43.847599029541016
  node_ip: 10.150.0.3
  pid: 4568
  should_checkpoint: true
  time_since_restore: 17.13966655731201
  time_this_iter_s: 17.13966655731201
  time_total_s: 17.13966655731201
  timestamp: 1662585473
  timesteps_since_restore: 0
  training_iteration: 1
  trial_id: 7246099c
  warmup_time: 0.008389949798583984
  


Done. 1.7s


[2m[36m(RayTrainWorker pid=4329)[0m {'loss': 69.95179748535156, 'avg_loss': 66.70455465316772}
Result for TorchTrainer_5b830cfa:
  _time_this_iter_s: 5.180276870727539
  _timestamp: 1662585476
  _training_iteration: 5
  avg_loss: 66.70455465316772
  date: 2022-09-07_21-17-56
  done: false
  experiment_id: 9c6c49d3d41a4874969e77dd30e3ec8c
  hostname: anish-l5-kit
  iterations_since_restore: 5
  loss: 69.95179748535156
  node_ip: 10.150.0.3
  pid: 4238
  should_checkpoint: true
  time_since_restore: 37.13556528091431
  time_this_iter_s: 5.25688910484314
  time_total_s: 37.13556528091431
  timestamp: 1662585476
  timesteps_since_restore: 0
  training_iteration: 5
  trial_id: 5b830cfa
  warmup_time: 0.005975961685180664
  


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_21-06-06/TorchTrainer_5b830cfa_17_batch_size=6.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50,_2022-09-07_21-17-15/checkpoint_000004)... Done. 1.0s
[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_21-06-06/TorchTrainer_5b830cfa_17_batch_size=6.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50,_2022-09-07_21-17-15/checkpoint_000004)... Done. 0.2s


[2m[36m(RayTrainWorker pid=4670)[0m {'loss': 24.788902282714844, 'avg_loss': 34.31825065612793}


[2m[36m(RayTrainWorker pid=4670)[0m {'loss': 14.166524887084961, 'avg_loss': 27.601008733113606}


VBox(children=(Label(value='456.836 MB of 456.836 MB uploaded (91.307 MB deduped)\r'), FloatProgress(value=1.0…

Result for TorchTrainer_5b830cfa:
  _time_this_iter_s: 5.180276870727539
  _timestamp: 1662585476
  _training_iteration: 5
  avg_loss: 66.70455465316772
  date: 2022-09-07_21-17-56
  done: true
  experiment_id: 9c6c49d3d41a4874969e77dd30e3ec8c
  experiment_tag: 17_batch_size=6.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50,render_ego_history=False,step_time=0.1000,dataset_meta_key=meta_json,disable_traffic_light_faces=False,ego_center=0_25_0_5,filter_agents_threshold=0.5000,map_type=py_semantic,pixel_size=0_5_0_5,raster_size=224_224,satellite_map_key=aerial_map_aerial_map_png,semantic_map_key=semantic_map_semantic_map_pb,set_origin_to_bottom=True,batch_size=12,key=scenes_sample_zarr,num_workers=16,perturb_probability=0.0000,shuffle=True,checkpoint_every_n_steps=10000,eval_every_n_steps=10000,max_num_steps=5,batch_size=12,key=scenes_sample_zarr,num_workers=16,shuffle=False,dataset_key=scenes_sample_zarr,lr=0.0002,max_num_steps=5,num_workers=4

[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_21-06-06/TorchTrainer_7246099c_18_batch_size=6.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50,_2022-09-07_21-17-31/checkpoint_000001)... 

Result for TorchTrainer_7246099c:
  _time_this_iter_s: 5.140620708465576
  _timestamp: 1662585477
  _training_iteration: 2
  avg_loss: 34.31825065612793
  date: 2022-09-07_21-17-58
  done: false
  experiment_id: 2d163536a5af4b44adbbb7a41f8eb8a8
  hostname: anish-l5-kit
  iterations_since_restore: 2
  loss: 24.788902282714844
  node_ip: 10.150.0.3
  pid: 4568
  should_checkpoint: true
  time_since_restore: 22.211164712905884
  time_this_iter_s: 5.071498155593872
  time_total_s: 22.211164712905884
  timestamp: 1662585478
  timesteps_since_restore: 0
  training_iteration: 2
  trial_id: 7246099c
  warmup_time: 0.008389949798583984
  


Done. 1.0s
[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_21-06-06/TorchTrainer_7246099c_18_batch_size=6.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50,_2022-09-07_21-17-31/checkpoint_000002)... Done. 1.0s


[2m[36m(RayTrainWorker pid=4670)[0m {'loss': 4.776675701141357, 'avg_loss': 21.894925475120544}
Result for TorchTrainer_7246099c:
  _time_this_iter_s: 5.71928071975708
  _timestamp: 1662585492
  _training_iteration: 4
  avg_loss: 21.894925475120544
  date: 2022-09-07_21-18-13
  done: false
  experiment_id: 2d163536a5af4b44adbbb7a41f8eb8a8
  hostname: anish-l5-kit
  iterations_since_restore: 4
  loss: 4.776675701141357
  node_ip: 10.150.0.3
  pid: 4568
  should_checkpoint: true
  time_since_restore: 36.898244857788086
  time_this_iter_s: 5.794817686080933
  time_total_s: 36.898244857788086
  timestamp: 1662585493
  timesteps_since_restore: 0
  training_iteration: 4
  trial_id: 7246099c
  warmup_time: 0.008389949798583984
  


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_21-06-06/TorchTrainer_7246099c_18_batch_size=6.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50,_2022-09-07_21-17-31/checkpoint_000003)... Done. 1.0s
[2m[36m(RayTrainWorker pid=5550)[0m 2022-09-07 21:18:15,497	INFO config.py:72 -- Setting up process group for: env:// [rank=0, world_size=1]


0,1
avg_loss,█▅▁▁▁
iterations_since_restore,▁▃▅▆█
loss,█▅▁▅▅
time_since_restore,▁▃▅▆█
time_this_iter_s,█▂▁▁▁
time_total_s,▁▃▅▆█
timestamp,▁▃▅▆█
timesteps_since_restore,▁▁▁▁▁
training_iteration,▁▃▅▆█
warmup_time,▁▁▁▁▁

0,1
avg_loss,66.70455
iterations_since_restore,5.0
loss,69.9518
time_since_restore,37.13557
time_this_iter_s,5.25689
time_total_s,37.13557
timestamp,1662585476.0
timesteps_since_restore,0.0
training_iteration,5.0
warmup_time,0.00598


[2m[36m(RayTrainWorker pid=4670)[0m {'loss': 59.811641693115234, 'avg_loss': 29.478268718719484}


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_21-06-06/TorchTrainer_7246099c_18_batch_size=6.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50,_2022-09-07_21-17-31/checkpoint_000004)... 

Result for TorchTrainer_7246099c:
  _time_this_iter_s: 7.015082836151123
  _timestamp: 1662585499
  _training_iteration: 5
  avg_loss: 29.478268718719484
  date: 2022-09-07_21-18-20
  done: false
  experiment_id: 2d163536a5af4b44adbbb7a41f8eb8a8
  hostname: anish-l5-kit
  iterations_since_restore: 5
  loss: 59.811641693115234
  node_ip: 10.150.0.3
  pid: 4568
  should_checkpoint: true
  time_since_restore: 43.881277084350586
  time_this_iter_s: 6.9830322265625
  time_total_s: 43.881277084350586
  timestamp: 1662585500
  timesteps_since_restore: 0
  training_iteration: 5
  trial_id: 7246099c
  warmup_time: 0.008389949798583984
  


Done. 1.0s
[2m[36m(RayTrainWorker pid=5550)[0m   f"The parameter '{pretrained_param}' is deprecated since 0.13 and will be removed in 0.15, "
[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_21-06-06/TorchTrainer_7246099c_18_batch_size=6.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50,_2022-09-07_21-17-31/checkpoint_000004)... Done. 0.3s
[2m[36m(RayTrainWorker pid=5550)[0m 2022-09-07 21:18:21,466	INFO train_loop_utils.py:300 -- Moving model to device: cpu


VBox(children=(Label(value='456.836 MB of 456.836 MB uploaded (91.307 MB deduped)\r'), FloatProgress(value=1.0…

0,1
avg_loss,█▅▃▁▃
iterations_since_restore,▁▃▅▆█
loss,▆▄▂▁█
time_since_restore,▁▂▅▆█
time_this_iter_s,█▁▃▁▂
time_total_s,▁▂▅▆█
timestamp,▁▂▅▆█
timesteps_since_restore,▁▁▁▁▁
training_iteration,▁▃▅▆█
warmup_time,▁▁▁▁▁

0,1
avg_loss,29.47827
iterations_since_restore,5.0
loss,59.81164
time_since_restore,43.88128
time_this_iter_s,6.98303
time_total_s,43.88128
timestamp,1662585500.0
timesteps_since_restore,0.0
training_iteration,5.0
warmup_time,0.00839


Result for TorchTrainer_7246099c:
  _time_this_iter_s: 7.015082836151123
  _timestamp: 1662585499
  _training_iteration: 5
  avg_loss: 29.478268718719484
  date: 2022-09-07_21-18-20
  done: true
  experiment_id: 2d163536a5af4b44adbbb7a41f8eb8a8
  experiment_tag: 18_batch_size=6.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50,render_ego_history=False,step_time=0.1000,dataset_meta_key=meta_json,disable_traffic_light_faces=False,ego_center=0_25_0_5,filter_agents_threshold=0.5000,map_type=py_semantic,pixel_size=0_5_0_5,raster_size=224_224,satellite_map_key=aerial_map_aerial_map_png,semantic_map_key=semantic_map_semantic_map_pb,set_origin_to_bottom=True,batch_size=12,key=scenes_sample_zarr,num_workers=16,perturb_probability=0.0000,shuffle=True,checkpoint_every_n_steps=10000,eval_every_n_steps=10000,max_num_steps=5,batch_size=12,key=scenes_sample_zarr,num_workers=16,shuffle=False,dataset_key=scenes_sample_zarr,lr=0.0044,max_num_steps=5,num_workers=

[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_21-06-06/TorchTrainer_7c45b76c_19_batch_size=12.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50_2022-09-07_21-18-08/checkpoint_000000)... 

Result for TorchTrainer_7c45b76c:
  _time_this_iter_s: 16.642154932022095
  _timestamp: 1662585512
  _training_iteration: 1
  avg_loss: 209.20376586914062
  date: 2022-09-07_21-18-33
  done: false
  experiment_id: c2763f685e0c4eaeb236d4ecfcaecc19
  hostname: anish-l5-kit
  iterations_since_restore: 1
  loss: 209.20376586914062
  node_ip: 10.150.0.3
  pid: 5470
  should_checkpoint: true
  time_since_restore: 20.804489612579346
  time_this_iter_s: 20.804489612579346
  time_total_s: 20.804489612579346
  timestamp: 1662585513
  timesteps_since_restore: 0
  training_iteration: 1
  trial_id: 7c45b76c
  warmup_time: 0.005153656005859375
  


Done. 1.0s
[2m[36m(RayTrainWorker pid=5893)[0m 2022-09-07 21:18:37,593	INFO config.py:72 -- Setting up process group for: env:// [rank=0, world_size=1]


[2m[36m(RayTrainWorker pid=5550)[0m {'loss': 0.4377988576889038, 'avg_loss': 104.82078236341476}


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_21-06-06/TorchTrainer_7c45b76c_19_batch_size=12.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50_2022-09-07_21-18-08/checkpoint_000001)... 

Result for TorchTrainer_7c45b76c:
  _time_this_iter_s: 10.279728174209595
  _timestamp: 1662585522
  _training_iteration: 2
  avg_loss: 104.82078236341476
  date: 2022-09-07_21-18-43
  done: false
  experiment_id: c2763f685e0c4eaeb236d4ecfcaecc19
  hostname: anish-l5-kit
  iterations_since_restore: 2
  loss: 0.4377988576889038
  node_ip: 10.150.0.3
  pid: 5470
  should_checkpoint: true
  time_since_restore: 30.971221446990967
  time_this_iter_s: 10.166731834411621
  time_total_s: 30.971221446990967
  timestamp: 1662585523
  timesteps_since_restore: 0
  training_iteration: 2
  trial_id: 7c45b76c
  warmup_time: 0.005153656005859375
  


[2m[36m(RayTrainWorker pid=5893)[0m   f"The parameter '{pretrained_param}' is deprecated since 0.13 and will be removed in 0.15, "
Done. 1.0s
[2m[36m(RayTrainWorker pid=5893)[0m 2022-09-07 21:18:44,236	INFO train_loop_utils.py:300 -- Moving model to device: cpu


[2m[36m(RayTrainWorker pid=5550)[0m {'loss': 119.99163055419922, 'avg_loss': 109.87773176034291}


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_21-06-06/TorchTrainer_7c45b76c_19_batch_size=12.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50_2022-09-07_21-18-08/checkpoint_000002)... 

Result for TorchTrainer_7c45b76c:
  _time_this_iter_s: 10.41622519493103
  _timestamp: 1662585533
  _training_iteration: 3
  avg_loss: 109.87773176034291
  date: 2022-09-07_21-18-53
  done: false
  experiment_id: c2763f685e0c4eaeb236d4ecfcaecc19
  hostname: anish-l5-kit
  iterations_since_restore: 3
  loss: 119.99163055419922
  node_ip: 10.150.0.3
  pid: 5470
  should_checkpoint: true
  time_since_restore: 41.43177318572998
  time_this_iter_s: 10.460551738739014
  time_total_s: 41.43177318572998
  timestamp: 1662585533
  timesteps_since_restore: 0
  training_iteration: 3
  trial_id: 7c45b76c
  warmup_time: 0.005153656005859375
  


Done. 1.0s


[2m[36m(RayTrainWorker pid=5893)[0m {'loss': 60.40569305419922, 'avg_loss': 60.40569305419922}


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_21-06-06/TorchTrainer_91ccf37a_20_batch_size=12.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50_2022-09-07_21-18-30/checkpoint_000000)... 

Result for TorchTrainer_91ccf37a:
  _time_this_iter_s: 17.775947332382202
  _timestamp: 1662585535
  _training_iteration: 1
  avg_loss: 60.40569305419922
  date: 2022-09-07_21-18-56
  done: false
  experiment_id: 76cfbc6801e34d6d8e2d7516a9fe56fd
  hostname: anish-l5-kit
  iterations_since_restore: 1
  loss: 60.40569305419922
  node_ip: 10.150.0.3
  pid: 5803
  should_checkpoint: true
  time_since_restore: 22.24928903579712
  time_this_iter_s: 22.24928903579712
  time_total_s: 22.24928903579712
  timestamp: 1662585536
  timesteps_since_restore: 0
  training_iteration: 1
  trial_id: 91ccf37a
  warmup_time: 0.005927324295043945
  


Done. 1.0s


[2m[36m(RayTrainWorker pid=5550)[0m {'loss': 9.539534568786621, 'avg_loss': 84.79318246245384}


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_21-06-06/TorchTrainer_7c45b76c_19_batch_size=12.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50_2022-09-07_21-18-08/checkpoint_000003)... 

Result for TorchTrainer_7c45b76c:
  _time_this_iter_s: 10.408117532730103
  _timestamp: 1662585543
  _training_iteration: 4
  avg_loss: 84.79318246245384
  date: 2022-09-07_21-19-04
  done: false
  experiment_id: c2763f685e0c4eaeb236d4ecfcaecc19
  hostname: anish-l5-kit
  iterations_since_restore: 4
  loss: 9.539534568786621
  node_ip: 10.150.0.3
  pid: 5470
  should_checkpoint: true
  time_since_restore: 51.78055000305176
  time_this_iter_s: 10.348776817321777
  time_total_s: 51.78055000305176
  timestamp: 1662585544
  timesteps_since_restore: 0
  training_iteration: 4
  trial_id: 7c45b76c
  warmup_time: 0.005153656005859375
  


Done. 1.0s


[2m[36m(RayTrainWorker pid=5893)[0m {'loss': 69.12847900390625, 'avg_loss': 64.76708602905273}
Result for TorchTrainer_91ccf37a:
  _time_this_iter_s: 10.68947958946228
  _timestamp: 1662585546
  _training_iteration: 2
  avg_loss: 64.76708602905273
  date: 2022-09-07_21-19-07
  done: false
  experiment_id: 76cfbc6801e34d6d8e2d7516a9fe56fd
  hostname: anish-l5-kit
  iterations_since_restore: 2
  loss: 69.12847900390625
  node_ip: 10.150.0.3
  pid: 5803
  should_checkpoint: true
  time_since_restore: 32.89037585258484
  time_this_iter_s: 10.64108681678772
  time_total_s: 32.89037585258484
  timestamp: 1662585547
  timesteps_since_restore: 0
  training_iteration: 2
  trial_id: 91ccf37a
  warmup_time: 0.005927324295043945
  


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_21-06-06/TorchTrainer_91ccf37a_20_batch_size=12.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50_2022-09-07_21-18-30/checkpoint_000001)... Done. 1.1s


[2m[36m(RayTrainWorker pid=5550)[0m {'loss': 35.24058532714844, 'avg_loss': 74.88266303539277}


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_21-06-06/TorchTrainer_7c45b76c_19_batch_size=12.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50_2022-09-07_21-18-08/checkpoint_000004)... 

Result for TorchTrainer_7c45b76c:
  _time_this_iter_s: 8.762531280517578
  _timestamp: 1662585552
  _training_iteration: 5
  avg_loss: 74.88266303539277
  date: 2022-09-07_21-19-12
  done: false
  experiment_id: c2763f685e0c4eaeb236d4ecfcaecc19
  hostname: anish-l5-kit
  iterations_since_restore: 5
  loss: 35.24058532714844
  node_ip: 10.150.0.3
  pid: 5470
  should_checkpoint: true
  time_since_restore: 60.61064624786377
  time_this_iter_s: 8.830096244812012
  time_total_s: 60.61064624786377
  timestamp: 1662585552
  timesteps_since_restore: 0
  training_iteration: 5
  trial_id: 7c45b76c
  warmup_time: 0.005153656005859375
  


Done. 1.0s
[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_21-06-06/TorchTrainer_7c45b76c_19_batch_size=12.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50_2022-09-07_21-18-08/checkpoint_000004)... Done. 0.3s


[2m[36m(RayTrainWorker pid=5893)[0m {'loss': 171.53176879882812, 'avg_loss': 100.35531361897786}


VBox(children=(Label(value='456.836 MB of 456.836 MB uploaded (91.307 MB deduped)\r'), FloatProgress(value=1.0…

0,1
avg_loss,█▃▃▂▁
iterations_since_restore,▁▃▅▆█
loss,█▁▅▁▂
time_since_restore,▁▃▅▆█
time_this_iter_s,█▂▂▂▁
time_total_s,▁▃▅▆█
timestamp,▁▃▅▇█
timesteps_since_restore,▁▁▁▁▁
training_iteration,▁▃▅▆█
warmup_time,▁▁▁▁▁

0,1
avg_loss,74.88266
iterations_since_restore,5.0
loss,35.24059
time_since_restore,60.61065
time_this_iter_s,8.8301
time_total_s,60.61065
timestamp,1662585552.0
timesteps_since_restore,0.0
training_iteration,5.0
warmup_time,0.00515


Result for TorchTrainer_7c45b76c:
  _time_this_iter_s: 8.762531280517578
  _timestamp: 1662585552
  _training_iteration: 5
  avg_loss: 74.88266303539277
  date: 2022-09-07_21-19-12
  done: true
  experiment_id: c2763f685e0c4eaeb236d4ecfcaecc19
  experiment_tag: 19_batch_size=12.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50,render_ego_history=False,step_time=0.1000,dataset_meta_key=meta_json,disable_traffic_light_faces=False,ego_center=0_25_0_5,filter_agents_threshold=0.5000,map_type=py_semantic,pixel_size=0_5_0_5,raster_size=224_224,satellite_map_key=aerial_map_aerial_map_png,semantic_map_key=semantic_map_semantic_map_pb,set_origin_to_bottom=True,batch_size=12,key=scenes_sample_zarr,num_workers=16,perturb_probability=0.0000,shuffle=True,checkpoint_every_n_steps=10000,eval_every_n_steps=10000,max_num_steps=5,batch_size=12,key=scenes_sample_zarr,num_workers=16,shuffle=False,dataset_key=scenes_sample_zarr,lr=0.0010,max_num_steps=5,num_workers=

[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_21-06-06/TorchTrainer_91ccf37a_20_batch_size=12.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50_2022-09-07_21-18-30/checkpoint_000002)... 

Result for TorchTrainer_91ccf37a:
  _time_this_iter_s: 9.088846921920776
  _timestamp: 1662585555
  _training_iteration: 3
  avg_loss: 100.35531361897786
  date: 2022-09-07_21-19-16
  done: false
  experiment_id: 76cfbc6801e34d6d8e2d7516a9fe56fd
  hostname: anish-l5-kit
  iterations_since_restore: 3
  loss: 171.53176879882812
  node_ip: 10.150.0.3
  pid: 5803
  should_checkpoint: true
  time_since_restore: 41.933146715164185
  time_this_iter_s: 9.042770862579346
  time_total_s: 41.933146715164185
  timestamp: 1662585556
  timesteps_since_restore: 0
  training_iteration: 3
  trial_id: 91ccf37a
  warmup_time: 0.005927324295043945
  


Done. 1.0s


[2m[36m(RayTrainWorker pid=5893)[0m {'loss': 130.70851135253906, 'avg_loss': 107.94361305236816}


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_21-06-06/TorchTrainer_91ccf37a_20_batch_size=12.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50_2022-09-07_21-18-30/checkpoint_000003)... Done. 1.1s
[2m[36m(RayTrainWorker pid=6768)[0m 2022-09-07 21:19:31,024	INFO config.py:72 -- Setting up process group for: env:// [rank=0, world_size=1]


[2m[36m(RayTrainWorker pid=5893)[0m {'loss': 16.46671485900879, 'avg_loss': 89.64823341369629}


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_21-06-06/TorchTrainer_91ccf37a_20_batch_size=12.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50_2022-09-07_21-18-30/checkpoint_000004)... 

Result for TorchTrainer_91ccf37a:
  _time_this_iter_s: 11.547380208969116
  _timestamp: 1662585575
  _training_iteration: 5
  avg_loss: 89.64823341369629
  date: 2022-09-07_21-19-35
  done: false
  experiment_id: 76cfbc6801e34d6d8e2d7516a9fe56fd
  hostname: anish-l5-kit
  iterations_since_restore: 5
  loss: 16.46671485900879
  node_ip: 10.150.0.3
  pid: 5803
  should_checkpoint: true
  time_since_restore: 61.53008270263672
  time_this_iter_s: 11.523568391799927
  time_total_s: 61.53008270263672
  timestamp: 1662585575
  timesteps_since_restore: 0
  training_iteration: 5
  trial_id: 91ccf37a
  warmup_time: 0.005927324295043945
  


[2m[36m(RayTrainWorker pid=6768)[0m   f"The parameter '{pretrained_param}' is deprecated since 0.13 and will be removed in 0.15, "
Done. 1.0s
[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_21-06-06/TorchTrainer_91ccf37a_20_batch_size=12.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50_2022-09-07_21-18-30/checkpoint_000004)... Done. 0.3s
[2m[36m(RayTrainWorker pid=6768)[0m 2022-09-07 21:19:37,004	INFO train_loop_utils.py:300 -- Moving model to device: cpu


VBox(children=(Label(value='456.836 MB of 456.836 MB uploaded (91.307 MB deduped)\r'), FloatProgress(value=1.0…

0,1
avg_loss,▁▂▇█▅
iterations_since_restore,▁▃▅▆█
loss,▃▃█▆▁
time_since_restore,▁▃▅▆█
time_this_iter_s,█▂▁▁▃
time_total_s,▁▃▅▆█
timestamp,▁▃▅▆█
timesteps_since_restore,▁▁▁▁▁
training_iteration,▁▃▅▆█
warmup_time,▁▁▁▁▁

0,1
avg_loss,89.64823
iterations_since_restore,5.0
loss,16.46671
time_since_restore,61.53008
time_this_iter_s,11.52357
time_total_s,61.53008
timestamp,1662585575.0
timesteps_since_restore,0.0
training_iteration,5.0
warmup_time,0.00593


Result for TorchTrainer_91ccf37a:
  _time_this_iter_s: 11.547380208969116
  _timestamp: 1662585575
  _training_iteration: 5
  avg_loss: 89.64823341369629
  date: 2022-09-07_21-19-35
  done: true
  experiment_id: 76cfbc6801e34d6d8e2d7516a9fe56fd
  experiment_tag: 20_batch_size=12.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50,render_ego_history=False,step_time=0.1000,dataset_meta_key=meta_json,disable_traffic_light_faces=False,ego_center=0_25_0_5,filter_agents_threshold=0.5000,map_type=py_semantic,pixel_size=0_5_0_5,raster_size=224_224,satellite_map_key=aerial_map_aerial_map_png,semantic_map_key=semantic_map_semantic_map_pb,set_origin_to_bottom=True,batch_size=12,key=scenes_sample_zarr,num_workers=16,perturb_probability=0.0000,shuffle=True,checkpoint_every_n_steps=10000,eval_every_n_steps=10000,max_num_steps=5,batch_size=12,key=scenes_sample_zarr,num_workers=16,shuffle=False,dataset_key=scenes_sample_zarr,lr=0.0009,max_num_steps=5,num_workers

[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_21-06-06/TorchTrainer_9edc449e_21_batch_size=12.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50_2022-09-07_21-19-23/checkpoint_000000)... 

Result for TorchTrainer_9edc449e:
  _time_this_iter_s: 16.107561111450195
  _timestamp: 1662585587
  _training_iteration: 1
  avg_loss: 61.80339431762695
  date: 2022-09-07_21-19-48
  done: false
  experiment_id: 307b5563f68b457298656e96d42effb0
  hostname: anish-l5-kit
  iterations_since_restore: 1
  loss: 61.80339431762695
  node_ip: 10.150.0.3
  pid: 6677
  should_checkpoint: true
  time_since_restore: 19.961857080459595
  time_this_iter_s: 19.961857080459595
  time_total_s: 19.961857080459595
  timestamp: 1662585588
  timesteps_since_restore: 0
  training_iteration: 1
  trial_id: 9edc449e
  warmup_time: 0.005873918533325195
  


Done. 1.0s
[2m[36m(RayTrainWorker pid=7115)[0m 2022-09-07 21:19:52,314	INFO config.py:72 -- Setting up process group for: env:// [rank=0, world_size=1]
[2m[36m(RayTrainWorker pid=7115)[0m   f"The parameter '{pretrained_param}' is deprecated since 0.13 and will be removed in 0.15, "
[2m[36m(RayTrainWorker pid=7115)[0m 2022-09-07 21:19:58,232	INFO train_loop_utils.py:300 -- Moving model to device: cpu


[2m[36m(RayTrainWorker pid=6768)[0m {'loss': 80.93175506591797, 'avg_loss': 71.36757469177246}


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_21-06-06/TorchTrainer_9edc449e_21_batch_size=12.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50_2022-09-07_21-19-23/checkpoint_000001)... 

Result for TorchTrainer_9edc449e:
  _time_this_iter_s: 12.199548482894897
  _timestamp: 1662585599
  _training_iteration: 2
  avg_loss: 71.36757469177246
  date: 2022-09-07_21-20-00
  done: false
  experiment_id: 307b5563f68b457298656e96d42effb0
  hostname: anish-l5-kit
  iterations_since_restore: 2
  loss: 80.93175506591797
  node_ip: 10.150.0.3
  pid: 6677
  should_checkpoint: true
  time_since_restore: 32.635648012161255
  time_this_iter_s: 12.67379093170166
  time_total_s: 32.635648012161255
  timestamp: 1662585600
  timesteps_since_restore: 0
  training_iteration: 2
  trial_id: 9edc449e
  warmup_time: 0.005873918533325195
  


Done. 1.0s


[2m[36m(RayTrainWorker pid=6768)[0m {'loss': 97.49231719970703, 'avg_loss': 80.07582219441731}
[2m[36m(RayTrainWorker pid=7115)[0m {'loss': 102.28921508789062, 'avg_loss': 102.28921508789062}
Result for TorchTrainer_9edc449e:
  _time_this_iter_s: 10.745925903320312
  _timestamp: 1662585610
  _training_iteration: 3
  avg_loss: 80.07582219441731
  date: 2022-09-07_21-20-10
  done: false
  experiment_id: 307b5563f68b457298656e96d42effb0
  hostname: anish-l5-kit
  iterations_since_restore: 3
  loss: 97.49231719970703
  node_ip: 10.150.0.3
  pid: 6677
  should_checkpoint: true
  time_since_restore: 42.849268436431885
  time_this_iter_s: 10.21362042427063
  time_total_s: 42.849268436431885
  timestamp: 1662585610
  timesteps_since_restore: 0
  training_iteration: 3
  trial_id: 9edc449e
  warmup_time: 0.005873918533325195
  


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_21-06-06/TorchTrainer_9edc449e_21_batch_size=12.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50_2022-09-07_21-19-23/checkpoint_000002)... [34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_21-06-06/TorchTrainer_bed468f8_22_batch_size=12.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50_2022-09-07_21-19-45/checkpoint_000000)... 

Result for TorchTrainer_bed468f8:
  _time_this_iter_s: 18.00953483581543
  _timestamp: 1662585610
  _training_iteration: 1
  avg_loss: 102.28921508789062
  date: 2022-09-07_21-20-11
  done: false
  experiment_id: 240a025336f5446a9ec393f1847b7757
  hostname: anish-l5-kit
  iterations_since_restore: 1
  loss: 102.28921508789062
  node_ip: 10.150.0.3
  pid: 7026
  should_checkpoint: true
  time_since_restore: 22.57522702217102
  time_this_iter_s: 22.57522702217102
  time_total_s: 22.57522702217102
  timestamp: 1662585611
  timesteps_since_restore: 0
  training_iteration: 1
  trial_id: bed468f8
  warmup_time: 0.006448030471801758
  


Done. 1.1s
Done. 1.3s


[2m[36m(RayTrainWorker pid=6768)[0m {'loss': 43.3187370300293, 'avg_loss': 70.88655090332031}


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_21-06-06/TorchTrainer_9edc449e_21_batch_size=12.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50_2022-09-07_21-19-23/checkpoint_000003)... 

Result for TorchTrainer_9edc449e:
  _time_this_iter_s: 8.944217443466187
  _timestamp: 1662585619
  _training_iteration: 4
  avg_loss: 70.88655090332031
  date: 2022-09-07_21-20-19
  done: false
  experiment_id: 307b5563f68b457298656e96d42effb0
  hostname: anish-l5-kit
  iterations_since_restore: 4
  loss: 43.3187370300293
  node_ip: 10.150.0.3
  pid: 6677
  should_checkpoint: true
  time_since_restore: 51.742502212524414
  time_this_iter_s: 8.89323377609253
  time_total_s: 51.742502212524414
  timestamp: 1662585619
  timesteps_since_restore: 0
  training_iteration: 4
  trial_id: 9edc449e
  warmup_time: 0.005873918533325195
  
[2m[36m(RayTrainWorker pid=7115)[0m {'loss': 85.46986389160156, 'avg_loss': 93.8795394897461}


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_21-06-06/TorchTrainer_bed468f8_22_batch_size=12.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50_2022-09-07_21-19-45/checkpoint_000001)... 

Result for TorchTrainer_bed468f8:
  _time_this_iter_s: 9.215688228607178
  _timestamp: 1662585620
  _training_iteration: 2
  avg_loss: 93.8795394897461
  date: 2022-09-07_21-20-20
  done: false
  experiment_id: 240a025336f5446a9ec393f1847b7757
  hostname: anish-l5-kit
  iterations_since_restore: 2
  loss: 85.46986389160156
  node_ip: 10.150.0.3
  pid: 7026
  should_checkpoint: true
  time_since_restore: 31.669150352478027
  time_this_iter_s: 9.093923330307007
  time_total_s: 31.669150352478027
  timestamp: 1662585620
  timesteps_since_restore: 0
  training_iteration: 2
  trial_id: bed468f8
  warmup_time: 0.006448030471801758
  


Done. 1.1s
Done. 1.1s


[2m[36m(RayTrainWorker pid=6768)[0m {'loss': 77.21394348144531, 'avg_loss': 72.15202941894532}


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_21-06-06/TorchTrainer_9edc449e_21_batch_size=12.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50_2022-09-07_21-19-23/checkpoint_000004)... 

Result for TorchTrainer_9edc449e:
  _time_this_iter_s: 8.776428461074829
  _timestamp: 1662585628
  _training_iteration: 5
  avg_loss: 72.15202941894532
  date: 2022-09-07_21-20-28
  done: false
  experiment_id: 307b5563f68b457298656e96d42effb0
  hostname: anish-l5-kit
  iterations_since_restore: 5
  loss: 77.21394348144531
  node_ip: 10.150.0.3
  pid: 6677
  should_checkpoint: true
  time_since_restore: 60.52331757545471
  time_this_iter_s: 8.780815362930298
  time_total_s: 60.52331757545471
  timestamp: 1662585628
  timesteps_since_restore: 0
  training_iteration: 5
  trial_id: 9edc449e
  warmup_time: 0.005873918533325195
  
[2m[36m(RayTrainWorker pid=7115)[0m {'loss': 147.8870849609375, 'avg_loss': 111.88205464680989}


Done. 1.0s
[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_21-06-06/TorchTrainer_9edc449e_21_batch_size=12.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50_2022-09-07_21-19-23/checkpoint_000004)... Done. 0.3s


VBox(children=(Label(value='456.836 MB of 456.836 MB uploaded (91.307 MB deduped)\r'), FloatProgress(value=1.0…

[2m[36m(RayTrainWorker pid=7115)[0m {'loss': 28.337833404541016, 'avg_loss': 90.99599933624268}


Result for TorchTrainer_9edc449e:
  _time_this_iter_s: 8.776428461074829
  _timestamp: 1662585628
  _training_iteration: 5
  avg_loss: 72.15202941894532
  date: 2022-09-07_21-20-28
  done: true
  experiment_id: 307b5563f68b457298656e96d42effb0
  experiment_tag: 21_batch_size=12.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50,render_ego_history=False,step_time=0.1000,dataset_meta_key=meta_json,disable_traffic_light_faces=False,ego_center=0_25_0_5,filter_agents_threshold=0.5000,map_type=py_semantic,pixel_size=0_5_0_5,raster_size=224_224,satellite_map_key=aerial_map_aerial_map_png,semantic_map_key=semantic_map_semantic_map_pb,set_origin_to_bottom=True,batch_size=12,key=scenes_sample_zarr,num_workers=16,perturb_probability=0.0000,shuffle=True,checkpoint_every_n_steps=10000,eval_every_n_steps=10000,max_num_steps=5,batch_size=12,key=scenes_sample_zarr,num_workers=16,shuffle=False,dataset_key=scenes_sample_zarr,lr=0.0092,max_num_steps=5,num_workers=

[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_21-06-06/TorchTrainer_bed468f8_22_batch_size=12.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50_2022-09-07_21-19-45/checkpoint_000002)... 

Result for TorchTrainer_bed468f8:
  _time_this_iter_s: 8.883598804473877
  _timestamp: 1662585629
  _training_iteration: 3
  avg_loss: 111.88205464680989
  date: 2022-09-07_21-20-29
  done: false
  experiment_id: 240a025336f5446a9ec393f1847b7757
  hostname: anish-l5-kit
  iterations_since_restore: 3
  loss: 147.8870849609375
  node_ip: 10.150.0.3
  pid: 7026
  should_checkpoint: true
  time_since_restore: 40.4627583026886
  time_this_iter_s: 8.793607950210571
  time_total_s: 40.4627583026886
  timestamp: 1662585629
  timesteps_since_restore: 0
  training_iteration: 3
  trial_id: bed468f8
  warmup_time: 0.006448030471801758
  


0,1
avg_loss,▁▅█▄▅
iterations_since_restore,▁▃▅▆█
loss,▃▆█▁▅
time_since_restore,▁▃▅▆█
time_this_iter_s,█▃▂▁▁
time_total_s,▁▃▅▆█
timestamp,▁▃▅▆█
timesteps_since_restore,▁▁▁▁▁
training_iteration,▁▃▅▆█
warmup_time,▁▁▁▁▁

0,1
avg_loss,72.15203
iterations_since_restore,5.0
loss,77.21394
time_since_restore,60.52332
time_this_iter_s,8.78082
time_total_s,60.52332
timestamp,1662585628.0
timesteps_since_restore,0.0
training_iteration,5.0
warmup_time,0.00587


Done. 1.0s
[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_21-06-06/TorchTrainer_bed468f8_22_batch_size=12.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50_2022-09-07_21-19-45/checkpoint_000003)... Done. 1.0s
[2m[36m(RayTrainWorker pid=7966)[0m 2022-09-07 21:20:47,377	INFO config.py:72 -- Setting up process group for: env:// [rank=0, world_size=1]


[2m[36m(RayTrainWorker pid=7115)[0m {'loss': 120.21112060546875, 'avg_loss': 96.83902359008789}


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_21-06-06/TorchTrainer_bed468f8_22_batch_size=12.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50_2022-09-07_21-19-45/checkpoint_000004)... 

Result for TorchTrainer_bed468f8:
  _time_this_iter_s: 11.878909826278687
  _timestamp: 1662585650
  _training_iteration: 5
  avg_loss: 96.83902359008789
  date: 2022-09-07_21-20-51
  done: false
  experiment_id: 240a025336f5446a9ec393f1847b7757
  hostname: anish-l5-kit
  iterations_since_restore: 5
  loss: 120.21112060546875
  node_ip: 10.150.0.3
  pid: 7026
  should_checkpoint: true
  time_since_restore: 62.374101877212524
  time_this_iter_s: 11.823947191238403
  time_total_s: 62.374101877212524
  timestamp: 1662585651
  timesteps_since_restore: 0
  training_iteration: 5
  trial_id: bed468f8
  warmup_time: 0.006448030471801758
  


Done. 1.4s
[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_21-06-06/TorchTrainer_bed468f8_22_batch_size=12.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50_2022-09-07_21-19-45/checkpoint_000004)... Done. 0.3s


[2m[36m(RayTrainWorker pid=7966)[0m   f"The parameter '{pretrained_param}' is deprecated since 0.13 and will be removed in 0.15, "
[2m[36m(RayTrainWorker pid=7966)[0m 2022-09-07 21:20:54,215	INFO train_loop_utils.py:300 -- Moving model to device: cpu


VBox(children=(Label(value='456.836 MB of 456.836 MB uploaded (91.307 MB deduped)\r'), FloatProgress(value=1.0…

0,1
avg_loss,▅▂█▁▃
iterations_since_restore,▁▃▅▆█
loss,▅▄█▁▆
time_since_restore,▁▃▄▆█
time_this_iter_s,█▁▁▂▃
time_total_s,▁▃▄▆█
timestamp,▁▃▄▆█
timesteps_since_restore,▁▁▁▁▁
training_iteration,▁▃▅▆█
warmup_time,▁▁▁▁▁

0,1
avg_loss,96.83902
iterations_since_restore,5.0
loss,120.21112
time_since_restore,62.3741
time_this_iter_s,11.82395
time_total_s,62.3741
timestamp,1662585651.0
timesteps_since_restore,0.0
training_iteration,5.0
warmup_time,0.00645


Result for TorchTrainer_bed468f8:
  _time_this_iter_s: 11.878909826278687
  _timestamp: 1662585650
  _training_iteration: 5
  avg_loss: 96.83902359008789
  date: 2022-09-07_21-20-51
  done: true
  experiment_id: 240a025336f5446a9ec393f1847b7757
  experiment_tag: 22_batch_size=12.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50,render_ego_history=False,step_time=0.1000,dataset_meta_key=meta_json,disable_traffic_light_faces=False,ego_center=0_25_0_5,filter_agents_threshold=0.5000,map_type=py_semantic,pixel_size=0_5_0_5,raster_size=224_224,satellite_map_key=aerial_map_aerial_map_png,semantic_map_key=semantic_map_semantic_map_pb,set_origin_to_bottom=True,batch_size=12,key=scenes_sample_zarr,num_workers=16,perturb_probability=0.0000,shuffle=True,checkpoint_every_n_steps=10000,eval_every_n_steps=10000,max_num_steps=5,batch_size=12,key=scenes_sample_zarr,num_workers=16,shuffle=False,dataset_key=scenes_sample_zarr,lr=0.0077,max_num_steps=5,num_workers

[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_21-06-06/TorchTrainer_cb92d304_23_batch_size=12.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50_2022-09-07_21-20-40/checkpoint_000000)... 

Result for TorchTrainer_cb92d304:
  _time_this_iter_s: 19.155983448028564
  _timestamp: 1662585667
  _training_iteration: 1
  avg_loss: 60.34614562988281
  date: 2022-09-07_21-21-07
  done: false
  experiment_id: a1577ee73b4a4fefa8dc36d6f96347b1
  hostname: anish-l5-kit
  iterations_since_restore: 1
  loss: 60.34614562988281
  node_ip: 10.150.0.3
  pid: 7888
  should_checkpoint: true
  time_since_restore: 23.03936266899109
  time_this_iter_s: 23.03936266899109
  time_total_s: 23.03936266899109
  timestamp: 1662585667
  timesteps_since_restore: 0
  training_iteration: 1
  trial_id: cb92d304
  warmup_time: 0.006450653076171875
  


Done. 1.0s
[2m[36m(RayTrainWorker pid=8634)[0m 2022-09-07 21:21:09,220	INFO config.py:72 -- Setting up process group for: env:// [rank=0, world_size=1]
[2m[36m(RayTrainWorker pid=8634)[0m   f"The parameter '{pretrained_param}' is deprecated since 0.13 and will be removed in 0.15, "
[2m[36m(RayTrainWorker pid=8634)[0m 2022-09-07 21:21:15,731	INFO train_loop_utils.py:300 -- Moving model to device: cpu


[2m[36m(RayTrainWorker pid=7966)[0m {'loss': 101.25701904296875, 'avg_loss': 80.80158233642578}


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_21-06-06/TorchTrainer_cb92d304_23_batch_size=12.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50_2022-09-07_21-20-40/checkpoint_000001)... 

Result for TorchTrainer_cb92d304:
  _time_this_iter_s: 11.202421426773071
  _timestamp: 1662585678
  _training_iteration: 2
  avg_loss: 80.80158233642578
  date: 2022-09-07_21-21-18
  done: false
  experiment_id: a1577ee73b4a4fefa8dc36d6f96347b1
  hostname: anish-l5-kit
  iterations_since_restore: 2
  loss: 101.25701904296875
  node_ip: 10.150.0.3
  pid: 7888
  should_checkpoint: true
  time_since_restore: 34.114229679107666
  time_this_iter_s: 11.074867010116577
  time_total_s: 34.114229679107666
  timestamp: 1662585678
  timesteps_since_restore: 0
  training_iteration: 2
  trial_id: cb92d304
  warmup_time: 0.006450653076171875
  


Done. 1.0s


[2m[36m(RayTrainWorker pid=8634)[0m {'loss': 73.57400512695312, 'avg_loss': 73.57400512695312}
Result for TorchTrainer_ec979508:
  _time_this_iter_s: 12.628728866577148
  _timestamp: 1662585682
  _training_iteration: 1
  avg_loss: 73.57400512695312
  date: 2022-09-07_21-21-22
  done: false
  experiment_id: 96b380f0227e49ed8fe58005ea4b716d
  hostname: anish-l5-kit
  iterations_since_restore: 1
  loss: 73.57400512695312
  node_ip: 10.150.0.3
  pid: 8158
  should_checkpoint: true
  time_since_restore: 17.177831411361694
  time_this_iter_s: 17.177831411361694
  time_total_s: 17.177831411361694
  timestamp: 1662585682
  timesteps_since_restore: 0
  training_iteration: 1
  trial_id: ec979508
  warmup_time: 0.005913257598876953
  


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_21-06-06/TorchTrainer_ec979508_24_batch_size=6.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50,_2022-09-07_21-21-01/checkpoint_000000)... Done. 1.0s


[2m[36m(RayTrainWorker pid=7966)[0m {'loss': 98.23750305175781, 'avg_loss': 86.61355590820312}
[2m[36m(RayTrainWorker pid=8634)[0m {'loss': 77.22203063964844, 'avg_loss': 75.39801788330078}


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_21-06-06/TorchTrainer_cb92d304_23_batch_size=12.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50_2022-09-07_21-20-40/checkpoint_000002)... 

Result for TorchTrainer_cb92d304:
  _time_this_iter_s: 8.896963119506836
  _timestamp: 1662585687
  _training_iteration: 3
  avg_loss: 86.61355590820312
  date: 2022-09-07_21-21-27
  done: false
  experiment_id: a1577ee73b4a4fefa8dc36d6f96347b1
  hostname: anish-l5-kit
  iterations_since_restore: 3
  loss: 98.23750305175781
  node_ip: 10.150.0.3
  pid: 7888
  should_checkpoint: true
  time_since_restore: 43.04955792427063
  time_this_iter_s: 8.935328245162964
  time_total_s: 43.04955792427063
  timestamp: 1662585687
  timesteps_since_restore: 0
  training_iteration: 3
  trial_id: cb92d304
  warmup_time: 0.006450653076171875
  
Result for TorchTrainer_ec979508:
  _time_this_iter_s: 5.240785121917725
  _timestamp: 1662585687
  _training_iteration: 2
  avg_loss: 75.39801788330078
  date: 2022-09-07_21-21-28
  done: false
  experiment_id: 96b380f0227e49ed8fe58005ea4b716d
  hostname: anish-l5-kit
  iterations_since_restore: 2
  loss: 77.22203063964844
  node_ip: 10.150.0.3
  pid: 8158
  sho

[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_21-06-06/TorchTrainer_ec979508_24_batch_size=6.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50,_2022-09-07_21-21-01/checkpoint_000001)... Done. 1.0s
Done. 1.2s


[2m[36m(RayTrainWorker pid=8634)[0m {'loss': 113.08607482910156, 'avg_loss': 87.96070353190105}


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_21-06-06/TorchTrainer_ec979508_24_batch_size=6.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50,_2022-09-07_21-21-01/checkpoint_000002)... 

Result for TorchTrainer_ec979508:
  _time_this_iter_s: 5.290882587432861
  _timestamp: 1662585692
  _training_iteration: 3
  avg_loss: 87.96070353190105
  date: 2022-09-07_21-21-33
  done: false
  experiment_id: 96b380f0227e49ed8fe58005ea4b716d
  hostname: anish-l5-kit
  iterations_since_restore: 3
  loss: 113.08607482910156
  node_ip: 10.150.0.3
  pid: 8158
  should_checkpoint: true
  time_since_restore: 27.666754961013794
  time_this_iter_s: 5.17017936706543
  time_total_s: 27.666754961013794
  timestamp: 1662585693
  timesteps_since_restore: 0
  training_iteration: 3
  trial_id: ec979508
  warmup_time: 0.005913257598876953
  


Done. 1.6s


[2m[36m(RayTrainWorker pid=7966)[0m {'loss': 62.27341842651367, 'avg_loss': 80.52852153778076}


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_21-06-06/TorchTrainer_cb92d304_23_batch_size=12.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50_2022-09-07_21-20-40/checkpoint_000003)... 

Result for TorchTrainer_cb92d304:
  _time_this_iter_s: 9.349447250366211
  _timestamp: 1662585696
  _training_iteration: 4
  avg_loss: 80.52852153778076
  date: 2022-09-07_21-21-36
  done: false
  experiment_id: a1577ee73b4a4fefa8dc36d6f96347b1
  hostname: anish-l5-kit
  iterations_since_restore: 4
  loss: 62.27341842651367
  node_ip: 10.150.0.3
  pid: 7888
  should_checkpoint: true
  time_since_restore: 52.36154627799988
  time_this_iter_s: 9.311988353729248
  time_total_s: 52.36154627799988
  timestamp: 1662585696
  timesteps_since_restore: 0
  training_iteration: 4
  trial_id: cb92d304
  warmup_time: 0.006450653076171875
  


Done. 1.0s


[2m[36m(RayTrainWorker pid=8634)[0m {'loss': 105.1417465209961, 'avg_loss': 92.2559642791748}
Result for TorchTrainer_ec979508:
  _time_this_iter_s: 5.094644069671631
  _timestamp: 1662585698
  _training_iteration: 4
  avg_loss: 92.2559642791748
  date: 2022-09-07_21-21-38
  done: false
  experiment_id: 96b380f0227e49ed8fe58005ea4b716d
  hostname: anish-l5-kit
  iterations_since_restore: 4
  loss: 105.1417465209961
  node_ip: 10.150.0.3
  pid: 8158
  should_checkpoint: true
  time_since_restore: 32.716594219207764
  time_this_iter_s: 5.04983925819397
  time_total_s: 32.716594219207764
  timestamp: 1662585698
  timesteps_since_restore: 0
  training_iteration: 4
  trial_id: ec979508
  warmup_time: 0.005913257598876953
  


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_21-06-06/TorchTrainer_ec979508_24_batch_size=6.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50,_2022-09-07_21-21-01/checkpoint_000003)... Done. 2.6s


[2m[36m(RayTrainWorker pid=8634)[0m {'loss': 64.12551879882812, 'avg_loss': 86.62987518310547}


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_21-06-06/TorchTrainer_ec979508_24_batch_size=6.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50,_2022-09-07_21-21-01/checkpoint_000004)... Done. 1.0s
[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_21-06-06/TorchTrainer_ec979508_24_batch_size=6.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50,_2022-09-07_21-21-01/checkpoint_000004)... Done. 0.3s


[2m[36m(RayTrainWorker pid=7966)[0m {'loss': 49.465858459472656, 'avg_loss': 74.31598892211915}


VBox(children=(Label(value='456.836 MB of 456.836 MB uploaded (91.307 MB deduped)\r'), FloatProgress(value=1.0…

0,1
avg_loss,▁▂▆█▆
iterations_since_restore,▁▃▅▆█
loss,▂▃█▇▁
time_since_restore,▁▃▅▆█
time_this_iter_s,█▁▁▁▁
time_total_s,▁▃▅▆█
timestamp,▁▃▅▆█
timesteps_since_restore,▁▁▁▁▁
training_iteration,▁▃▅▆█
warmup_time,▁▁▁▁▁

0,1
avg_loss,86.62988
iterations_since_restore,5.0
loss,64.12552
time_since_restore,37.5333
time_this_iter_s,4.81671
time_total_s,37.5333
timestamp,1662585703.0
timesteps_since_restore,0.0
training_iteration,5.0
warmup_time,0.00591


Result for TorchTrainer_ec979508:
  _time_this_iter_s: 4.865289211273193
  _timestamp: 1662585702
  _training_iteration: 5
  avg_loss: 86.62987518310547
  date: 2022-09-07_21-21-43
  done: true
  experiment_id: 96b380f0227e49ed8fe58005ea4b716d
  experiment_tag: 24_batch_size=6.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50,render_ego_history=False,step_time=0.1000,dataset_meta_key=meta_json,disable_traffic_light_faces=False,ego_center=0_25_0_5,filter_agents_threshold=0.5000,map_type=py_semantic,pixel_size=0_5_0_5,raster_size=224_224,satellite_map_key=aerial_map_aerial_map_png,semantic_map_key=semantic_map_semantic_map_pb,set_origin_to_bottom=True,batch_size=12,key=scenes_sample_zarr,num_workers=16,perturb_probability=0.0000,shuffle=True,checkpoint_every_n_steps=10000,eval_every_n_steps=10000,max_num_steps=5,batch_size=12,key=scenes_sample_zarr,num_workers=16,shuffle=False,dataset_key=scenes_sample_zarr,lr=0.0042,max_num_steps=5,num_workers=4

[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_21-06-06/TorchTrainer_cb92d304_23_batch_size=12.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50_2022-09-07_21-20-40/checkpoint_000004)... 

Result for TorchTrainer_cb92d304:
  _time_this_iter_s: 8.97896695137024
  _timestamp: 1662585705
  _training_iteration: 5
  avg_loss: 74.31598892211915
  date: 2022-09-07_21-21-45
  done: false
  experiment_id: a1577ee73b4a4fefa8dc36d6f96347b1
  hostname: anish-l5-kit
  iterations_since_restore: 5
  loss: 49.465858459472656
  node_ip: 10.150.0.3
  pid: 7888
  should_checkpoint: true
  time_since_restore: 61.29706907272339
  time_this_iter_s: 8.93552279472351
  time_total_s: 61.29706907272339
  timestamp: 1662585705
  timesteps_since_restore: 0
  training_iteration: 5
  trial_id: cb92d304
  warmup_time: 0.006450653076171875
  


Done. 1.0s
[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_21-06-06/TorchTrainer_cb92d304_23_batch_size=12.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50_2022-09-07_21-20-40/checkpoint_000004)... Done. 0.3s


VBox(children=(Label(value='456.836 MB of 456.836 MB uploaded (91.307 MB deduped)\r'), FloatProgress(value=1.0…

0,1
avg_loss,▁▆█▆▅
iterations_since_restore,▁▃▅▆█
loss,▂██▃▁
time_since_restore,▁▃▅▆█
time_this_iter_s,█▂▁▁▁
time_total_s,▁▃▅▆█
timestamp,▁▃▅▆█
timesteps_since_restore,▁▁▁▁▁
training_iteration,▁▃▅▆█
warmup_time,▁▁▁▁▁

0,1
avg_loss,74.31599
iterations_since_restore,5.0
loss,49.46586
time_since_restore,61.29707
time_this_iter_s,8.93552
time_total_s,61.29707
timestamp,1662585705.0
timesteps_since_restore,0.0
training_iteration,5.0
warmup_time,0.00645


Result for TorchTrainer_cb92d304:
  _time_this_iter_s: 8.97896695137024
  _timestamp: 1662585705
  _training_iteration: 5
  avg_loss: 74.31598892211915
  date: 2022-09-07_21-21-45
  done: true
  experiment_id: a1577ee73b4a4fefa8dc36d6f96347b1
  experiment_tag: 23_batch_size=12.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50,render_ego_history=False,step_time=0.1000,dataset_meta_key=meta_json,disable_traffic_light_faces=False,ego_center=0_25_0_5,filter_agents_threshold=0.5000,map_type=py_semantic,pixel_size=0_5_0_5,raster_size=224_224,satellite_map_key=aerial_map_aerial_map_png,semantic_map_key=semantic_map_semantic_map_pb,set_origin_to_bottom=True,batch_size=12,key=scenes_sample_zarr,num_workers=16,perturb_probability=0.0000,shuffle=True,checkpoint_every_n_steps=10000,eval_every_n_steps=10000,max_num_steps=5,batch_size=12,key=scenes_sample_zarr,num_workers=16,shuffle=False,dataset_key=scenes_sample_zarr,lr=0.0084,max_num_steps=5,num_workers=4

[2m[36m(RayTrainWorker pid=9571)[0m 2022-09-07 21:22:06,687	INFO config.py:72 -- Setting up process group for: env:// [rank=0, world_size=1]
[2m[36m(RayTrainWorker pid=9571)[0m   f"The parameter '{pretrained_param}' is deprecated since 0.13 and will be removed in 0.15, "
[2m[36m(RayTrainWorker pid=9571)[0m 2022-09-07 21:22:11,908	INFO train_loop_utils.py:300 -- Moving model to device: cpu


[2m[36m(RayTrainWorker pid=9571)[0m {'loss': 116.76582336425781, 'avg_loss': 116.76582336425781}


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_21-06-06/TorchTrainer_f8df3500_25_batch_size=6.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50,_2022-09-07_21-22-00/checkpoint_000000)... 

Result for TorchTrainer_f8df3500:
  _time_this_iter_s: 10.15064525604248
  _timestamp: 1662585737
  _training_iteration: 1
  avg_loss: 116.76582336425781
  date: 2022-09-07_21-22-17
  done: false
  experiment_id: 0454cfca86fa4aebb8278f28870bee72
  hostname: anish-l5-kit
  iterations_since_restore: 1
  loss: 116.76582336425781
  node_ip: 10.150.0.3
  pid: 9097
  should_checkpoint: true
  time_since_restore: 13.410129308700562
  time_this_iter_s: 13.410129308700562
  time_total_s: 13.410129308700562
  timestamp: 1662585737
  timesteps_since_restore: 0
  training_iteration: 1
  trial_id: f8df3500
  warmup_time: 0.004829883575439453
  


Done. 1.5s


[2m[36m(RayTrainWorker pid=9571)[0m {'loss': 64.10145568847656, 'avg_loss': 90.43363952636719}


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_21-06-06/TorchTrainer_f8df3500_25_batch_size=6.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50,_2022-09-07_21-22-00/checkpoint_000001)... Done. 1.0s


[2m[36m(RayTrainWorker pid=9571)[0m {'loss': 30.046037673950195, 'avg_loss': 70.30443890889485}


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_21-06-06/TorchTrainer_f8df3500_25_batch_size=6.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50,_2022-09-07_21-22-00/checkpoint_000002)... 

Result for TorchTrainer_f8df3500:
  _time_this_iter_s: 4.538066625595093
  _timestamp: 1662585746
  _training_iteration: 3
  avg_loss: 70.30443890889485
  date: 2022-09-07_21-22-26
  done: false
  experiment_id: 0454cfca86fa4aebb8278f28870bee72
  hostname: anish-l5-kit
  iterations_since_restore: 3
  loss: 30.046037673950195
  node_ip: 10.150.0.3
  pid: 9097
  should_checkpoint: true
  time_since_restore: 22.39370584487915
  time_this_iter_s: 4.518744707107544
  time_total_s: 22.39370584487915
  timestamp: 1662585746
  timesteps_since_restore: 0
  training_iteration: 3
  trial_id: f8df3500
  warmup_time: 0.004829883575439453
  


Done. 1.0s


[2m[36m(RayTrainWorker pid=9571)[0m {'loss': 62.759769439697266, 'avg_loss': 68.41827154159546}


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_21-06-06/TorchTrainer_f8df3500_25_batch_size=6.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50,_2022-09-07_21-22-00/checkpoint_000003)... Done. 1.0s


[2m[36m(RayTrainWorker pid=9571)[0m {'loss': 121.65872955322266, 'avg_loss': 79.0663631439209}


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_21-06-06/TorchTrainer_f8df3500_25_batch_size=6.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50,_2022-09-07_21-22-00/checkpoint_000004)... 

Result for TorchTrainer_f8df3500:
  _time_this_iter_s: 4.393399715423584
  _timestamp: 1662585755
  _training_iteration: 5
  avg_loss: 79.0663631439209
  date: 2022-09-07_21-22-35
  done: false
  experiment_id: 0454cfca86fa4aebb8278f28870bee72
  hostname: anish-l5-kit
  iterations_since_restore: 5
  loss: 121.65872955322266
  node_ip: 10.150.0.3
  pid: 9097
  should_checkpoint: true
  time_since_restore: 31.116479635238647
  time_this_iter_s: 4.407530307769775
  time_total_s: 31.116479635238647
  timestamp: 1662585755
  timesteps_since_restore: 0
  training_iteration: 5
  trial_id: f8df3500
  warmup_time: 0.004829883575439453
  


Done. 1.0s
[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_21-06-06/TorchTrainer_f8df3500_25_batch_size=6.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50,_2022-09-07_21-22-00/checkpoint_000004)... Done. 0.2s


VBox(children=(Label(value='456.836 MB of 456.836 MB uploaded (91.307 MB deduped)\r'), FloatProgress(value=1.0…

0,1
avg_loss,█▄▁▁▃
iterations_since_restore,▁▃▅▆█
loss,█▄▁▃█
time_since_restore,▁▃▅▆█
time_this_iter_s,█▁▁▁▁
time_total_s,▁▃▅▆█
timestamp,▁▃▅▆█
timesteps_since_restore,▁▁▁▁▁
training_iteration,▁▃▅▆█
warmup_time,▁▁▁▁▁

0,1
avg_loss,79.06636
iterations_since_restore,5.0
loss,121.65873
time_since_restore,31.11648
time_this_iter_s,4.40753
time_total_s,31.11648
timestamp,1662585755.0
timesteps_since_restore,0.0
training_iteration,5.0
warmup_time,0.00483


Result for TorchTrainer_f8df3500:
  _time_this_iter_s: 4.393399715423584
  _timestamp: 1662585755
  _training_iteration: 5
  avg_loss: 79.0663631439209
  date: 2022-09-07_21-22-35
  done: true
  experiment_id: 0454cfca86fa4aebb8278f28870bee72
  experiment_tag: 25_batch_size=6.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50,render_ego_history=False,step_time=0.1000,dataset_meta_key=meta_json,disable_traffic_light_faces=False,ego_center=0_25_0_5,filter_agents_threshold=0.5000,map_type=py_semantic,pixel_size=0_5_0_5,raster_size=224_224,satellite_map_key=aerial_map_aerial_map_png,semantic_map_key=semantic_map_semantic_map_pb,set_origin_to_bottom=True,batch_size=12,key=scenes_sample_zarr,num_workers=16,perturb_probability=0.0000,shuffle=True,checkpoint_every_n_steps=10000,eval_every_n_steps=10000,max_num_steps=5,batch_size=12,key=scenes_sample_zarr,num_workers=16,shuffle=False,dataset_key=scenes_sample_zarr,lr=0.0051,max_num_steps=5,num_workers=4,

2022-09-07 21:22:44,584	INFO tune.py:759 -- Total run time: 997.96 seconds (997.79 seconds for the tuning loop).


In [35]:
import time

In [36]:
time.sleep(30)

In [37]:
analysis_df = analysis.get_dataframe()

In [38]:
analysis_df

Unnamed: 0,loss,avg_loss,_timestamp,_time_this_iter_s,_training_iteration,time_this_iter_s,should_checkpoint,done,timesteps_total,episodes_total,...,config/train_loop_config/cfg/train_data_loader/perturb_probability,config/train_loop_config/cfg/train_data_loader/shuffle,config/train_loop_config/cfg/train_params/checkpoint_every_n_steps,config/train_loop_config/cfg/train_params/eval_every_n_steps,config/train_loop_config/cfg/train_params/max_num_steps,config/train_loop_config/cfg/val_data_loader/batch_size,config/train_loop_config/cfg/val_data_loader/key,config/train_loop_config/cfg/val_data_loader/num_workers,config/train_loop_config/cfg/val_data_loader/shuffle,logdir
0,74.567551,77.843877,1662584829,9.905044,5,9.896055,True,False,,,...,0.0,True,10000,10000,5,12,scenes/sample.zarr,16,False,/home/jupyter/ray_results/TorchTrainer_2022-09...
1,67.728432,90.870517,1662584862,16.227512,5,16.154536,True,False,,,...,0.0,True,10000,10000,5,12,scenes/sample.zarr,16,False,/home/jupyter/ray_results/TorchTrainer_2022-09...
2,17.969776,35.895309,1662584903,9.094891,5,9.252898,True,False,,,...,0.0,True,10000,10000,5,12,scenes/sample.zarr,16,False,/home/jupyter/ray_results/TorchTrainer_2022-09...
3,5.845992,98.969687,1662584913,5.566701,5,5.570402,True,False,,,...,0.0,True,10000,10000,5,12,scenes/sample.zarr,16,False,/home/jupyter/ray_results/TorchTrainer_2022-09...
4,138.551712,41.856454,1662584968,4.608834,5,4.618596,True,False,,,...,0.0,True,10000,10000,5,12,scenes/sample.zarr,16,False,/home/jupyter/ray_results/TorchTrainer_2022-09...
5,47.916885,59.969492,1662585013,14.004685,5,13.904366,True,False,,,...,0.0,True,10000,10000,5,12,scenes/sample.zarr,16,False,/home/jupyter/ray_results/TorchTrainer_2022-09...
6,14.776404,42.043575,1662585061,12.540035,5,13.211249,True,False,,,...,0.0,True,10000,10000,5,12,scenes/sample.zarr,16,False,/home/jupyter/ray_results/TorchTrainer_2022-09...
7,93.338753,68.701921,1662585114,12.791445,5,12.706946,True,False,,,...,0.0,True,10000,10000,5,12,scenes/sample.zarr,16,False,/home/jupyter/ray_results/TorchTrainer_2022-09...
8,37.867397,52.506245,1662585183,18.638955,5,18.685607,True,False,,,...,0.0,True,10000,10000,5,12,scenes/sample.zarr,16,False,/home/jupyter/ray_results/TorchTrainer_2022-09...
9,119.39801,69.863711,1662585186,9.733388,5,9.714789,True,False,,,...,0.0,True,10000,10000,5,12,scenes/sample.zarr,16,False,/home/jupyter/ray_results/TorchTrainer_2022-09...


In [39]:
# Run information
wandb_entity = "l5-demo"
project_name = "l5-simulation"
run_name = "train-simulation-model"
run_type = "train"
run_description = """
Train simulation model
"""
tags = ["train", "simulation"]

In [40]:
#🪄🐝
run = wandb.init(
    entity=wandb_entity,
    project=project_name,
    job_type=run_type,
    name=run_name,
    notes=run_description,
    tags=tags,
    config=cfg
)

In [41]:
#BUG: to force a connection on the lineage graph
#🪄🐝
artifact = run.use_artifact(f"{artifact_entity}/{artifact_project}/{artifact_name}:{artifact_alias}", type=artifact_type)

In [42]:
#🪄🐝
analysis_table = wandb.Table(dataframe=analysis_df)

In [43]:
#BUG: run gets lost after tune job due to change in cwd. Forced to make 2 runs
if len(analysis_table.data) == 0:
    raise ValueError("bad table for some reason")
else:
    run.log({"analysis_table": analysis_table})
    run.finish()

VBox(children=(Label(value='0.030 MB of 0.053 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.574678…