https://github.com/woven-planet/l5kit/blob/master/examples/simulation/train.ipynb

## Prepare Data path and load cfg

By setting the `L5KIT_DATA_FOLDER` variable, we can point the script to the folder where the data lies.

Then, we load our config file with relative paths and other configurations (rasteriser, training params...).

### Setup

In [1]:
from pathlib import Path
import os

In [2]:
#NOTE: DONT USE RELATIVE PATHS FOR THE MODELS PROVIDED BY L5
experiments_directory = Path(Path(os.path.abspath('')).parent.parent, "Experiments")
experiments_directory.mkdir(parents=True, exist_ok=True)

data_directory = Path(experiments_directory, "data")
data_directory.mkdir(parents=True, exist_ok=True)

prediction_directory = Path(experiments_directory, "simulation")
prediction_directory.mkdir(parents=True, exist_ok=True)

prediction_training_directory = Path(prediction_directory, "training")
prediction_training_directory.mkdir(parents=True, exist_ok=True)

save_directory = Path(prediction_training_directory, "saved_outputs")
save_directory.mkdir(parents=True, exist_ok=True)

In [3]:
import os
os.chdir(prediction_training_directory)

In [4]:
%%writefile requirements.txt
l5kit
pyyaml
ray==2.0.0rc1
ray[air]
wandb
optuna

Overwriting requirements.txt


In [5]:
%%capture
# !pip install -r requirements.txt
!pip install l5kit pyyaml
!pip install wandb --upgrade
!pip install ray==2.0.0rc1
!pip install "ray[air]"
!pip install optuna

In [6]:
from typing import Dict

from tempfile import gettempdir
import matplotlib.pyplot as plt
import numpy as np
import torch
from torch import nn, optim
from torch.utils.data import DataLoader
from torchvision.models.resnet import resnet50
from tqdm import tqdm

from l5kit.configs import load_config_data
from l5kit.data import LocalDataManager, ChunkedDataset
from l5kit.dataset import AgentDataset, EgoDataset
from l5kit.rasterization import build_rasterizer
from l5kit.evaluation import write_pred_csv, compute_metrics_csv, read_gt_csv, create_chopped_dataset
from l5kit.evaluation.chop_dataset import MIN_FUTURE_STEPS
from l5kit.evaluation.metrics import neg_multi_log_likelihood, time_displace, rmse, prob_true_mode, average_displacement_error_oracle, average_displacement_error_mean, final_displacement_error_oracle, final_displacement_error_mean, detect_collision, distance_to_reference_trajectory
from l5kit.geometry import transform_points
from l5kit.visualization import PREDICTED_POINTS_COLOR, TARGET_POINTS_COLOR, draw_trajectory
from prettytable import PrettyTable
from pathlib import Path

import os

In [7]:
from tempfile import gettempdir
import matplotlib.pyplot as plt
import numpy as np
import torch
from torch import nn, optim
from torch.utils.data import DataLoader
from tqdm import tqdm

from l5kit.configs import load_config_data
from l5kit.data import LocalDataManager, ChunkedDataset
from l5kit.dataset import AgentDataset
from l5kit.rasterization import build_rasterizer
from l5kit.geometry import transform_points
from l5kit.visualization import TARGET_POINTS_COLOR, draw_trajectory
from l5kit.planning.rasterized.model import RasterizedPlanningModel

import os

### Get Data from Wandb

In [8]:
import wandb
wandb.login()

[34m[1mwandb[0m: Currently logged in as: [33ma-sh0ts[0m. Use [1m`wandb login --relogin`[0m to force relogin


True

In [9]:
# Run information
wandb_entity = "l5-demo"
project_name = "l5-simulation"
run_name = "download-l5-data"
run_type = "download"
run_description = """
Download data for the task of training a prediction model
"""
tags = ["download", "data"]

In [10]:
#🪄🐝
run = wandb.init(
    entity=wandb_entity,
    project=project_name,
    job_type=run_type,
    name=run_name,
    notes=run_description,
    tags=tags
)

[34m[1mwandb[0m: Currently logged in as: [33ma-sh0ts[0m ([33ml5-demo[0m). Use [1m`wandb login --relogin`[0m to force relogin


In [11]:
artifact_entity = "l5-demo"
artifact_project = "l5-common"
artifact_name = "l5-data"
artifact_alias = "latest"
artifact_type = "dataset"

In [12]:
#🪄🐝
artifact = run.use_artifact(f"{artifact_entity}/{artifact_project}/{artifact_name}:{artifact_alias}", type=artifact_type)

In [13]:
_ = artifact.download(data_directory)

[34m[1mwandb[0m: Downloading large artifact l5-data:latest, 2386.92MB. 517 files... Done. 0:1:5.5


In [14]:
#BUG: need to seperate runs into download and training due to issues with routing runs after ray.tune
run.finish()

VBox(children=(Label(value='1.046 MB of 1.046 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

In [15]:
# Dataset is assumed to be on the folder specified
# in the L5KIT_DATA_FOLDER environment variable

# get config
cfg = load_config_data(Path(data_directory, "configurations", "simulation_config.yaml"))

##TODO-Fix with logic flow to select dataset###
cfg["train_data_loader"]["key"] = "scenes/sample.zarr"
cfg["val_data_loader"]["key"] = "scenes/sample.zarr"

l5_data_location = Path(data_directory, "dataset")
# run.config.update(cfg)

In [16]:
# cfg["zarr_dataset_location"] = l5_data_location
os.environ["L5KIT_DATA_FOLDER"] = str(l5_data_location)

## Model

Our baseline is a simple `resnet50` pretrained on `imagenet`. We must replace the input and the final layer to address our requirements.

In [17]:
def build_model(cfg: Dict, num_input_channels, criterion):
    model = RasterizedPlanningModel(
        model_arch=cfg["model_params"]["model_architecture"],
        num_input_channels=num_input_channels,
        num_targets=3 * cfg["model_params"]["future_num_frames"],  # X, Y, Yaw * number of future states,
        weights_scaling= [1., 1., 1.],
        criterion=criterion
        )

    return model

In [18]:
def forward(data, model, criterion):    
    outputs = model(data)
    loss = outputs["loss"]
    return loss, outputs

In [19]:
def train_simulation_model_epoch(data, model, criterion, optimizer):
    loss, outputs = forward(data, model, criterion)
    # Backward pass
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    return loss, outputs

Our data pipeline map a raw `.zarr` folder into a multi-processing instance ready for training by:
- loading the `zarr` into a `ChunkedDataset` object. This object has a reference to the different arrays into the zarr (e.g. agents and traffic lights);
- wrapping the `ChunkedDataset` into an `AgentDataset`, which inherits from torch `Dataset` class;
- passing the `AgentDataset` into a torch `DataLoader`

# Training

note: if you're on MacOS and using `py_satellite` rasterizer, you may need to disable opencv multiprocessing by adding:
`cv2.setNumThreads(0)` before the following cell. This seems to only affect running in python notebook and it's caused by the `cv2.warpaffine` function

In [20]:
import ray.train as train
from ray.air import session, Checkpoint

In [21]:
from ray import tune
from ray.tune.tuner import Tuner

In [22]:
def train_simulation_model(tuner_cfg : Dict):
    trial_name = session.get_trial_name()
    trial_id = session.get_trial_id()
    trial_readable_name = f"{trial_name}_{trial_id}"
    
    dm = LocalDataManager()
    
    # ==== Configurations
    shuffle = tuner_cfg["shuffle"]
    batch_size = int(tuner_cfg["batch_size"])
    num_workers = tuner_cfg["num_workers"]
    lr = tuner_cfg["lr"]
    max_num_steps = int(tuner_cfg["max_num_steps"])
    dataset_key = tuner_cfg["dataset_key"]
    cfg = tuner_cfg["cfg"]
    
    # ==== Loading Dataset
    rasterizer = build_rasterizer(cfg, dm)

    train_zarr = ChunkedDataset(dm.require(dataset_key)).open()
    train_dataset = AgentDataset(cfg, train_zarr, rasterizer)

    batch_size_per_worker = batch_size // session.get_world_size()
    train_dataloader = DataLoader(train_dataset, shuffle=shuffle, batch_size=batch_size_per_worker, num_workers=num_workers)
    train_dataloader = train.torch.prepare_data_loader(train_dataloader)
    
    # ==== Init model
    criterion = nn.MSELoss(reduction="none")
    num_input_channels = rasterizer.num_channels()
    
    model = build_model(cfg, num_input_channels, criterion)
    model = train.torch.prepare_model(model)
    optimizer = optim.Adam(model.parameters(), lr=lr)

    # ==== TRAIN LOOP
    tr_it = iter(train_dataloader)
    progress_bar = range(max_num_steps)
    num_checkpoints = 5
    steps_before_checkpointing = max_num_steps // num_checkpoints
    losses_train = []
    checkpoint_counter = 0
    
    for step in progress_bar:
        try:
            data = next(tr_it)
        except StopIteration:
            tr_it = iter(train_dataloader)
            data = next(tr_it)
            
        model.train()
        torch.set_grad_enabled(True)
        loss, _ = train_simulation_model_epoch(data, model, criterion, optimizer)
        losses_train.append(loss.item())
        avg_loss = np.mean(losses_train)
        metrics = {
            "loss": loss.item(),
            "avg_loss": avg_loss
        }
        
        if train.world_rank() == 0:
            print(metrics)
        
        if (step%steps_before_checkpointing==0) or (step==max_num_steps-1):
            session.report(
                metrics=metrics,
                checkpoint=Checkpoint.from_dict(dict(step=step, model=model)))
            checkpoint_counter += 1
        else:
            session.report(
                metrics=metrics
            )

### Distributed Training using Ray

We calculate the available hardware for our current training sessions and efficiently split CPUs based on GPUs or split CPUs evenly if possible

In [23]:
from ray.train.torch import TorchTrainer
from ray.air.config import RunConfig, ScalingConfig
from ray.air.callbacks.wandb import WandbLoggerCallback #🪄🐝

In [24]:
import multiprocessing

In [25]:
USE_GPU = torch.cuda.is_available()
NUM_GPUS = torch.cuda.device_count()
NUM_CPUS = multiprocessing.cpu_count()

In [26]:
if USE_GPU:
    num_actors = NUM_GPUS
    num_data_workers = NUM_CPUS // num_actors
else:
    num_data_workers = 4 if NUM_CPUS>=4 else NUM_CPUS
    ideal_num_actors = NUM_CPUS // num_data_workers
    num_actors = ideal_num_actors if ideal_num_actors else 1

To use Ray all we need to simply do is wrap the training function above. The only addition needed above was calls to `report.session` to log metrics during training

In [27]:
#NOTE: To figure out if scaling config intuiutin is correct: num_actors divide resources between each actor and within the train func each actor can the utilize the shared resources
trainer = TorchTrainer(
    train_loop_per_worker=train_simulation_model,
    scaling_config=ScalingConfig(num_workers=num_actors, use_gpu=USE_GPU),
)

2022-09-07 22:21:35,421	INFO worker.py:1487 -- Started a local Ray instance. View the dashboard at [1m[32mhttp://127.0.0.1:8265[39m[22m.


### Distributed Hyperparemeter Tuning using Ray

Due to Ray's easy interface we can simply extend our normal trainer to Ray's tuner which will allow us to do efficient hyperparameter optimization. In our case we use `optuna`

In [28]:
tuner_train_config = {}
##static
tuner_train_config["shuffle"] = cfg["train_data_loader"]["shuffle"]
tuner_train_config["num_workers"] = num_data_workers
tuner_train_config["dataset_key"] = cfg["train_data_loader"]["key"]

##tunable
tuner_train_config["max_num_steps"] = 5
# tuner_train_config["max_num_steps"] = tune.quniform(1000, 5000, 250)
tuner_train_config["lr"] = tune.loguniform(1e-4, 1e-2)
tuner_train_config["batch_size"] = tune.quniform(6, 24, 6)
cfg["raster_params"]["map_type"] = tune.choice(["py_semantic", "py_satellite"])

tuner_train_config["cfg"] = cfg

In [29]:
from ray.tune.logger import LoggerCallback
from typing import Dict, List

In [30]:
from ray.tune.stopper import ExperimentPlateauStopper
from ray.tune.search.optuna import OptunaSearch

In [31]:
n_search_attempts = 25

In [32]:
optuna_search = OptunaSearch()

In [33]:
tuner = Tuner(
        trainer,
        tune_config=tune.TuneConfig(
            metric="avg_loss", #loss or avg_loss here?
            mode="min",
            search_alg=optuna_search,
            num_samples=n_search_attempts,
        ),
        param_space={
            "train_loop_config": tuner_train_config
        },
        run_config=RunConfig(
            stop=ExperimentPlateauStopper("avg_loss"),
            callbacks=[WandbLoggerCallback(project=f"{project_name}-trials", save_checkpoints=True),]))  #🪄🐝

  


### Aggregrate and Report Metrics from All Trials

In [34]:
analysis = tuner.fit()

[32m[I 2022-09-07 22:21:37,915][0m A new study created in memory with name: optuna[0m
2022-09-07 22:21:37,964	INFO wandb.py:119 -- Already logged into W&B.


Trial name,status,loc,train_loop_config...,train_loop_config....1,train_loop_config/lr,iter,total time (s),loss,avg_loss,_timestamp
TorchTrainer_70a52fb0,TERMINATED,10.150.0.3:23101,12,py_semantic,0.000679685,5,64.812,33.7803,65.7031,1662589365
TorchTrainer_727aa5ae,TERMINATED,10.150.0.3:23173,24,py_semantic,0.00211888,5,112.337,67.1353,68.1636,1662589417
TorchTrainer_7283a4c4,TERMINATED,10.150.0.3:24265,6,py_satellite,0.000357663,5,36.441,0.366061,80.4814,1662589416
TorchTrainer_9fab1d06,TERMINATED,10.150.0.3:25250,18,py_satellite,0.00178972,5,82.5818,44.4912,57.2109,1662589519
TorchTrainer_c12d4d82,TERMINATED,10.150.0.3:25260,18,py_satellite,0.000146682,5,82.3744,66.3325,96.6547,1662589520
TorchTrainer_c19f6e8a,TERMINATED,10.150.0.3:26884,18,py_satellite,0.00281803,5,84.7158,16.0632,50.1754,1662589627
TorchTrainer_ffd13a76,TERMINATED,10.150.0.3:26919,24,py_semantic,0.000192362,5,109.775,14.2937,34.844,1662589652
TorchTrainer_0038a5bc,TERMINATED,10.150.0.3:28508,6,py_satellite,0.000658554,5,41.7107,107.925,107.537,1662589683
TorchTrainer_3b7a5fee,TERMINATED,10.150.0.3:28875,12,py_semantic,0.007989,5,61.7474,78.5341,81.7076,1662589728
TorchTrainer_4a587ab4,TERMINATED,10.150.0.3:29736,6,py_semantic,0.000282046,5,36.7738,19.8298,46.7594,1662589734


[2m[36m(RayTrainWorker pid=23172)[0m 2022-09-07 22:21:44,644	INFO config.py:72 -- Setting up process group for: env:// [rank=0, world_size=1]
[2m[36m(RayTrainWorker pid=23285)[0m 2022-09-07 22:21:49,173	INFO config.py:72 -- Setting up process group for: env:// [rank=0, world_size=1]
[2m[36m(RayTrainWorker pid=23172)[0m   f"The parameter '{pretrained_param}' is deprecated since 0.13 and will be removed in 0.15, "
[2m[36m(RayTrainWorker pid=23172)[0m Downloading: "https://download.pytorch.org/models/resnet50-0676ba61.pth" to /home/jupyter/.cache/torch/hub/checkpoints/resnet50-0676ba61.pth
  0%|          | 0.00/97.8M [00:00<?, ?B/s]
 15%|█▌        | 14.8M/97.8M [00:00<00:00, 155MB/s]
 31%|███       | 30.0M/97.8M [00:00<00:00, 158MB/s]
 46%|████▋     | 45.3M/97.8M [00:00<00:00, 159MB/s]
 62%|██████▏   | 60.5M/97.8M [00:00<00:00, 159MB/s]
 77%|███████▋  | 75.6M/97.8M [00:00<00:00, 155MB/s]
100%|██████████| 97.8M/97.8M [00:00<00:00, 164MB/s]
[2m[36m(RayTrainWorker pid=23172)[0

[2m[36m(RayTrainWorker pid=23172)[0m {'loss': 73.51165771484375, 'avg_loss': 73.51165771484375}


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_22-21-37/TorchTrainer_70a52fb0_1_batch_size=12.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50,_2022-09-07_22-21-38/checkpoint_000000)... 

Result for TorchTrainer_70a52fb0:
  _time_this_iter_s: 24.811136960983276
  _timestamp: 1662589330
  _training_iteration: 1
  avg_loss: 73.51165771484375
  date: 2022-09-07_22-22-10
  done: false
  experiment_id: c534acd986054ddfbec95d73abb8a304
  hostname: anish-l5-kit
  iterations_since_restore: 1
  loss: 73.51165771484375
  node_ip: 10.150.0.3
  pid: 23101
  should_checkpoint: true
  time_since_restore: 29.494240283966064
  time_this_iter_s: 29.494240283966064
  time_total_s: 29.494240283966064
  timestamp: 1662589330
  timesteps_since_restore: 0
  training_iteration: 1
  trial_id: 70a52fb0
  warmup_time: 0.005673408508300781
  


Done. 1.1s


[2m[36m(RayTrainWorker pid=23172)[0m {'loss': 155.3971710205078, 'avg_loss': 114.45441436767578}


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_22-21-37/TorchTrainer_70a52fb0_1_batch_size=12.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50,_2022-09-07_22-21-38/checkpoint_000001)... 

Result for TorchTrainer_70a52fb0:
  _time_this_iter_s: 8.58382511138916
  _timestamp: 1662589338
  _training_iteration: 2
  avg_loss: 114.45441436767578
  date: 2022-09-07_22-22-19
  done: false
  experiment_id: c534acd986054ddfbec95d73abb8a304
  hostname: anish-l5-kit
  iterations_since_restore: 2
  loss: 155.3971710205078
  node_ip: 10.150.0.3
  pid: 23101
  should_checkpoint: true
  time_since_restore: 37.98921012878418
  time_this_iter_s: 8.494969844818115
  time_total_s: 37.98921012878418
  timestamp: 1662589339
  timesteps_since_restore: 0
  training_iteration: 2
  trial_id: 70a52fb0
  warmup_time: 0.005673408508300781
  


Done. 1.1s


[2m[36m(RayTrainWorker pid=23285)[0m {'loss': 41.961753845214844, 'avg_loss': 41.961753845214844}
Result for TorchTrainer_727aa5ae:
  _time_this_iter_s: 31.26285195350647
  _timestamp: 1662589341
  _training_iteration: 1
  avg_loss: 41.961753845214844
  date: 2022-09-07_22-22-21
  done: false
  experiment_id: 19a5bc1b960f4c81bdca0a8a91598102
  hostname: anish-l5-kit
  iterations_since_restore: 1
  loss: 41.961753845214844
  node_ip: 10.150.0.3
  pid: 23173
  should_checkpoint: true
  time_since_restore: 36.07328009605408
  time_this_iter_s: 36.07328009605408
  time_total_s: 36.07328009605408
  timestamp: 1662589341
  timesteps_since_restore: 0
  training_iteration: 1
  trial_id: 727aa5ae
  warmup_time: 0.007851123809814453
  


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_22-21-37/TorchTrainer_727aa5ae_2_batch_size=24.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50,_2022-09-07_22-21-41/checkpoint_000000)... Done. 1.1s


[2m[36m(RayTrainWorker pid=23172)[0m {'loss': 55.52874755859375, 'avg_loss': 94.81252543131511}


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_22-21-37/TorchTrainer_70a52fb0_1_batch_size=12.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50,_2022-09-07_22-21-38/checkpoint_000002)... 

Result for TorchTrainer_70a52fb0:
  _time_this_iter_s: 9.30670952796936
  _timestamp: 1662589347
  _training_iteration: 3
  avg_loss: 94.81252543131511
  date: 2022-09-07_22-22-28
  done: false
  experiment_id: c534acd986054ddfbec95d73abb8a304
  hostname: anish-l5-kit
  iterations_since_restore: 3
  loss: 55.52874755859375
  node_ip: 10.150.0.3
  pid: 23101
  should_checkpoint: true
  time_since_restore: 47.39532113075256
  time_this_iter_s: 9.406111001968384
  time_total_s: 47.39532113075256
  timestamp: 1662589348
  timesteps_since_restore: 0
  training_iteration: 3
  trial_id: 70a52fb0
  warmup_time: 0.005673408508300781
  


Done. 1.4s


[2m[36m(RayTrainWorker pid=23172)[0m {'loss': 10.297449111938477, 'avg_loss': 73.68375635147095}


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_22-21-37/TorchTrainer_70a52fb0_1_batch_size=12.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50,_2022-09-07_22-21-38/checkpoint_000003)... 

Result for TorchTrainer_70a52fb0:
  _time_this_iter_s: 8.558222532272339
  _timestamp: 1662589356
  _training_iteration: 4
  avg_loss: 73.68375635147095
  date: 2022-09-07_22-22-36
  done: false
  experiment_id: c534acd986054ddfbec95d73abb8a304
  hostname: anish-l5-kit
  iterations_since_restore: 4
  loss: 10.297449111938477
  node_ip: 10.150.0.3
  pid: 23101
  should_checkpoint: true
  time_since_restore: 55.842246294021606
  time_this_iter_s: 8.446925163269043
  time_total_s: 55.842246294021606
  timestamp: 1662589356
  timesteps_since_restore: 0
  training_iteration: 4
  trial_id: 70a52fb0
  warmup_time: 0.005673408508300781
  


Done. 1.0s


[2m[36m(RayTrainWorker pid=23285)[0m {'loss': 98.05196380615234, 'avg_loss': 70.0068588256836}
Result for TorchTrainer_727aa5ae:
  _time_this_iter_s: 18.65444588661194
  _timestamp: 1662589359
  _training_iteration: 2
  avg_loss: 70.0068588256836
  date: 2022-09-07_22-22-40
  done: false
  experiment_id: 19a5bc1b960f4c81bdca0a8a91598102
  hostname: anish-l5-kit
  iterations_since_restore: 2
  loss: 98.05196380615234
  node_ip: 10.150.0.3
  pid: 23173
  should_checkpoint: true
  time_since_restore: 54.62339115142822
  time_this_iter_s: 18.550111055374146
  time_total_s: 54.62339115142822
  timestamp: 1662589360
  timesteps_since_restore: 0
  training_iteration: 2
  trial_id: 727aa5ae
  warmup_time: 0.007851123809814453
  


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_22-21-37/TorchTrainer_727aa5ae_2_batch_size=24.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50,_2022-09-07_22-21-41/checkpoint_000001)... Done. 1.1s


[2m[36m(RayTrainWorker pid=23172)[0m {'loss': 33.78026580810547, 'avg_loss': 65.70305824279785}


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_22-21-37/TorchTrainer_70a52fb0_1_batch_size=12.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50,_2022-09-07_22-21-38/checkpoint_000004)... 

Result for TorchTrainer_70a52fb0:
  _time_this_iter_s: 8.806889533996582
  _timestamp: 1662589365
  _training_iteration: 5
  avg_loss: 65.70305824279785
  date: 2022-09-07_22-22-45
  done: false
  experiment_id: c534acd986054ddfbec95d73abb8a304
  hostname: anish-l5-kit
  iterations_since_restore: 5
  loss: 33.78026580810547
  node_ip: 10.150.0.3
  pid: 23101
  should_checkpoint: true
  time_since_restore: 64.81202864646912
  time_this_iter_s: 8.96978235244751
  time_total_s: 64.81202864646912
  timestamp: 1662589365
  timesteps_since_restore: 0
  training_iteration: 5
  trial_id: 70a52fb0
  warmup_time: 0.005673408508300781
  


Done. 1.5s
[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_22-21-37/TorchTrainer_70a52fb0_1_batch_size=12.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50,_2022-09-07_22-21-38/checkpoint_000004)... Done. 0.3s


VBox(children=(Label(value='456.987 MB of 456.987 MB uploaded (91.307 MB deduped)\r'), FloatProgress(value=1.0…

0,1
avg_loss,▂█▅▂▁
iterations_since_restore,▁▃▅▆█
loss,▄█▃▁▂
time_since_restore,▁▃▅▆█
time_this_iter_s,█▁▁▁▁
time_total_s,▁▃▅▆█
timestamp,▁▃▅▆█
timesteps_since_restore,▁▁▁▁▁
training_iteration,▁▃▅▆█
warmup_time,▁▁▁▁▁

0,1
avg_loss,65.70306
iterations_since_restore,5.0
loss,33.78027
time_since_restore,64.81203
time_this_iter_s,8.96978
time_total_s,64.81203
timestamp,1662589365.0
timesteps_since_restore,0.0
training_iteration,5.0
warmup_time,0.00567


Result for TorchTrainer_70a52fb0:
  _time_this_iter_s: 8.806889533996582
  _timestamp: 1662589365
  _training_iteration: 5
  avg_loss: 65.70305824279785
  date: 2022-09-07_22-22-45
  done: true
  experiment_id: c534acd986054ddfbec95d73abb8a304
  experiment_tag: 1_batch_size=12.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50,render_ego_history=False,step_time=0.1000,dataset_meta_key=meta_json,disable_traffic_light_faces=False,ego_center=0_25_0_5,filter_agents_threshold=0.5000,map_type=py_semantic,pixel_size=0_5_0_5,raster_size=224_224,satellite_map_key=aerial_map_aerial_map_png,semantic_map_key=semantic_map_semantic_map_pb,set_origin_to_bottom=True,batch_size=12,key=scenes_sample_zarr,num_workers=16,perturb_probability=0.0000,shuffle=True,checkpoint_every_n_steps=10000,eval_every_n_steps=10000,max_num_steps=5,batch_size=12,key=scenes_sample_zarr,num_workers=16,shuffle=False,dataset_key=scenes_sample_zarr,lr=0.0007,max_num_steps=5,num_workers=4

[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_22-21-37/TorchTrainer_727aa5ae_2_batch_size=24.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50,_2022-09-07_22-21-41/checkpoint_000002)... 

Result for TorchTrainer_727aa5ae:
  _time_this_iter_s: 16.95954155921936
  _timestamp: 1662589376
  _training_iteration: 3
  avg_loss: 66.6151746114095
  date: 2022-09-07_22-22-57
  done: false
  experiment_id: 19a5bc1b960f4c81bdca0a8a91598102
  hostname: anish-l5-kit
  iterations_since_restore: 3
  loss: 59.83180618286133
  node_ip: 10.150.0.3
  pid: 23173
  should_checkpoint: true
  time_since_restore: 71.66144132614136
  time_this_iter_s: 17.038050174713135
  time_total_s: 71.66144132614136
  timestamp: 1662589377
  timesteps_since_restore: 0
  training_iteration: 3
  trial_id: 727aa5ae
  warmup_time: 0.007851123809814453
  


Done. 1.1s
[2m[36m(RayTrainWorker pid=24353)[0m 2022-09-07 22:23:04,736	INFO config.py:72 -- Setting up process group for: env:// [rank=0, world_size=1]
[2m[36m(RayTrainWorker pid=24353)[0m   f"The parameter '{pretrained_param}' is deprecated since 0.13 and will be removed in 0.15, "
[2m[36m(RayTrainWorker pid=24353)[0m 2022-09-07 22:23:11,284	INFO train_loop_utils.py:300 -- Moving model to device: cpu


[2m[36m(RayTrainWorker pid=24353)[0m {'loss': 130.0070037841797, 'avg_loss': 130.0070037841797}


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_22-21-37/TorchTrainer_7283a4c4_3_batch_size=6.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50,r_2022-09-07_22-22-56/checkpoint_000000)... 

Result for TorchTrainer_7283a4c4:
  _time_this_iter_s: 11.082481622695923
  _timestamp: 1662589396
  _training_iteration: 1
  avg_loss: 130.0070037841797
  date: 2022-09-07_22-23-16
  done: false
  experiment_id: 5110b6e737f745b488f55283b9111532
  hostname: anish-l5-kit
  iterations_since_restore: 1
  loss: 130.0070037841797
  node_ip: 10.150.0.3
  pid: 24265
  should_checkpoint: true
  time_since_restore: 16.175740957260132
  time_this_iter_s: 16.175740957260132
  time_total_s: 16.175740957260132
  timestamp: 1662589396
  timesteps_since_restore: 0
  training_iteration: 1
  trial_id: 7283a4c4
  warmup_time: 0.005540609359741211
  
[2m[36m(RayTrainWorker pid=23285)[0m {'loss': 73.83734893798828, 'avg_loss': 68.4207181930542}


Done. 1.9s


Result for TorchTrainer_727aa5ae:
  _time_this_iter_s: 22.026614665985107
  _timestamp: 1662589398
  _training_iteration: 4
  avg_loss: 68.4207181930542
  date: 2022-09-07_22-23-19
  done: false
  experiment_id: 19a5bc1b960f4c81bdca0a8a91598102
  hostname: anish-l5-kit
  iterations_since_restore: 4
  loss: 73.83734893798828
  node_ip: 10.150.0.3
  pid: 23173
  should_checkpoint: true
  time_since_restore: 93.5714635848999
  time_this_iter_s: 21.910022258758545
  time_total_s: 93.5714635848999
  timestamp: 1662589399
  timesteps_since_restore: 0
  training_iteration: 4
  trial_id: 727aa5ae
  warmup_time: 0.007851123809814453
  


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_22-21-37/TorchTrainer_727aa5ae_2_batch_size=24.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50,_2022-09-07_22-21-41/checkpoint_000003)... Done. 1.1s


[2m[36m(RayTrainWorker pid=24353)[0m {'loss': 43.312255859375, 'avg_loss': 86.65962982177734}


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_22-21-37/TorchTrainer_7283a4c4_3_batch_size=6.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50,r_2022-09-07_22-22-56/checkpoint_000001)... 

Result for TorchTrainer_7283a4c4:
  _time_this_iter_s: 5.826077461242676
  _timestamp: 1662589402
  _training_iteration: 2
  avg_loss: 86.65962982177734
  date: 2022-09-07_22-23-22
  done: false
  experiment_id: 5110b6e737f745b488f55283b9111532
  hostname: anish-l5-kit
  iterations_since_restore: 2
  loss: 43.312255859375
  node_ip: 10.150.0.3
  pid: 24265
  should_checkpoint: true
  time_since_restore: 21.946674585342407
  time_this_iter_s: 5.770933628082275
  time_total_s: 21.946674585342407
  timestamp: 1662589402
  timesteps_since_restore: 0
  training_iteration: 2
  trial_id: 7283a4c4
  warmup_time: 0.005540609359741211
  


Done. 1.0s


[2m[36m(RayTrainWorker pid=24353)[0m {'loss': 73.71448516845703, 'avg_loss': 82.3445816040039}


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_22-21-37/TorchTrainer_7283a4c4_3_batch_size=6.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50,r_2022-09-07_22-22-56/checkpoint_000002)... Done. 1.0s


[2m[36m(RayTrainWorker pid=24353)[0m {'loss': 155.00701904296875, 'avg_loss': 100.51019096374512}


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_22-21-37/TorchTrainer_7283a4c4_3_batch_size=6.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50,r_2022-09-07_22-22-56/checkpoint_000003)... 

Result for TorchTrainer_7283a4c4:
  _time_this_iter_s: 4.853615999221802
  _timestamp: 1662589412
  _training_iteration: 4
  avg_loss: 100.51019096374512
  date: 2022-09-07_22-23-32
  done: false
  experiment_id: 5110b6e737f745b488f55283b9111532
  hostname: anish-l5-kit
  iterations_since_restore: 4
  loss: 155.00701904296875
  node_ip: 10.150.0.3
  pid: 24265
  should_checkpoint: true
  time_since_restore: 31.648338794708252
  time_this_iter_s: 4.881451606750488
  time_total_s: 31.648338794708252
  timestamp: 1662589412
  timesteps_since_restore: 0
  training_iteration: 4
  trial_id: 7283a4c4
  warmup_time: 0.005540609359741211
  


Done. 2.4s


[2m[36m(RayTrainWorker pid=24353)[0m {'loss': 0.3660605847835541, 'avg_loss': 80.48136488795281}


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_22-21-37/TorchTrainer_7283a4c4_3_batch_size=6.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50,r_2022-09-07_22-22-56/checkpoint_000004)... 

[2m[36m(RayTrainWorker pid=23285)[0m {'loss': 67.13532257080078, 'avg_loss': 68.16363906860352}


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_22-21-37/TorchTrainer_727aa5ae_2_batch_size=24.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50,_2022-09-07_22-21-41/checkpoint_000004)... 

Result for TorchTrainer_727aa5ae:
  _time_this_iter_s: 18.771693468093872
  _timestamp: 1662589417
  _training_iteration: 5
  avg_loss: 68.16363906860352
  date: 2022-09-07_22-23-37
  done: false
  experiment_id: 19a5bc1b960f4c81bdca0a8a91598102
  hostname: anish-l5-kit
  iterations_since_restore: 5
  loss: 67.13532257080078
  node_ip: 10.150.0.3
  pid: 23173
  should_checkpoint: true
  time_since_restore: 112.33720850944519
  time_this_iter_s: 18.765744924545288
  time_total_s: 112.33720850944519
  timestamp: 1662589417
  timesteps_since_restore: 0
  training_iteration: 5
  trial_id: 727aa5ae
  warmup_time: 0.007851123809814453
  


Done. 1.5s
[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_22-21-37/TorchTrainer_7283a4c4_3_batch_size=6.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50,r_2022-09-07_22-22-56/checkpoint_000004)... Done. 1.3s
Done. 0.4s


VBox(children=(Label(value='456.987 MB of 456.987 MB uploaded (91.307 MB deduped)\r'), FloatProgress(value=1.0…

0,1
avg_loss,█▂▁▄▁
iterations_since_restore,▁▃▅▆█
loss,▇▃▄█▁
time_since_restore,▁▃▅▆█
time_this_iter_s,█▂▁▁▁
time_total_s,▁▃▅▆█
timestamp,▁▃▅▆█
timesteps_since_restore,▁▁▁▁▁
training_iteration,▁▃▅▆█
warmup_time,▁▁▁▁▁

0,1
avg_loss,80.48136
iterations_since_restore,5.0
loss,0.36606
time_since_restore,36.44101
time_this_iter_s,4.79267
time_total_s,36.44101
timestamp,1662589417.0
timesteps_since_restore,0.0
training_iteration,5.0
warmup_time,0.00554


Result for TorchTrainer_7283a4c4:
  _time_this_iter_s: 4.788918972015381
  _timestamp: 1662589416
  _training_iteration: 5
  avg_loss: 80.48136488795281
  date: 2022-09-07_22-23-37
  done: true
  experiment_id: 5110b6e737f745b488f55283b9111532
  experiment_tag: 3_batch_size=6.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50,render_ego_history=False,step_time=0.1000,dataset_meta_key=meta_json,disable_traffic_light_faces=False,ego_center=0_25_0_5,filter_agents_threshold=0.5000,map_type=py_satellite,pixel_size=0_5_0_5,raster_size=224_224,satellite_map_key=aerial_map_aerial_map_png,semantic_map_key=semantic_map_semantic_map_pb,set_origin_to_bottom=True,batch_size=12,key=scenes_sample_zarr,num_workers=16,perturb_probability=0.0000,shuffle=True,checkpoint_every_n_steps=10000,eval_every_n_steps=10000,max_num_steps=5,batch_size=12,key=scenes_sample_zarr,num_workers=16,shuffle=False,dataset_key=scenes_sample_zarr,lr=0.0004,max_num_steps=5,num_workers=4

[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_22-21-37/TorchTrainer_727aa5ae_2_batch_size=24.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50,_2022-09-07_22-21-41/checkpoint_000004)... Done. 0.3s


VBox(children=(Label(value='456.987 MB of 456.987 MB uploaded (91.307 MB deduped)\r'), FloatProgress(value=1.0…

0,1
avg_loss,▁█▇██
iterations_since_restore,▁▃▅▆█
loss,▁█▃▅▄
time_since_restore,▁▃▄▆█
time_this_iter_s,█▂▁▃▂
time_total_s,▁▃▄▆█
timestamp,▁▃▄▆█
timesteps_since_restore,▁▁▁▁▁
training_iteration,▁▃▅▆█
warmup_time,▁▁▁▁▁

0,1
avg_loss,68.16364
iterations_since_restore,5.0
loss,67.13532
time_since_restore,112.33721
time_this_iter_s,18.76574
time_total_s,112.33721
timestamp,1662589417.0
timesteps_since_restore,0.0
training_iteration,5.0
warmup_time,0.00785


Result for TorchTrainer_727aa5ae:
  _time_this_iter_s: 18.771693468093872
  _timestamp: 1662589417
  _training_iteration: 5
  avg_loss: 68.16363906860352
  date: 2022-09-07_22-23-37
  done: true
  experiment_id: 19a5bc1b960f4c81bdca0a8a91598102
  experiment_tag: 2_batch_size=24.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50,render_ego_history=False,step_time=0.1000,dataset_meta_key=meta_json,disable_traffic_light_faces=False,ego_center=0_25_0_5,filter_agents_threshold=0.5000,map_type=py_semantic,pixel_size=0_5_0_5,raster_size=224_224,satellite_map_key=aerial_map_aerial_map_png,semantic_map_key=semantic_map_semantic_map_pb,set_origin_to_bottom=True,batch_size=12,key=scenes_sample_zarr,num_workers=16,perturb_probability=0.0000,shuffle=True,checkpoint_every_n_steps=10000,eval_every_n_steps=10000,max_num_steps=5,batch_size=12,key=scenes_sample_zarr,num_workers=16,shuffle=False,dataset_key=scenes_sample_zarr,lr=0.0021,max_num_steps=5,num_workers=

[2m[36m(RayTrainWorker pid=25381)[0m 2022-09-07 22:24:00,815	INFO config.py:72 -- Setting up process group for: env:// [rank=0, world_size=1]
[2m[36m(RayTrainWorker pid=25390)[0m 2022-09-07 22:24:02,894	INFO config.py:72 -- Setting up process group for: env:// [rank=0, world_size=1]
[2m[36m(RayTrainWorker pid=25381)[0m   f"The parameter '{pretrained_param}' is deprecated since 0.13 and will be removed in 0.15, "
[2m[36m(RayTrainWorker pid=25381)[0m 2022-09-07 22:24:09,869	INFO train_loop_utils.py:300 -- Moving model to device: cpu
[2m[36m(RayTrainWorker pid=25390)[0m   f"The parameter '{pretrained_param}' is deprecated since 0.13 and will be removed in 0.15, "
[2m[36m(RayTrainWorker pid=25390)[0m 2022-09-07 22:24:11,393	INFO train_loop_utils.py:300 -- Moving model to device: cpu


[2m[36m(RayTrainWorker pid=25381)[0m {'loss': 27.5812931060791, 'avg_loss': 27.5812931060791}


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_22-21-37/TorchTrainer_9fab1d06_4_batch_size=18.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50,_2022-09-07_22-23-53/checkpoint_000000)... 

[2m[36m(RayTrainWorker pid=25390)[0m {'loss': 42.19852828979492, 'avg_loss': 42.19852828979492}
Result for TorchTrainer_9fab1d06:
  _time_this_iter_s: 24.810033321380615
  _timestamp: 1662589466
  _training_iteration: 1
  avg_loss: 27.5812931060791
  date: 2022-09-07_22-24-26
  done: false
  experiment_id: 883146272b3045babd0d33d5e2460cb6
  hostname: anish-l5-kit
  iterations_since_restore: 1
  loss: 27.5812931060791
  node_ip: 10.150.0.3
  pid: 25250
  should_checkpoint: true
  time_since_restore: 29.60651159286499
  time_this_iter_s: 29.60651159286499
  time_total_s: 29.60651159286499
  timestamp: 1662589466
  timesteps_since_restore: 0
  training_iteration: 1
  trial_id: 9fab1d06
  warmup_time: 0.007066011428833008
  


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_22-21-37/TorchTrainer_c12d4d82_5_batch_size=18.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50,_2022-09-07_22-23-53/checkpoint_000000)... 

Result for TorchTrainer_c12d4d82:
  _time_this_iter_s: 22.261414527893066
  _timestamp: 1662589466
  _training_iteration: 1
  avg_loss: 42.19852828979492
  date: 2022-09-07_22-24-27
  done: false
  experiment_id: 78f22cb608834136a975e70a0a69143b
  hostname: anish-l5-kit
  iterations_since_restore: 1
  loss: 42.19852828979492
  node_ip: 10.150.0.3
  pid: 25260
  should_checkpoint: true
  time_since_restore: 29.320902109146118
  time_this_iter_s: 29.320902109146118
  time_total_s: 29.320902109146118
  timestamp: 1662589467
  timesteps_since_restore: 0
  training_iteration: 1
  trial_id: c12d4d82
  warmup_time: 0.006730318069458008
  


Done. 1.0s
Done. 1.0s


[2m[36m(RayTrainWorker pid=25381)[0m {'loss': 99.4334945678711, 'avg_loss': 63.5073938369751}


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_22-21-37/TorchTrainer_9fab1d06_4_batch_size=18.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50,_2022-09-07_22-23-53/checkpoint_000001)... 

Result for TorchTrainer_9fab1d06:
  _time_this_iter_s: 13.243748664855957
  _timestamp: 1662589479
  _training_iteration: 2
  avg_loss: 63.5073938369751
  date: 2022-09-07_22-24-39
  done: false
  experiment_id: 883146272b3045babd0d33d5e2460cb6
  hostname: anish-l5-kit
  iterations_since_restore: 2
  loss: 99.4334945678711
  node_ip: 10.150.0.3
  pid: 25250
  should_checkpoint: true
  time_since_restore: 42.78880548477173
  time_this_iter_s: 13.182293891906738
  time_total_s: 42.78880548477173
  timestamp: 1662589479
  timesteps_since_restore: 0
  training_iteration: 2
  trial_id: 9fab1d06
  warmup_time: 0.007066011428833008
  
[2m[36m(RayTrainWorker pid=25390)[0m {'loss': 97.84817504882812, 'avg_loss': 70.02335166931152}


Done. 1.0s
[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_22-21-37/TorchTrainer_c12d4d82_5_batch_size=18.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50,_2022-09-07_22-23-53/checkpoint_000001)... 

Result for TorchTrainer_c12d4d82:
  _time_this_iter_s: 13.701167345046997
  _timestamp: 1662589480
  _training_iteration: 2
  avg_loss: 70.02335166931152
  date: 2022-09-07_22-24-40
  done: false
  experiment_id: 78f22cb608834136a975e70a0a69143b
  hostname: anish-l5-kit
  iterations_since_restore: 2
  loss: 97.84817504882812
  node_ip: 10.150.0.3
  pid: 25260
  should_checkpoint: true
  time_since_restore: 42.755988359451294
  time_this_iter_s: 13.435086250305176
  time_total_s: 42.755988359451294
  timestamp: 1662589480
  timesteps_since_restore: 0
  training_iteration: 2
  trial_id: c12d4d82
  warmup_time: 0.006730318069458008
  


Done. 1.0s


[2m[36m(RayTrainWorker pid=25381)[0m {'loss': 88.00411224365234, 'avg_loss': 71.67296663920085}


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_22-21-37/TorchTrainer_9fab1d06_4_batch_size=18.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50,_2022-09-07_22-23-53/checkpoint_000002)... 

Result for TorchTrainer_9fab1d06:
  _time_this_iter_s: 12.897667407989502
  _timestamp: 1662589492
  _training_iteration: 3
  avg_loss: 71.67296663920085
  date: 2022-09-07_22-24-52
  done: false
  experiment_id: 883146272b3045babd0d33d5e2460cb6
  hostname: anish-l5-kit
  iterations_since_restore: 3
  loss: 88.00411224365234
  node_ip: 10.150.0.3
  pid: 25250
  should_checkpoint: true
  time_since_restore: 55.65586042404175
  time_this_iter_s: 12.86705493927002
  time_total_s: 55.65586042404175
  timestamp: 1662589492
  timesteps_since_restore: 0
  training_iteration: 3
  trial_id: 9fab1d06
  warmup_time: 0.007066011428833008
  
[2m[36m(RayTrainWorker pid=25390)[0m {'loss': 88.46379852294922, 'avg_loss': 76.17016728719075}


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_22-21-37/TorchTrainer_c12d4d82_5_batch_size=18.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50,_2022-09-07_22-23-53/checkpoint_000002)... 

Result for TorchTrainer_c12d4d82:
  _time_this_iter_s: 12.500419855117798
  _timestamp: 1662589493
  _training_iteration: 3
  avg_loss: 76.17016728719075
  date: 2022-09-07_22-24-53
  done: false
  experiment_id: 78f22cb608834136a975e70a0a69143b
  hostname: anish-l5-kit
  iterations_since_restore: 3
  loss: 88.46379852294922
  node_ip: 10.150.0.3
  pid: 25260
  should_checkpoint: true
  time_since_restore: 55.47234106063843
  time_this_iter_s: 12.716352701187134
  time_total_s: 55.47234106063843
  timestamp: 1662589493
  timesteps_since_restore: 0
  training_iteration: 3
  trial_id: c12d4d82
  warmup_time: 0.006730318069458008
  


Done. 1.5s
Done. 1.3s


[2m[36m(RayTrainWorker pid=25381)[0m {'loss': 26.54417610168457, 'avg_loss': 60.39076900482178}


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_22-21-37/TorchTrainer_9fab1d06_4_batch_size=18.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50,_2022-09-07_22-23-53/checkpoint_000003)... 

Result for TorchTrainer_9fab1d06:
  _time_this_iter_s: 14.1835196018219
  _timestamp: 1662589506
  _training_iteration: 4
  avg_loss: 60.39076900482178
  date: 2022-09-07_22-25-06
  done: false
  experiment_id: 883146272b3045babd0d33d5e2460cb6
  hostname: anish-l5-kit
  iterations_since_restore: 4
  loss: 26.54417610168457
  node_ip: 10.150.0.3
  pid: 25250
  should_checkpoint: true
  time_since_restore: 69.84996008872986
  time_this_iter_s: 14.19409966468811
  time_total_s: 69.84996008872986
  timestamp: 1662589506
  timesteps_since_restore: 0
  training_iteration: 4
  trial_id: 9fab1d06
  warmup_time: 0.007066011428833008
  
[2m[36m(RayTrainWorker pid=25390)[0m {'loss': 188.43038940429688, 'avg_loss': 104.23522281646729}


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_22-21-37/TorchTrainer_c12d4d82_5_batch_size=18.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50,_2022-09-07_22-23-53/checkpoint_000003)... 

Result for TorchTrainer_c12d4d82:
  _time_this_iter_s: 14.600107192993164
  _timestamp: 1662589507
  _training_iteration: 4
  avg_loss: 104.23522281646729
  date: 2022-09-07_22-25-08
  done: false
  experiment_id: 78f22cb608834136a975e70a0a69143b
  hostname: anish-l5-kit
  iterations_since_restore: 4
  loss: 188.43038940429688
  node_ip: 10.150.0.3
  pid: 25260
  should_checkpoint: true
  time_since_restore: 69.8012969493866
  time_this_iter_s: 14.328955888748169
  time_total_s: 69.8012969493866
  timestamp: 1662589508
  timesteps_since_restore: 0
  training_iteration: 4
  trial_id: c12d4d82
  warmup_time: 0.006730318069458008
  


Done. 1.7s
Done. 2.8s


[2m[36m(RayTrainWorker pid=25381)[0m {'loss': 44.491188049316406, 'avg_loss': 57.210852813720706}


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_22-21-37/TorchTrainer_9fab1d06_4_batch_size=18.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50,_2022-09-07_22-23-53/checkpoint_000004)... 

Result for TorchTrainer_9fab1d06:
  _time_this_iter_s: 12.728408813476562
  _timestamp: 1662589519
  _training_iteration: 5
  avg_loss: 57.210852813720706
  date: 2022-09-07_22-25-19
  done: false
  experiment_id: 883146272b3045babd0d33d5e2460cb6
  hostname: anish-l5-kit
  iterations_since_restore: 5
  loss: 44.491188049316406
  node_ip: 10.150.0.3
  pid: 25250
  should_checkpoint: true
  time_since_restore: 82.58176970481873
  time_this_iter_s: 12.731809616088867
  time_total_s: 82.58176970481873
  timestamp: 1662589519
  timesteps_since_restore: 0
  training_iteration: 5
  trial_id: 9fab1d06
  warmup_time: 0.007066011428833008
  
[2m[36m(RayTrainWorker pid=25390)[0m {'loss': 66.33246612548828, 'avg_loss': 96.65467147827148}


Done. 1.0s
[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_22-21-37/TorchTrainer_9fab1d06_4_batch_size=18.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50,_2022-09-07_22-23-53/checkpoint_000004)... Done. 0.3s


VBox(children=(Label(value='457.001 MB of 457.001 MB uploaded (91.307 MB deduped)\r'), FloatProgress(value=1.0…

0,1
avg_loss,▁▇█▆▆
iterations_since_restore,▁▃▅▆█
loss,▁█▇▁▃
time_since_restore,▁▃▄▆█
time_this_iter_s,█▁▁▂▁
time_total_s,▁▃▄▆█
timestamp,▁▃▄▆█
timesteps_since_restore,▁▁▁▁▁
training_iteration,▁▃▅▆█
warmup_time,▁▁▁▁▁

0,1
avg_loss,57.21085
iterations_since_restore,5.0
loss,44.49119
time_since_restore,82.58177
time_this_iter_s,12.73181
time_total_s,82.58177
timestamp,1662589519.0
timesteps_since_restore,0.0
training_iteration,5.0
warmup_time,0.00707


Result for TorchTrainer_9fab1d06:
  _time_this_iter_s: 12.728408813476562
  _timestamp: 1662589519
  _training_iteration: 5
  avg_loss: 57.210852813720706
  date: 2022-09-07_22-25-19
  done: true
  experiment_id: 883146272b3045babd0d33d5e2460cb6
  experiment_tag: 4_batch_size=18.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50,render_ego_history=False,step_time=0.1000,dataset_meta_key=meta_json,disable_traffic_light_faces=False,ego_center=0_25_0_5,filter_agents_threshold=0.5000,map_type=py_satellite,pixel_size=0_5_0_5,raster_size=224_224,satellite_map_key=aerial_map_aerial_map_png,semantic_map_key=semantic_map_semantic_map_pb,set_origin_to_bottom=True,batch_size=12,key=scenes_sample_zarr,num_workers=16,perturb_probability=0.0000,shuffle=True,checkpoint_every_n_steps=10000,eval_every_n_steps=10000,max_num_steps=5,batch_size=12,key=scenes_sample_zarr,num_workers=16,shuffle=False,dataset_key=scenes_sample_zarr,lr=0.0018,max_num_steps=5,num_worker

[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_22-21-37/TorchTrainer_c12d4d82_5_batch_size=18.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50,_2022-09-07_22-23-53/checkpoint_000004)... 

Result for TorchTrainer_c12d4d82:
  _time_this_iter_s: 12.58676791191101
  _timestamp: 1662589520
  _training_iteration: 5
  avg_loss: 96.65467147827148
  date: 2022-09-07_22-25-20
  done: false
  experiment_id: 78f22cb608834136a975e70a0a69143b
  hostname: anish-l5-kit
  iterations_since_restore: 5
  loss: 66.33246612548828
  node_ip: 10.150.0.3
  pid: 25260
  should_checkpoint: true
  time_since_restore: 82.37436127662659
  time_this_iter_s: 12.57306432723999
  time_total_s: 82.37436127662659
  timestamp: 1662589520
  timesteps_since_restore: 0
  training_iteration: 5
  trial_id: c12d4d82
  warmup_time: 0.006730318069458008
  


Done. 1.0s
[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_22-21-37/TorchTrainer_c12d4d82_5_batch_size=18.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50,_2022-09-07_22-23-53/checkpoint_000004)... Done. 0.3s


VBox(children=(Label(value='457.001 MB of 457.001 MB uploaded (91.307 MB deduped)\r'), FloatProgress(value=1.0…

Result for TorchTrainer_c12d4d82:
  _time_this_iter_s: 12.58676791191101
  _timestamp: 1662589520
  _training_iteration: 5
  avg_loss: 96.65467147827148
  date: 2022-09-07_22-25-20
  done: true
  experiment_id: 78f22cb608834136a975e70a0a69143b
  experiment_tag: 5_batch_size=18.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50,render_ego_history=False,step_time=0.1000,dataset_meta_key=meta_json,disable_traffic_light_faces=False,ego_center=0_25_0_5,filter_agents_threshold=0.5000,map_type=py_satellite,pixel_size=0_5_0_5,raster_size=224_224,satellite_map_key=aerial_map_aerial_map_png,semantic_map_key=semantic_map_semantic_map_pb,set_origin_to_bottom=True,batch_size=12,key=scenes_sample_zarr,num_workers=16,perturb_probability=0.0000,shuffle=True,checkpoint_every_n_steps=10000,eval_every_n_steps=10000,max_num_steps=5,batch_size=12,key=scenes_sample_zarr,num_workers=16,shuffle=False,dataset_key=scenes_sample_zarr,lr=0.0001,max_num_steps=5,num_workers=

0,1
avg_loss,▁▄▅█▇
iterations_since_restore,▁▃▅▆█
loss,▁▄▃█▂
time_since_restore,▁▃▄▆█
time_this_iter_s,█▁▁▂▁
time_total_s,▁▃▄▆█
timestamp,▁▃▄▆█
timesteps_since_restore,▁▁▁▁▁
training_iteration,▁▃▅▆█
warmup_time,▁▁▁▁▁

0,1
avg_loss,96.65467
iterations_since_restore,5.0
loss,66.33247
time_since_restore,82.37436
time_this_iter_s,12.57306
time_total_s,82.37436
timestamp,1662589520.0
timesteps_since_restore,0.0
training_iteration,5.0
warmup_time,0.00673


[2m[36m(RayTrainWorker pid=27044)[0m 2022-09-07 22:25:46,623	INFO config.py:72 -- Setting up process group for: env:// [rank=0, world_size=1]
[2m[36m(RayTrainWorker pid=27050)[0m 2022-09-07 22:25:47,056	INFO config.py:72 -- Setting up process group for: env:// [rank=0, world_size=1]
[2m[36m(RayTrainWorker pid=27044)[0m   f"The parameter '{pretrained_param}' is deprecated since 0.13 and will be removed in 0.15, "
[2m[36m(RayTrainWorker pid=27044)[0m 2022-09-07 22:25:53,873	INFO train_loop_utils.py:300 -- Moving model to device: cpu
[2m[36m(RayTrainWorker pid=27050)[0m   f"The parameter '{pretrained_param}' is deprecated since 0.13 and will be removed in 0.15, "
[2m[36m(RayTrainWorker pid=27050)[0m 2022-09-07 22:25:54,972	INFO train_loop_utils.py:300 -- Moving model to device: cpu


[2m[36m(RayTrainWorker pid=27044)[0m {'loss': 160.96267700195312, 'avg_loss': 160.96267700195312}


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_22-21-37/TorchTrainer_c19f6e8a_6_batch_size=18.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50,_2022-09-07_22-25-38/checkpoint_000000)... 

Result for TorchTrainer_c19f6e8a:
  _time_this_iter_s: 25.660257816314697
  _timestamp: 1662589572
  _training_iteration: 1
  avg_loss: 160.96267700195312
  date: 2022-09-07_22-26-13
  done: false
  experiment_id: ef2bde5641d44c6382a490b28a83bb7b
  hostname: anish-l5-kit
  iterations_since_restore: 1
  loss: 160.96267700195312
  node_ip: 10.150.0.3
  pid: 26884
  should_checkpoint: true
  time_since_restore: 30.671546697616577
  time_this_iter_s: 30.671546697616577
  time_total_s: 30.671546697616577
  timestamp: 1662589573
  timesteps_since_restore: 0
  training_iteration: 1
  trial_id: c19f6e8a
  warmup_time: 0.0072231292724609375
  


Done. 1.1s


[2m[36m(RayTrainWorker pid=27050)[0m {'loss': 56.14323806762695, 'avg_loss': 56.14323806762695}


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_22-21-37/TorchTrainer_ffd13a76_7_batch_size=24.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50,_2022-09-07_22-25-38/checkpoint_000000)... 

Result for TorchTrainer_ffd13a76:
  _time_this_iter_s: 31.4076189994812
  _timestamp: 1662589579
  _training_iteration: 1
  avg_loss: 56.14323806762695
  date: 2022-09-07_22-26-19
  done: false
  experiment_id: 2e0930cd5a3c4e19bfa59e1e512db84c
  hostname: anish-l5-kit
  iterations_since_restore: 1
  loss: 56.14323806762695
  node_ip: 10.150.0.3
  pid: 26919
  should_checkpoint: true
  time_since_restore: 36.00232005119324
  time_this_iter_s: 36.00232005119324
  time_total_s: 36.00232005119324
  timestamp: 1662589579
  timesteps_since_restore: 0
  training_iteration: 1
  trial_id: ffd13a76
  warmup_time: 0.0063266754150390625
  


Done. 1.0s


[2m[36m(RayTrainWorker pid=27044)[0m {'loss': 28.569807052612305, 'avg_loss': 94.76624202728271}


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_22-21-37/TorchTrainer_c19f6e8a_6_batch_size=18.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50,_2022-09-07_22-25-38/checkpoint_000001)... 

Result for TorchTrainer_c19f6e8a:
  _time_this_iter_s: 13.757345199584961
  _timestamp: 1662589586
  _training_iteration: 2
  avg_loss: 94.76624202728271
  date: 2022-09-07_22-26-27
  done: false
  experiment_id: ef2bde5641d44c6382a490b28a83bb7b
  hostname: anish-l5-kit
  iterations_since_restore: 2
  loss: 28.569807052612305
  node_ip: 10.150.0.3
  pid: 26884
  should_checkpoint: true
  time_since_restore: 44.49286222457886
  time_this_iter_s: 13.82131552696228
  time_total_s: 44.49286222457886
  timestamp: 1662589587
  timesteps_since_restore: 0
  training_iteration: 2
  trial_id: c19f6e8a
  warmup_time: 0.0072231292724609375
  


Done. 1.0s


[2m[36m(RayTrainWorker pid=27050)[0m {'loss': 32.62532424926758, 'avg_loss': 44.384281158447266}


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_22-21-37/TorchTrainer_ffd13a76_7_batch_size=24.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50,_2022-09-07_22-25-38/checkpoint_000001)... 

Result for TorchTrainer_ffd13a76:
  _time_this_iter_s: 18.92997646331787
  _timestamp: 1662589598
  _training_iteration: 2
  avg_loss: 44.384281158447266
  date: 2022-09-07_22-26-38
  done: false
  experiment_id: 2e0930cd5a3c4e19bfa59e1e512db84c
  hostname: anish-l5-kit
  iterations_since_restore: 2
  loss: 32.62532424926758
  node_ip: 10.150.0.3
  pid: 26919
  should_checkpoint: true
  time_since_restore: 54.898523569107056
  time_this_iter_s: 18.89620351791382
  time_total_s: 54.898523569107056
  timestamp: 1662589598
  timesteps_since_restore: 0
  training_iteration: 2
  trial_id: ffd13a76
  warmup_time: 0.0063266754150390625
  


Done. 1.1s


[2m[36m(RayTrainWorker pid=27044)[0m {'loss': 31.340723037719727, 'avg_loss': 73.62440236409505}


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_22-21-37/TorchTrainer_c19f6e8a_6_batch_size=18.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50,_2022-09-07_22-25-38/checkpoint_000002)... 

Result for TorchTrainer_c19f6e8a:
  _time_this_iter_s: 13.568504571914673
  _timestamp: 1662589600
  _training_iteration: 3
  avg_loss: 73.62440236409505
  date: 2022-09-07_22-26-40
  done: false
  experiment_id: ef2bde5641d44c6382a490b28a83bb7b
  hostname: anish-l5-kit
  iterations_since_restore: 3
  loss: 31.340723037719727
  node_ip: 10.150.0.3
  pid: 26884
  should_checkpoint: true
  time_since_restore: 57.873053312301636
  time_this_iter_s: 13.380191087722778
  time_total_s: 57.873053312301636
  timestamp: 1662589600
  timesteps_since_restore: 0
  training_iteration: 3
  trial_id: c19f6e8a
  warmup_time: 0.0072231292724609375
  


Done. 1.0s


[2m[36m(RayTrainWorker pid=27044)[0m {'loss': 13.940718650817871, 'avg_loss': 58.70348143577576}


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_22-21-37/TorchTrainer_c19f6e8a_6_batch_size=18.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50,_2022-09-07_22-25-38/checkpoint_000003)... 

Result for TorchTrainer_c19f6e8a:
  _time_this_iter_s: 12.754664659500122
  _timestamp: 1662589613
  _training_iteration: 4
  avg_loss: 58.70348143577576
  date: 2022-09-07_22-26-53
  done: false
  experiment_id: ef2bde5641d44c6382a490b28a83bb7b
  hostname: anish-l5-kit
  iterations_since_restore: 4
  loss: 13.940718650817871
  node_ip: 10.150.0.3
  pid: 26884
  should_checkpoint: true
  time_since_restore: 70.63062572479248
  time_this_iter_s: 12.757572412490845
  time_total_s: 70.63062572479248
  timestamp: 1662589613
  timesteps_since_restore: 0
  training_iteration: 4
  trial_id: c19f6e8a
  warmup_time: 0.0072231292724609375
  


Done. 1.0s


[2m[36m(RayTrainWorker pid=27050)[0m {'loss': 41.63853454589844, 'avg_loss': 43.469032287597656}
Result for TorchTrainer_ffd13a76:
  _time_this_iter_s: 17.641614198684692
  _timestamp: 1662589615
  _training_iteration: 3
  avg_loss: 43.469032287597656
  date: 2022-09-07_22-26-56
  done: false
  experiment_id: 2e0930cd5a3c4e19bfa59e1e512db84c
  hostname: anish-l5-kit
  iterations_since_restore: 3
  loss: 41.63853454589844
  node_ip: 10.150.0.3
  pid: 26919
  should_checkpoint: true
  time_since_restore: 72.51348996162415
  time_this_iter_s: 17.61496639251709
  time_total_s: 72.51348996162415
  timestamp: 1662589616
  timesteps_since_restore: 0
  training_iteration: 3
  trial_id: ffd13a76
  warmup_time: 0.0063266754150390625
  


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_22-21-37/TorchTrainer_ffd13a76_7_batch_size=24.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50,_2022-09-07_22-25-38/checkpoint_000002)... Done. 1.5s


[2m[36m(RayTrainWorker pid=27044)[0m {'loss': 16.063161849975586, 'avg_loss': 50.175417518615724}


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_22-21-37/TorchTrainer_c19f6e8a_6_batch_size=18.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50,_2022-09-07_22-25-38/checkpoint_000004)... 

Result for TorchTrainer_c19f6e8a:
  _time_this_iter_s: 14.00586199760437
  _timestamp: 1662589627
  _training_iteration: 5
  avg_loss: 50.175417518615724
  date: 2022-09-07_22-27-07
  done: false
  experiment_id: ef2bde5641d44c6382a490b28a83bb7b
  hostname: anish-l5-kit
  iterations_since_restore: 5
  loss: 16.063161849975586
  node_ip: 10.150.0.3
  pid: 26884
  should_checkpoint: true
  time_since_restore: 84.71576285362244
  time_this_iter_s: 14.085137128829956
  time_total_s: 84.71576285362244
  timestamp: 1662589627
  timesteps_since_restore: 0
  training_iteration: 5
  trial_id: c19f6e8a
  warmup_time: 0.0072231292724609375
  


Done. 1.0s
[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_22-21-37/TorchTrainer_c19f6e8a_6_batch_size=18.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50,_2022-09-07_22-25-38/checkpoint_000004)... Done. 0.3s


VBox(children=(Label(value='457.025 MB of 457.025 MB uploaded (91.307 MB deduped)\r'), FloatProgress(value=1.0…

[2m[36m(RayTrainWorker pid=27050)[0m {'loss': 29.51937484741211, 'avg_loss': 39.98161792755127}


0,1
avg_loss,█▄▂▂▁
iterations_since_restore,▁▃▅▆█
loss,█▂▂▁▁
time_since_restore,▁▃▅▆█
time_this_iter_s,█▁▁▁▂
time_total_s,▁▃▅▆█
timestamp,▁▃▅▆█
timesteps_since_restore,▁▁▁▁▁
training_iteration,▁▃▅▆█
warmup_time,▁▁▁▁▁

0,1
avg_loss,50.17542
iterations_since_restore,5.0
loss,16.06316
time_since_restore,84.71576
time_this_iter_s,14.08514
time_total_s,84.71576
timestamp,1662589627.0
timesteps_since_restore,0.0
training_iteration,5.0
warmup_time,0.00722


Result for TorchTrainer_c19f6e8a:
  _time_this_iter_s: 14.00586199760437
  _timestamp: 1662589627
  _training_iteration: 5
  avg_loss: 50.175417518615724
  date: 2022-09-07_22-27-07
  done: true
  experiment_id: ef2bde5641d44c6382a490b28a83bb7b
  experiment_tag: 6_batch_size=18.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50,render_ego_history=False,step_time=0.1000,dataset_meta_key=meta_json,disable_traffic_light_faces=False,ego_center=0_25_0_5,filter_agents_threshold=0.5000,map_type=py_satellite,pixel_size=0_5_0_5,raster_size=224_224,satellite_map_key=aerial_map_aerial_map_png,semantic_map_key=semantic_map_semantic_map_pb,set_origin_to_bottom=True,batch_size=12,key=scenes_sample_zarr,num_workers=16,perturb_probability=0.0000,shuffle=True,checkpoint_every_n_steps=10000,eval_every_n_steps=10000,max_num_steps=5,batch_size=12,key=scenes_sample_zarr,num_workers=16,shuffle=False,dataset_key=scenes_sample_zarr,lr=0.0028,max_num_steps=5,num_workers

[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_22-21-37/TorchTrainer_ffd13a76_7_batch_size=24.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50,_2022-09-07_22-25-38/checkpoint_000003)... 

Result for TorchTrainer_ffd13a76:
  _time_this_iter_s: 18.53560757637024
  _timestamp: 1662589634
  _training_iteration: 4
  avg_loss: 39.98161792755127
  date: 2022-09-07_22-27-14
  done: false
  experiment_id: 2e0930cd5a3c4e19bfa59e1e512db84c
  hostname: anish-l5-kit
  iterations_since_restore: 4
  loss: 29.51937484741211
  node_ip: 10.150.0.3
  pid: 26919
  should_checkpoint: true
  time_since_restore: 91.05100083351135
  time_this_iter_s: 18.537510871887207
  time_total_s: 91.05100083351135
  timestamp: 1662589634
  timesteps_since_restore: 0
  training_iteration: 4
  trial_id: ffd13a76
  warmup_time: 0.0063266754150390625
  


Done. 1.0s
[2m[36m(RayTrainWorker pid=28576)[0m 2022-09-07 22:27:25,572	INFO config.py:72 -- Setting up process group for: env:// [rank=0, world_size=1]
[2m[36m(RayTrainWorker pid=28576)[0m   f"The parameter '{pretrained_param}' is deprecated since 0.13 and will be removed in 0.15, "
[2m[36m(RayTrainWorker pid=28576)[0m 2022-09-07 22:27:31,892	INFO train_loop_utils.py:300 -- Moving model to device: cpu


[2m[36m(RayTrainWorker pid=27050)[0m {'loss': 14.293675422668457, 'avg_loss': 34.84402942657471}


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_22-21-37/TorchTrainer_ffd13a76_7_batch_size=24.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50,_2022-09-07_22-25-38/checkpoint_000004)... 

Result for TorchTrainer_ffd13a76:
  _time_this_iter_s: 18.74686312675476
  _timestamp: 1662589652
  _training_iteration: 5
  avg_loss: 34.84402942657471
  date: 2022-09-07_22-27-33
  done: false
  experiment_id: 2e0930cd5a3c4e19bfa59e1e512db84c
  hostname: anish-l5-kit
  iterations_since_restore: 5
  loss: 14.293675422668457
  node_ip: 10.150.0.3
  pid: 26919
  should_checkpoint: true
  time_since_restore: 109.77454113960266
  time_this_iter_s: 18.72354030609131
  time_total_s: 109.77454113960266
  timestamp: 1662589653
  timesteps_since_restore: 0
  training_iteration: 5
  trial_id: ffd13a76
  warmup_time: 0.0063266754150390625
  


Done. 1.0s
[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_22-21-37/TorchTrainer_ffd13a76_7_batch_size=24.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50,_2022-09-07_22-25-38/checkpoint_000004)... Done. 0.3s


[2m[36m(RayTrainWorker pid=28576)[0m {'loss': 177.0812225341797, 'avg_loss': 177.0812225341797}


VBox(children=(Label(value='457.025 MB of 457.025 MB uploaded (91.307 MB deduped)\r'), FloatProgress(value=1.0…

0,1
avg_loss,█▄▄▃▁
iterations_since_restore,▁▃▅▆█
loss,█▄▆▄▁
time_since_restore,▁▃▄▆█
time_this_iter_s,█▁▁▁▁
time_total_s,▁▃▄▆█
timestamp,▁▃▅▆█
timesteps_since_restore,▁▁▁▁▁
training_iteration,▁▃▅▆█
warmup_time,▁▁▁▁▁

0,1
avg_loss,34.84403
iterations_since_restore,5.0
loss,14.29368
time_since_restore,109.77454
time_this_iter_s,18.72354
time_total_s,109.77454
timestamp,1662589653.0
timesteps_since_restore,0.0
training_iteration,5.0
warmup_time,0.00633


[2m[36m(RayTrainWorker pid=28576)[0m {'loss': 162.8499298095703, 'avg_loss': 169.965576171875}
Result for TorchTrainer_ffd13a76:
  _time_this_iter_s: 18.74686312675476
  _timestamp: 1662589652
  _training_iteration: 5
  avg_loss: 34.84402942657471
  date: 2022-09-07_22-27-33
  done: true
  experiment_id: 2e0930cd5a3c4e19bfa59e1e512db84c
  experiment_tag: 7_batch_size=24.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50,render_ego_history=False,step_time=0.1000,dataset_meta_key=meta_json,disable_traffic_light_faces=False,ego_center=0_25_0_5,filter_agents_threshold=0.5000,map_type=py_semantic,pixel_size=0_5_0_5,raster_size=224_224,satellite_map_key=aerial_map_aerial_map_png,semantic_map_key=semantic_map_semantic_map_pb,set_origin_to_bottom=True,batch_size=12,key=scenes_sample_zarr,num_workers=16,perturb_probability=0.0000,shuffle=True,checkpoint_every_n_steps=10000,eval_every_n_steps=10000,max_num_steps=5,batch_size=12,key=scenes_sample_zarr,n

[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_22-21-37/TorchTrainer_0038a5bc_8_batch_size=6.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50,r_2022-09-07_22-27-18/checkpoint_000000)... 

Result for TorchTrainer_0038a5bc:
  _time_this_iter_s: 11.201198101043701
  _timestamp: 1662589657
  _training_iteration: 1
  avg_loss: 177.0812225341797
  date: 2022-09-07_22-27-37
  done: false
  experiment_id: e4729039e6bb4d94bd7917c7013d3101
  hostname: anish-l5-kit
  iterations_since_restore: 1
  loss: 177.0812225341797
  node_ip: 10.150.0.3
  pid: 28508
  should_checkpoint: true
  time_since_restore: 15.515589475631714
  time_this_iter_s: 15.515589475631714
  time_total_s: 15.515589475631714
  timestamp: 1662589657
  timesteps_since_restore: 0
  training_iteration: 1
  trial_id: 0038a5bc
  warmup_time: 0.006408214569091797
  


Done. 1.3s
[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_22-21-37/TorchTrainer_0038a5bc_8_batch_size=6.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50,r_2022-09-07_22-27-18/checkpoint_000001)... Done. 1.1s


[2m[36m(RayTrainWorker pid=28576)[0m {'loss': 71.62319946289062, 'avg_loss': 137.18478393554688}


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_22-21-37/TorchTrainer_0038a5bc_8_batch_size=6.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50,r_2022-09-07_22-27-18/checkpoint_000002)... 

Result for TorchTrainer_0038a5bc:
  _time_this_iter_s: 6.671166896820068
  _timestamp: 1662589668
  _training_iteration: 3
  avg_loss: 137.18478393554688
  date: 2022-09-07_22-27-49
  done: false
  experiment_id: e4729039e6bb4d94bd7917c7013d3101
  hostname: anish-l5-kit
  iterations_since_restore: 3
  loss: 71.62319946289062
  node_ip: 10.150.0.3
  pid: 28508
  should_checkpoint: true
  time_since_restore: 27.05624747276306
  time_this_iter_s: 6.438979148864746
  time_total_s: 27.05624747276306
  timestamp: 1662589669
  timesteps_since_restore: 0
  training_iteration: 3
  trial_id: 0038a5bc
  warmup_time: 0.006408214569091797
  


[2m[36m(RayTrainWorker pid=28970)[0m 2022-09-07 22:27:50,189	INFO config.py:72 -- Setting up process group for: env:// [rank=0, world_size=1]
Done. 1.0s


[2m[36m(RayTrainWorker pid=28576)[0m {'loss': 18.204444885253906, 'avg_loss': 107.43969917297363}


[2m[36m(RayTrainWorker pid=28970)[0m   f"The parameter '{pretrained_param}' is deprecated since 0.13 and will be removed in 0.15, "
[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_22-21-37/TorchTrainer_0038a5bc_8_batch_size=6.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50,r_2022-09-07_22-27-18/checkpoint_000003)... 

Result for TorchTrainer_0038a5bc:
  _time_this_iter_s: 6.832183122634888
  _timestamp: 1662589675
  _training_iteration: 4
  avg_loss: 107.43969917297363
  date: 2022-09-07_22-27-56
  done: false
  experiment_id: e4729039e6bb4d94bd7917c7013d3101
  hostname: anish-l5-kit
  iterations_since_restore: 4
  loss: 18.204444885253906
  node_ip: 10.150.0.3
  pid: 28508
  should_checkpoint: true
  time_since_restore: 33.77626872062683
  time_this_iter_s: 6.7200212478637695
  time_total_s: 33.77626872062683
  timestamp: 1662589676
  timesteps_since_restore: 0
  training_iteration: 4
  trial_id: 0038a5bc
  warmup_time: 0.006408214569091797
  


[2m[36m(RayTrainWorker pid=28970)[0m 2022-09-07 22:27:56,567	INFO train_loop_utils.py:300 -- Moving model to device: cpu
Done. 1.0s


[2m[36m(RayTrainWorker pid=28576)[0m {'loss': 107.92539978027344, 'avg_loss': 107.53683929443359}


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_22-21-37/TorchTrainer_0038a5bc_8_batch_size=6.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50,r_2022-09-07_22-27-18/checkpoint_000004)... 

Result for TorchTrainer_0038a5bc:
  _time_this_iter_s: 7.768827199935913
  _timestamp: 1662589683
  _training_iteration: 5
  avg_loss: 107.53683929443359
  date: 2022-09-07_22-28-04
  done: false
  experiment_id: e4729039e6bb4d94bd7917c7013d3101
  hostname: anish-l5-kit
  iterations_since_restore: 5
  loss: 107.92539978027344
  node_ip: 10.150.0.3
  pid: 28508
  should_checkpoint: true
  time_since_restore: 41.71065926551819
  time_this_iter_s: 7.934390544891357
  time_total_s: 41.71065926551819
  timestamp: 1662589684
  timesteps_since_restore: 0
  training_iteration: 5
  trial_id: 0038a5bc
  warmup_time: 0.006408214569091797
  


Done. 1.0s
[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_22-21-37/TorchTrainer_0038a5bc_8_batch_size=6.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50,r_2022-09-07_22-27-18/checkpoint_000004)... Done. 0.3s


VBox(children=(Label(value='457.048 MB of 457.048 MB uploaded (91.307 MB deduped)\r'), FloatProgress(value=1.0…

[2m[36m(RayTrainWorker pid=28970)[0m {'loss': 73.40284729003906, 'avg_loss': 73.40284729003906}


0,1
avg_loss,█▇▄▁▁
iterations_since_restore,▁▃▅▆█
loss,█▇▃▁▅
time_since_restore,▁▂▄▆█
time_this_iter_s,█▁▂▂▃
time_total_s,▁▂▄▆█
timestamp,▁▂▄▆█
timesteps_since_restore,▁▁▁▁▁
training_iteration,▁▃▅▆█
warmup_time,▁▁▁▁▁

0,1
avg_loss,107.53684
iterations_since_restore,5.0
loss,107.9254
time_since_restore,41.71066
time_this_iter_s,7.93439
time_total_s,41.71066
timestamp,1662589684.0
timesteps_since_restore,0.0
training_iteration,5.0
warmup_time,0.00641


Result for TorchTrainer_0038a5bc:
  _time_this_iter_s: 7.768827199935913
  _timestamp: 1662589683
  _training_iteration: 5
  avg_loss: 107.53683929443359
  date: 2022-09-07_22-28-04
  done: true
  experiment_id: e4729039e6bb4d94bd7917c7013d3101
  experiment_tag: 8_batch_size=6.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50,render_ego_history=False,step_time=0.1000,dataset_meta_key=meta_json,disable_traffic_light_faces=False,ego_center=0_25_0_5,filter_agents_threshold=0.5000,map_type=py_satellite,pixel_size=0_5_0_5,raster_size=224_224,satellite_map_key=aerial_map_aerial_map_png,semantic_map_key=semantic_map_semantic_map_pb,set_origin_to_bottom=True,batch_size=12,key=scenes_sample_zarr,num_workers=16,perturb_probability=0.0000,shuffle=True,checkpoint_every_n_steps=10000,eval_every_n_steps=10000,max_num_steps=5,batch_size=12,key=scenes_sample_zarr,num_workers=16,shuffle=False,dataset_key=scenes_sample_zarr,lr=0.0007,max_num_steps=5,num_workers=

[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_22-21-37/TorchTrainer_3b7a5fee_9_batch_size=12.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50,_2022-09-07_22-27-43/checkpoint_000000)... 

Result for TorchTrainer_3b7a5fee:
  _time_this_iter_s: 18.552804470062256
  _timestamp: 1662589689
  _training_iteration: 1
  avg_loss: 73.40284729003906
  date: 2022-09-07_22-28-09
  done: false
  experiment_id: 3797028b45d04ae39e74971751330000
  hostname: anish-l5-kit
  iterations_since_restore: 1
  loss: 73.40284729003906
  node_ip: 10.150.0.3
  pid: 28875
  should_checkpoint: true
  time_since_restore: 22.950035572052002
  time_this_iter_s: 22.950035572052002
  time_total_s: 22.950035572052002
  timestamp: 1662589689
  timesteps_since_restore: 0
  training_iteration: 1
  trial_id: 3b7a5fee
  warmup_time: 0.0074100494384765625
  


Done. 1.0s


[2m[36m(RayTrainWorker pid=28970)[0m {'loss': 69.19451141357422, 'avg_loss': 71.29867935180664}


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_22-21-37/TorchTrainer_3b7a5fee_9_batch_size=12.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50,_2022-09-07_22-27-43/checkpoint_000001)... 

Result for TorchTrainer_3b7a5fee:
  _time_this_iter_s: 10.056334972381592
  _timestamp: 1662589699
  _training_iteration: 2
  avg_loss: 71.29867935180664
  date: 2022-09-07_22-28-19
  done: false
  experiment_id: 3797028b45d04ae39e74971751330000
  hostname: anish-l5-kit
  iterations_since_restore: 2
  loss: 69.19451141357422
  node_ip: 10.150.0.3
  pid: 28875
  should_checkpoint: true
  time_since_restore: 33.0240364074707
  time_this_iter_s: 10.074000835418701
  time_total_s: 33.0240364074707
  timestamp: 1662589699
  timesteps_since_restore: 0
  training_iteration: 2
  trial_id: 3b7a5fee
  warmup_time: 0.0074100494384765625
  


Done. 1.1s
[2m[36m(RayTrainWorker pid=29806)[0m 2022-09-07 22:28:21,455	INFO config.py:72 -- Setting up process group for: env:// [rank=0, world_size=1]
[2m[36m(RayTrainWorker pid=29806)[0m   f"The parameter '{pretrained_param}' is deprecated since 0.13 and will be removed in 0.15, "
[2m[36m(RayTrainWorker pid=29806)[0m 2022-09-07 22:28:27,870	INFO train_loop_utils.py:300 -- Moving model to device: cpu


[2m[36m(RayTrainWorker pid=28970)[0m {'loss': 22.056886672973633, 'avg_loss': 54.884748458862305}


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_22-21-37/TorchTrainer_3b7a5fee_9_batch_size=12.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50,_2022-09-07_22-27-43/checkpoint_000002)... 

Result for TorchTrainer_3b7a5fee:
  _time_this_iter_s: 10.462164402008057
  _timestamp: 1662589709
  _training_iteration: 3
  avg_loss: 54.884748458862305
  date: 2022-09-07_22-28-30
  done: false
  experiment_id: 3797028b45d04ae39e74971751330000
  hostname: anish-l5-kit
  iterations_since_restore: 3
  loss: 22.056886672973633
  node_ip: 10.150.0.3
  pid: 28875
  should_checkpoint: true
  time_since_restore: 43.431997776031494
  time_this_iter_s: 10.407961368560791
  time_total_s: 43.431997776031494
  timestamp: 1662589710
  timesteps_since_restore: 0
  training_iteration: 3
  trial_id: 3b7a5fee
  warmup_time: 0.0074100494384765625
  


Done. 1.4s


[2m[36m(RayTrainWorker pid=29806)[0m {'loss': 156.32322692871094, 'avg_loss': 156.32322692871094}


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_22-21-37/TorchTrainer_4a587ab4_10_batch_size=6.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50,_2022-09-07_22-28-13/checkpoint_000000)... 

Result for TorchTrainer_4a587ab4:
  _time_this_iter_s: 12.26977801322937
  _timestamp: 1662589714
  _training_iteration: 1
  avg_loss: 156.32322692871094
  date: 2022-09-07_22-28-34
  done: false
  experiment_id: 9957c869ac65425ca95e1dd2c9154d97
  hostname: anish-l5-kit
  iterations_since_restore: 1
  loss: 156.32322692871094
  node_ip: 10.150.0.3
  pid: 29736
  should_checkpoint: true
  time_since_restore: 17.073624849319458
  time_this_iter_s: 17.073624849319458
  time_total_s: 17.073624849319458
  timestamp: 1662589714
  timesteps_since_restore: 0
  training_iteration: 1
  trial_id: 4a587ab4
  warmup_time: 0.008248329162597656
  


Done. 1.0s


[2m[36m(RayTrainWorker pid=28970)[0m {'loss': 165.34959411621094, 'avg_loss': 82.50095987319946}
[2m[36m(RayTrainWorker pid=29806)[0m {'loss': 0.32480350136756897, 'avg_loss': 78.32401521503925}
Result for TorchTrainer_3b7a5fee:
  _time_this_iter_s: 9.204148292541504
  _timestamp: 1662589719
  _training_iteration: 4
  avg_loss: 82.50095987319946
  date: 2022-09-07_22-28-39
  done: false
  experiment_id: 3797028b45d04ae39e74971751330000
  hostname: anish-l5-kit
  iterations_since_restore: 4
  loss: 165.34959411621094
  node_ip: 10.150.0.3
  pid: 28875
  should_checkpoint: true
  time_since_restore: 52.582987785339355
  time_this_iter_s: 9.150990009307861
  time_total_s: 52.582987785339355
  timestamp: 1662589719
  timesteps_since_restore: 0
  training_iteration: 4
  trial_id: 3b7a5fee
  warmup_time: 0.0074100494384765625
  


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_22-21-37/TorchTrainer_3b7a5fee_9_batch_size=12.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50,_2022-09-07_22-27-43/checkpoint_000003)... [34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_22-21-37/TorchTrainer_4a587ab4_10_batch_size=6.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50,_2022-09-07_22-28-13/checkpoint_000001)... 

Result for TorchTrainer_4a587ab4:
  _time_this_iter_s: 4.950843572616577
  _timestamp: 1662589719
  _training_iteration: 2
  avg_loss: 78.32401521503925
  date: 2022-09-07_22-28-40
  done: false
  experiment_id: 9957c869ac65425ca95e1dd2c9154d97
  hostname: anish-l5-kit
  iterations_since_restore: 2
  loss: 0.32480350136756897
  node_ip: 10.150.0.3
  pid: 29736
  should_checkpoint: true
  time_since_restore: 22.21434187889099
  time_this_iter_s: 5.140717029571533
  time_total_s: 22.21434187889099
  timestamp: 1662589720
  timesteps_since_restore: 0
  training_iteration: 2
  trial_id: 4a587ab4
  warmup_time: 0.008248329162597656
  


Done. 1.3s
Done. 1.3s


[2m[36m(RayTrainWorker pid=29806)[0m {'loss': 1.416263461112976, 'avg_loss': 52.68809796373049}


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_22-21-37/TorchTrainer_4a587ab4_10_batch_size=6.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50,_2022-09-07_22-28-13/checkpoint_000002)... Done. 1.0s


[2m[36m(RayTrainWorker pid=28970)[0m {'loss': 78.53407287597656, 'avg_loss': 81.70758247375488}


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_22-21-37/TorchTrainer_3b7a5fee_9_batch_size=12.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50,_2022-09-07_22-27-43/checkpoint_000004)... 

Result for TorchTrainer_3b7a5fee:
  _time_this_iter_s: 9.151776552200317
  _timestamp: 1662589728
  _training_iteration: 5
  avg_loss: 81.70758247375488
  date: 2022-09-07_22-28-48
  done: false
  experiment_id: 3797028b45d04ae39e74971751330000
  hostname: anish-l5-kit
  iterations_since_restore: 5
  loss: 78.53407287597656
  node_ip: 10.150.0.3
  pid: 28875
  should_checkpoint: true
  time_since_restore: 61.747360706329346
  time_this_iter_s: 9.16437292098999
  time_total_s: 61.747360706329346
  timestamp: 1662589728
  timesteps_since_restore: 0
  training_iteration: 5
  trial_id: 3b7a5fee
  warmup_time: 0.0074100494384765625
  
[2m[36m(RayTrainWorker pid=29806)[0m {'loss': 55.90283966064453, 'avg_loss': 53.491783387959}


Done. 1.0s
[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_22-21-37/TorchTrainer_4a587ab4_10_batch_size=6.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50,_2022-09-07_22-28-13/checkpoint_000003)... 

Result for TorchTrainer_4a587ab4:
  _time_this_iter_s: 4.843145847320557
  _timestamp: 1662589729
  _training_iteration: 4
  avg_loss: 53.491783387959
  date: 2022-09-07_22-28-49
  done: false
  experiment_id: 9957c869ac65425ca95e1dd2c9154d97
  hostname: anish-l5-kit
  iterations_since_restore: 4
  loss: 55.90283966064453
  node_ip: 10.150.0.3
  pid: 29736
  should_checkpoint: true
  time_since_restore: 31.983159065246582
  time_this_iter_s: 4.83160400390625
  time_total_s: 31.983159065246582
  timestamp: 1662589729
  timesteps_since_restore: 0
  training_iteration: 4
  trial_id: 4a587ab4
  warmup_time: 0.008248329162597656
  


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_22-21-37/TorchTrainer_3b7a5fee_9_batch_size=12.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50,_2022-09-07_22-27-43/checkpoint_000004)... Done. 0.3s


Done. 1.0s


VBox(children=(Label(value='457.048 MB of 457.048 MB uploaded (91.307 MB deduped)\r'), FloatProgress(value=1.0…

[2m[36m(RayTrainWorker pid=29806)[0m {'loss': 19.829809188842773, 'avg_loss': 46.75938854813576}


0,1
avg_loss,▆▅▁██
iterations_since_restore,▁▃▅▆█
loss,▄▃▁█▄
time_since_restore,▁▃▅▆█
time_this_iter_s,█▁▂▁▁
time_total_s,▁▃▅▆█
timestamp,▁▃▅▆█
timesteps_since_restore,▁▁▁▁▁
training_iteration,▁▃▅▆█
warmup_time,▁▁▁▁▁

0,1
avg_loss,81.70758
iterations_since_restore,5.0
loss,78.53407
time_since_restore,61.74736
time_this_iter_s,9.16437
time_total_s,61.74736
timestamp,1662589728.0
timesteps_since_restore,0.0
training_iteration,5.0
warmup_time,0.00741


Result for TorchTrainer_3b7a5fee:
  _time_this_iter_s: 9.151776552200317
  _timestamp: 1662589728
  _training_iteration: 5
  avg_loss: 81.70758247375488
  date: 2022-09-07_22-28-48
  done: true
  experiment_id: 3797028b45d04ae39e74971751330000
  experiment_tag: 9_batch_size=12.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50,render_ego_history=False,step_time=0.1000,dataset_meta_key=meta_json,disable_traffic_light_faces=False,ego_center=0_25_0_5,filter_agents_threshold=0.5000,map_type=py_semantic,pixel_size=0_5_0_5,raster_size=224_224,satellite_map_key=aerial_map_aerial_map_png,semantic_map_key=semantic_map_semantic_map_pb,set_origin_to_bottom=True,batch_size=12,key=scenes_sample_zarr,num_workers=16,perturb_probability=0.0000,shuffle=True,checkpoint_every_n_steps=10000,eval_every_n_steps=10000,max_num_steps=5,batch_size=12,key=scenes_sample_zarr,num_workers=16,shuffle=False,dataset_key=scenes_sample_zarr,lr=0.0080,max_num_steps=5,num_workers=4

[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_22-21-37/TorchTrainer_4a587ab4_10_batch_size=6.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50,_2022-09-07_22-28-13/checkpoint_000004)... 

Result for TorchTrainer_4a587ab4:
  _time_this_iter_s: 4.773626804351807
  _timestamp: 1662589734
  _training_iteration: 5
  avg_loss: 46.75938854813576
  date: 2022-09-07_22-28-54
  done: false
  experiment_id: 9957c869ac65425ca95e1dd2c9154d97
  hostname: anish-l5-kit
  iterations_since_restore: 5
  loss: 19.829809188842773
  node_ip: 10.150.0.3
  pid: 29736
  should_checkpoint: true
  time_since_restore: 36.77384090423584
  time_this_iter_s: 4.790681838989258
  time_total_s: 36.77384090423584
  timestamp: 1662589734
  timesteps_since_restore: 0
  training_iteration: 5
  trial_id: 4a587ab4
  warmup_time: 0.008248329162597656
  


Done. 1.0s
[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_22-21-37/TorchTrainer_4a587ab4_10_batch_size=6.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50,_2022-09-07_22-28-13/checkpoint_000004)... Done. 0.3s


VBox(children=(Label(value='457.048 MB of 457.048 MB uploaded (91.307 MB deduped)\r'), FloatProgress(value=1.0…

0,1
avg_loss,█▃▁▁▁
iterations_since_restore,▁▃▅▆█
loss,█▁▁▃▂
time_since_restore,▁▃▅▆█
time_this_iter_s,█▁▁▁▁
time_total_s,▁▃▅▆█
timestamp,▁▃▅▆█
timesteps_since_restore,▁▁▁▁▁
training_iteration,▁▃▅▆█
warmup_time,▁▁▁▁▁

0,1
avg_loss,46.75939
iterations_since_restore,5.0
loss,19.82981
time_since_restore,36.77384
time_this_iter_s,4.79068
time_total_s,36.77384
timestamp,1662589734.0
timesteps_since_restore,0.0
training_iteration,5.0
warmup_time,0.00825


Result for TorchTrainer_4a587ab4:
  _time_this_iter_s: 4.773626804351807
  _timestamp: 1662589734
  _training_iteration: 5
  avg_loss: 46.75938854813576
  date: 2022-09-07_22-28-54
  done: true
  experiment_id: 9957c869ac65425ca95e1dd2c9154d97
  experiment_tag: 10_batch_size=6.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50,render_ego_history=False,step_time=0.1000,dataset_meta_key=meta_json,disable_traffic_light_faces=False,ego_center=0_25_0_5,filter_agents_threshold=0.5000,map_type=py_semantic,pixel_size=0_5_0_5,raster_size=224_224,satellite_map_key=aerial_map_aerial_map_png,semantic_map_key=semantic_map_semantic_map_pb,set_origin_to_bottom=True,batch_size=12,key=scenes_sample_zarr,num_workers=16,perturb_probability=0.0000,shuffle=True,checkpoint_every_n_steps=10000,eval_every_n_steps=10000,max_num_steps=5,batch_size=12,key=scenes_sample_zarr,num_workers=16,shuffle=False,dataset_key=scenes_sample_zarr,lr=0.0003,max_num_steps=5,num_workers=4

[2m[36m(RayTrainWorker pid=30871)[0m 2022-09-07 22:29:15,511	INFO config.py:72 -- Setting up process group for: env:// [rank=0, world_size=1]
[2m[36m(RayTrainWorker pid=30877)[0m 2022-09-07 22:29:16,196	INFO config.py:72 -- Setting up process group for: env:// [rank=0, world_size=1]
[2m[36m(RayTrainWorker pid=30871)[0m   f"The parameter '{pretrained_param}' is deprecated since 0.13 and will be removed in 0.15, "
[2m[36m(RayTrainWorker pid=30871)[0m 2022-09-07 22:29:22,587	INFO train_loop_utils.py:300 -- Moving model to device: cpu
[2m[36m(RayTrainWorker pid=30877)[0m   f"The parameter '{pretrained_param}' is deprecated since 0.13 and will be removed in 0.15, "
[2m[36m(RayTrainWorker pid=30877)[0m 2022-09-07 22:29:24,723	INFO train_loop_utils.py:300 -- Moving model to device: cpu


[2m[36m(RayTrainWorker pid=30871)[0m {'loss': 97.75194549560547, 'avg_loss': 97.75194549560547}


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_22-21-37/TorchTrainer_5c8963a6_11_batch_size=12.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50_2022-09-07_22-29-07/checkpoint_000000)... 

Result for TorchTrainer_5c8963a6:
  _time_this_iter_s: 18.86416459083557
  _timestamp: 1662589774
  _training_iteration: 1
  avg_loss: 97.75194549560547
  date: 2022-09-07_22-29-35
  done: false
  experiment_id: d94910aa352943519d623bd44149187c
  hostname: anish-l5-kit
  iterations_since_restore: 1
  loss: 97.75194549560547
  node_ip: 10.150.0.3
  pid: 30739
  should_checkpoint: true
  time_since_restore: 24.011290311813354
  time_this_iter_s: 24.011290311813354
  time_total_s: 24.011290311813354
  timestamp: 1662589775
  timesteps_since_restore: 0
  training_iteration: 1
  trial_id: 5c8963a6
  warmup_time: 0.006880998611450195
  


Done. 1.0s


[2m[36m(RayTrainWorker pid=30871)[0m {'loss': 49.547279357910156, 'avg_loss': 73.64961242675781}


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_22-21-37/TorchTrainer_5c8963a6_11_batch_size=12.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50_2022-09-07_22-29-07/checkpoint_000001)... 

Result for TorchTrainer_5c8963a6:
  _time_this_iter_s: 8.972011089324951
  _timestamp: 1662589783
  _training_iteration: 2
  avg_loss: 73.64961242675781
  date: 2022-09-07_22-29-44
  done: false
  experiment_id: d94910aa352943519d623bd44149187c
  hostname: anish-l5-kit
  iterations_since_restore: 2
  loss: 49.547279357910156
  node_ip: 10.150.0.3
  pid: 30739
  should_checkpoint: true
  time_since_restore: 32.78300929069519
  time_this_iter_s: 8.771718978881836
  time_total_s: 32.78300929069519
  timestamp: 1662589784
  timesteps_since_restore: 0
  training_iteration: 2
  trial_id: 5c8963a6
  warmup_time: 0.006880998611450195
  


Done. 1.0s


[2m[36m(RayTrainWorker pid=30877)[0m {'loss': 115.76343536376953, 'avg_loss': 115.76343536376953}
Result for TorchTrainer_7c8db18e:
  _time_this_iter_s: 30.498860836029053
  _timestamp: 1662589787
  _training_iteration: 1
  avg_loss: 115.76343536376953
  date: 2022-09-07_22-29-47
  done: false
  experiment_id: 852e126daf544161a052659a010eff6b
  hostname: anish-l5-kit
  iterations_since_restore: 1
  loss: 115.76343536376953
  node_ip: 10.150.0.3
  pid: 30748
  should_checkpoint: true
  time_since_restore: 35.489298820495605
  time_this_iter_s: 35.489298820495605
  time_total_s: 35.489298820495605
  timestamp: 1662589787
  timesteps_since_restore: 0
  training_iteration: 1
  trial_id: 7c8db18e
  warmup_time: 0.0066182613372802734
  


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_22-21-37/TorchTrainer_7c8db18e_12_batch_size=24.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50_2022-09-07_22-29-08/checkpoint_000000)... Done. 1.1s


[2m[36m(RayTrainWorker pid=30871)[0m {'loss': 26.206785202026367, 'avg_loss': 57.835336685180664}


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_22-21-37/TorchTrainer_5c8963a6_11_batch_size=12.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50_2022-09-07_22-29-07/checkpoint_000002)... 

Result for TorchTrainer_5c8963a6:
  _time_this_iter_s: 8.99614429473877
  _timestamp: 1662589792
  _training_iteration: 3
  avg_loss: 57.835336685180664
  date: 2022-09-07_22-29-53
  done: false
  experiment_id: d94910aa352943519d623bd44149187c
  hostname: anish-l5-kit
  iterations_since_restore: 3
  loss: 26.206785202026367
  node_ip: 10.150.0.3
  pid: 30739
  should_checkpoint: true
  time_since_restore: 41.86423873901367
  time_this_iter_s: 9.081229448318481
  time_total_s: 41.86423873901367
  timestamp: 1662589793
  timesteps_since_restore: 0
  training_iteration: 3
  trial_id: 5c8963a6
  warmup_time: 0.006880998611450195
  


Done. 1.1s


[2m[36m(RayTrainWorker pid=30871)[0m {'loss': 21.997163772583008, 'avg_loss': 48.87579345703125}


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_22-21-37/TorchTrainer_5c8963a6_11_batch_size=12.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50_2022-09-07_22-29-07/checkpoint_000003)... 

Result for TorchTrainer_5c8963a6:
  _time_this_iter_s: 8.49676775932312
  _timestamp: 1662589801
  _training_iteration: 4
  avg_loss: 48.87579345703125
  date: 2022-09-07_22-30-02
  done: false
  experiment_id: d94910aa352943519d623bd44149187c
  hostname: anish-l5-kit
  iterations_since_restore: 4
  loss: 21.997163772583008
  node_ip: 10.150.0.3
  pid: 30739
  should_checkpoint: true
  time_since_restore: 51.07124304771423
  time_this_iter_s: 9.207004308700562
  time_total_s: 51.07124304771423
  timestamp: 1662589802
  timesteps_since_restore: 0
  training_iteration: 4
  trial_id: 5c8963a6
  warmup_time: 0.006880998611450195
  


Done. 1.7s


[2m[36m(RayTrainWorker pid=30877)[0m {'loss': 80.73584747314453, 'avg_loss': 98.24964141845703}


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_22-21-37/TorchTrainer_7c8db18e_12_batch_size=24.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50_2022-09-07_22-29-08/checkpoint_000001)... 

Result for TorchTrainer_7c8db18e:
  _time_this_iter_s: 20.060969591140747
  _timestamp: 1662589807
  _training_iteration: 2
  avg_loss: 98.24964141845703
  date: 2022-09-07_22-30-07
  done: false
  experiment_id: 852e126daf544161a052659a010eff6b
  hostname: anish-l5-kit
  iterations_since_restore: 2
  loss: 80.73584747314453
  node_ip: 10.150.0.3
  pid: 30748
  should_checkpoint: true
  time_since_restore: 55.51757502555847
  time_this_iter_s: 20.028276205062866
  time_total_s: 55.51757502555847
  timestamp: 1662589807
  timesteps_since_restore: 0
  training_iteration: 2
  trial_id: 7c8db18e
  warmup_time: 0.0066182613372802734
  


Done. 2.5s


[2m[36m(RayTrainWorker pid=30871)[0m {'loss': 103.54435729980469, 'avg_loss': 59.809506225585935}


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_22-21-37/TorchTrainer_5c8963a6_11_batch_size=12.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50_2022-09-07_22-29-07/checkpoint_000004)... 

Result for TorchTrainer_5c8963a6:
  _time_this_iter_s: 10.332557439804077
  _timestamp: 1662589811
  _training_iteration: 5
  avg_loss: 59.809506225585935
  date: 2022-09-07_22-30-12
  done: false
  experiment_id: d94910aa352943519d623bd44149187c
  hostname: anish-l5-kit
  iterations_since_restore: 5
  loss: 103.54435729980469
  node_ip: 10.150.0.3
  pid: 30739
  should_checkpoint: true
  time_since_restore: 60.67612075805664
  time_this_iter_s: 9.604877710342407
  time_total_s: 60.67612075805664
  timestamp: 1662589812
  timesteps_since_restore: 0
  training_iteration: 5
  trial_id: 5c8963a6
  warmup_time: 0.006880998611450195
  


Done. 1.0s
[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_22-21-37/TorchTrainer_5c8963a6_11_batch_size=12.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50_2022-09-07_22-29-07/checkpoint_000004)... Done. 0.3s


VBox(children=(Label(value='457.090 MB of 457.090 MB uploaded (91.307 MB deduped)\r'), FloatProgress(value=1.0…

0,1
avg_loss,█▅▂▁▃
iterations_since_restore,▁▃▅▆█
loss,█▃▁▁█
time_since_restore,▁▃▄▆█
time_this_iter_s,█▁▁▁▁
time_total_s,▁▃▄▆█
timestamp,▁▃▄▆█
timesteps_since_restore,▁▁▁▁▁
training_iteration,▁▃▅▆█
warmup_time,▁▁▁▁▁

0,1
avg_loss,59.80951
iterations_since_restore,5.0
loss,103.54436
time_since_restore,60.67612
time_this_iter_s,9.60488
time_total_s,60.67612
timestamp,1662589812.0
timesteps_since_restore,0.0
training_iteration,5.0
warmup_time,0.00688


Result for TorchTrainer_5c8963a6:
  _time_this_iter_s: 10.332557439804077
  _timestamp: 1662589811
  _training_iteration: 5
  avg_loss: 59.809506225585935
  date: 2022-09-07_22-30-12
  done: true
  experiment_id: d94910aa352943519d623bd44149187c
  experiment_tag: 11_batch_size=12.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50,render_ego_history=False,step_time=0.1000,dataset_meta_key=meta_json,disable_traffic_light_faces=False,ego_center=0_25_0_5,filter_agents_threshold=0.5000,map_type=py_satellite,pixel_size=0_5_0_5,raster_size=224_224,satellite_map_key=aerial_map_aerial_map_png,semantic_map_key=semantic_map_semantic_map_pb,set_origin_to_bottom=True,batch_size=12,key=scenes_sample_zarr,num_workers=16,perturb_probability=0.0000,shuffle=True,checkpoint_every_n_steps=10000,eval_every_n_steps=10000,max_num_steps=5,batch_size=12,key=scenes_sample_zarr,num_workers=16,shuffle=False,dataset_key=scenes_sample_zarr,lr=0.0006,max_num_steps=5,num_worke

[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_22-21-37/TorchTrainer_7c8db18e_12_batch_size=24.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50_2022-09-07_22-29-08/checkpoint_000002)... 

Result for TorchTrainer_7c8db18e:
  _time_this_iter_s: 17.56546401977539
  _timestamp: 1662589824
  _training_iteration: 3
  avg_loss: 90.25574493408203
  date: 2022-09-07_22-30-25
  done: false
  experiment_id: 852e126daf544161a052659a010eff6b
  hostname: anish-l5-kit
  iterations_since_restore: 3
  loss: 74.26795196533203
  node_ip: 10.150.0.3
  pid: 30748
  should_checkpoint: true
  time_since_restore: 73.0201358795166
  time_this_iter_s: 17.50256085395813
  time_total_s: 73.0201358795166
  timestamp: 1662589825
  timesteps_since_restore: 0
  training_iteration: 3
  trial_id: 7c8db18e
  warmup_time: 0.0066182613372802734
  


Done. 1.1s
[2m[36m(RayTrainWorker pid=31969)[0m 2022-09-07 22:30:33,052	INFO config.py:72 -- Setting up process group for: env:// [rank=0, world_size=1]
[2m[36m(RayTrainWorker pid=31969)[0m   f"The parameter '{pretrained_param}' is deprecated since 0.13 and will be removed in 0.15, "
[2m[36m(RayTrainWorker pid=31969)[0m 2022-09-07 22:30:39,070	INFO train_loop_utils.py:300 -- Moving model to device: cpu


[2m[36m(RayTrainWorker pid=30877)[0m {'loss': 22.43189239501953, 'avg_loss': 73.2997817993164}


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_22-21-37/TorchTrainer_7c8db18e_12_batch_size=24.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50_2022-09-07_22-29-08/checkpoint_000003)... 

Result for TorchTrainer_7c8db18e:
  _time_this_iter_s: 20.520598888397217
  _timestamp: 1662589845
  _training_iteration: 4
  avg_loss: 73.2997817993164
  date: 2022-09-07_22-30-45
  done: false
  experiment_id: 852e126daf544161a052659a010eff6b
  hostname: anish-l5-kit
  iterations_since_restore: 4
  loss: 22.43189239501953
  node_ip: 10.150.0.3
  pid: 30748
  should_checkpoint: true
  time_since_restore: 93.5247757434845
  time_this_iter_s: 20.504639863967896
  time_total_s: 93.5247757434845
  timestamp: 1662589845
  timesteps_since_restore: 0
  training_iteration: 4
  trial_id: 7c8db18e
  warmup_time: 0.0066182613372802734
  


Done. 1.1s


[2m[36m(RayTrainWorker pid=31969)[0m {'loss': 57.797576904296875, 'avg_loss': 57.797576904296875}


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_22-21-37/TorchTrainer_7ce876fa_13_batch_size=24.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50_2022-09-07_22-30-22/checkpoint_000000)... 

Result for TorchTrainer_7ce876fa:
  _time_this_iter_s: 26.334730863571167
  _timestamp: 1662589859
  _training_iteration: 1
  avg_loss: 57.797576904296875
  date: 2022-09-07_22-31-00
  done: false
  experiment_id: 583d325ebb214e7b8e8385e2747de52a
  hostname: anish-l5-kit
  iterations_since_restore: 1
  loss: 57.797576904296875
  node_ip: 10.150.0.3
  pid: 31872
  should_checkpoint: true
  time_since_restore: 30.620442867279053
  time_this_iter_s: 30.620442867279053
  time_total_s: 30.620442867279053
  timestamp: 1662589860
  timesteps_since_restore: 0
  training_iteration: 1
  trial_id: 7ce876fa
  warmup_time: 0.006645917892456055
  


Done. 1.0s


[2m[36m(RayTrainWorker pid=30877)[0m {'loss': 69.86618041992188, 'avg_loss': 72.6130615234375}
Result for TorchTrainer_7c8db18e:
  _time_this_iter_s: 19.15438151359558
  _timestamp: 1662589864
  _training_iteration: 5
  avg_loss: 72.6130615234375
  date: 2022-09-07_22-31-05
  done: false
  experiment_id: 852e126daf544161a052659a010eff6b
  hostname: anish-l5-kit
  iterations_since_restore: 5
  loss: 69.86618041992188
  node_ip: 10.150.0.3
  pid: 30748
  should_checkpoint: true
  time_since_restore: 112.70063400268555
  time_this_iter_s: 19.17585825920105
  time_total_s: 112.70063400268555
  timestamp: 1662589865
  timesteps_since_restore: 0
  training_iteration: 5
  trial_id: 7c8db18e
  warmup_time: 0.0066182613372802734
  


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_22-21-37/TorchTrainer_7c8db18e_12_batch_size=24.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50_2022-09-07_22-29-08/checkpoint_000004)... Done. 1.0s
[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_22-21-37/TorchTrainer_7c8db18e_12_batch_size=24.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50_2022-09-07_22-29-08/checkpoint_000004)... Done. 0.3s


VBox(children=(Label(value='457.090 MB of 457.090 MB uploaded (91.307 MB deduped)\r'), FloatProgress(value=1.0…

0,1
avg_loss,█▅▄▁▁
iterations_since_restore,▁▃▅▆█
loss,█▅▅▁▅
time_since_restore,▁▃▄▆█
time_this_iter_s,█▂▁▂▂
time_total_s,▁▃▄▆█
timestamp,▁▃▄▆█
timesteps_since_restore,▁▁▁▁▁
training_iteration,▁▃▅▆█
warmup_time,▁▁▁▁▁

0,1
avg_loss,72.61306
iterations_since_restore,5.0
loss,69.86618
time_since_restore,112.70063
time_this_iter_s,19.17586
time_total_s,112.70063
timestamp,1662589865.0
timesteps_since_restore,0.0
training_iteration,5.0
warmup_time,0.00662


Result for TorchTrainer_7c8db18e:
  _time_this_iter_s: 19.15438151359558
  _timestamp: 1662589864
  _training_iteration: 5
  avg_loss: 72.6130615234375
  date: 2022-09-07_22-31-05
  done: true
  experiment_id: 852e126daf544161a052659a010eff6b
  experiment_tag: 12_batch_size=24.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50,render_ego_history=False,step_time=0.1000,dataset_meta_key=meta_json,disable_traffic_light_faces=False,ego_center=0_25_0_5,filter_agents_threshold=0.5000,map_type=py_semantic,pixel_size=0_5_0_5,raster_size=224_224,satellite_map_key=aerial_map_aerial_map_png,semantic_map_key=semantic_map_semantic_map_pb,set_origin_to_bottom=True,batch_size=12,key=scenes_sample_zarr,num_workers=16,perturb_probability=0.0000,shuffle=True,checkpoint_every_n_steps=10000,eval_every_n_steps=10000,max_num_steps=5,batch_size=12,key=scenes_sample_zarr,num_workers=16,shuffle=False,dataset_key=scenes_sample_zarr,lr=0.0001,max_num_steps=5,num_workers=4

[2m[36m(RayTrainWorker pid=328)[0m 2022-09-07 22:31:19,983	INFO config.py:72 -- Setting up process group for: env:// [rank=0, world_size=1]


[2m[36m(RayTrainWorker pid=31969)[0m {'loss': 38.288490295410156, 'avg_loss': 48.043033599853516}


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_22-21-37/TorchTrainer_7ce876fa_13_batch_size=24.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50_2022-09-07_22-30-22/checkpoint_000001)... 

Result for TorchTrainer_7ce876fa:
  _time_this_iter_s: 20.465931177139282
  _timestamp: 1662589880
  _training_iteration: 2
  avg_loss: 48.043033599853516
  date: 2022-09-07_22-31-20
  done: false
  experiment_id: 583d325ebb214e7b8e8385e2747de52a
  hostname: anish-l5-kit
  iterations_since_restore: 2
  loss: 38.288490295410156
  node_ip: 10.150.0.3
  pid: 31872
  should_checkpoint: true
  time_since_restore: 51.056318521499634
  time_this_iter_s: 20.43587565422058
  time_total_s: 51.056318521499634
  timestamp: 1662589880
  timesteps_since_restore: 0
  training_iteration: 2
  trial_id: 7ce876fa
  warmup_time: 0.006645917892456055
  


Done. 1.0s
[2m[36m(RayTrainWorker pid=328)[0m   f"The parameter '{pretrained_param}' is deprecated since 0.13 and will be removed in 0.15, "
[2m[36m(RayTrainWorker pid=328)[0m 2022-09-07 22:31:27,943	INFO train_loop_utils.py:300 -- Moving model to device: cpu


[2m[36m(RayTrainWorker pid=31969)[0m {'loss': 120.09806060791016, 'avg_loss': 72.06137593587239}


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_22-21-37/TorchTrainer_7ce876fa_13_batch_size=24.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50_2022-09-07_22-30-22/checkpoint_000002)... 

Result for TorchTrainer_7ce876fa:
  _time_this_iter_s: 21.585253715515137
  _timestamp: 1662589902
  _training_iteration: 3
  avg_loss: 72.06137593587239
  date: 2022-09-07_22-31-42
  done: false
  experiment_id: 583d325ebb214e7b8e8385e2747de52a
  hostname: anish-l5-kit
  iterations_since_restore: 3
  loss: 120.09806060791016
  node_ip: 10.150.0.3
  pid: 31872
  should_checkpoint: true
  time_since_restore: 72.59062266349792
  time_this_iter_s: 21.53430414199829
  time_total_s: 72.59062266349792
  timestamp: 1662589902
  timesteps_since_restore: 0
  training_iteration: 3
  trial_id: 7ce876fa
  warmup_time: 0.006645917892456055
  


Done. 1.0s


[2m[36m(RayTrainWorker pid=328)[0m {'loss': 89.77851867675781, 'avg_loss': 89.77851867675781}


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_22-21-37/TorchTrainer_a9126326_14_batch_size=24.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50_2022-09-07_22-31-13/checkpoint_000000)... 

Result for TorchTrainer_a9126326:
  _time_this_iter_s: 30.344859838485718
  _timestamp: 1662589910
  _training_iteration: 1
  avg_loss: 89.77851867675781
  date: 2022-09-07_22-31-51
  done: false
  experiment_id: 3a895807c2574315830a1518af33816e
  hostname: anish-l5-kit
  iterations_since_restore: 1
  loss: 89.77851867675781
  node_ip: 10.150.0.3
  pid: 32730
  should_checkpoint: true
  time_since_restore: 33.79382133483887
  time_this_iter_s: 33.79382133483887
  time_total_s: 33.79382133483887
  timestamp: 1662589911
  timesteps_since_restore: 0
  training_iteration: 1
  trial_id: a9126326
  warmup_time: 0.005923032760620117
  


Done. 1.0s


[2m[36m(RayTrainWorker pid=31969)[0m {'loss': 101.74803924560547, 'avg_loss': 79.48304176330566}


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_22-21-37/TorchTrainer_7ce876fa_13_batch_size=24.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50_2022-09-07_22-30-22/checkpoint_000003)... 

Result for TorchTrainer_7ce876fa:
  _time_this_iter_s: 17.701676607131958
  _timestamp: 1662589919
  _training_iteration: 4
  avg_loss: 79.48304176330566
  date: 2022-09-07_22-32-00
  done: false
  experiment_id: 583d325ebb214e7b8e8385e2747de52a
  hostname: anish-l5-kit
  iterations_since_restore: 4
  loss: 101.74803924560547
  node_ip: 10.150.0.3
  pid: 31872
  should_checkpoint: true
  time_since_restore: 90.27450060844421
  time_this_iter_s: 17.68387794494629
  time_total_s: 90.27450060844421
  timestamp: 1662589920
  timesteps_since_restore: 0
  training_iteration: 4
  trial_id: 7ce876fa
  warmup_time: 0.006645917892456055
  


Done. 1.0s


[2m[36m(RayTrainWorker pid=328)[0m {'loss': 46.26633071899414, 'avg_loss': 68.02242469787598}


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_22-21-37/TorchTrainer_a9126326_14_batch_size=24.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50_2022-09-07_22-31-13/checkpoint_000001)... 

Result for TorchTrainer_a9126326:
  _time_this_iter_s: 19.811874389648438
  _timestamp: 1662589930
  _training_iteration: 2
  avg_loss: 68.02242469787598
  date: 2022-09-07_22-32-11
  done: false
  experiment_id: 3a895807c2574315830a1518af33816e
  hostname: anish-l5-kit
  iterations_since_restore: 2
  loss: 46.26633071899414
  node_ip: 10.150.0.3
  pid: 32730
  should_checkpoint: true
  time_since_restore: 53.5701949596405
  time_this_iter_s: 19.776373624801636
  time_total_s: 53.5701949596405
  timestamp: 1662589931
  timesteps_since_restore: 0
  training_iteration: 2
  trial_id: a9126326
  warmup_time: 0.005923032760620117
  


Done. 1.1s


[2m[36m(RayTrainWorker pid=31969)[0m {'loss': 55.97064208984375, 'avg_loss': 74.78056182861329}


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_22-21-37/TorchTrainer_7ce876fa_13_batch_size=24.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50_2022-09-07_22-30-22/checkpoint_000004)... 

Result for TorchTrainer_7ce876fa:
  _time_this_iter_s: 19.055661916732788
  _timestamp: 1662589938
  _training_iteration: 5
  avg_loss: 74.78056182861329
  date: 2022-09-07_22-32-19
  done: false
  experiment_id: 583d325ebb214e7b8e8385e2747de52a
  hostname: anish-l5-kit
  iterations_since_restore: 5
  loss: 55.97064208984375
  node_ip: 10.150.0.3
  pid: 31872
  should_checkpoint: true
  time_since_restore: 109.34258794784546
  time_this_iter_s: 19.068087339401245
  time_total_s: 109.34258794784546
  timestamp: 1662589939
  timesteps_since_restore: 0
  training_iteration: 5
  trial_id: 7ce876fa
  warmup_time: 0.006645917892456055
  


Done. 1.0s
[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_22-21-37/TorchTrainer_7ce876fa_13_batch_size=24.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50_2022-09-07_22-30-22/checkpoint_000004)... Done. 0.3s


VBox(children=(Label(value='457.090 MB of 457.090 MB uploaded (91.307 MB deduped)\r'), FloatProgress(value=1.0…

[2m[36m(RayTrainWorker pid=328)[0m {'loss': 76.20713806152344, 'avg_loss': 70.75066248575847}


0,1
avg_loss,▃▁▆█▇
iterations_since_restore,▁▃▅▆█
loss,▃▁█▆▃
time_since_restore,▁▃▅▆█
time_this_iter_s,█▂▃▁▂
time_total_s,▁▃▅▆█
timestamp,▁▃▅▆█
timesteps_since_restore,▁▁▁▁▁
training_iteration,▁▃▅▆█
warmup_time,▁▁▁▁▁

0,1
avg_loss,74.78056
iterations_since_restore,5.0
loss,55.97064
time_since_restore,109.34259
time_this_iter_s,19.06809
time_total_s,109.34259
timestamp,1662589939.0
timesteps_since_restore,0.0
training_iteration,5.0
warmup_time,0.00665


Result for TorchTrainer_7ce876fa:
  _time_this_iter_s: 19.055661916732788
  _timestamp: 1662589938
  _training_iteration: 5
  avg_loss: 74.78056182861329
  date: 2022-09-07_22-32-19
  done: true
  experiment_id: 583d325ebb214e7b8e8385e2747de52a
  experiment_tag: 13_batch_size=24.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50,render_ego_history=False,step_time=0.1000,dataset_meta_key=meta_json,disable_traffic_light_faces=False,ego_center=0_25_0_5,filter_agents_threshold=0.5000,map_type=py_semantic,pixel_size=0_5_0_5,raster_size=224_224,satellite_map_key=aerial_map_aerial_map_png,semantic_map_key=semantic_map_semantic_map_pb,set_origin_to_bottom=True,batch_size=12,key=scenes_sample_zarr,num_workers=16,perturb_probability=0.0000,shuffle=True,checkpoint_every_n_steps=10000,eval_every_n_steps=10000,max_num_steps=5,batch_size=12,key=scenes_sample_zarr,num_workers=16,shuffle=False,dataset_key=scenes_sample_zarr,lr=0.0001,max_num_steps=5,num_workers

[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_22-21-37/TorchTrainer_a9126326_14_batch_size=24.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50_2022-09-07_22-31-13/checkpoint_000002)... 

Result for TorchTrainer_a9126326:
  _time_this_iter_s: 17.328635931015015
  _timestamp: 1662589947
  _training_iteration: 3
  avg_loss: 70.75066248575847
  date: 2022-09-07_22-32-28
  done: false
  experiment_id: 3a895807c2574315830a1518af33816e
  hostname: anish-l5-kit
  iterations_since_restore: 3
  loss: 76.20713806152344
  node_ip: 10.150.0.3
  pid: 32730
  should_checkpoint: true
  time_since_restore: 70.862961769104
  time_this_iter_s: 17.2927668094635
  time_total_s: 70.862961769104
  timestamp: 1662589948
  timesteps_since_restore: 0
  training_iteration: 3
  trial_id: a9126326
  warmup_time: 0.005923032760620117
  


Done. 1.0s
[2m[36m(RayTrainWorker pid=1320)[0m 2022-09-07 22:32:38,017	INFO config.py:72 -- Setting up process group for: env:// [rank=0, world_size=1]
[2m[36m(RayTrainWorker pid=1320)[0m   f"The parameter '{pretrained_param}' is deprecated since 0.13 and will be removed in 0.15, "
[2m[36m(RayTrainWorker pid=1320)[0m 2022-09-07 22:32:44,048	INFO train_loop_utils.py:300 -- Moving model to device: cpu


[2m[36m(RayTrainWorker pid=328)[0m {'loss': 105.7643814086914, 'avg_loss': 79.5040922164917}


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_22-21-37/TorchTrainer_a9126326_14_batch_size=24.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50_2022-09-07_22-31-13/checkpoint_000003)... 

Result for TorchTrainer_a9126326:
  _time_this_iter_s: 20.202338695526123
  _timestamp: 1662589968
  _training_iteration: 4
  avg_loss: 79.5040922164917
  date: 2022-09-07_22-32-48
  done: false
  experiment_id: 3a895807c2574315830a1518af33816e
  hostname: anish-l5-kit
  iterations_since_restore: 4
  loss: 105.7643814086914
  node_ip: 10.150.0.3
  pid: 32730
  should_checkpoint: true
  time_since_restore: 91.03654646873474
  time_this_iter_s: 20.173584699630737
  time_total_s: 91.03654646873474
  timestamp: 1662589968
  timesteps_since_restore: 0
  training_iteration: 4
  trial_id: a9126326
  warmup_time: 0.005923032760620117
  


Done. 1.0s


[2m[36m(RayTrainWorker pid=1320)[0m {'loss': 160.4739990234375, 'avg_loss': 160.4739990234375}
Result for TorchTrainer_c7d92f7e:
  _time_this_iter_s: 12.042727708816528
  _timestamp: 1662589970
  _training_iteration: 1
  avg_loss: 160.4739990234375
  date: 2022-09-07_22-32-51
  done: false
  experiment_id: 15898c60be314f2a9a578ac6564432fd
  hostname: anish-l5-kit
  iterations_since_restore: 1
  loss: 160.4739990234375
  node_ip: 10.150.0.3
  pid: 1240
  should_checkpoint: true
  time_since_restore: 16.28026556968689
  time_this_iter_s: 16.28026556968689
  time_total_s: 16.28026556968689
  timestamp: 1662589971
  timesteps_since_restore: 0
  training_iteration: 1
  trial_id: c7d92f7e
  warmup_time: 0.00631403923034668
  


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_22-21-37/TorchTrainer_c7d92f7e_15_batch_size=6.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50,_2022-09-07_22-32-30/checkpoint_000000)... Done. 1.1s


[2m[36m(RayTrainWorker pid=1320)[0m {'loss': 83.2232894897461, 'avg_loss': 121.8486442565918}


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_22-21-37/TorchTrainer_c7d92f7e_15_batch_size=6.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50,_2022-09-07_22-32-30/checkpoint_000001)... Done. 1.0s


[2m[36m(RayTrainWorker pid=1320)[0m {'loss': 0.08007790148258209, 'avg_loss': 81.25912213822205}
Result for TorchTrainer_c7d92f7e:
  _time_this_iter_s: 4.783747673034668
  _timestamp: 1662589980
  _training_iteration: 3
  avg_loss: 81.25912213822205
  date: 2022-09-07_22-33-00
  done: false
  experiment_id: 15898c60be314f2a9a578ac6564432fd
  hostname: anish-l5-kit
  iterations_since_restore: 3
  loss: 0.08007790148258209
  node_ip: 10.150.0.3
  pid: 1240
  should_checkpoint: true
  time_since_restore: 25.798041582107544
  time_this_iter_s: 4.759238958358765
  time_total_s: 25.798041582107544
  timestamp: 1662589980
  timesteps_since_restore: 0
  training_iteration: 3
  trial_id: c7d92f7e
  warmup_time: 0.00631403923034668
  


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_22-21-37/TorchTrainer_c7d92f7e_15_batch_size=6.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50,_2022-09-07_22-32-30/checkpoint_000002)... Done. 1.0s


[2m[36m(RayTrainWorker pid=1320)[0m {'loss': 105.5950698852539, 'avg_loss': 87.34310907498002}


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_22-21-37/TorchTrainer_c7d92f7e_15_batch_size=6.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50,_2022-09-07_22-32-30/checkpoint_000003)... 

Result for TorchTrainer_c7d92f7e:
  _time_this_iter_s: 5.9904725551605225
  _timestamp: 1662589986
  _training_iteration: 4
  avg_loss: 87.34310907498002
  date: 2022-09-07_22-33-06
  done: false
  experiment_id: 15898c60be314f2a9a578ac6564432fd
  hostname: anish-l5-kit
  iterations_since_restore: 4
  loss: 105.5950698852539
  node_ip: 10.150.0.3
  pid: 1240
  should_checkpoint: true
  time_since_restore: 31.85336995124817
  time_this_iter_s: 6.055328369140625
  time_total_s: 31.85336995124817
  timestamp: 1662589986
  timesteps_since_restore: 0
  training_iteration: 4
  trial_id: c7d92f7e
  warmup_time: 0.00631403923034668
  


Done. 1.0s


[2m[36m(RayTrainWorker pid=328)[0m {'loss': 49.49141311645508, 'avg_loss': 73.50155639648438}


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_22-21-37/TorchTrainer_a9126326_14_batch_size=24.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50_2022-09-07_22-31-13/checkpoint_000004)... 

Result for TorchTrainer_a9126326:
  _time_this_iter_s: 19.444671392440796
  _timestamp: 1662589987
  _training_iteration: 5
  avg_loss: 73.50155639648438
  date: 2022-09-07_22-33-08
  done: false
  experiment_id: 3a895807c2574315830a1518af33816e
  hostname: anish-l5-kit
  iterations_since_restore: 5
  loss: 49.49141311645508
  node_ip: 10.150.0.3
  pid: 32730
  should_checkpoint: true
  time_since_restore: 110.51356482505798
  time_this_iter_s: 19.477018356323242
  time_total_s: 110.51356482505798
  timestamp: 1662589988
  timesteps_since_restore: 0
  training_iteration: 5
  trial_id: a9126326
  warmup_time: 0.005923032760620117
  


Done. 1.0s
[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_22-21-37/TorchTrainer_a9126326_14_batch_size=24.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50_2022-09-07_22-31-13/checkpoint_000004)... Done. 0.3s


[2m[36m(RayTrainWorker pid=1320)[0m {'loss': 46.45763397216797, 'avg_loss': 79.16601405441762}


VBox(children=(Label(value='457.112 MB of 457.112 MB uploaded (91.307 MB deduped)\r'), FloatProgress(value=1.0…

0,1
avg_loss,█▁▂▅▃
iterations_since_restore,▁▃▅▆█
loss,▆▁▅█▁
time_since_restore,▁▃▄▆█
time_this_iter_s,█▂▁▂▂
time_total_s,▁▃▄▆█
timestamp,▁▃▄▆█
timesteps_since_restore,▁▁▁▁▁
training_iteration,▁▃▅▆█
warmup_time,▁▁▁▁▁

0,1
avg_loss,73.50156
iterations_since_restore,5.0
loss,49.49141
time_since_restore,110.51356
time_this_iter_s,19.47702
time_total_s,110.51356
timestamp,1662589988.0
timesteps_since_restore,0.0
training_iteration,5.0
warmup_time,0.00592


Result for TorchTrainer_a9126326:
  _time_this_iter_s: 19.444671392440796
  _timestamp: 1662589987
  _training_iteration: 5
  avg_loss: 73.50155639648438
  date: 2022-09-07_22-33-08
  done: true
  experiment_id: 3a895807c2574315830a1518af33816e
  experiment_tag: 14_batch_size=24.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50,render_ego_history=False,step_time=0.1000,dataset_meta_key=meta_json,disable_traffic_light_faces=False,ego_center=0_25_0_5,filter_agents_threshold=0.5000,map_type=py_semantic,pixel_size=0_5_0_5,raster_size=224_224,satellite_map_key=aerial_map_aerial_map_png,semantic_map_key=semantic_map_semantic_map_pb,set_origin_to_bottom=True,batch_size=12,key=scenes_sample_zarr,num_workers=16,perturb_probability=0.0000,shuffle=True,checkpoint_every_n_steps=10000,eval_every_n_steps=10000,max_num_steps=5,batch_size=12,key=scenes_sample_zarr,num_workers=16,shuffle=False,dataset_key=scenes_sample_zarr,lr=0.0001,max_num_steps=5,num_workers

[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_22-21-37/TorchTrainer_c7d92f7e_15_batch_size=6.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50,_2022-09-07_22-32-30/checkpoint_000004)... 

Result for TorchTrainer_c7d92f7e:
  _time_this_iter_s: 4.924676418304443
  _timestamp: 1662589991
  _training_iteration: 5
  avg_loss: 79.16601405441762
  date: 2022-09-07_22-33-11
  done: false
  experiment_id: 15898c60be314f2a9a578ac6564432fd
  hostname: anish-l5-kit
  iterations_since_restore: 5
  loss: 46.45763397216797
  node_ip: 10.150.0.3
  pid: 1240
  should_checkpoint: true
  time_since_restore: 36.70714235305786
  time_this_iter_s: 4.853772401809692
  time_total_s: 36.70714235305786
  timestamp: 1662589991
  timesteps_since_restore: 0
  training_iteration: 5
  trial_id: c7d92f7e
  warmup_time: 0.00631403923034668
  


Done. 1.0s
[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_22-21-37/TorchTrainer_c7d92f7e_15_batch_size=6.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50,_2022-09-07_22-32-30/checkpoint_000004)... Done. 0.3s


VBox(children=(Label(value='457.112 MB of 457.112 MB uploaded (91.307 MB deduped)\r'), FloatProgress(value=1.0…

0,1
avg_loss,█▅▁▂▁
iterations_since_restore,▁▃▅▆█
loss,█▅▁▆▃
time_since_restore,▁▃▄▆█
time_this_iter_s,█▁▁▂▁
time_total_s,▁▃▄▆█
timestamp,▁▂▄▆█
timesteps_since_restore,▁▁▁▁▁
training_iteration,▁▃▅▆█
warmup_time,▁▁▁▁▁

0,1
avg_loss,79.16601
iterations_since_restore,5.0
loss,46.45763
time_since_restore,36.70714
time_this_iter_s,4.85377
time_total_s,36.70714
timestamp,1662589991.0
timesteps_since_restore,0.0
training_iteration,5.0
warmup_time,0.00631


Result for TorchTrainer_c7d92f7e:
  _time_this_iter_s: 4.924676418304443
  _timestamp: 1662589991
  _training_iteration: 5
  avg_loss: 79.16601405441762
  date: 2022-09-07_22-33-11
  done: true
  experiment_id: 15898c60be314f2a9a578ac6564432fd
  experiment_tag: 15_batch_size=6.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50,render_ego_history=False,step_time=0.1000,dataset_meta_key=meta_json,disable_traffic_light_faces=False,ego_center=0_25_0_5,filter_agents_threshold=0.5000,map_type=py_semantic,pixel_size=0_5_0_5,raster_size=224_224,satellite_map_key=aerial_map_aerial_map_png,semantic_map_key=semantic_map_semantic_map_pb,set_origin_to_bottom=True,batch_size=12,key=scenes_sample_zarr,num_workers=16,perturb_probability=0.0000,shuffle=True,checkpoint_every_n_steps=10000,eval_every_n_steps=10000,max_num_steps=5,batch_size=12,key=scenes_sample_zarr,num_workers=16,shuffle=False,dataset_key=scenes_sample_zarr,lr=0.0002,max_num_steps=5,num_workers=4

[2m[36m(RayTrainWorker pid=2315)[0m 2022-09-07 22:33:31,222	INFO config.py:72 -- Setting up process group for: env:// [rank=0, world_size=1]
[2m[36m(RayTrainWorker pid=2324)[0m 2022-09-07 22:33:32,387	INFO config.py:72 -- Setting up process group for: env:// [rank=0, world_size=1]
[2m[36m(RayTrainWorker pid=2315)[0m   f"The parameter '{pretrained_param}' is deprecated since 0.13 and will be removed in 0.15, "
[2m[36m(RayTrainWorker pid=2315)[0m 2022-09-07 22:33:39,301	INFO train_loop_utils.py:300 -- Moving model to device: cpu
[2m[36m(RayTrainWorker pid=2324)[0m   f"The parameter '{pretrained_param}' is deprecated since 0.13 and will be removed in 0.15, "
[2m[36m(RayTrainWorker pid=2324)[0m 2022-09-07 22:33:40,853	INFO train_loop_utils.py:300 -- Moving model to device: cpu


[2m[36m(RayTrainWorker pid=2315)[0m {'loss': 145.06973266601562, 'avg_loss': 145.06973266601562}


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_22-21-37/TorchTrainer_f5d3e478_16_batch_size=6.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50,_2022-09-07_22-33-23/checkpoint_000000)... 

Result for TorchTrainer_f5d3e478:
  _time_this_iter_s: 15.404920101165771
  _timestamp: 1662590027
  _training_iteration: 1
  avg_loss: 145.06973266601562
  date: 2022-09-07_22-33-47
  done: false
  experiment_id: 3efe267366704a4789979162689c2175
  hostname: anish-l5-kit
  iterations_since_restore: 1
  loss: 145.06973266601562
  node_ip: 10.150.0.3
  pid: 2178
  should_checkpoint: true
  time_since_restore: 20.26424241065979
  time_this_iter_s: 20.26424241065979
  time_total_s: 20.26424241065979
  timestamp: 1662590027
  timesteps_since_restore: 0
  training_iteration: 1
  trial_id: f5d3e478
  warmup_time: 0.008968591690063477
  


Done. 1.1s


[2m[36m(RayTrainWorker pid=2315)[0m {'loss': 0.11786077171564102, 'avg_loss': 72.59379671886563}
[2m[36m(RayTrainWorker pid=2324)[0m {'loss': 0.3762845993041992, 'avg_loss': 0.3762845993041992}


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_22-21-37/TorchTrainer_f5d3e478_16_batch_size=6.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50,_2022-09-07_22-33-23/checkpoint_000001)... [34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_22-21-37/TorchTrainer_15265464_17_batch_size=12.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50_2022-09-07_22-33-24/checkpoint_000000)... 

Result for TorchTrainer_15265464:
  _time_this_iter_s: 19.399818181991577
  _timestamp: 1662590032
  _training_iteration: 1
  avg_loss: 0.3762845993041992
  date: 2022-09-07_22-33-52
  done: false
  experiment_id: 817bf91e82a34130b95a1f3761627e91
  hostname: anish-l5-kit
  iterations_since_restore: 1
  loss: 0.3762845993041992
  node_ip: 10.150.0.3
  pid: 2189
  should_checkpoint: true
  time_since_restore: 24.44707727432251
  time_this_iter_s: 24.44707727432251
  time_total_s: 24.44707727432251
  timestamp: 1662590032
  timesteps_since_restore: 0
  training_iteration: 1
  trial_id: '15265464'
  warmup_time: 0.0057942867279052734
  


Done. 1.0s
Done. 1.3s


[2m[36m(RayTrainWorker pid=2315)[0m {'loss': 200.1158447265625, 'avg_loss': 115.10114605476458}
Result for TorchTrainer_f5d3e478:
  _time_this_iter_s: 5.256723165512085
  _timestamp: 1662590037
  _training_iteration: 3
  avg_loss: 115.10114605476458
  date: 2022-09-07_22-33-57
  done: false
  experiment_id: 3efe267366704a4789979162689c2175
  hostname: anish-l5-kit
  iterations_since_restore: 3
  loss: 200.1158447265625
  node_ip: 10.150.0.3
  pid: 2178
  should_checkpoint: true
  time_since_restore: 30.167164087295532
  time_this_iter_s: 5.180816650390625
  time_total_s: 30.167164087295532
  timestamp: 1662590037
  timesteps_since_restore: 0
  training_iteration: 3
  trial_id: f5d3e478
  warmup_time: 0.008968591690063477
  


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_22-21-37/TorchTrainer_f5d3e478_16_batch_size=6.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50,_2022-09-07_22-33-23/checkpoint_000002)... Done. 1.0s


[2m[36m(RayTrainWorker pid=2324)[0m {'loss': 11.997621536254883, 'avg_loss': 6.186953067779541}
[2m[36m(RayTrainWorker pid=2315)[0m {'loss': 67.13414001464844, 'avg_loss': 103.10939454473555}


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_22-21-37/TorchTrainer_15265464_17_batch_size=12.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50_2022-09-07_22-33-24/checkpoint_000001)... 

Result for TorchTrainer_15265464:
  _time_this_iter_s: 9.398245811462402
  _timestamp: 1662590041
  _training_iteration: 2
  avg_loss: 6.186953067779541
  date: 2022-09-07_22-34-02
  done: false
  experiment_id: 817bf91e82a34130b95a1f3761627e91
  hostname: anish-l5-kit
  iterations_since_restore: 2
  loss: 11.997621536254883
  node_ip: 10.150.0.3
  pid: 2189
  should_checkpoint: true
  time_since_restore: 34.076884508132935
  time_this_iter_s: 9.629807233810425
  time_total_s: 34.076884508132935
  timestamp: 1662590042
  timesteps_since_restore: 0
  training_iteration: 2
  trial_id: '15265464'
  warmup_time: 0.0057942867279052734
  
Result for TorchTrainer_f5d3e478:
  _time_this_iter_s: 4.636677026748657
  _timestamp: 1662590041
  _training_iteration: 4
  avg_loss: 103.10939454473555
  date: 2022-09-07_22-34-02
  done: false
  experiment_id: 3efe267366704a4789979162689c2175
  hostname: anish-l5-kit
  iterations_since_restore: 4
  loss: 67.13414001464844
  node_ip: 10.150.0.3
  pid: 217

[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_22-21-37/TorchTrainer_f5d3e478_16_batch_size=6.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50,_2022-09-07_22-33-23/checkpoint_000003)... Done. 1.5s
Done. 1.7s


[2m[36m(RayTrainWorker pid=2315)[0m {'loss': 37.48554611206055, 'avg_loss': 89.98462485820055}


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_22-21-37/TorchTrainer_f5d3e478_16_batch_size=6.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50,_2022-09-07_22-33-23/checkpoint_000004)... 

Result for TorchTrainer_f5d3e478:
  _time_this_iter_s: 6.303240776062012
  _timestamp: 1662590048
  _training_iteration: 5
  avg_loss: 89.98462485820055
  date: 2022-09-07_22-34-08
  done: false
  experiment_id: 3efe267366704a4789979162689c2175
  hostname: anish-l5-kit
  iterations_since_restore: 5
  loss: 37.48554611206055
  node_ip: 10.150.0.3
  pid: 2178
  should_checkpoint: true
  time_since_restore: 41.13374900817871
  time_this_iter_s: 5.692748069763184
  time_total_s: 41.13374900817871
  timestamp: 1662590048
  timesteps_since_restore: 0
  training_iteration: 5
  trial_id: f5d3e478
  warmup_time: 0.008968591690063477
  


Done. 1.0s
[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_22-21-37/TorchTrainer_f5d3e478_16_batch_size=6.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50,_2022-09-07_22-33-23/checkpoint_000004)... Done. 0.3s


[2m[36m(RayTrainWorker pid=2324)[0m {'loss': 156.16061401367188, 'avg_loss': 56.17817338307699}


VBox(children=(Label(value='457.135 MB of 457.135 MB uploaded (91.307 MB deduped)\r'), FloatProgress(value=1.0…

0,1
avg_loss,█▁▅▄▃
iterations_since_restore,▁▃▅▆█
loss,▆▁█▃▂
time_since_restore,▁▃▄▆█
time_this_iter_s,█▁▁▁▁
time_total_s,▁▃▄▆█
timestamp,▁▃▄▆█
timesteps_since_restore,▁▁▁▁▁
training_iteration,▁▃▅▆█
warmup_time,▁▁▁▁▁

0,1
avg_loss,89.98462
iterations_since_restore,5.0
loss,37.48555
time_since_restore,41.13375
time_this_iter_s,5.69275
time_total_s,41.13375
timestamp,1662590048.0
timesteps_since_restore,0.0
training_iteration,5.0
warmup_time,0.00897


Result for TorchTrainer_f5d3e478:
  _time_this_iter_s: 6.303240776062012
  _timestamp: 1662590048
  _training_iteration: 5
  avg_loss: 89.98462485820055
  date: 2022-09-07_22-34-08
  done: true
  experiment_id: 3efe267366704a4789979162689c2175
  experiment_tag: 16_batch_size=6.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50,render_ego_history=False,step_time=0.1000,dataset_meta_key=meta_json,disable_traffic_light_faces=False,ego_center=0_25_0_5,filter_agents_threshold=0.5000,map_type=py_semantic,pixel_size=0_5_0_5,raster_size=224_224,satellite_map_key=aerial_map_aerial_map_png,semantic_map_key=semantic_map_semantic_map_pb,set_origin_to_bottom=True,batch_size=12,key=scenes_sample_zarr,num_workers=16,perturb_probability=0.0000,shuffle=True,checkpoint_every_n_steps=10000,eval_every_n_steps=10000,max_num_steps=5,batch_size=12,key=scenes_sample_zarr,num_workers=16,shuffle=False,dataset_key=scenes_sample_zarr,lr=0.0003,max_num_steps=5,num_workers=4

[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_22-21-37/TorchTrainer_15265464_17_batch_size=12.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50_2022-09-07_22-33-24/checkpoint_000002)... 

Result for TorchTrainer_15265464:
  _time_this_iter_s: 10.981924772262573
  _timestamp: 1662590052
  _training_iteration: 3
  avg_loss: 56.17817338307699
  date: 2022-09-07_22-34-13
  done: false
  experiment_id: 817bf91e82a34130b95a1f3761627e91
  hostname: anish-l5-kit
  iterations_since_restore: 3
  loss: 156.16061401367188
  node_ip: 10.150.0.3
  pid: 2189
  should_checkpoint: true
  time_since_restore: 44.60515093803406
  time_this_iter_s: 10.528266429901123
  time_total_s: 44.60515093803406
  timestamp: 1662590053
  timesteps_since_restore: 0
  training_iteration: 3
  trial_id: '15265464'
  warmup_time: 0.0057942867279052734
  


Done. 1.0s


[2m[36m(RayTrainWorker pid=2324)[0m {'loss': 44.489566802978516, 'avg_loss': 53.25602173805237}
Result for TorchTrainer_15265464:
  _time_this_iter_s: 9.347875833511353
  _timestamp: 1662590062
  _training_iteration: 4
  avg_loss: 53.25602173805237
  date: 2022-09-07_22-34-22
  done: false
  experiment_id: 817bf91e82a34130b95a1f3761627e91
  hostname: anish-l5-kit
  iterations_since_restore: 4
  loss: 44.489566802978516
  node_ip: 10.150.0.3
  pid: 2189
  should_checkpoint: true
  time_since_restore: 54.025665283203125
  time_this_iter_s: 9.420514345169067
  time_total_s: 54.025665283203125
  timestamp: 1662590062
  timesteps_since_restore: 0
  training_iteration: 4
  trial_id: '15265464'
  warmup_time: 0.0057942867279052734
  


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_22-21-37/TorchTrainer_15265464_17_batch_size=12.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50_2022-09-07_22-33-24/checkpoint_000003)... Done. 1.1s
[2m[36m(RayTrainWorker pid=3359)[0m 2022-09-07 22:34:25,476	INFO config.py:72 -- Setting up process group for: env:// [rank=0, world_size=1]
[2m[36m(RayTrainWorker pid=3359)[0m   f"The parameter '{pretrained_param}' is deprecated since 0.13 and will be removed in 0.15, "
[2m[36m(RayTrainWorker pid=3359)[0m 2022-09-07 22:34:31,787	INFO train_loop_utils.py:300 -- Moving model to device: cpu


[2m[36m(RayTrainWorker pid=2324)[0m {'loss': 137.02883911132812, 'avg_loss': 70.01058521270753}


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_22-21-37/TorchTrainer_15265464_17_batch_size=12.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50_2022-09-07_22-33-24/checkpoint_000004)... 

Result for TorchTrainer_15265464:
  _time_this_iter_s: 11.592853546142578
  _timestamp: 1662590073
  _training_iteration: 5
  avg_loss: 70.01058521270753
  date: 2022-09-07_22-34-34
  done: false
  experiment_id: 817bf91e82a34130b95a1f3761627e91
  hostname: anish-l5-kit
  iterations_since_restore: 5
  loss: 137.02883911132812
  node_ip: 10.150.0.3
  pid: 2189
  should_checkpoint: true
  time_since_restore: 65.83392667770386
  time_this_iter_s: 11.808261394500732
  time_total_s: 65.83392667770386
  timestamp: 1662590074
  timesteps_since_restore: 0
  training_iteration: 5
  trial_id: '15265464'
  warmup_time: 0.0057942867279052734
  


Done. 1.0s
[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_22-21-37/TorchTrainer_15265464_17_batch_size=12.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50_2022-09-07_22-33-24/checkpoint_000004)... Done. 0.2s


VBox(children=(Label(value='457.135 MB of 457.135 MB uploaded (91.307 MB deduped)\r'), FloatProgress(value=1.0…

0,1
avg_loss,▁▂▇▆█
iterations_since_restore,▁▃▅▆█
loss,▁▂█▃▇
time_since_restore,▁▃▄▆█
time_this_iter_s,█▁▂▁▂
time_total_s,▁▃▄▆█
timestamp,▁▃▅▆█
timesteps_since_restore,▁▁▁▁▁
training_iteration,▁▃▅▆█
warmup_time,▁▁▁▁▁

0,1
avg_loss,70.01059
iterations_since_restore,5.0
loss,137.02884
time_since_restore,65.83393
time_this_iter_s,11.80826
time_total_s,65.83393
timestamp,1662590074.0
timesteps_since_restore,0.0
training_iteration,5.0
warmup_time,0.00579


Result for TorchTrainer_15265464:
  _time_this_iter_s: 11.592853546142578
  _timestamp: 1662590073
  _training_iteration: 5
  avg_loss: 70.01058521270753
  date: 2022-09-07_22-34-34
  done: true
  experiment_id: 817bf91e82a34130b95a1f3761627e91
  experiment_tag: 17_batch_size=12.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50,render_ego_history=False,step_time=0.1000,dataset_meta_key=meta_json,disable_traffic_light_faces=False,ego_center=0_25_0_5,filter_agents_threshold=0.5000,map_type=py_semantic,pixel_size=0_5_0_5,raster_size=224_224,satellite_map_key=aerial_map_aerial_map_png,semantic_map_key=semantic_map_semantic_map_pb,set_origin_to_bottom=True,batch_size=12,key=scenes_sample_zarr,num_workers=16,perturb_probability=0.0000,shuffle=True,checkpoint_every_n_steps=10000,eval_every_n_steps=10000,max_num_steps=5,batch_size=12,key=scenes_sample_zarr,num_workers=16,shuffle=False,dataset_key=scenes_sample_zarr,lr=0.0003,max_num_steps=5,num_workers

[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_22-21-37/TorchTrainer_159ff6e8_18_batch_size=12.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50_2022-09-07_22-34-18/checkpoint_000000)... 

Result for TorchTrainer_159ff6e8:
  _time_this_iter_s: 16.67856788635254
  _timestamp: 1662590082
  _training_iteration: 1
  avg_loss: 15.795732498168945
  date: 2022-09-07_22-34-43
  done: false
  experiment_id: bbe776729fbd4182ad958f66eaea87b1
  hostname: anish-l5-kit
  iterations_since_restore: 1
  loss: 15.795732498168945
  node_ip: 10.150.0.3
  pid: 3288
  should_checkpoint: true
  time_since_restore: 21.183704376220703
  time_this_iter_s: 21.183704376220703
  time_total_s: 21.183704376220703
  timestamp: 1662590083
  timesteps_since_restore: 0
  training_iteration: 1
  trial_id: 159ff6e8
  warmup_time: 0.009635448455810547
  


Done. 1.1s
[2m[36m(RayTrainWorker pid=3728)[0m 2022-09-07 22:34:50,562	INFO config.py:72 -- Setting up process group for: env:// [rank=0, world_size=1]


[2m[36m(RayTrainWorker pid=3359)[0m {'loss': 43.568843841552734, 'avg_loss': 29.68228816986084}


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_22-21-37/TorchTrainer_159ff6e8_18_batch_size=12.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50_2022-09-07_22-34-18/checkpoint_000001)... 

Result for TorchTrainer_159ff6e8:
  _time_this_iter_s: 11.980476379394531
  _timestamp: 1662590094
  _training_iteration: 2
  avg_loss: 29.68228816986084
  date: 2022-09-07_22-34-55
  done: false
  experiment_id: bbe776729fbd4182ad958f66eaea87b1
  hostname: anish-l5-kit
  iterations_since_restore: 2
  loss: 43.568843841552734
  node_ip: 10.150.0.3
  pid: 3288
  should_checkpoint: true
  time_since_restore: 33.21362566947937
  time_this_iter_s: 12.029921293258667
  time_total_s: 33.21362566947937
  timestamp: 1662590095
  timesteps_since_restore: 0
  training_iteration: 2
  trial_id: 159ff6e8
  warmup_time: 0.009635448455810547
  


[2m[36m(RayTrainWorker pid=3728)[0m   f"The parameter '{pretrained_param}' is deprecated since 0.13 and will be removed in 0.15, "
[2m[36m(RayTrainWorker pid=3728)[0m 2022-09-07 22:34:56,715	INFO train_loop_utils.py:300 -- Moving model to device: cpu
Done. 1.7s


[2m[36m(RayTrainWorker pid=3359)[0m {'loss': 5.805562973022461, 'avg_loss': 21.723379770914715}


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_22-21-37/TorchTrainer_159ff6e8_18_batch_size=12.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50_2022-09-07_22-34-18/checkpoint_000002)... 

Result for TorchTrainer_159ff6e8:
  _time_this_iter_s: 11.766469717025757
  _timestamp: 1662590106
  _training_iteration: 3
  avg_loss: 21.723379770914715
  date: 2022-09-07_22-35-07
  done: false
  experiment_id: bbe776729fbd4182ad958f66eaea87b1
  hostname: anish-l5-kit
  iterations_since_restore: 3
  loss: 5.805562973022461
  node_ip: 10.150.0.3
  pid: 3288
  should_checkpoint: true
  time_since_restore: 44.988534450531006
  time_this_iter_s: 11.774908781051636
  time_total_s: 44.988534450531006
  timestamp: 1662590107
  timesteps_since_restore: 0
  training_iteration: 3
  trial_id: 159ff6e8
  warmup_time: 0.009635448455810547
  


Done. 1.1s


[2m[36m(RayTrainWorker pid=3728)[0m {'loss': 5.4092206954956055, 'avg_loss': 5.4092206954956055}
Result for TorchTrainer_35c84b5a:
  _time_this_iter_s: 19.037570476531982
  _timestamp: 1662590110
  _training_iteration: 1
  avg_loss: 5.4092206954956055
  date: 2022-09-07_22-35-10
  done: false
  experiment_id: 668f4e8699cd4ec4aeecd0f6ba224cbf
  hostname: anish-l5-kit
  iterations_since_restore: 1
  loss: 5.4092206954956055
  node_ip: 10.150.0.3
  pid: 3654
  should_checkpoint: true
  time_since_restore: 23.22676968574524
  time_this_iter_s: 23.22676968574524
  time_total_s: 23.22676968574524
  timestamp: 1662590110
  timesteps_since_restore: 0
  training_iteration: 1
  trial_id: 35c84b5a
  warmup_time: 0.0054776668548583984
  


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_22-21-37/TorchTrainer_35c84b5a_19_batch_size=12.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50_2022-09-07_22-34-43/checkpoint_000000)... Done. 1.0s


[2m[36m(RayTrainWorker pid=3359)[0m {'loss': 27.10579490661621, 'avg_loss': 23.068983554840088}


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_22-21-37/TorchTrainer_159ff6e8_18_batch_size=12.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50_2022-09-07_22-34-18/checkpoint_000003)... 

Result for TorchTrainer_159ff6e8:
  _time_this_iter_s: 9.239076375961304
  _timestamp: 1662590115
  _training_iteration: 4
  avg_loss: 23.068983554840088
  date: 2022-09-07_22-35-16
  done: false
  experiment_id: bbe776729fbd4182ad958f66eaea87b1
  hostname: anish-l5-kit
  iterations_since_restore: 4
  loss: 27.10579490661621
  node_ip: 10.150.0.3
  pid: 3288
  should_checkpoint: true
  time_since_restore: 54.121458768844604
  time_this_iter_s: 9.132924318313599
  time_total_s: 54.121458768844604
  timestamp: 1662590116
  timesteps_since_restore: 0
  training_iteration: 4
  trial_id: 159ff6e8
  warmup_time: 0.009635448455810547
  


Done. 1.0s


[2m[36m(RayTrainWorker pid=3728)[0m {'loss': 78.5600357055664, 'avg_loss': 41.984628200531006}


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_22-21-37/TorchTrainer_35c84b5a_19_batch_size=12.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50_2022-09-07_22-34-43/checkpoint_000001)... 

Result for TorchTrainer_35c84b5a:
  _time_this_iter_s: 9.531437158584595
  _timestamp: 1662590119
  _training_iteration: 2
  avg_loss: 41.984628200531006
  date: 2022-09-07_22-35-20
  done: false
  experiment_id: 668f4e8699cd4ec4aeecd0f6ba224cbf
  hostname: anish-l5-kit
  iterations_since_restore: 2
  loss: 78.5600357055664
  node_ip: 10.150.0.3
  pid: 3654
  should_checkpoint: true
  time_since_restore: 32.67402482032776
  time_this_iter_s: 9.44725513458252
  time_total_s: 32.67402482032776
  timestamp: 1662590120
  timesteps_since_restore: 0
  training_iteration: 2
  trial_id: 35c84b5a
  warmup_time: 0.0054776668548583984
  


Done. 1.1s


[2m[36m(RayTrainWorker pid=3359)[0m {'loss': 148.98915100097656, 'avg_loss': 48.253017044067384}


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_22-21-37/TorchTrainer_159ff6e8_18_batch_size=12.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50_2022-09-07_22-34-18/checkpoint_000004)... 

Result for TorchTrainer_159ff6e8:
  _time_this_iter_s: 8.941566467285156
  _timestamp: 1662590124
  _training_iteration: 5
  avg_loss: 48.253017044067384
  date: 2022-09-07_22-35-25
  done: false
  experiment_id: bbe776729fbd4182ad958f66eaea87b1
  hostname: anish-l5-kit
  iterations_since_restore: 5
  loss: 148.98915100097656
  node_ip: 10.150.0.3
  pid: 3288
  should_checkpoint: true
  time_since_restore: 63.080604553222656
  time_this_iter_s: 8.959145784378052
  time_total_s: 63.080604553222656
  timestamp: 1662590125
  timesteps_since_restore: 0
  training_iteration: 5
  trial_id: 159ff6e8
  warmup_time: 0.009635448455810547
  


Done. 1.0s
[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_22-21-37/TorchTrainer_159ff6e8_18_batch_size=12.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50_2022-09-07_22-34-18/checkpoint_000004)... Done. 0.3s


[2m[36m(RayTrainWorker pid=3728)[0m {'loss': 74.47563934326172, 'avg_loss': 52.81496524810791}


VBox(children=(Label(value='457.135 MB of 457.135 MB uploaded (91.307 MB deduped)\r'), FloatProgress(value=1.0…

0,1
avg_loss,▁▄▂▃█
iterations_since_restore,▁▃▅▆█
loss,▁▃▁▂█
time_since_restore,▁▃▅▇█
time_this_iter_s,█▃▃▁▁
time_total_s,▁▃▅▇█
timestamp,▁▃▅▇█
timesteps_since_restore,▁▁▁▁▁
training_iteration,▁▃▅▆█
warmup_time,▁▁▁▁▁

0,1
avg_loss,48.25302
iterations_since_restore,5.0
loss,148.98915
time_since_restore,63.0806
time_this_iter_s,8.95915
time_total_s,63.0806
timestamp,1662590125.0
timesteps_since_restore,0.0
training_iteration,5.0
warmup_time,0.00964


Result for TorchTrainer_159ff6e8:
  _time_this_iter_s: 8.941566467285156
  _timestamp: 1662590124
  _training_iteration: 5
  avg_loss: 48.253017044067384
  date: 2022-09-07_22-35-25
  done: true
  experiment_id: bbe776729fbd4182ad958f66eaea87b1
  experiment_tag: 18_batch_size=12.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50,render_ego_history=False,step_time=0.1000,dataset_meta_key=meta_json,disable_traffic_light_faces=False,ego_center=0_25_0_5,filter_agents_threshold=0.5000,map_type=py_semantic,pixel_size=0_5_0_5,raster_size=224_224,satellite_map_key=aerial_map_aerial_map_png,semantic_map_key=semantic_map_semantic_map_pb,set_origin_to_bottom=True,batch_size=12,key=scenes_sample_zarr,num_workers=16,perturb_probability=0.0000,shuffle=True,checkpoint_every_n_steps=10000,eval_every_n_steps=10000,max_num_steps=5,batch_size=12,key=scenes_sample_zarr,num_workers=16,shuffle=False,dataset_key=scenes_sample_zarr,lr=0.0003,max_num_steps=5,num_workers

[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_22-21-37/TorchTrainer_35c84b5a_19_batch_size=12.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50_2022-09-07_22-34-43/checkpoint_000002)... 

Result for TorchTrainer_35c84b5a:
  _time_this_iter_s: 8.982773303985596
  _timestamp: 1662590128
  _training_iteration: 3
  avg_loss: 52.81496524810791
  date: 2022-09-07_22-35-29
  done: false
  experiment_id: 668f4e8699cd4ec4aeecd0f6ba224cbf
  hostname: anish-l5-kit
  iterations_since_restore: 3
  loss: 74.47563934326172
  node_ip: 10.150.0.3
  pid: 3654
  should_checkpoint: true
  time_since_restore: 41.619145154953
  time_this_iter_s: 8.945120334625244
  time_total_s: 41.619145154953
  timestamp: 1662590129
  timesteps_since_restore: 0
  training_iteration: 3
  trial_id: 35c84b5a
  warmup_time: 0.0054776668548583984
  


Done. 1.0s


[2m[36m(RayTrainWorker pid=3728)[0m {'loss': 44.893157958984375, 'avg_loss': 50.834513425827026}


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_22-21-37/TorchTrainer_35c84b5a_19_batch_size=12.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50_2022-09-07_22-34-43/checkpoint_000003)... Done. 1.0s
[2m[36m(RayTrainWorker pid=4628)[0m 2022-09-07 22:35:43,523	INFO config.py:72 -- Setting up process group for: env:// [rank=0, world_size=1]


[2m[36m(RayTrainWorker pid=3728)[0m {'loss': 40.69707107543945, 'avg_loss': 48.80702495574951}


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_22-21-37/TorchTrainer_35c84b5a_19_batch_size=12.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50_2022-09-07_22-34-43/checkpoint_000004)... 

Result for TorchTrainer_35c84b5a:
  _time_this_iter_s: 10.709978103637695
  _timestamp: 1662590147
  _training_iteration: 5
  avg_loss: 48.80702495574951
  date: 2022-09-07_22-35-48
  done: false
  experiment_id: 668f4e8699cd4ec4aeecd0f6ba224cbf
  hostname: anish-l5-kit
  iterations_since_restore: 5
  loss: 40.69707107543945
  node_ip: 10.150.0.3
  pid: 3654
  should_checkpoint: true
  time_since_restore: 60.876083850860596
  time_this_iter_s: 10.632569551467896
  time_total_s: 60.876083850860596
  timestamp: 1662590148
  timesteps_since_restore: 0
  training_iteration: 5
  trial_id: 35c84b5a
  warmup_time: 0.0054776668548583984
  


[2m[36m(RayTrainWorker pid=4628)[0m   f"The parameter '{pretrained_param}' is deprecated since 0.13 and will be removed in 0.15, "
Done. 1.0s
[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_22-21-37/TorchTrainer_35c84b5a_19_batch_size=12.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50_2022-09-07_22-34-43/checkpoint_000004)... Done. 0.3s
[2m[36m(RayTrainWorker pid=4628)[0m 2022-09-07 22:35:49,537	INFO train_loop_utils.py:300 -- Moving model to device: cpu


VBox(children=(Label(value='457.135 MB of 457.135 MB uploaded (91.307 MB deduped)\r'), FloatProgress(value=1.0…

0,1
avg_loss,▁▆██▇
iterations_since_restore,▁▃▅▆█
loss,▁██▅▄
time_since_restore,▁▃▄▆█
time_this_iter_s,█▁▁▁▂
time_total_s,▁▃▄▆█
timestamp,▁▃▅▆█
timesteps_since_restore,▁▁▁▁▁
training_iteration,▁▃▅▆█
warmup_time,▁▁▁▁▁

0,1
avg_loss,48.80702
iterations_since_restore,5.0
loss,40.69707
time_since_restore,60.87608
time_this_iter_s,10.63257
time_total_s,60.87608
timestamp,1662590148.0
timesteps_since_restore,0.0
training_iteration,5.0
warmup_time,0.00548


Result for TorchTrainer_35c84b5a:
  _time_this_iter_s: 10.709978103637695
  _timestamp: 1662590147
  _training_iteration: 5
  avg_loss: 48.80702495574951
  date: 2022-09-07_22-35-48
  done: true
  experiment_id: 668f4e8699cd4ec4aeecd0f6ba224cbf
  experiment_tag: 19_batch_size=12.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50,render_ego_history=False,step_time=0.1000,dataset_meta_key=meta_json,disable_traffic_light_faces=False,ego_center=0_25_0_5,filter_agents_threshold=0.5000,map_type=py_semantic,pixel_size=0_5_0_5,raster_size=224_224,satellite_map_key=aerial_map_aerial_map_png,semantic_map_key=semantic_map_semantic_map_pb,set_origin_to_bottom=True,batch_size=12,key=scenes_sample_zarr,num_workers=16,perturb_probability=0.0000,shuffle=True,checkpoint_every_n_steps=10000,eval_every_n_steps=10000,max_num_steps=5,batch_size=12,key=scenes_sample_zarr,num_workers=16,shuffle=False,dataset_key=scenes_sample_zarr,lr=0.0002,max_num_steps=5,num_workers

[2m[36m(RayTrainWorker pid=5326)[0m 2022-09-07 22:36:06,733	INFO config.py:72 -- Setting up process group for: env:// [rank=0, world_size=1]


[2m[36m(RayTrainWorker pid=4628)[0m {'loss': 58.441532135009766, 'avg_loss': 58.441532135009766}


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_22-21-37/TorchTrainer_44d11b86_20_batch_size=18.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50_2022-09-07_22-35-35/checkpoint_000000)... 

Result for TorchTrainer_44d11b86:
  _time_this_iter_s: 25.10494565963745
  _timestamp: 1662590169
  _training_iteration: 1
  avg_loss: 58.441532135009766
  date: 2022-09-07_22-36-09
  done: false
  experiment_id: 9f3f3051004e4b289b41f982734c46b0
  hostname: anish-l5-kit
  iterations_since_restore: 1
  loss: 58.441532135009766
  node_ip: 10.150.0.3
  pid: 4539
  should_checkpoint: true
  time_since_restore: 29.421180963516235
  time_this_iter_s: 29.421180963516235
  time_total_s: 29.421180963516235
  timestamp: 1662590169
  timesteps_since_restore: 0
  training_iteration: 1
  trial_id: 44d11b86
  warmup_time: 0.008868932723999023
  


Done. 1.0s
[2m[36m(RayTrainWorker pid=5326)[0m   f"The parameter '{pretrained_param}' is deprecated since 0.13 and will be removed in 0.15, "
[2m[36m(RayTrainWorker pid=5326)[0m 2022-09-07 22:36:13,935	INFO train_loop_utils.py:300 -- Moving model to device: cpu


[2m[36m(RayTrainWorker pid=4628)[0m {'loss': 14.283161163330078, 'avg_loss': 36.36234664916992}


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_22-21-37/TorchTrainer_44d11b86_20_batch_size=18.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50_2022-09-07_22-35-35/checkpoint_000001)... 

Result for TorchTrainer_44d11b86:
  _time_this_iter_s: 15.268676280975342
  _timestamp: 1662590184
  _training_iteration: 2
  avg_loss: 36.36234664916992
  date: 2022-09-07_22-36-24
  done: false
  experiment_id: 9f3f3051004e4b289b41f982734c46b0
  hostname: anish-l5-kit
  iterations_since_restore: 2
  loss: 14.283161163330078
  node_ip: 10.150.0.3
  pid: 4539
  should_checkpoint: true
  time_since_restore: 44.613630294799805
  time_this_iter_s: 15.19244933128357
  time_total_s: 44.613630294799805
  timestamp: 1662590184
  timesteps_since_restore: 0
  training_iteration: 2
  trial_id: 44d11b86
  warmup_time: 0.008868932723999023
  


Done. 1.0s


[2m[36m(RayTrainWorker pid=5326)[0m {'loss': 35.3824577331543, 'avg_loss': 35.3824577331543}


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_22-21-37/TorchTrainer_63f4c242_21_batch_size=18.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50_2022-09-07_22-35-58/checkpoint_000000)... 

Result for TorchTrainer_63f4c242:
  _time_this_iter_s: 24.128557682037354
  _timestamp: 1662590191
  _training_iteration: 1
  avg_loss: 35.3824577331543
  date: 2022-09-07_22-36-31
  done: false
  experiment_id: 00c2c9ca69e7494c9efc0894351f0cf8
  hostname: anish-l5-kit
  iterations_since_restore: 1
  loss: 35.3824577331543
  node_ip: 10.150.0.3
  pid: 4822
  should_checkpoint: true
  time_since_restore: 27.8669490814209
  time_this_iter_s: 27.8669490814209
  time_total_s: 27.8669490814209
  timestamp: 1662590191
  timesteps_since_restore: 0
  training_iteration: 1
  trial_id: 63f4c242
  warmup_time: 0.014725685119628906
  


Done. 1.1s


[2m[36m(RayTrainWorker pid=4628)[0m {'loss': 1.2859759330749512, 'avg_loss': 24.670223077138264}


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_22-21-37/TorchTrainer_44d11b86_20_batch_size=18.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50_2022-09-07_22-35-35/checkpoint_000002)... 

Result for TorchTrainer_44d11b86:
  _time_this_iter_s: 14.53411602973938
  _timestamp: 1662590198
  _training_iteration: 3
  avg_loss: 24.670223077138264
  date: 2022-09-07_22-36-39
  done: false
  experiment_id: 9f3f3051004e4b289b41f982734c46b0
  hostname: anish-l5-kit
  iterations_since_restore: 3
  loss: 1.2859759330749512
  node_ip: 10.150.0.3
  pid: 4539
  should_checkpoint: true
  time_since_restore: 59.11141610145569
  time_this_iter_s: 14.497785806655884
  time_total_s: 59.11141610145569
  timestamp: 1662590199
  timesteps_since_restore: 0
  training_iteration: 3
  trial_id: 44d11b86
  warmup_time: 0.008868932723999023
  


Done. 1.0s


[2m[36m(RayTrainWorker pid=5326)[0m {'loss': 152.71226501464844, 'avg_loss': 94.04736137390137}


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_22-21-37/TorchTrainer_63f4c242_21_batch_size=18.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50_2022-09-07_22-35-58/checkpoint_000001)... 

Result for TorchTrainer_63f4c242:
  _time_this_iter_s: 14.006881713867188
  _timestamp: 1662590205
  _training_iteration: 2
  avg_loss: 94.04736137390137
  date: 2022-09-07_22-36-45
  done: false
  experiment_id: 00c2c9ca69e7494c9efc0894351f0cf8
  hostname: anish-l5-kit
  iterations_since_restore: 2
  loss: 152.71226501464844
  node_ip: 10.150.0.3
  pid: 4822
  should_checkpoint: true
  time_since_restore: 41.82793712615967
  time_this_iter_s: 13.96098804473877
  time_total_s: 41.82793712615967
  timestamp: 1662590205
  timesteps_since_restore: 0
  training_iteration: 2
  trial_id: 63f4c242
  warmup_time: 0.014725685119628906
  


Done. 1.1s


[2m[36m(RayTrainWorker pid=4628)[0m {'loss': 86.61248779296875, 'avg_loss': 40.155789256095886}


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_22-21-37/TorchTrainer_44d11b86_20_batch_size=18.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50_2022-09-07_22-35-35/checkpoint_000003)... 

Result for TorchTrainer_44d11b86:
  _time_this_iter_s: 14.15595531463623
  _timestamp: 1662590213
  _training_iteration: 4
  avg_loss: 40.155789256095886
  date: 2022-09-07_22-36-53
  done: false
  experiment_id: 9f3f3051004e4b289b41f982734c46b0
  hostname: anish-l5-kit
  iterations_since_restore: 4
  loss: 86.61248779296875
  node_ip: 10.150.0.3
  pid: 4539
  should_checkpoint: true
  time_since_restore: 73.25057172775269
  time_this_iter_s: 14.139155626296997
  time_total_s: 73.25057172775269
  timestamp: 1662590213
  timesteps_since_restore: 0
  training_iteration: 4
  trial_id: 44d11b86
  warmup_time: 0.008868932723999023
  


Done. 1.1s


[2m[36m(RayTrainWorker pid=5326)[0m {'loss': 115.54216766357422, 'avg_loss': 101.21229680379231}


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_22-21-37/TorchTrainer_63f4c242_21_batch_size=18.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50_2022-09-07_22-35-58/checkpoint_000002)... 

Result for TorchTrainer_63f4c242:
  _time_this_iter_s: 13.285151958465576
  _timestamp: 1662590218
  _training_iteration: 3
  avg_loss: 101.21229680379231
  date: 2022-09-07_22-36-59
  done: false
  experiment_id: 00c2c9ca69e7494c9efc0894351f0cf8
  hostname: anish-l5-kit
  iterations_since_restore: 3
  loss: 115.54216766357422
  node_ip: 10.150.0.3
  pid: 4822
  should_checkpoint: true
  time_since_restore: 55.21828055381775
  time_this_iter_s: 13.390343427658081
  time_total_s: 55.21828055381775
  timestamp: 1662590219
  timesteps_since_restore: 0
  training_iteration: 3
  trial_id: 63f4c242
  warmup_time: 0.014725685119628906
  


Done. 1.5s


[2m[36m(RayTrainWorker pid=4628)[0m {'loss': 25.575414657592773, 'avg_loss': 37.23971433639527}


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_22-21-37/TorchTrainer_44d11b86_20_batch_size=18.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50_2022-09-07_22-35-35/checkpoint_000004)... 

Result for TorchTrainer_44d11b86:
  _time_this_iter_s: 15.40588665008545
  _timestamp: 1662590228
  _training_iteration: 5
  avg_loss: 37.23971433639527
  date: 2022-09-07_22-37-08
  done: false
  experiment_id: 9f3f3051004e4b289b41f982734c46b0
  hostname: anish-l5-kit
  iterations_since_restore: 5
  loss: 25.575414657592773
  node_ip: 10.150.0.3
  pid: 4539
  should_checkpoint: true
  time_since_restore: 88.67522358894348
  time_this_iter_s: 15.424651861190796
  time_total_s: 88.67522358894348
  timestamp: 1662590228
  timesteps_since_restore: 0
  training_iteration: 5
  trial_id: 44d11b86
  warmup_time: 0.008868932723999023
  


Done. 1.0s
[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_22-21-37/TorchTrainer_44d11b86_20_batch_size=18.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50_2022-09-07_22-35-35/checkpoint_000004)... Done. 0.3s


[2m[36m(RayTrainWorker pid=5326)[0m {'loss': 33.9464111328125, 'avg_loss': 84.39582538604736}


VBox(children=(Label(value='457.170 MB of 457.170 MB uploaded (91.307 MB deduped)\r'), FloatProgress(value=1.0…

0,1
avg_loss,█▃▁▄▄
iterations_since_restore,▁▃▅▆█
loss,▆▂▁█▃
time_since_restore,▁▃▅▆█
time_this_iter_s,█▁▁▁▂
time_total_s,▁▃▅▆█
timestamp,▁▃▅▆█
timesteps_since_restore,▁▁▁▁▁
training_iteration,▁▃▅▆█
warmup_time,▁▁▁▁▁

0,1
avg_loss,37.23971
iterations_since_restore,5.0
loss,25.57541
time_since_restore,88.67522
time_this_iter_s,15.42465
time_total_s,88.67522
timestamp,1662590228.0
timesteps_since_restore,0.0
training_iteration,5.0
warmup_time,0.00887


Result for TorchTrainer_44d11b86:
  _time_this_iter_s: 15.40588665008545
  _timestamp: 1662590228
  _training_iteration: 5
  avg_loss: 37.23971433639527
  date: 2022-09-07_22-37-08
  done: true
  experiment_id: 9f3f3051004e4b289b41f982734c46b0
  experiment_tag: 20_batch_size=18.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50,render_ego_history=False,step_time=0.1000,dataset_meta_key=meta_json,disable_traffic_light_faces=False,ego_center=0_25_0_5,filter_agents_threshold=0.5000,map_type=py_semantic,pixel_size=0_5_0_5,raster_size=224_224,satellite_map_key=aerial_map_aerial_map_png,semantic_map_key=semantic_map_semantic_map_pb,set_origin_to_bottom=True,batch_size=12,key=scenes_sample_zarr,num_workers=16,perturb_probability=0.0000,shuffle=True,checkpoint_every_n_steps=10000,eval_every_n_steps=10000,max_num_steps=5,batch_size=12,key=scenes_sample_zarr,num_workers=16,shuffle=False,dataset_key=scenes_sample_zarr,lr=0.0011,max_num_steps=5,num_workers=

[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_22-21-37/TorchTrainer_63f4c242_21_batch_size=18.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50_2022-09-07_22-35-58/checkpoint_000003)... 

Result for TorchTrainer_63f4c242:
  _time_this_iter_s: 14.080241441726685
  _timestamp: 1662590232
  _training_iteration: 4
  avg_loss: 84.39582538604736
  date: 2022-09-07_22-37-13
  done: false
  experiment_id: 00c2c9ca69e7494c9efc0894351f0cf8
  hostname: anish-l5-kit
  iterations_since_restore: 4
  loss: 33.9464111328125
  node_ip: 10.150.0.3
  pid: 4822
  should_checkpoint: true
  time_since_restore: 69.13944864273071
  time_this_iter_s: 13.921168088912964
  time_total_s: 69.13944864273071
  timestamp: 1662590233
  timesteps_since_restore: 0
  training_iteration: 4
  trial_id: 63f4c242
  warmup_time: 0.014725685119628906
  


Done. 1.0s


[2m[36m(RayTrainWorker pid=5326)[0m {'loss': 18.0721435546875, 'avg_loss': 71.13108901977539}


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_22-21-37/TorchTrainer_63f4c242_21_batch_size=18.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50_2022-09-07_22-35-58/checkpoint_000004)... 

Result for TorchTrainer_63f4c242:
  _time_this_iter_s: 13.150818109512329
  _timestamp: 1662590246
  _training_iteration: 5
  avg_loss: 71.13108901977539
  date: 2022-09-07_22-37-26
  done: false
  experiment_id: 00c2c9ca69e7494c9efc0894351f0cf8
  hostname: anish-l5-kit
  iterations_since_restore: 5
  loss: 18.0721435546875
  node_ip: 10.150.0.3
  pid: 4822
  should_checkpoint: true
  time_since_restore: 82.37583494186401
  time_this_iter_s: 13.2363862991333
  time_total_s: 82.37583494186401
  timestamp: 1662590246
  timesteps_since_restore: 0
  training_iteration: 5
  trial_id: 63f4c242
  warmup_time: 0.014725685119628906
  


Done. 1.0s
[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_22-21-37/TorchTrainer_63f4c242_21_batch_size=18.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50_2022-09-07_22-35-58/checkpoint_000004)... Done. 0.3s


[2m[36m(RayTrainWorker pid=6270)[0m 2022-09-07 22:37:28,530	INFO config.py:72 -- Setting up process group for: env:// [rank=0, world_size=1]


VBox(children=(Label(value='457.170 MB of 457.170 MB uploaded (91.307 MB deduped)\r'), FloatProgress(value=1.0…

[2m[36m(RayTrainWorker pid=6270)[0m   f"The parameter '{pretrained_param}' is deprecated since 0.13 and will be removed in 0.15, "
[2m[36m(RayTrainWorker pid=6270)[0m 2022-09-07 22:37:33,926	INFO train_loop_utils.py:300 -- Moving model to device: cpu


0,1
avg_loss,▁▇█▆▅
iterations_since_restore,▁▃▅▆█
loss,▂█▆▂▁
time_since_restore,▁▃▅▆█
time_this_iter_s,█▁▁▁▁
time_total_s,▁▃▅▆█
timestamp,▁▃▅▆█
timesteps_since_restore,▁▁▁▁▁
training_iteration,▁▃▅▆█
warmup_time,▁▁▁▁▁

0,1
avg_loss,71.13109
iterations_since_restore,5.0
loss,18.07214
time_since_restore,82.37583
time_this_iter_s,13.23639
time_total_s,82.37583
timestamp,1662590246.0
timesteps_since_restore,0.0
training_iteration,5.0
warmup_time,0.01473


Result for TorchTrainer_63f4c242:
  _time_this_iter_s: 13.150818109512329
  _timestamp: 1662590246
  _training_iteration: 5
  avg_loss: 71.13108901977539
  date: 2022-09-07_22-37-26
  done: true
  experiment_id: 00c2c9ca69e7494c9efc0894351f0cf8
  experiment_tag: 21_batch_size=18.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50,render_ego_history=False,step_time=0.1000,dataset_meta_key=meta_json,disable_traffic_light_faces=False,ego_center=0_25_0_5,filter_agents_threshold=0.5000,map_type=py_semantic,pixel_size=0_5_0_5,raster_size=224_224,satellite_map_key=aerial_map_aerial_map_png,semantic_map_key=semantic_map_semantic_map_pb,set_origin_to_bottom=True,batch_size=12,key=scenes_sample_zarr,num_workers=16,perturb_probability=0.0000,shuffle=True,checkpoint_every_n_steps=10000,eval_every_n_steps=10000,max_num_steps=5,batch_size=12,key=scenes_sample_zarr,num_workers=16,shuffle=False,dataset_key=scenes_sample_zarr,lr=0.0002,max_num_steps=5,num_workers

[2m[36m(RayTrainWorker pid=6576)[0m 2022-09-07 22:37:43,775	INFO config.py:72 -- Setting up process group for: env:// [rank=0, world_size=1]
[2m[36m(RayTrainWorker pid=6576)[0m   f"The parameter '{pretrained_param}' is deprecated since 0.13 and will be removed in 0.15, "
[2m[36m(RayTrainWorker pid=6576)[0m 2022-09-07 22:37:50,268	INFO train_loop_utils.py:300 -- Moving model to device: cpu


[2m[36m(RayTrainWorker pid=6270)[0m {'loss': 101.63909912109375, 'avg_loss': 101.63909912109375}


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_22-21-37/TorchTrainer_71783246_22_batch_size=18.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50_2022-09-07_22-37-20/checkpoint_000000)... 

Result for TorchTrainer_71783246:
  _time_this_iter_s: 26.07393193244934
  _timestamp: 1662590275
  _training_iteration: 1
  avg_loss: 101.63909912109375
  date: 2022-09-07_22-37-55
  done: false
  experiment_id: e5f1a76af3f748fcb9ec38ba9a7aaaab
  hostname: anish-l5-kit
  iterations_since_restore: 1
  loss: 101.63909912109375
  node_ip: 10.150.0.3
  pid: 6200
  should_checkpoint: true
  time_since_restore: 30.02992081642151
  time_this_iter_s: 30.02992081642151
  time_total_s: 30.02992081642151
  timestamp: 1662590275
  timesteps_since_restore: 0
  training_iteration: 1
  trial_id: '71783246'
  warmup_time: 0.008259296417236328
  


Done. 1.5s


[2m[36m(RayTrainWorker pid=6576)[0m {'loss': 142.06919860839844, 'avg_loss': 142.06919860839844}


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_22-21-37/TorchTrainer_a253209c_23_batch_size=18.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50_2022-09-07_22-37-35/checkpoint_000000)... 

Result for TorchTrainer_a253209c:
  _time_this_iter_s: 23.85733938217163
  _timestamp: 1662590288
  _training_iteration: 1
  avg_loss: 142.06919860839844
  date: 2022-09-07_22-38-08
  done: false
  experiment_id: 08ebcd9d7bd34e428f36d9460f08ae84
  hostname: anish-l5-kit
  iterations_since_restore: 1
  loss: 142.06919860839844
  node_ip: 10.150.0.3
  pid: 6499
  should_checkpoint: true
  time_since_restore: 27.864238500595093
  time_this_iter_s: 27.864238500595093
  time_total_s: 27.864238500595093
  timestamp: 1662590288
  timesteps_since_restore: 0
  training_iteration: 1
  trial_id: a253209c
  warmup_time: 0.005983114242553711
  


Done. 1.1s


[2m[36m(RayTrainWorker pid=6270)[0m {'loss': 69.43778228759766, 'avg_loss': 85.5384407043457}
Result for TorchTrainer_71783246:
  _time_this_iter_s: 14.979305744171143
  _timestamp: 1662590290
  _training_iteration: 2
  avg_loss: 85.5384407043457
  date: 2022-09-07_22-38-10
  done: false
  experiment_id: e5f1a76af3f748fcb9ec38ba9a7aaaab
  hostname: anish-l5-kit
  iterations_since_restore: 2
  loss: 69.43778228759766
  node_ip: 10.150.0.3
  pid: 6200
  should_checkpoint: true
  time_since_restore: 45.05448508262634
  time_this_iter_s: 15.024564266204834
  time_total_s: 45.05448508262634
  timestamp: 1662590290
  timesteps_since_restore: 0
  training_iteration: 2
  trial_id: '71783246'
  warmup_time: 0.008259296417236328
  


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_22-21-37/TorchTrainer_71783246_22_batch_size=18.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50_2022-09-07_22-37-20/checkpoint_000001)... Done. 1.0s


[2m[36m(RayTrainWorker pid=6576)[0m {'loss': 110.77450561523438, 'avg_loss': 126.4218521118164}


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_22-21-37/TorchTrainer_a253209c_23_batch_size=18.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50_2022-09-07_22-37-35/checkpoint_000001)... 

Result for TorchTrainer_a253209c:
  _time_this_iter_s: 13.499944686889648
  _timestamp: 1662590301
  _training_iteration: 2
  avg_loss: 126.4218521118164
  date: 2022-09-07_22-38-22
  done: false
  experiment_id: 08ebcd9d7bd34e428f36d9460f08ae84
  hostname: anish-l5-kit
  iterations_since_restore: 2
  loss: 110.77450561523438
  node_ip: 10.150.0.3
  pid: 6499
  should_checkpoint: true
  time_since_restore: 41.327545404434204
  time_this_iter_s: 13.463306903839111
  time_total_s: 41.327545404434204
  timestamp: 1662590302
  timesteps_since_restore: 0
  training_iteration: 2
  trial_id: a253209c
  warmup_time: 0.005983114242553711
  
[2m[36m(RayTrainWorker pid=6270)[0m {'loss': 74.55601501464844, 'avg_loss': 81.87763214111328}


Done. 1.1s
[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_22-21-37/TorchTrainer_71783246_22_batch_size=18.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50_2022-09-07_22-37-20/checkpoint_000002)... 

Result for TorchTrainer_71783246:
  _time_this_iter_s: 13.05223274230957
  _timestamp: 1662590303
  _training_iteration: 3
  avg_loss: 81.87763214111328
  date: 2022-09-07_22-38-23
  done: false
  experiment_id: e5f1a76af3f748fcb9ec38ba9a7aaaab
  hostname: anish-l5-kit
  iterations_since_restore: 3
  loss: 74.55601501464844
  node_ip: 10.150.0.3
  pid: 6200
  should_checkpoint: true
  time_since_restore: 58.06894397735596
  time_this_iter_s: 13.014458894729614
  time_total_s: 58.06894397735596
  timestamp: 1662590303
  timesteps_since_restore: 0
  training_iteration: 3
  trial_id: '71783246'
  warmup_time: 0.008259296417236328
  


Done. 1.5s


[2m[36m(RayTrainWorker pid=6576)[0m {'loss': 67.6620864868164, 'avg_loss': 106.83526357014973}
[2m[36m(RayTrainWorker pid=6270)[0m {'loss': 98.93031311035156, 'avg_loss': 86.14080238342285}


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_22-21-37/TorchTrainer_a253209c_23_batch_size=18.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50_2022-09-07_22-37-35/checkpoint_000002)... 

Result for TorchTrainer_a253209c:
  _time_this_iter_s: 13.71084713935852
  _timestamp: 1662590315
  _training_iteration: 3
  avg_loss: 106.83526357014973
  date: 2022-09-07_22-38-35
  done: false
  experiment_id: 08ebcd9d7bd34e428f36d9460f08ae84
  hostname: anish-l5-kit
  iterations_since_restore: 3
  loss: 67.6620864868164
  node_ip: 10.150.0.3
  pid: 6499
  should_checkpoint: true
  time_since_restore: 55.03093767166138
  time_this_iter_s: 13.703392267227173
  time_total_s: 55.03093767166138
  timestamp: 1662590315
  timesteps_since_restore: 0
  training_iteration: 3
  trial_id: a253209c
  warmup_time: 0.005983114242553711
  


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_22-21-37/TorchTrainer_71783246_22_batch_size=18.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50_2022-09-07_22-37-20/checkpoint_000003)... 

Result for TorchTrainer_71783246:
  _time_this_iter_s: 12.608906984329224
  _timestamp: 1662590315
  _training_iteration: 4
  avg_loss: 86.14080238342285
  date: 2022-09-07_22-38-36
  done: false
  experiment_id: e5f1a76af3f748fcb9ec38ba9a7aaaab
  hostname: anish-l5-kit
  iterations_since_restore: 4
  loss: 98.93031311035156
  node_ip: 10.150.0.3
  pid: 6200
  should_checkpoint: true
  time_since_restore: 70.73315143585205
  time_this_iter_s: 12.664207458496094
  time_total_s: 70.73315143585205
  timestamp: 1662590316
  timesteps_since_restore: 0
  training_iteration: 4
  trial_id: '71783246'
  warmup_time: 0.008259296417236328
  


Done. 1.8s
Done. 1.6s


[2m[36m(RayTrainWorker pid=6270)[0m {'loss': 26.73236656188965, 'avg_loss': 74.25911521911621}


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_22-21-37/TorchTrainer_71783246_22_batch_size=18.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50_2022-09-07_22-37-20/checkpoint_000004)... 

Result for TorchTrainer_71783246:
  _time_this_iter_s: 12.956475019454956
  _timestamp: 1662590328
  _training_iteration: 5
  avg_loss: 74.25911521911621
  date: 2022-09-07_22-38-49
  done: false
  experiment_id: e5f1a76af3f748fcb9ec38ba9a7aaaab
  hostname: anish-l5-kit
  iterations_since_restore: 5
  loss: 26.73236656188965
  node_ip: 10.150.0.3
  pid: 6200
  should_checkpoint: true
  time_since_restore: 83.52563118934631
  time_this_iter_s: 12.792479753494263
  time_total_s: 83.52563118934631
  timestamp: 1662590329
  timesteps_since_restore: 0
  training_iteration: 5
  trial_id: '71783246'
  warmup_time: 0.008259296417236328
  
[2m[36m(RayTrainWorker pid=6576)[0m {'loss': 52.419891357421875, 'avg_loss': 93.23142051696777}


Done. 1.0s
[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_22-21-37/TorchTrainer_71783246_22_batch_size=18.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50_2022-09-07_22-37-20/checkpoint_000004)... Done. 0.3s


VBox(children=(Label(value='457.195 MB of 457.195 MB uploaded (91.307 MB deduped)\r'), FloatProgress(value=1.0…

0,1
avg_loss,█▄▃▄▁
iterations_since_restore,▁▃▅▆█
loss,█▅▅█▁
time_since_restore,▁▃▅▆█
time_this_iter_s,█▂▁▁▁
time_total_s,▁▃▅▆█
timestamp,▁▃▅▆█
timesteps_since_restore,▁▁▁▁▁
training_iteration,▁▃▅▆█
warmup_time,▁▁▁▁▁

0,1
avg_loss,74.25912
iterations_since_restore,5.0
loss,26.73237
time_since_restore,83.52563
time_this_iter_s,12.79248
time_total_s,83.52563
timestamp,1662590329.0
timesteps_since_restore,0.0
training_iteration,5.0
warmup_time,0.00826


Result for TorchTrainer_71783246:
  _time_this_iter_s: 12.956475019454956
  _timestamp: 1662590328
  _training_iteration: 5
  avg_loss: 74.25911521911621
  date: 2022-09-07_22-38-49
  done: true
  experiment_id: e5f1a76af3f748fcb9ec38ba9a7aaaab
  experiment_tag: 22_batch_size=18.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50,render_ego_history=False,step_time=0.1000,dataset_meta_key=meta_json,disable_traffic_light_faces=False,ego_center=0_25_0_5,filter_agents_threshold=0.5000,map_type=py_semantic,pixel_size=0_5_0_5,raster_size=224_224,satellite_map_key=aerial_map_aerial_map_png,semantic_map_key=semantic_map_semantic_map_pb,set_origin_to_bottom=True,batch_size=12,key=scenes_sample_zarr,num_workers=16,perturb_probability=0.0000,shuffle=True,checkpoint_every_n_steps=10000,eval_every_n_steps=10000,max_num_steps=5,batch_size=12,key=scenes_sample_zarr,num_workers=16,shuffle=False,dataset_key=scenes_sample_zarr,lr=0.0012,max_num_steps=5,num_workers

[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_22-21-37/TorchTrainer_a253209c_23_batch_size=18.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50_2022-09-07_22-37-35/checkpoint_000003)... 

Result for TorchTrainer_a253209c:
  _time_this_iter_s: 13.85779595375061
  _timestamp: 1662590329
  _training_iteration: 4
  avg_loss: 93.23142051696777
  date: 2022-09-07_22-38-49
  done: false
  experiment_id: 08ebcd9d7bd34e428f36d9460f08ae84
  hostname: anish-l5-kit
  iterations_since_restore: 4
  loss: 52.419891357421875
  node_ip: 10.150.0.3
  pid: 6499
  should_checkpoint: true
  time_since_restore: 68.83096432685852
  time_this_iter_s: 13.800026655197144
  time_total_s: 68.83096432685852
  timestamp: 1662590329
  timesteps_since_restore: 0
  training_iteration: 4
  trial_id: a253209c
  warmup_time: 0.005983114242553711
  


Done. 1.0s


[2m[36m(RayTrainWorker pid=6576)[0m {'loss': 24.017383575439453, 'avg_loss': 79.38861312866212}
Result for TorchTrainer_a253209c:
  _time_this_iter_s: 14.647578001022339
  _timestamp: 1662590343
  _training_iteration: 5
  avg_loss: 79.38861312866212
  date: 2022-09-07_22-39-04
  done: false
  experiment_id: 08ebcd9d7bd34e428f36d9460f08ae84
  hostname: anish-l5-kit
  iterations_since_restore: 5
  loss: 24.017383575439453
  node_ip: 10.150.0.3
  pid: 6499
  should_checkpoint: true
  time_since_restore: 83.59553718566895
  time_this_iter_s: 14.764572858810425
  time_total_s: 83.59553718566895
  timestamp: 1662590344
  timesteps_since_restore: 0
  training_iteration: 5
  trial_id: a253209c
  warmup_time: 0.005983114242553711
  


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_22-21-37/TorchTrainer_a253209c_23_batch_size=18.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50_2022-09-07_22-37-35/checkpoint_000004)... Done. 1.0s
[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_22-21-37/TorchTrainer_a253209c_23_batch_size=18.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50_2022-09-07_22-37-35/checkpoint_000004)... Done. 0.3s


[2m[36m(RayTrainWorker pid=7906)[0m 2022-09-07 22:39:07,694	INFO config.py:72 -- Setting up process group for: env:// [rank=0, world_size=1]


VBox(children=(Label(value='457.195 MB of 457.195 MB uploaded (91.307 MB deduped)\r'), FloatProgress(value=1.0…

[2m[36m(RayTrainWorker pid=7906)[0m   f"The parameter '{pretrained_param}' is deprecated since 0.13 and will be removed in 0.15, "


0,1
avg_loss,█▆▄▃▁
iterations_since_restore,▁▃▅▆█
loss,█▆▄▃▁
time_since_restore,▁▃▄▆█
time_this_iter_s,█▁▁▁▂
time_total_s,▁▃▄▆█
timestamp,▁▃▄▆█
timesteps_since_restore,▁▁▁▁▁
training_iteration,▁▃▅▆█
warmup_time,▁▁▁▁▁

0,1
avg_loss,79.38861
iterations_since_restore,5.0
loss,24.01738
time_since_restore,83.59554
time_this_iter_s,14.76457
time_total_s,83.59554
timestamp,1662590344.0
timesteps_since_restore,0.0
training_iteration,5.0
warmup_time,0.00598


[2m[36m(RayTrainWorker pid=7906)[0m 2022-09-07 22:39:13,093	INFO train_loop_utils.py:300 -- Moving model to device: cpu


Result for TorchTrainer_a253209c:
  _time_this_iter_s: 14.647578001022339
  _timestamp: 1662590343
  _training_iteration: 5
  avg_loss: 79.38861312866212
  date: 2022-09-07_22-39-04
  done: true
  experiment_id: 08ebcd9d7bd34e428f36d9460f08ae84
  experiment_tag: 23_batch_size=18.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50,render_ego_history=False,step_time=0.1000,dataset_meta_key=meta_json,disable_traffic_light_faces=False,ego_center=0_25_0_5,filter_agents_threshold=0.5000,map_type=py_semantic,pixel_size=0_5_0_5,raster_size=224_224,satellite_map_key=aerial_map_aerial_map_png,semantic_map_key=semantic_map_semantic_map_pb,set_origin_to_bottom=True,batch_size=12,key=scenes_sample_zarr,num_workers=16,perturb_probability=0.0000,shuffle=True,checkpoint_every_n_steps=10000,eval_every_n_steps=10000,max_num_steps=5,batch_size=12,key=scenes_sample_zarr,num_workers=16,shuffle=False,dataset_key=scenes_sample_zarr,lr=0.0012,max_num_steps=5,num_workers

[2m[36m(RayTrainWorker pid=8213)[0m 2022-09-07 22:39:23,315	INFO config.py:72 -- Setting up process group for: env:// [rank=0, world_size=1]
[2m[36m(RayTrainWorker pid=8213)[0m   f"The parameter '{pretrained_param}' is deprecated since 0.13 and will be removed in 0.15, "
[2m[36m(RayTrainWorker pid=8213)[0m 2022-09-07 22:39:29,305	INFO train_loop_utils.py:300 -- Moving model to device: cpu


[2m[36m(RayTrainWorker pid=7906)[0m {'loss': 68.1643295288086, 'avg_loss': 68.1643295288086}


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_22-21-37/TorchTrainer_ab7915dc_24_batch_size=18.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50_2022-09-07_22-38-59/checkpoint_000000)... 

Result for TorchTrainer_ab7915dc:
  _time_this_iter_s: 26.411190271377563
  _timestamp: 1662590374
  _training_iteration: 1
  avg_loss: 68.1643295288086
  date: 2022-09-07_22-39-35
  done: false
  experiment_id: fcf8a41a286b4176b99dc2f9f52083c2
  hostname: anish-l5-kit
  iterations_since_restore: 1
  loss: 68.1643295288086
  node_ip: 10.150.0.3
  pid: 7399
  should_checkpoint: true
  time_since_restore: 30.061359167099
  time_this_iter_s: 30.061359167099
  time_total_s: 30.061359167099
  timestamp: 1662590375
  timesteps_since_restore: 0
  training_iteration: 1
  trial_id: ab7915dc
  warmup_time: 0.00520634651184082
  


Done. 1.0s


[2m[36m(RayTrainWorker pid=7906)[0m {'loss': 4.367187023162842, 'avg_loss': 36.26575827598572}


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_22-21-37/TorchTrainer_ab7915dc_24_batch_size=18.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50_2022-09-07_22-38-59/checkpoint_000001)... 

Result for TorchTrainer_ab7915dc:
  _time_this_iter_s: 13.095037698745728
  _timestamp: 1662590387
  _training_iteration: 2
  avg_loss: 36.26575827598572
  date: 2022-09-07_22-39-48
  done: false
  experiment_id: fcf8a41a286b4176b99dc2f9f52083c2
  hostname: anish-l5-kit
  iterations_since_restore: 2
  loss: 4.367187023162842
  node_ip: 10.150.0.3
  pid: 7399
  should_checkpoint: true
  time_since_restore: 43.031766176223755
  time_this_iter_s: 12.970407009124756
  time_total_s: 43.031766176223755
  timestamp: 1662590388
  timesteps_since_restore: 0
  training_iteration: 2
  trial_id: ab7915dc
  warmup_time: 0.00520634651184082
  


Done. 1.0s


[2m[36m(RayTrainWorker pid=8213)[0m {'loss': 101.05824279785156, 'avg_loss': 101.05824279785156}
Result for TorchTrainer_dd768114:
  _time_this_iter_s: 27.585978031158447
  _timestamp: 1662590391
  _training_iteration: 1
  avg_loss: 101.05824279785156
  date: 2022-09-07_22-39-52
  done: false
  experiment_id: 6145bc2e07bb4ce1a9e955e6540d88c2
  hostname: anish-l5-kit
  iterations_since_restore: 1
  loss: 101.05824279785156
  node_ip: 10.150.0.3
  pid: 8134
  should_checkpoint: true
  time_since_restore: 32.32519006729126
  time_this_iter_s: 32.32519006729126
  time_total_s: 32.32519006729126
  timestamp: 1662590392
  timesteps_since_restore: 0
  training_iteration: 1
  trial_id: dd768114
  warmup_time: 0.013957977294921875
  


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_22-21-37/TorchTrainer_dd768114_25_batch_size=24.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50_2022-09-07_22-39-13/checkpoint_000000)... Done. 1.1s


[2m[36m(RayTrainWorker pid=7906)[0m {'loss': 105.1120376586914, 'avg_loss': 59.21451807022095}


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_22-21-37/TorchTrainer_ab7915dc_24_batch_size=18.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50_2022-09-07_22-38-59/checkpoint_000002)... 

Result for TorchTrainer_ab7915dc:
  _time_this_iter_s: 12.883364200592041
  _timestamp: 1662590400
  _training_iteration: 3
  avg_loss: 59.21451807022095
  date: 2022-09-07_22-40-00
  done: false
  experiment_id: fcf8a41a286b4176b99dc2f9f52083c2
  hostname: anish-l5-kit
  iterations_since_restore: 3
  loss: 105.1120376586914
  node_ip: 10.150.0.3
  pid: 7399
  should_checkpoint: true
  time_since_restore: 55.973867654800415
  time_this_iter_s: 12.94210147857666
  time_total_s: 55.973867654800415
  timestamp: 1662590400
  timesteps_since_restore: 0
  training_iteration: 3
  trial_id: ab7915dc
  warmup_time: 0.00520634651184082
  


Done. 1.1s


[2m[36m(RayTrainWorker pid=8213)[0m {'loss': 18.714263916015625, 'avg_loss': 59.886253356933594}


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_22-21-37/TorchTrainer_dd768114_25_batch_size=24.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50_2022-09-07_22-39-13/checkpoint_000001)... 

Result for TorchTrainer_dd768114:
  _time_this_iter_s: 20.36624836921692
  _timestamp: 1662590411
  _training_iteration: 2
  avg_loss: 59.886253356933594
  date: 2022-09-07_22-40-12
  done: false
  experiment_id: 6145bc2e07bb4ce1a9e955e6540d88c2
  hostname: anish-l5-kit
  iterations_since_restore: 2
  loss: 18.714263916015625
  node_ip: 10.150.0.3
  pid: 8134
  should_checkpoint: true
  time_since_restore: 52.591161489486694
  time_this_iter_s: 20.265971422195435
  time_total_s: 52.591161489486694
  timestamp: 1662590412
  timesteps_since_restore: 0
  training_iteration: 2
  trial_id: dd768114
  warmup_time: 0.013957977294921875
  


Done. 1.1s


[2m[36m(RayTrainWorker pid=7906)[0m {'loss': 79.37877655029297, 'avg_loss': 64.25558269023895}


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_22-21-37/TorchTrainer_ab7915dc_24_batch_size=18.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50_2022-09-07_22-38-59/checkpoint_000003)... 

Result for TorchTrainer_ab7915dc:
  _time_this_iter_s: 13.928529024124146
  _timestamp: 1662590414
  _training_iteration: 4
  avg_loss: 64.25558269023895
  date: 2022-09-07_22-40-14
  done: false
  experiment_id: fcf8a41a286b4176b99dc2f9f52083c2
  hostname: anish-l5-kit
  iterations_since_restore: 4
  loss: 79.37877655029297
  node_ip: 10.150.0.3
  pid: 7399
  should_checkpoint: true
  time_since_restore: 69.86056876182556
  time_this_iter_s: 13.886701107025146
  time_total_s: 69.86056876182556
  timestamp: 1662590414
  timesteps_since_restore: 0
  training_iteration: 4
  trial_id: ab7915dc
  warmup_time: 0.00520634651184082
  


Done. 1.1s


[2m[36m(RayTrainWorker pid=7906)[0m {'loss': 2.939028024673462, 'avg_loss': 51.992271757125856}


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_22-21-37/TorchTrainer_ab7915dc_24_batch_size=18.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50_2022-09-07_22-38-59/checkpoint_000004)... 

Result for TorchTrainer_ab7915dc:
  _time_this_iter_s: 12.20077657699585
  _timestamp: 1662590426
  _training_iteration: 5
  avg_loss: 51.992271757125856
  date: 2022-09-07_22-40-27
  done: false
  experiment_id: fcf8a41a286b4176b99dc2f9f52083c2
  hostname: anish-l5-kit
  iterations_since_restore: 5
  loss: 2.939028024673462
  node_ip: 10.150.0.3
  pid: 7399
  should_checkpoint: true
  time_since_restore: 82.06967091560364
  time_this_iter_s: 12.209102153778076
  time_total_s: 82.06967091560364
  timestamp: 1662590427
  timesteps_since_restore: 0
  training_iteration: 5
  trial_id: ab7915dc
  warmup_time: 0.00520634651184082
  


Done. 1.1s
[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_22-21-37/TorchTrainer_ab7915dc_24_batch_size=18.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50_2022-09-07_22-38-59/checkpoint_000004)... Done. 0.3s


[2m[36m(RayTrainWorker pid=8213)[0m {'loss': 28.8111629486084, 'avg_loss': 49.52788988749186}


VBox(children=(Label(value='457.195 MB of 457.195 MB uploaded (91.307 MB deduped)\r'), FloatProgress(value=1.0…

0,1
avg_loss,█▁▆▇▄
iterations_since_restore,▁▃▅▆█
loss,▅▁█▆▁
time_since_restore,▁▃▄▆█
time_this_iter_s,█▁▁▂▁
time_total_s,▁▃▄▆█
timestamp,▁▃▄▆█
timesteps_since_restore,▁▁▁▁▁
training_iteration,▁▃▅▆█
warmup_time,▁▁▁▁▁

0,1
avg_loss,51.99227
iterations_since_restore,5.0
loss,2.93903
time_since_restore,82.06967
time_this_iter_s,12.2091
time_total_s,82.06967
timestamp,1662590427.0
timesteps_since_restore,0.0
training_iteration,5.0
warmup_time,0.00521


Result for TorchTrainer_ab7915dc:
  _time_this_iter_s: 12.20077657699585
  _timestamp: 1662590426
  _training_iteration: 5
  avg_loss: 51.992271757125856
  date: 2022-09-07_22-40-27
  done: true
  experiment_id: fcf8a41a286b4176b99dc2f9f52083c2
  experiment_tag: 24_batch_size=18.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50,render_ego_history=False,step_time=0.1000,dataset_meta_key=meta_json,disable_traffic_light_faces=False,ego_center=0_25_0_5,filter_agents_threshold=0.5000,map_type=py_semantic,pixel_size=0_5_0_5,raster_size=224_224,satellite_map_key=aerial_map_aerial_map_png,semantic_map_key=semantic_map_semantic_map_pb,set_origin_to_bottom=True,batch_size=12,key=scenes_sample_zarr,num_workers=16,perturb_probability=0.0000,shuffle=True,checkpoint_every_n_steps=10000,eval_every_n_steps=10000,max_num_steps=5,batch_size=12,key=scenes_sample_zarr,num_workers=16,shuffle=False,dataset_key=scenes_sample_zarr,lr=0.0013,max_num_steps=5,num_workers

[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_22-21-37/TorchTrainer_dd768114_25_batch_size=24.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50_2022-09-07_22-39-13/checkpoint_000002)... 

Result for TorchTrainer_dd768114:
  _time_this_iter_s: 18.238826274871826
  _timestamp: 1662590430
  _training_iteration: 3
  avg_loss: 49.52788988749186
  date: 2022-09-07_22-40-30
  done: false
  experiment_id: 6145bc2e07bb4ce1a9e955e6540d88c2
  hostname: anish-l5-kit
  iterations_since_restore: 3
  loss: 28.8111629486084
  node_ip: 10.150.0.3
  pid: 8134
  should_checkpoint: true
  time_since_restore: 70.78211641311646
  time_this_iter_s: 18.19095492362976
  time_total_s: 70.78211641311646
  timestamp: 1662590430
  timesteps_since_restore: 0
  training_iteration: 3
  trial_id: dd768114
  warmup_time: 0.013957977294921875
  


Done. 1.0s


[2m[36m(RayTrainWorker pid=8213)[0m {'loss': 39.65055465698242, 'avg_loss': 47.0585560798645}


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_22-21-37/TorchTrainer_dd768114_25_batch_size=24.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50_2022-09-07_22-39-13/checkpoint_000003)... 

Result for TorchTrainer_dd768114:
  _time_this_iter_s: 15.942399501800537
  _timestamp: 1662590446
  _training_iteration: 4
  avg_loss: 47.0585560798645
  date: 2022-09-07_22-40-46
  done: false
  experiment_id: 6145bc2e07bb4ce1a9e955e6540d88c2
  hostname: anish-l5-kit
  iterations_since_restore: 4
  loss: 39.65055465698242
  node_ip: 10.150.0.3
  pid: 8134
  should_checkpoint: true
  time_since_restore: 86.70124626159668
  time_this_iter_s: 15.919129848480225
  time_total_s: 86.70124626159668
  timestamp: 1662590446
  timesteps_since_restore: 0
  training_iteration: 4
  trial_id: dd768114
  warmup_time: 0.013957977294921875
  


Done. 1.0s


[2m[36m(RayTrainWorker pid=8213)[0m {'loss': 78.80968475341797, 'avg_loss': 53.40878181457519}


[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_22-21-37/TorchTrainer_dd768114_25_batch_size=24.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50_2022-09-07_22-39-13/checkpoint_000004)... 

Result for TorchTrainer_dd768114:
  _time_this_iter_s: 16.700042724609375
  _timestamp: 1662590462
  _training_iteration: 5
  avg_loss: 53.40878181457519
  date: 2022-09-07_22-41-03
  done: false
  experiment_id: 6145bc2e07bb4ce1a9e955e6540d88c2
  hostname: anish-l5-kit
  iterations_since_restore: 5
  loss: 78.80968475341797
  node_ip: 10.150.0.3
  pid: 8134
  should_checkpoint: true
  time_since_restore: 103.64607071876526
  time_this_iter_s: 16.94482445716858
  time_total_s: 103.64607071876526
  timestamp: 1662590463
  timesteps_since_restore: 0
  training_iteration: 5
  trial_id: dd768114
  warmup_time: 0.013957977294921875
  


Done. 1.0s
[34m[1mwandb[0m: Adding directory to artifact (/home/jupyter/ray_results/TorchTrainer_2022-09-07_22-21-37/TorchTrainer_dd768114_25_batch_size=24.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50_2022-09-07_22-39-13/checkpoint_000004)... Done. 0.3s


VBox(children=(Label(value='457.225 MB of 457.225 MB uploaded (91.307 MB deduped)\r'), FloatProgress(value=1.0…

0,1
avg_loss,█▃▁▁▂
iterations_since_restore,▁▃▅▆█
loss,█▁▂▃▆
time_since_restore,▁▃▅▆█
time_this_iter_s,█▃▂▁▁
time_total_s,▁▃▅▆█
timestamp,▁▃▅▆█
timesteps_since_restore,▁▁▁▁▁
training_iteration,▁▃▅▆█
warmup_time,▁▁▁▁▁

0,1
avg_loss,53.40878
iterations_since_restore,5.0
loss,78.80968
time_since_restore,103.64607
time_this_iter_s,16.94482
time_total_s,103.64607
timestamp,1662590463.0
timesteps_since_restore,0.0
training_iteration,5.0
warmup_time,0.01396


Result for TorchTrainer_dd768114:
  _time_this_iter_s: 16.700042724609375
  _timestamp: 1662590462
  _training_iteration: 5
  avg_loss: 53.40878181457519
  date: 2022-09-07_22-41-03
  done: true
  experiment_id: 6145bc2e07bb4ce1a9e955e6540d88c2
  experiment_tag: 25_batch_size=24.0000,format_version=4,future_num_frames=50,history_num_frames=5,model_architecture=resnet50,render_ego_history=False,step_time=0.1000,dataset_meta_key=meta_json,disable_traffic_light_faces=False,ego_center=0_25_0_5,filter_agents_threshold=0.5000,map_type=py_semantic,pixel_size=0_5_0_5,raster_size=224_224,satellite_map_key=aerial_map_aerial_map_png,semantic_map_key=semantic_map_semantic_map_pb,set_origin_to_bottom=True,batch_size=12,key=scenes_sample_zarr,num_workers=16,perturb_probability=0.0000,shuffle=True,checkpoint_every_n_steps=10000,eval_every_n_steps=10000,max_num_steps=5,batch_size=12,key=scenes_sample_zarr,num_workers=16,shuffle=False,dataset_key=scenes_sample_zarr,lr=0.0004,max_num_steps=5,num_workers

2022-09-07 22:41:10,804	INFO tune.py:759 -- Total run time: 1172.90 seconds (1172.69 seconds for the tuning loop).


In [35]:
import time

In [36]:
time.sleep(30)

In [37]:
analysis_df = analysis.get_dataframe()

In [38]:
analysis_df

Unnamed: 0,loss,avg_loss,_timestamp,_time_this_iter_s,_training_iteration,time_this_iter_s,should_checkpoint,done,timesteps_total,episodes_total,...,config/train_loop_config/cfg/train_data_loader/perturb_probability,config/train_loop_config/cfg/train_data_loader/shuffle,config/train_loop_config/cfg/train_params/checkpoint_every_n_steps,config/train_loop_config/cfg/train_params/eval_every_n_steps,config/train_loop_config/cfg/train_params/max_num_steps,config/train_loop_config/cfg/val_data_loader/batch_size,config/train_loop_config/cfg/val_data_loader/key,config/train_loop_config/cfg/val_data_loader/num_workers,config/train_loop_config/cfg/val_data_loader/shuffle,logdir
0,33.780266,65.703058,1662589365,8.80689,5,8.969782,True,False,,,...,0.0,True,10000,10000,5,12,scenes/sample.zarr,16,False,/home/jupyter/ray_results/TorchTrainer_2022-09...
1,67.135323,68.163639,1662589417,18.771693,5,18.765745,True,False,,,...,0.0,True,10000,10000,5,12,scenes/sample.zarr,16,False,/home/jupyter/ray_results/TorchTrainer_2022-09...
2,0.366061,80.481365,1662589416,4.788919,5,4.792669,True,False,,,...,0.0,True,10000,10000,5,12,scenes/sample.zarr,16,False,/home/jupyter/ray_results/TorchTrainer_2022-09...
3,44.491188,57.210853,1662589519,12.728409,5,12.73181,True,False,,,...,0.0,True,10000,10000,5,12,scenes/sample.zarr,16,False,/home/jupyter/ray_results/TorchTrainer_2022-09...
4,66.332466,96.654671,1662589520,12.586768,5,12.573064,True,False,,,...,0.0,True,10000,10000,5,12,scenes/sample.zarr,16,False,/home/jupyter/ray_results/TorchTrainer_2022-09...
5,16.063162,50.175418,1662589627,14.005862,5,14.085137,True,False,,,...,0.0,True,10000,10000,5,12,scenes/sample.zarr,16,False,/home/jupyter/ray_results/TorchTrainer_2022-09...
6,14.293675,34.844029,1662589652,18.746863,5,18.72354,True,False,,,...,0.0,True,10000,10000,5,12,scenes/sample.zarr,16,False,/home/jupyter/ray_results/TorchTrainer_2022-09...
7,107.9254,107.536839,1662589683,7.768827,5,7.934391,True,False,,,...,0.0,True,10000,10000,5,12,scenes/sample.zarr,16,False,/home/jupyter/ray_results/TorchTrainer_2022-09...
8,78.534073,81.707582,1662589728,9.151777,5,9.164373,True,False,,,...,0.0,True,10000,10000,5,12,scenes/sample.zarr,16,False,/home/jupyter/ray_results/TorchTrainer_2022-09...
9,19.829809,46.759389,1662589734,4.773627,5,4.790682,True,False,,,...,0.0,True,10000,10000,5,12,scenes/sample.zarr,16,False,/home/jupyter/ray_results/TorchTrainer_2022-09...


In [39]:
# Run information
wandb_entity = "l5-demo"
project_name = "l5-simulation"
run_name = "train-simulation-model"
run_type = "train"
run_description = """
Train simulation model
"""
tags = ["train", "simulation"]

In [40]:
#🪄🐝
run = wandb.init(
    entity=wandb_entity,
    project=project_name,
    job_type=run_type,
    name=run_name,
    notes=run_description,
    tags=tags,
    config=cfg
)

In [41]:
#BUG: to force a connection on the lineage graph
#🪄🐝
artifact = run.use_artifact(f"{artifact_entity}/{artifact_project}/{artifact_name}:{artifact_alias}", type=artifact_type)

In [42]:
#🪄🐝
analysis_table = wandb.Table(dataframe=analysis_df)

In [43]:
#BUG: run gets lost after tune job due to change in cwd. Forced to make 2 runs
if len(analysis_table.data) == 0:
    raise ValueError("bad table for some reason")
else:
    run.log({"analysis_table": analysis_table})
    run.finish()

VBox(children=(Label(value='0.030 MB of 0.053 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.574617…