In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import os
os.chdir("..")

In [21]:
import math
from collections import OrderedDict

import numpy as np
import os
import ipdb
import random
import torch
import torch.optim as optim
from os import path
import tqdm

import wandb

import planning
import utils
from dataloader import DataLoader

In [11]:
opt = utils.parse_command_line(args=["-model_dir", "/scratch/izar/erciyes/models"])
opt.model_file = path.join(opt.model_dir, "policy_networks", "MPUR-" + opt.policy)
utils.build_model_file_name(opt)

os.system("mkdir -p " + path.join(opt.model_dir, "policy_networks"))

random.seed(opt.seed)
np.random.seed(opt.seed)
torch.manual_seed(opt.seed)

# Define default device
opt.device = torch.device(
    "cuda" if torch.cuda.is_available() and not opt.no_cuda else "cpu"
)
if torch.cuda.is_available() and opt.no_cuda:
    print(
        "WARNING: You have a CUDA device, so you should probably run without -no_cuda"
    )

# load the model

model_path = path.join(opt.model_dir, opt.mfile)
if path.exists(model_path):
    model = torch.load(model_path)
elif path.exists(opt.mfile):
    model = torch.load(opt.mfile)
else:
    raise RuntimeError(f"couldn't find file {opt.mfile}")
print("Loaded model")
if not hasattr(model.encoder, "n_channels"):
    model.encoder.n_channels = 3

if type(model) is dict:
    model = model["model"]
model.opt.lambda_l = opt.lambda_l  # used by planning.py/compute_uncertainty_batch
model.opt.lambda_o = opt.lambda_o  # used by planning.py/compute_uncertainty_batch
if opt.value_model != "":
    value_function = torch.load(
        path.join(opt.model_dir, "value_functions", opt.value_model)
    ).to(opt.device)
    model.value_function = value_function

# Create policy
model.create_policy_net(opt)
optimizer = optim.Adam(model.policy_net.parameters(), opt.lrt)  # POLICY optimiser ONLY!
print("Policy created")
# Load normalisation stats
stats = torch.load("traffic-data/state-action-cost/data_i80_v0/data_stats.pth")
model.stats = stats  # used by planning.py/compute_uncertainty_batch
print("Normalization loaded")
if "ten" in opt.mfile:
    p_z_file = opt.model_dir + opt.mfile + ".pz"
    p_z = torch.load(p_z_file)
    model.p_z = p_z

# Send to GPU if possible
model.to(opt.device)
model.policy_net.stats_d = {}
for k, v in stats.items():
    if isinstance(v, torch.Tensor):
        model.policy_net.stats_d[k] = v.to(opt.device)
if opt.learned_cost:
    print("[loading cost regressor]")
    model.cost = torch.load(path.join(opt.model_dir, opt.mfile + ".cost.model"))[
        "model"
    ]
print("Model setup completed")
model.train()
model.opt.u_hinge = opt.u_hinge
dataloader = DataLoader(None, opt, opt.dataset)
print("Data loaded")
planning.estimate_uncertainty_stats(model, dataloader, n_batches=50, npred=opt.npred)
print("Uncertainty stats estimated")
model.eval()
print("done")

[will save as: /scratch/izar/erciyes/models/policy_networks/MPUR-policy-deterministic-model=vae-zdropout=0.5-nfeature=256-bsize=6-npred=30-ureg=0.05-lambdal=0.2-lambdaa=0.0-gamma=0.99-lrtz=0.0-updatez=0-inferz=False-learnedcost=False-seed=1-novalue]




Loaded model
Policy created
Normalization loaded
Model setup completed
[loading data shard: traffic-data/state-action-cost/data_i80_v0/trajectories-0400-0415/all_data.pth]
[loading data shard: traffic-data/state-action-cost/data_i80_v0/trajectories-0500-0515/all_data.pth]
[loading data shard: traffic-data/state-action-cost/data_i80_v0/trajectories-0515-0530/all_data.pth]
Number of episodes: 4829
[loading data splits: traffic-data/state-action-cost/data_i80_v0/splits.pth]
[loading data stats: traffic-data/state-action-cost/data_i80_v0/data_stats_with_diff.pth]
[loading car sizes: traffic-data/state-action-cost/data_i80_v0/car_sizes.pth]
Data loaded
[estimating normal uncertainty ranges: 100.0%]
Uncertainty stats estimated
done


### Train step

In [13]:
opt.npred = 30
if opt.goal_rollout_len == -1:
    opt.goal_rollout_len = opt.npred

In [14]:
# inputs, actions, targets, ids, car_sizes = dataloader.get_batch_fm("train", opt.npred)

inputs[0] - input images - B x Cond x C x H x W

inputs[1] - states - B x Cond x 4

inputs[2] - ego image - B x C x H x W

actions - B x Pred x 2

targets[0] - input images - B x Pred x C x H x W

targets[1] - states - B x Pred x 4

targets[2] - actions - B x Pred x 2

len(ids) - B - car pickles

car_sizes - B x 2 - for each batch

In [30]:
def step(what, nbatches, npred):
    train = True if what == "train" else False
    model.train()
    model.policy_net.train()
    n_updates, grad_norm = 0, 0
    total_losses = dict(
        proximity=0,
        uncertainty=0,
        lane=0,
        offroad=0,
        action=0,
        policy=0,
        goal=0,
    )
    proximity_losses = []
    lane_losses = []
    goal_losses = []
    for j in tqdm.tqdm(range(nbatches)):
        inputs, actions, targets, ids, car_sizes = dataloader.get_batch_fm(what, npred)
        pred, actions = planning.train_policy_net_mpur(
            model,
            inputs,
            targets,
            car_sizes,
            goal_distance=opt.goal_distance,
            n_models=10,
            lrt_z=opt.lrt_z,
            n_updates_z=opt.z_updates,
            infer_z=opt.infer_z,
        )
        pred["policy"] = (
            pred["proximity"]
            + opt.u_reg * pred["uncertainty"]
            + opt.lambda_l * pred["lane"]
            + opt.lambda_a * pred["action"]
            + opt.lambda_o * pred["offroad"]
            + opt.lambda_g * pred["goal"]  # add goal cost here
        )
        proximity_losses.append(pred["proximity"].detach().cpu())
        lane_losses.append(pred["lane"].detach().cpu())
        goal_losses.append(pred["goal"].detach().cpu())

        if not math.isnan(pred["policy"].item()):
            if train:
                optimizer.zero_grad()
                pred["policy"].backward()  # back-propagation through time!
                grad_norm += utils.grad_norm(model.policy_net).item()
                torch.nn.utils.clip_grad_norm_(
                    model.policy_net.parameters(), opt.grad_clip
                )
                optimizer.step()
            for loss in total_losses:
                total_losses[loss] += pred[loss].item()
            n_updates += 1
        else:
            print("warning, NaN")  # Oh no... Something got quite fucked up!
            ipdb.set_trace()
    proximity_losses = np.asarray(proximity_losses)
    lane_losses = np.asarray(lane_losses)
    goal_losses = np.asarray(goal_losses)
    print("Means == Proximity:", proximity_losses.mean(), "Lane:", lane_losses.mean() , "Goal:", goal_losses.mean())
    print("Std == Proximity:", proximity_losses.std(), "Lane:", lane_losses.std() , "Goal:", goal_losses.std())
step("train", opt.epoch_size, opt.npred)

100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 500/500 [07:12<00:00,  1.15it/s]

Means == Proximity: 0.10893657 Lane: 0.21554056 Goal: 0.042373955
Std == Proximity: 0.045732547 Lane: 0.054166045 Goal: 0.009344624





In [20]:

np.asarray([1,2.3]).std()

0.6499999999999999