In [1]:
import argparse
import datetime
import os
import shutil
import sys
import time

import matplotlib
import numpy as np
import torch
import wandb
import yaml

from torch.utils.tensorboard import SummaryWriter

from policy import config
from policy.checkpoints import CheckpointIO
from policy.dataset.ms2dataset import get_MS_loaders
from policy.skill.training import Trainer

matplotlib.use("Agg")
torch.backends.cuda.matmul.allow_tf32 = True

out_dir = "/home/mrl/Documents/Projects/tskill/out/PegInsertion/test2"
default_cfg_path = os.path.join(out_dir, "config.yaml")

cfg = config.load_config(default_cfg_path)

is_cuda = torch.cuda.is_available()
device = torch.device("cuda" if is_cuda else "cpu")

lr = cfg["training"].get("lr", 1e-3)
weight_decay = cfg["training"].get("weight_decay", 1e-4)
backup_every = cfg["training"]["backup_every"]
max_it = cfg["training"]["max_it"]
model_selection_metric = cfg["training"]["model_selection_metric"]
if cfg["training"]["model_selection_mode"] == "maximize":
    model_selection_sign = 1
elif cfg["training"]["model_selection_mode"] == "minimize":
    model_selection_sign = -1
else:
    raise ValueError("model_selection_mode must be " "either maximize or minimize.")

# os.makedirs(out_dir, exist_ok=True)

# copy config to output directory
# shutil.copyfile(default_cfg_path, os.path.join(out_dir, "config.yaml"))

cfg["training"]["batch_size"] = 1
cfg["training"]["val_batch_size"] = 1

# Model
model = config.get_model(cfg, device=device)
print(model)

# Intialize training
param_dicts = [{"params": [p for n, p in model.named_parameters() if "stt_encoder" not in n and p.requires_grad]}]
if cfg["training"]["lr_state_encoder"] > 0:
    param_dicts.append({
        "params": [p for n, p in model.named_parameters() if "stt_encoder" in n and p.requires_grad],
        "lr": cfg["training"]["lr_state_encoder"],
    })

n_p = 0
for d in param_dicts:
    n_p += sum([p.numel() for p in d["params"]])

optimizer = torch.optim.AdamW(param_dicts, lr=lr,
                                weight_decay=weight_decay)    

lr_decay = cfg["training"].get("lr_decay",1)
if lr_decay < 1:
    scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma=lr_decay)

trainer: Trainer = config.get_trainer(model, optimizer, cfg, device=device)
checkpoint_io = CheckpointIO(out_dir, model=model, optimizer=optimizer)

try:
    load_dict = checkpoint_io.load("model_best.pt")
except FileExistsError:
    load_dict = dict()

epoch_it = load_dict.get("epoch_it", 0)
it = load_dict.get("it", 0)

metric_val_best = load_dict.get("loss_val_best", -model_selection_sign * np.inf)

if metric_val_best == np.inf or metric_val_best == -np.inf:
    metric_val_best = -model_selection_sign * np.inf
print(
    "Current best validation metric (%s): %.8f"
    % (model_selection_metric, metric_val_best)
)

# Shorthands
print_every = cfg["training"]["print_every"]
checkpoint_every = cfg["training"]["checkpoint_every"]
validate_every = cfg["training"]["validate_every"]
visualize_every = cfg["training"]["visualize_every"]

# Print model
nparameters = sum(p.numel() for p in model.parameters())
n_trainable_parameters = sum(p.numel() for p in model.parameters() if p.requires_grad)
assert n_trainable_parameters == n_p, "Number of trainable params does not match param dicts"

print("Number of trainable parameters: %.2fM" % (n_trainable_parameters/1e6,))
print("Number of total parameters: %.2fM" % (nparameters/1e6,))




TSkillCVAE(
  (decoder): Transformer(
    (encoder): TransformerEncoder(
      (layers): ModuleList(
        (0-5): 6 x TransformerEncoderLayer(
          (self_attn): MultiheadAttention(
            (out_proj): NonDynamicallyQuantizableLinear(in_features=256, out_features=256, bias=True)
          )
          (linear1): Linear(in_features=256, out_features=1024, bias=True)
          (dropout): Dropout(p=0.1, inplace=False)
          (linear2): Linear(in_features=1024, out_features=256, bias=True)
          (norm1): LayerNorm((256,), eps=1e-05, elementwise_affine=True)
          (norm2): LayerNorm((256,), eps=1e-05, elementwise_affine=True)
          (dropout1): Dropout(p=0.1, inplace=False)
          (dropout2): Dropout(p=0.1, inplace=False)
        )
      )
      (norm): LayerNorm((256,), eps=1e-05, elementwise_affine=True)
    )
    (decoder): TransformerDecoder(
      (layers): ModuleList(
        (0-7): 8 x TransformerDecoderLayer(
          (self_attn): MultiheadAttention(
     

In [3]:
from tqdm import tqdm

act_scaling = (1, "robust_scaler","normal","uniform")
state_scaling = (1, "robust_scaler", "normal", "uniform")
summary_dir = "out/PegInsertion/action_scaling_test"
os.makedirs(summary_dir, exist_ok=True)
writer = SummaryWriter(summary_dir)
t0 = time.time()
for k in range(len(act_scaling)):
    s = act_scaling[k]
    s2 = state_scaling[k]
    cfg["data"]["action_scaling"] = s
    cfg["data"]["state_scaling"] = s2
    cfg["data"]["gripper_scaling"] = False
    cfg["training"]["out_dir"] = summary_dir
    if k > 0:
        train_loader, val_loader = get_MS_loaders(cfg, indices="file", shuffle=False)
    else:
        train_loader, val_loader = get_MS_loaders(cfg, shuffle=False)
    # Dataset
    
    print("Iterating through training set...")
    ep = 0
    for batch in tqdm(train_loader):
        it += 1
        ep += 1  
        acts = batch["actions"]
        qpos = batch["state"]
        bs, seq, act_dim = acts.shape
        _, _, q_dim = qpos.shape
        for i in range(seq):
            acts_i = acts[0,i,:-1]
            qpos_i = qpos[0,i,:]
            if torch.nonzero(acts_i).shape[0] > 0:
                writer.add_histogram(f'ep_{ep}_all_acts_{k}', acts_i, i)
                writer.add_histogram(f'ep_{ep}_all_qpos_{k}', qpos_i, i)
            else:
                continue

        for a in range(act_dim):
            acts_a = acts[0,:,a]
            writer.add_histogram(f'ep_{ep}_act_{a}', acts_a, k)
        for q in range(q_dim):
            qpos_q = qpos[0,:,q]
            writer.add_histogram(f'ep_{ep}_qpos_{q}', qpos_q, k)

        # losses, met = trainer.train_step(batch)

        # Tensorboard model graph
        # if args.debug:
        #     trace_batch = dict()
        #     for k,v in batch.items():
        #         if "skill" not in k:
        #             trace_batch[k] = v[:,:5,...]
        #         else:
        #             trace_batch[k] = v[:,0:1,...]
        #     writer.add_graph(model, batch, use_strict_trace=False)

        # metrics = {f"train/{k}": v for k, v in losses.items()}
        # metrics.update({f"train/metrics/{k}": v for k, v in met.items()})
writer.close()

Computing linear scaling
Replacing existing action scaling file
Replacing existing train/val index file
Shuffling: False
Iterating through training set...


100%|██████████| 45/45 [00:26<00:00,  1.71it/s]


Collecting all actions...


100%|██████████| 50/50 [00:04<00:00, 11.94it/s]


computing seperate gripper scaling
Computing action norm
Replacing existing action scaling file
Loading indices from file: out/PegInsertion/action_scaling_test/train_val_indices.pickle
Shuffling: False
Iterating through training set...


 56%|█████▌    | 25/45 [00:14<00:11,  1.71it/s]


KeyboardInterrupt: 