In [1]:
import os
# os.environ["CUDA_VISIBLE_DEVICES"] = "6"
import json
import pprint as pp
from options import get_options

import torch
import torch.optim as optim
from tensorboard_logger import Logger as TbLogger

from critic_network import CriticNetwork
from options import get_options
from train import train_epoch, get_inner_model   ##########  no need for validate now
from baselines import CriticBaseline
from attention_model import AttentionModel
from utils import torch_load_cpu, load_model, maybe_cuda_model, load_problem

In [2]:
%run options

In [None]:
pp.pprint(vars(opts))

# Set the random seed
torch.manual_seed(opts.seed)

# Optionally configure tensorboard
tb_logger = None
if not opts.no_tensorboard:
    tb_logger = TbLogger(os.path.join(opts.log_dir, "{}_{}".format(opts.problem, opts.graph_size), opts.run_name))


os.makedirs(opts.save_dir)
with open(os.path.join(opts.save_dir, "args.json"), 'w') as f:
    json.dump(vars(opts), f, indent=True)

problem = load_problem(opts.problem)

# Load data from load_path
load_data = {}
assert opts.load_path is None or opts.resume is None, "Only one of load path and resume can be given"
load_path = opts.load_path if opts.load_path is not None else opts.resume
if load_path is not None:
    print('  [*] Loading data from {}'.format(load_path))
    load_data = load_data = torch_load_cpu(load_path)

# Initialize model
model_class = AttentionModel
model = maybe_cuda_model(
    model_class(
        opts.embedding_dim,
        opts.hidden_dim,
        problem,
        n_encode_layers=opts.n_encode_layers,
        mask_inner=True,
        mask_logits=True,
        normalization=opts.normalization,
        tanh_clipping=opts.tanh_clipping
    ),
    opts.use_cuda
)

# Overwrite model parameters by parameters to load
model_ = get_inner_model(model)
model_.load_state_dict({**model_.state_dict(), **load_data.get('model', {})})

# Initialize baseline
baseline = CriticBaseline(
            maybe_cuda_model(
                CriticNetwork(
                    2,
                    opts.embedding_dim,
                    opts.hidden_dim,
                    opts.n_encode_layers,
                    opts.normalization
                ),
                opts.use_cuda
            )
        )

# Load baseline from data, make sure script is called with same type of baseline
if 'baseline' in load_data:
    baseline.load_state_dict(load_data['baseline'])

# Initialize optimizer
optimizer = optim.Adam(
    [{'params': model.parameters(), 'lr': opts.lr_model}]
    + (
        [{'params': baseline.get_learnable_parameters(), 'lr': opts.lr_critic}]
        if len(baseline.get_learnable_parameters()) > 0
        else []
    )
)

# Load optimizer state
if 'optimizer' in load_data:
    optimizer.load_state_dict(load_data['optimizer'])
    for state in optimizer.state.values():
        for k, v in state.items():
            # if isinstance(v, torch.Tensor):
            if torch.is_tensor(v):
                state[k] = v.cuda()

# Initialize learning rate scheduler, decay by lr_decay once per epoch!
lr_scheduler = optim.lr_scheduler.LambdaLR(optimizer, lambda epoch: opts.lr_decay ** epoch)

# Start the actual training loop
# val_dataset = problem.make_dataset(size=opts.graph_size, num_samples=opts.val_size, filename=opts.val_dataset)
# torch.save(val_dataset,'myval_100.pt')

val_dataset = torch.load('test50.pt')
val_dataset = val_dataset[0:200]

if opts.resume:
    epoch_resume = int(os.path.splitext(os.path.split(opts.resume)[-1])[0].split("-")[1])

    torch.set_rng_state(load_data['rng_state'])
    if opts.use_cuda:
        torch.cuda.set_rng_state_all(load_data['cuda_rng_state'])
    # Set the random states
    # Dumping of state was done before epoch callback, so do that now (model is loaded)
    baseline.epoch_callback(model, epoch_resume)
    print("Resuming after {}".format(epoch_resume))
    opts.epoch_start = epoch_resume + 1

if opts.eval_only:
    validate(model, val_dataset, opts)
else:
    for epoch in range(opts.epoch_start, opts.epoch_start + opts.n_epochs):
        train_epoch(
            model,
            optimizer,
            baseline,
            lr_scheduler,
            epoch,
            val_dataset,
            problem,
            tb_logger,
            opts
        )   

{'baseline': 'critic',
 'batch_size': 10,
 'checkpoint_epochs': 1,
 'embedding_dim': 128,
 'epoch_size': 100,
 'epoch_start': 0,
 'eval_batch_size': 200,
 'eval_only': False,
 'graph_size': 50,
 'hidden_dim': 128,
 'lambda': 0.8,
 'load_path': None,
 'log_dir': 'logs',
 'log_step': 50,
 'lr_critic': 0.0001,
 'lr_decay': 0.99,
 'lr_model': 0.0001,
 'max_grad_norm': 1.0,
 'n_encode_layers': 3,
 'n_epochs': 100,
 'no_cuda': False,
 'no_progress_bar': False,
 'no_tensorboard': False,
 'normalization': 'batch',
 'output_dir': 'outputs',
 'problem': 'tsp',
 'resume': None,
 'run_name': 'run',
 'save_dir': 'outputs\\tsp_50\\run',
 'seed': 1234,
 'steps': 10,
 'tanh_clipping': 10.0,
 'use_cuda': True,
 'val_dataset': None,
 'val_size': 2000}
Start train epoch 0, lr=0.0001 for run run
Start train epoch 0, lr=0.0001 for run run


  for group in param_groups
100%|██████████████████████████████████████████████████████████████████████████████████| 10/10 [06:09<00:00, 36.73s/it]


Finished epoch 0, took 00:06:09 s
Saving model and state...
Validating...
Test, took 00:01:59 s
Improving: 4.934609889984131 +- 0.15752926468849182
Best Improving: 8.985796928405762 +- 0.1288522183895111
Best solutions: 17.050809860229492 +- 0.09012404829263687
Start train epoch 1, lr=0.0001 for run run
Start train epoch 1, lr=9.900000000000001e-05 for run run


 20%|████████████████▌                                                                  | 2/10 [01:13<04:53, 36.74s/it]