In [None]:
import numpy as np
import os
import random 
import torch
from torch.autograd import Variable

%matplotlib inline

In [None]:
USE_CUDA = True

# hyperparameter
num_episodes = 100
max_episode_steps = 1000
max_time = 2 * 3600
num_O = 50
input_dim = 2
embedding_dim = 128
batch_size = 1
hidden_dim = 128
process_iters = 3
tsp_num = num_O * 2 + 2
train_size = 1 #1000000
validation_size = 1
lr = 1e-4
beta = 0.9
num_glimpse = 1
punishment = 1e5
save_to_dir = '../tmp/pointer'
resample_k = 5

In [None]:
from src.utils import read_instance_data 

instance_dir = os.path.join('data', 'tsppdlib', 'instances', 'random-uniform')
instances = [i for i in os.listdir(instance_dir) if i.endswith('.tsp')]
sub_instances = [i for i in instances if '-' + f'{num_O:03}' + '-' in i]
sub_instances = random.sample(sub_instances, k=resample_k)


def generate_tsp_data(instance):
    '''
    Generate tsp data

    Return:
    tsp_data (tensor): shape (1, tsp_num, 2)
    '''
    # tsp_data = torch.FloatTensor(tsp_num, input_dim).uniform_(0, 1)
    locations = read_instance_data(instance)
    _locations = []
    _locations.append(locations['dummy'] if 'dummy' in locations else [0,0])
    _locations.append(locations['taxi'] if 'taxi' in locations else np.random.uniform(size=(1, 2)))
    _locations.append(locations['O'])
    _locations.append(locations['D'])
    _locations = np.vstack(_locations)
    tsp_data = torch.FloatTensor(_locations)
    return tsp_data.unsqueeze(0)

In [None]:
from src.rl.pointernet.nn import Model
from src.rl.pointernet.train import Train
from src.problem import MultiODProblem
from src.solution import MultiODSolution

for i in range(len(sub_instances)):
    instance = os.path.join(instance_dir, sub_instances[i])
    train_dataset = [generate_tsp_data(instance) for _ in range(train_size)]
    validation_dataset = [generate_tsp_data(instance) for _ in range(validation_size)]
    print("train dataset size: ", len(train_dataset))
    print("validation dataset size: ", len(validation_dataset))
    precedence = {o:(d:=num_O + o) for o in range(2, 2 + num_O)}
    # moving average reward
    moving_average_model = Model(input_dim=input_dim, embedding_dim=embedding_dim, hidden_dim=hidden_dim, seq_len=tsp_num, precedence=precedence, punishment=punishment, batch_size=batch_size, use_cuda=USE_CUDA)
    if USE_CUDA:
        moving_average_model  = moving_average_model .cuda()
    train = Train(moving_average_model , train_dataset, validation_dataset, batch_size, lr=lr, max_time=max_time)
    train.train_and_validation(num_episodes, max_episode_steps, use_critic = False)
    # save result:
    
    print("index list: ", train.best_tour)
    locations = read_instance_data(instance)
    problem = MultiODProblem(locations=locations, ignore_to_dummy_cost=False, ignore_from_dummy_cost=False)
    problem.convert_distance_matrix_to_int()
    ptr_sol = MultiODSolution([train.best_tour], problem)
    is_feasible = problem.is_feasible(ptr_sol)
    cost = problem.calc_cost(ptr_sol) if is_feasible else None
    lines = [f'is_feasible: {is_feasible}', f'tour: {train.best_tour}', f'cost: {cost}']
    instance_save_header = sub_instances[i].replace(".tsp", "")
    with open(os.path.join(save_to_dir, f'{instance_save_header}.txt'), 'w') as f:
        for line in lines:
            f.write(line + '\n')

In [None]:
print("index list: ", train.best_tour)
locations = read_instance_data(instance)
problem = MultiODProblem(locations=locations, ignore_to_dummy_cost=False, ignore_from_dummy_cost=False)
problem.convert_distance_matrix_to_int()
ptr_sol = MultiODSolution([train.best_tour], problem)
is_feasible = problem.is_feasible(ptr_sol)
cost = problem.calc_cost(ptr_sol) if is_feasible else None
lines = [f'is_feasible: {is_feasible}', f'tour: {train.best_tour}', f'cost: {cost}']
instance_save_header = sub_instances[i].replace(".tsp", "")
with open(os.path.join(save_to_dir, f'{instance_save_header}.txt'), 'w') as f:
    for line in lines:
        f.write(line + '\n')