In [2]:
%load_ext autoreload
%autoreload 2

# Make MiniZinc work in Jupyter Notebooks
import nest_asyncio
nest_asyncio.apply()

import os.path
from copy import deepcopy
from typing import Optional

import torch
print(f"PyTorch version: {torch.__version__}")

import torch_geometric as pyg
from torch_geometric.data import Data

from problems.tsp.tsp_env_multibinary import TSPEnvironmentMultiBinary

BASE_PATH = os.path.abspath(os.path.join(os.getcwd(), os.pardir))

TSP_DATA_DIR = os.path.join(BASE_PATH, "problems", "tsp", "data", "generated")

TSP_SOLVERS_DIR = os.path.join(BASE_PATH, "problems", "tsp", "minizinc")
TSP_INIT_SOLVER_PATH = os.path.join(TSP_SOLVERS_DIR, "tsp_init.mzn")
TSP_REPAIR_SOLVER_PATH = os.path.join(TSP_SOLVERS_DIR, "tsp_repair.mzn")

problem_instances_paths = [os.path.join(TSP_DATA_DIR, path) for path in os.listdir(TSP_DATA_DIR) if path.endswith(".json")]
print(len(problem_instances_paths))

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
PyTorch version: 2.3.0
100


In [3]:
BASE_PATH

'D:\\Coding\\University\\S7\\engineering-thesis'

In [3]:
env = TSPEnvironmentMultiBinary(
    problem_instance_path=problem_instances_paths[0],
    init_model_path=TSP_INIT_SOLVER_PATH,
    repair_model_path=TSP_REPAIR_SOLVER_PATH,
    solver_name="gecode",
    max_episode_length=50,
    action_bounds=0.2
)

obs, _ = env.reset()
print(obs)

{'problem': {'node_positions': [{'x': 74, 'y': 528}, {'x': 658, 'y': 280}, {'x': 314, 'y': 534}, {'x': 160, 'y': 915}, {'x': 756, 'y': 153}, {'x': 841, 'y': 843}, {'x': 748, 'y': 954}, {'x': 995, 'y': 922}, {'x': 75, 'y': 1}, {'x': 139, 'y': 470}, {'x': 338, 'y': 176}, {'x': 973, 'y': 586}, {'x': 296, 'y': 844}, {'x': 820, 'y': 770}, {'x': 438, 'y': 229}, {'x': 742, 'y': 866}, {'x': 244, 'y': 638}, {'x': 962, 'y': 942}, {'x': 149, 'y': 403}, {'x': 412, 'y': 11}]}, 'solution': {'route': [1, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2]}}


## GAT

In [5]:
graph_data = TSPEnvironmentMultiBinary.preprocess(obs) 
graph_data

Data(x=[20, 2], edge_index=[2, 40], edge_attr=[40, 1], pos=[20, 2])

In [7]:
from general.ml.features_extractor import GraphFeaturesExtractor, SequentialGraphFeaturesExtractor
    
net = GraphFeaturesExtractor(in_channels=2, num_heads=8, edge_dim=1)
print(pyg.nn.summary(net, graph_data.x, graph_data.edge_index, graph_data.edge_attr))
print(net(graph_data.x, graph_data.edge_index, graph_data.edge_attr).shape)

graph_data1 = deepcopy(graph_data)
graph_data1.x = torch.cat([graph_data1.x, torch.tensor([[1, 2]])], dim=0)
print(graph_data1)

print(pyg.nn.summary(net, graph_data1.x, graph_data1.edge_index, graph_data1.edge_attr))
print(net(graph_data1.x, graph_data1.edge_index, graph_data1.edge_attr).shape)

+------------------------+-----------------------------+----------------+----------+
| Layer                  | Input Shape                 | Output Shape   | #Param   |
|------------------------+-----------------------------+----------------+----------|
| GraphFeaturesExtractor | [20, 2], [2, 40], [40, 1]   | [20, 512]      | 201,792  |
| ├─(convs)ModuleList    | --                          | --             | 168,512  |
| │    └─(0)GATConv      | [20, 2], [2, 40], [40, 1]   | [20, 256]      | 1,792    |
| │    └─(1)GATConv      | [20, 256], [2, 40], [40, 1] | [20, 512]      | 133,632  |
| │    └─(2)GATConv      | [20, 512], [2, 40], [40, 1] | [20, 64]       | 33,088   |
| ├─(fc)Linear           | [20, 64]                    | [20, 512]      | 33,280   |
+------------------------+-----------------------------+----------------+----------+
torch.Size([20, 512])
Data(x=[21, 2], edge_index=[2, 40], edge_attr=[40, 1], pos=[20, 2])
+------------------------+-----------------------------+----

## REINFORCE

In [8]:
from general.ml.reinforce import REINFORCE

reinforce_hyperparameters = dict(
    gamma = 1.0,
)
optimizer_hyperparameters = dict(
    lr = 1e-3,
)

policy = REINFORCE(**reinforce_hyperparameters, graph_features_extractor_kwargs=dict(in_channels=2, num_heads=8, edge_dim=1))
policy.configure_optimizers("adam", **optimizer_hyperparameters)

action, log_prob = policy.get_action(graph_data)
print(action.shape)
print(action)
print(torch.exp(log_prob))

action, log_prob = policy.get_action(graph_data1)
print(action.shape)
print(action)
print(torch.exp(log_prob))

torch.Size([20])
tensor([1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
        1., 1.])
tensor([0.9703, 0.9976, 0.9900, 0.9900, 0.9966, 0.9966, 0.9989, 0.9988, 0.9988,
        0.9280, 0.9281, 0.9281, 0.9998, 0.9999, 0.9982, 0.9980, 0.9980, 0.9977,
        0.9988, 0.9337], grad_fn=<ExpBackward0>)
torch.Size([21])
tensor([0., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
        1., 1., 1.])
tensor([0.0297, 0.9976, 0.9900, 0.9900, 0.9966, 0.9966, 0.9989, 0.9988, 0.9988,
        0.9280, 0.9281, 0.9281, 0.9998, 0.9999, 0.9982, 0.9980, 0.9980, 0.9977,
        0.9988, 0.9337, 0.4885], grad_fn=<ExpBackward0>)


### Training

In [11]:
policy.learn(env, n_epochs=1, max_t=env.max_episode_length, verbose=True)

Epoch 1	Loss: 0.0 Average Score: 0.00
{'objective_value': 12420}
