In [59]:
from pathlib import Path
import shutil
import tarfile
import gzip

import tsplib95
import networkx as nx
import torch
from torch_geometric.data import Data

In [60]:
def extract_tsp_archive(tar_path: Path, extract_path: Path):
    """
    Extract a TSPLIB .tar archive and decompress any .gz files inside it.
    Returns a list of .tsp file paths.
    """

    if not tar_path.exists():
        raise FileNotFoundError(f"Archive not found: {tar_path}")

    if extract_path.exists():
        shutil.rmtree(extract_path)
    extract_path.mkdir(parents=True, exist_ok=True)

    with tarfile.open(tar_path, "r") as tar:
        tar.extractall(path=extract_path)

    for gz_file in extract_path.glob("*.gz"):
        output_file = extract_path / gz_file.stem
        with gzip.open(gz_file, "rb") as f_in, open(output_file, "wb") as f_out:
            shutil.copyfileobj(f_in, f_out)
        gz_file.unlink()

    print("Archive extracted and .gz files decompressed.")

    return sorted(extract_path.glob("*.tsp"))

In [61]:
def prepare_graph(G): # different from script in .py (here coords are kept)
    """
    Prepare a TSPLIB graph loaded with tsplib95:
    - ensure undirected
    - remove self-loops
    - keep only edge weight
    - keep node attributes: coord/display + initial/current/target
    """

    # Ensure undirected structure
    G = nx.Graph(G)

    # Remove self-loops
    G.remove_edges_from(nx.selfloop_edges(G))

    first_node = min(G.nodes)

    for node in G.nodes:
        # Save coordinates if present
        coord = G.nodes[node].get("coord", None)
        display = G.nodes[node].get("display", None)

        # Reset attributes
        G.nodes[node].clear()

        # Restore coordinates
        if coord is not None:
            G.nodes[node]["coord"] = coord
        if display is not None:
            G.nodes[node]["display"] = display

        # Add your attributes
        G.nodes[node]["initial"] = int(node == first_node)
        G.nodes[node]["current"] = int(node == first_node)
        G.nodes[node]["target"] = 0

    # Keep only edge weight
    for u, v, attrs in G.edges(data=True):
        w = attrs.get("weight", None)
        attrs.clear()
        attrs["weight"] = w

    return G


In [62]:
def nx_to_pyg(G):
    """
    Convert a prepared NetworkX TSP graph into a PyTorch Geometric Data object.
    Keeps:
      - x: [initial, current]
      - edge_index (bidirectional)
      - edge_attr (weight)
      - node_id (original TSPLIB ids)
      - y: index of target node (0-based)
    """

    # Sorted node list for consistent indexing
    nodes = sorted(G.nodes())
    mapping = {node: i for i, node in enumerate(nodes)}

    # Node features
    x = torch.tensor(
        [
            [
                G.nodes[node]["initial"],
                G.nodes[node]["current"]
            ]
            for node in nodes
        ],
        dtype=torch.float
    )

    # Original TSPLIB node IDs
    node_id = torch.tensor(nodes, dtype=torch.long)

    # Target node (converted to PyTorch index)
    target_node = next((node for node in nodes if G.nodes[node]["target"] == 1), None)
    y = torch.tensor(
        mapping[target_node] if target_node is not None else -1,
        dtype=torch.long
    )

    # Edges (bidirectional)
    edge_index_list = []
    edge_attr_list = []

    for u, v, attrs in G.edges(data=True):
        i, j = mapping[u], mapping[v]
        w = attrs["weight"]

        edge_index_list.append([i, j])
        edge_attr_list.append([w])

        edge_index_list.append([j, i])
        edge_attr_list.append([w])

    edge_index = torch.tensor(edge_index_list, dtype=torch.long).t().contiguous()
    edge_attr = torch.tensor(edge_attr_list, dtype=torch.float)

    # Build Data object
    return Data(
        x=x,
        edge_index=edge_index,
        edge_attr=edge_attr,
        node_id=node_id,
        y=y
    )

In [63]:
tar_path = Path("Datasets/ALL_tsp.tar")
extract_path = Path("Datasets/ALL_tsp")
tsp_files = extract_tsp_archive(tar_path, extract_path)

Archive extracted and .gz files decompressed.


In [69]:
problem = tsplib95.load(tsp_files[4])
G = problem.get_graph()
#print(G.graph)
#print(G.__dict__)
#print(G.nodes(data=True))
#print(G.edges(data=True))

G = prepare_graph(G)
#print(G.graph)
#print(G.__dict__)
#print(G.nodes(data=True))
#print(G.nodes()[1])
#print(G.edges(data=True))
#print(G.edges()[1, 2])

data = nx_to_pyg(G)
#print(data)
print(data.x)
#print(data.edge_index)
#print(data.edge_attr)
print(data.node_id)

tensor([[1., 1.],
        [0., 0.],
        [0., 0.],
        [0., 0.],
        [0., 0.],
        [0., 0.],
        [0., 0.],
        [0., 0.],
        [0., 0.],
        [0., 0.],
        [0., 0.],
        [0., 0.],
        [0., 0.],
        [0., 0.],
        [0., 0.],
        [0., 0.],
        [0., 0.],
        [0., 0.],
        [0., 0.],
        [0., 0.],
        [0., 0.],
        [0., 0.],
        [0., 0.],
        [0., 0.],
        [0., 0.],
        [0., 0.],
        [0., 0.],
        [0., 0.],
        [0., 0.]])
tensor([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
        19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29])


In [65]:
def generate_training_graphs(G, tour):
    """
    Given a prepared graph G and a normalized tour,
    generate one graph per decision (num_nodes - 2).
    """

    graphs = []

    # Copy nodes to track which remain
    remaining = list(tour)

    initial = tour[0]

    for step in range(len(tour) - 2):
        current = tour[step]
        target = tour[step + 1]

        # Build a fresh copy of the graph
        H = G.copy()

        # Remove visited nodes except initial and current
        visited = tour[:step]
        for v in visited:
            if v != initial:
                if v in H:
                    H.remove_node(v)

        # Reset attributes
        for node in H.nodes:
            H.nodes[node]["initial"] = int(node == initial)
            H.nodes[node]["current"] = int(node == current)
            H.nodes[node]["target"] = int(node == target)

        graphs.append(H)

    return graphs

In [None]:
def load_opt_tour(tour_path: Path):
    """
    Load a TSPLIB .opt.tour file and return the tour as a list of node IDs.
    Handles:
      - one node per line
      - multiple nodes per line
      - -1 or EOF termination
    """
    tour = []
    reading = False

    with open(tour_path, "r") as f:
        for line in f:
            line = line.strip()

            if line == "TOUR_SECTION":
                reading = True
                continue

            if not reading:
                continue

            if line == "-1" or line == "EOF":
                break

            # Split line into tokens (handles multiple numbers per line)
            parts = line.split()
            for p in parts:
                tour.append(int(p))

    # Remove possible duplicated last node
    if len(tour) > 1 and tour[0] == tour[-1]:
        tour = tour[:-1]

    return tour


In [67]:
import matplotlib.pyplot as plt

def draw_graph(G, title=""):
    # Priority: display → coord → spring_layout
    if all("display" in G.nodes[n] for n in G.nodes):
        pos = {n: G.nodes[n]["display"] for n in G.nodes}
    elif all("coord" in G.nodes[n] for n in G.nodes):
        pos = {n: G.nodes[n]["coord"] for n in G.nodes}
    else:
        pos = nx.spring_layout(G, seed=42)

    colors = []
    for n in G.nodes:
        if G.nodes[n]["initial"]:
            colors.append("green")
        elif G.nodes[n]["current"]:
            colors.append("blue")
        elif G.nodes[n]["target"]:
            colors.append("red")
        else:
            colors.append("lightgray")

    nx.draw(G, pos, with_labels=True, node_color=colors, node_size=600)
    plt.title(title)
    plt.gca().invert_yaxis()  # TSPLIB coordinates have inverted Y
    plt.show()


In [72]:
problem = tsplib95.load(tsp_files[4])
G = problem.get_graph()
G = prepare_graph(G)
print(problem.name)

name = tsp_files[4].stem
tour_path = extract_path / f"{name}.opt.tour"
tour = load_opt_tour(tour_path)
print(tour)

graphs = generate_training_graphs(G, tour)
pyg_graphs = []

for i, g in enumerate(graphs):
    #draw_graph(g, title=f"Graph {i}")
    pyg_graph = nx_to_pyg(g)
    print(pyg_graph.node_id)
    print(pyg_graph.y)


bayg29
[1, 28, 6, 12, 9, 26, 3, 29, 5, 21, 2, 20, 10, 4, 15, 18, 14, 17, 22, 11, 19, 25, 7, 23, 8, 27, 16, 13, 24]
tensor([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
        19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29])
tensor(27)
tensor([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
        19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29])
tensor(5)
tensor([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
        19, 20, 21, 22, 23, 24, 25, 26, 27, 29])
tensor(11)
tensor([ 1,  2,  3,  4,  5,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
        20, 21, 22, 23, 24, 25, 26, 27, 29])
tensor(7)
tensor([ 1,  2,  3,  4,  5,  7,  8,  9, 10, 11, 13, 14, 15, 16, 17, 18, 19, 20,
        21, 22, 23, 24, 25, 26, 27, 29])
tensor(23)
tensor([ 1,  2,  3,  4,  5,  7,  8, 10, 11, 13, 14, 15, 16, 17, 18, 19, 20, 21,
        22, 23, 24, 25, 26, 27, 29])
tensor(2)
tensor([ 1,  2,  3,  4,  5,  7,  8, 10, 11, 13, 14, 15, 16, 17, 

# Main

In [None]:
tar_path = Path("Datasets/ALL_tsp.tar")
extract_path = Path("Datasets/ALL_tsp")
tsp_files = extract_tsp_archive(tar_path, extract_path)

for i, tsp_file in enumerate(tsp_files):
    print(f"\n---Graf {i}---")
    problem = tsplib95.load(tsp_file)

    # Process only symmetric TSP instances
    if problem.type != "TSP":
        print(f"⚠️ Skipped (TYPE: {problem.type})")
        continue
    
    # Skip large instances
    if problem.dimension > 1000:
        print(f"⚠️ Skipped (DIMENSION: {problem.dimension})")
        continue

    # Load graph to NetworkX
    print("Loading graph...")
    G = problem.get_graph()

    # Clean graph
    print("Cleaning graph...")
    G = prepare_graph(G)

    # Convert to PyTorch
    data = nx_to_pyg(G)

Archive extracted and .gz files decompressed.


AttributeError: 'Graph' object has no attribute 'type'