## Install torch geometric libraries

In [1]:
!pip install torch torch-scatter torch-sparse torch-cluster torch-spline-conv torch-geometric -f https://data.pyg.org/whl/torch-1.11.0+cu102.html

Defaulting to user installation because normal site-packages is not writeable
Looking in links: https://data.pyg.org/whl/torch-1.11.0+cu102.html


In [2]:
import torch_geometric
import torch
from torch_geometric.data import Data
from torch_geometric.data import Dataset
from torch_geometric.loader import DataLoader
from env import Env_tsp
from config import Config
import itertools

In [3]:


cfg = Config()
env = Env_tsp(cfg)
#get 20000 node combinations to train the autoencoder on
batch_feat =  env.get_batch_nodes(20000)

data_list = []
for el in batch_feat:
    feat = el.double() #array of coordinates
    
    #to insert information about edge weight (distance between nodes),
    #i defined the graph instance as a fully connected one
    #data.edge_index: Graph connectivity in COO format with shape [2, num_edges] and type torch.long
    #edge index 
    perm = itertools.permutations(range(5), 2)
    l1 = []
    l2 = []

    for x in perm:
        l1.append(x[0])
        l2.append(x[1])
    
    a1 = torch.LongTensor(l1)
    a2 = torch.LongTensor(l2)

    res = [a1,a2]
    res = torch.stack(res)

    #add to array of graphs, to save dataset
    data = Data(x=feat, edge_index=res, pos=feat )
    data_list.append(data)

         

#save dataset
torch.save(data_list, "./datasets/train_pos_20000.pt")

#loader in case batch norm needed later
#loader = DataLoader(data_list, batch_size=32)



In [4]:
import torch
from torch_geometric.data import InMemoryDataset, download_url
import torch_geometric.transforms as T

class MyOwnDataset(InMemoryDataset):
    def __init__(self, root="./datasets", transform=None, pre_transform=None, pre_filter=None):
        super().__init__(root, transform, pre_transform, pre_filter)
        self.data = torch.load("./datasets/train_pos_20000.pt")

    @property
    def raw_file_names(self):
        return ['some_file_1', 'some_file_2', ...]

    @property
    def processed_file_names(self):
        return ['data.pt']

    def process(self):
        # Read data into huge `Data` list.
        data_list = [...]

        if self.pre_filter is not None:
            data_list = [data for data in data_list if self.pre_filter(data)]

        if self.pre_transform is not None:
            data_list = [self.pre_transform(data) for data in data_list]

        data, slices = self.collate(data_list)
        torch.save((data, slices), self.processed_paths[0])

transform = T.Cartesian(cat=False)
dataset = MyOwnDataset(pre_transform=transform)

  f"The `pre_transform` argument differs from the one used in "


In [5]:
#verify dataset length
print(len(dataset.data))
#verify graph structure
print((dataset.data)[0])

20000
Data(x=[10, 2], edge_index=[2, 20], pos=[10, 2])


In [6]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

## Define autoencoder

In [7]:
import torch_geometric.transforms as T
from torch_geometric.nn import GCNConv
from torch_geometric.utils import train_test_split_edges
from torch_geometric.transforms import RandomLinkSplit

#following torch_geometric autoencoder guide
data = dataset.data

for _ in range(len(data)):
    data[_] = transform(data[_])
    data[_].train_mask = data[_].val_mask = data[_].test_mask = None
    data[_] = data[_].to(device)
    data[_] = RandomLinkSplit(data[_])
    


### Graph Encoder module

- I contacted the pytorch_geometric team to ask about the optimal layer to use for my use case (fully connected graph whose only features are node coordinates and distance between nodes) and they kindly advised me to use SplineConv <a href="https://github.com/pyg-team/pytorch_geometric/discussions/4535">(here the discussion)</a> 



In [8]:
from torch_geometric.nn import SplineConv

class GraphEncoder(torch.nn.Module):
    def __init__(self, in_channels, out_channels):
        super(GraphEncoder, self).__init__()
        self.conv1 = SplineConv(in_channels, 2 * out_channels, dim=2, kernel_size=3 ) # cached only for transductive learning
        self.conv2 = SplineConv(2 * out_channels, out_channels,dim=2, kernel_size=3) # cached only for transductive learning

    def forward(self, x, edge_index, edge_attr):
        x = self.conv1(x, edge_index, edge_attr).relu()
        return self.conv2(x, edge_index, edge_attr)
    

In [9]:
from torch_geometric.nn import GAE
out_channels = 16
num_features = 2
epochs = 100

# model
model = GAE(GraphEncoder(num_features, out_channels))
model = model.to(device)

# inizialize the optimizer
optimizer = torch.optim.Adam(model.parameters(), lr=0.004)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, 20000, 0.9999)

print(data[0].num_val)



Data(x=[10, 2], edge_index=[2, 20], pos=[10, 2], edge_attr=[20, 2])


In [10]:


def train(cnt):
    model.train()
    optimizer.zero_grad()
    obj = data[cnt].num_val
    #data[cnt][0].x = (data[cnt][0].x).float().to(device)
    obj.x = (obj.x).float().to(device)
    obj.edge_attr = (obj.edge_attr).float()
    #print(dataset[0][cnt])
    z = model.encode(obj.x, obj.edge_index, obj.edge_attr)
    loss = model.recon_loss(z,  torch.LongTensor(obj.edge_index))
    loss.backward()
    optimizer.step()
    scheduler.step()
    return float(loss)


def test(pos_edge_index, neg_edge_index, cnt):
    model.eval()
    with torch.no_grad():
        z = model.encode(data[cnt].x, data[cnt].train_pos_edge_index, data[cnt].train_pos_edge_attr)
    return model.test(z, pos_edge_index, neg_edge_index)

Autoencoder in this instance takes 10 nodes with 2 dimensions and outputs a 16-dim embedding of each node.

In [11]:
epoch=1

while epoch < 20000:
    
    loss = train(epoch)
    print(f"loss {loss} //epoch : {epoch}")
    
    if loss < 0.1:
        torch.save(model.state_dict(), "./models/model_SplineConv_20000.pt")
        epoch=200000

    epoch+=1
    
torch.save(model.state_dict(), "./models/third_model_SplineConv_20000.pt")



  'We do not recommend using the non-optimized CPU version of '


loss 1.377434253692627 //epoch : 1
loss 1.37760591506958 //epoch : 2
loss 1.3952767848968506 //epoch : 3
loss 1.3526811599731445 //epoch : 4
loss 1.3440523147583008 //epoch : 5
loss 1.3370628356933594 //epoch : 6
loss 1.3393216133117676 //epoch : 7
loss 1.2591071128845215 //epoch : 8
loss 1.13352370262146 //epoch : 9
loss 1.1536471843719482 //epoch : 10
loss 1.0855989456176758 //epoch : 11
loss 1.0122826099395752 //epoch : 12
loss 1.1327348947525024 //epoch : 13
loss 1.1130850315093994 //epoch : 14
loss 0.9643628597259521 //epoch : 15
loss 1.1634279489517212 //epoch : 16
loss 0.8533313274383545 //epoch : 17
loss 0.8329411149024963 //epoch : 18
loss 0.8164599537849426 //epoch : 19
loss 0.7944999933242798 //epoch : 20
loss 0.8417636156082153 //epoch : 21
loss 0.8002870082855225 //epoch : 22
loss 0.6501341462135315 //epoch : 23
loss 0.6741078495979309 //epoch : 24
loss 0.3577166795730591 //epoch : 25
loss 0.4867723286151886 //epoch : 26
loss 0.4179753363132477 //epoch : 27
loss 0.41046193

In [None]:
obj1 = data[0].num_val
obj2 = data[1].num_val

Z = model.encode(obj1.x.float(), obj1.edge_index, (obj1.edge_attr).float())
Y = model.encode(obj2.x.float(), obj2.edge_index, (obj2.edge_attr).float())
print(obj1.x)
print(Z)
print(obj2.x)
print(Y)

tensor([[0.7576, 0.2793],
        [0.4031, 0.7347],
        [0.0293, 0.7999],
        [0.3971, 0.7544],
        [0.5695, 0.4388],
        [0.6387, 0.5247],
        [0.6826, 0.3051],
        [0.4635, 0.4550],
        [0.5725, 0.4980],
        [0.9371, 0.6556]], dtype=torch.float64)
tensor([[-1.9853e+01, -3.4666e+01, -1.4277e+01, -1.8772e+01, -2.4773e+01,
          2.9172e+01,  2.8252e+01,  2.0750e+01,  2.5156e+01, -2.0352e+01,
          1.5138e+01, -8.6671e+00,  2.3499e+01,  2.7759e+01,  1.4722e+01,
         -3.1795e+01],
        [-1.9904e+01, -3.4736e+01, -1.4322e+01, -1.9078e+01, -2.4218e+01,
          2.9297e+01,  2.8982e+01,  2.0300e+01,  2.3721e+01, -1.9743e+01,
          1.5421e+01, -8.5059e+00,  2.3534e+01,  2.8493e+01,  1.4687e+01,
         -3.2552e+01],
        [-1.8990e+01, -3.3741e+01, -1.3973e+01, -1.7311e+01, -2.2639e+01,
          2.7276e+01,  2.6998e+01,  1.8430e+01,  2.2091e+01, -1.7889e+01,
          1.4890e+01, -8.6177e+00,  2.1825e+01,  2.7288e+01,  1.3111e+01,
      

  'We do not recommend using the non-optimized CPU version of '
