In [1]:
import networkx as nx
import numpy as np
import torch_geometric
import torch

import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset 

import matplotlib.pyplot as plt
from tqdm import tqdm

from torch_geometric.data import Dataset, Data
from torch_geometric.loader.dataloader import DataLoader

from GraphDataset import RandomGraphDataset
from GraphToSequence import graphToSequence, sequenceToGraph

from SimpleTransformer import SimpleTransformer, Transformer
from lang import Lang

In [2]:
data_folder_training = "/eos/user/c/czeh/graphsequencer/fixed_edge_graph_training"
data_folder_test = "/eos/user/c/czeh/graphsequencer/fixed_edge_graph_test"


dataset_training = RandomGraphDataset(data_folder_training, nodes=5, edges=3, data_count=100)
dataset_test = RandomGraphDataset(data_folder_test, nodes=5,  edges=3, data_count=5)

In [3]:
train_dl = DataLoader(dataset_training, shuffle=True)
test_dl = DataLoader(dataset_test, shuffle=True)

In [19]:
input_length = 10
max_seq_length = 10
max_edge_count = 6
batch_size = 1

In [20]:
# Check for GPU availability
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# device = "cpu"

In [21]:
# Generating data
num_samples = 1000

converter = Lang(dataset_training.get(0).num_nodes)
vocab_size = converter.n_words

In [22]:
epochs = 100
d_model = 512
num_heads = 8
num_layers = 6
d_ff = 2048
dropout = 0.1

# Model, loss, and optimizer
# model = SimpleTransformer(embed_size, input_length, vocab_size).to(device)
model = Transformer(dataset_training.get(0).num_nodes, vocab_size, d_model, num_heads, num_layers, d_ff, max_edge_count, max_seq_length, dropout).to(device)
criterion = nn.CrossEntropyLoss(ignore_index=0)

# Optionally introduce weight decay
optimizer = optim.Adam(model.parameters(), lr=0.0001, betas=(0.9, 0.98), eps=1e-9)

In [None]:
losses_per_epoch = []

# Optionally introduce gradient clipping
# torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)

# Training loop
for epoch in range(epochs):
    model.train()
    optimizer.zero_grad()
    total_loss = 0
    for sample in tqdm(train_dl, desc="training"):
        sample_seq = sample[0].y
        
        input_tensor = torch.zeros((len(sample_seq)+1, input_length), dtype=torch.long).to(device)
        target_tensor = torch.zeros((len(sample_seq)+1, input_length), dtype=torch.long).to(device)
        edge_tensor = torch.zeros((len(sample_seq)+1, max_edge_count), dtype=torch.long).to(device)
        for i in range(len(sample_seq)+1):
            input_seq = converter.subseq2arr(sample_seq, input_length+1, max(i-input_length-1, 0), min(i+1, input_length+1))
            input_tensor[i, :] = torch.tensor(input_seq[:-1], dtype=torch.long)
            target_tensor[i, :] = torch.tensor(input_seq[1:], dtype=torch.long)
            edge_tensor[i, :] = torch.flatten(sample.edge_index.T).unsqueeze(0)
            
        optimizer.zero_grad()
        output = model(edge_tensor, input_tensor)
        loss = criterion(output.contiguous().view(-1, vocab_size), target_tensor.contiguous().view(-1))
        loss.backward()
        optimizer.step()
        total_loss += loss.item()

    loss = total_loss / len(train_dl)
    print(f"Epoch {epoch+1}, Loss: {loss:.6f}")
    losses_per_epoch.append(loss)

training: 100%|██████████| 100/100 [00:06<00:00, 15.62it/s]


Epoch 1, Loss: 1.117625


training: 100%|██████████| 100/100 [00:06<00:00, 15.82it/s]


Epoch 2, Loss: 0.738416


training: 100%|██████████| 100/100 [00:05<00:00, 16.70it/s]


Epoch 3, Loss: 0.660407


training: 100%|██████████| 100/100 [00:05<00:00, 18.02it/s]


Epoch 4, Loss: 0.572286


training: 100%|██████████| 100/100 [00:05<00:00, 17.74it/s]


Epoch 5, Loss: 0.499651


training: 100%|██████████| 100/100 [00:06<00:00, 16.59it/s]


Epoch 6, Loss: 0.417463


training: 100%|██████████| 100/100 [00:06<00:00, 16.26it/s]


Epoch 7, Loss: 0.368894


training: 100%|██████████| 100/100 [00:05<00:00, 17.67it/s]


Epoch 8, Loss: 0.306604


training: 100%|██████████| 100/100 [00:05<00:00, 17.83it/s]


Epoch 9, Loss: 0.294084


training: 100%|██████████| 100/100 [00:05<00:00, 17.42it/s]


Epoch 10, Loss: 0.257906


training: 100%|██████████| 100/100 [00:06<00:00, 16.43it/s]


Epoch 11, Loss: 0.246845


training: 100%|██████████| 100/100 [00:06<00:00, 16.52it/s]


Epoch 12, Loss: 0.267315


training:  48%|████▊     | 48/100 [00:02<00:03, 16.68it/s]

In [None]:
plt.plot(losses_per_epoch)
plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.title("Training Loss")

In [None]:
correct = 0
model.eval() 
torch.no_grad()
for sample in tqdm(test_dl, desc="test"):
    
    input_tensor = torch.tensor(converter.subseq2arr(sample.y, input_length, 0, 0)).unsqueeze(0).to(device)
    edge_tensor = torch.flatten(sample.edge_index.T).unsqueeze(0).to(device)
    
    predictions = model(edge_tensor, input_tensor)
    predicted_index = predictions.argmax(-1)
    predicted_number = predicted_index[0, -1].item()

    step = 0
    while (predicted_number != 2 and step < 15):  # Disable gradient computation for inference
        input_tensor = torch.roll(input_tensor, -1, dims=1)
        input_tensor[0, -1] = predicted_number
    
        predictions = model(edge_tensor, input_tensor)
        predicted_index = predictions.argmax(-1)  # Get the index of the max log-probability for the last position
        predicted_number = predicted_index[0, -1].item()  # Convert to Python number
        step += 1
     
    input_tensor = torch.roll(input_tensor, -1, dims=1)
    input_tensor[0, -1] = predicted_number 
    check = converter.arr2seq(input_tensor[0]) == converter.arr2seq(converter.subseq2arr(sample[0].y, input_length, 0, input_length))
    
    print(f"Input Sequence: {sample[0].y}")
    print(f"Predicted Sequence: {converter.arr2seq(input_tensor[0])}")
    print(f"Correct: {check}")
    
    correct += check

print(f"Percentage of correct guesses: {correct / (len(test_dl))}")