In [1]:
import os
import datetime
import time
from IPython.display import clear_output

import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim.lr_scheduler import CosineAnnealingLR
from torch_geometric.loader.dataloader import DataLoader
import torch.nn.functional as F
from torch.optim.lr_scheduler import StepLR

from torch_geometric.data import Data
import torch_geometric
import networkx as nx

from sklearn.model_selection import train_test_split

from ClusterDataset import ClusterDataset as GNNDataset
from ClusterDatasetTransformer import ClusterDataset
from train_transformer import *
from data_statistics import *

from IPython.display import display

from Transformer import Transformer
from lang import Lang
from LossFunction import Loss

In [2]:
# CUDA Setup
device = torch.device('cuda' if torch.cuda.is_available() else "cpu")
# device = torch.device("cpu")

print(f"Using device: {device}")

Using device: cuda


In [3]:
input_length = 60
max_seq_length = 60
batch_size = 64
max_nodes = 66

In [4]:
converter = Lang(max_nodes)
vocab_size = converter.n_words

In [6]:
# Load the dataset
hist_folder = "/eos/user/c/czeh/histo_10pion0PU/"
data_folder_training = "/eos/user/c/czeh/graph_data"
data_folder_test = "/eos/user/c/czeh/graph_data_test"

dataset_training = GNNDataset(data_folder_training, hist_folder)
dataset_test = GNNDataset(data_folder_test, hist_folder, test=True)

data_folder_training = "/eos/user/c/czeh/graph_data/processed"
store_folder_training = "/eos/user/c/czeh/graph_data_trans"
data_folder_test = "/eos/user/c/czeh/graph_data_test/processed"
store_folder_test = "/eos/user/c/czeh/graph_data_trans_test"

model_folder = "/eos/user/c/czeh/model"
dataset_training = ClusterDataset(converter, store_folder_training, data_folder_training, max_nodes=max_nodes, input_length=input_length)
dataset_test = ClusterDataset(converter, store_folder_test, data_folder_test, max_nodes=max_nodes, input_length=input_length)

[[{inner: [], outer: []}, {...}, ..., {inner: [247, ...], outer: []}], ...]
[[{inner: [], outer: []}, {...}, ..., {inner: [236, ...], outer: []}], ...]
[[{inner: [], outer: []}, {inner: [], ...}, ..., {inner: [], outer: []}], ...]
[[{inner: [], outer: []}, {inner: [], ...}, ..., {inner: [], outer: []}], ...]
[[{inner: [], outer: []}, {...}, ..., {inner: [278, ...], outer: []}], ...]
[[{inner: [], outer: []}, {...}, ..., {inner: [166, ...], outer: []}], ...]
[[{inner: [], outer: []}, {...}, ..., {inner: [198, ...], outer: []}], ...]
[[{inner: [], outer: []}, {...}, ..., {inner: [178, ...], outer: []}], ...]
[[{inner: [], outer: []}, {...}, ..., {inner: [268, ...], outer: [276]}], ...]
[[{inner: [], outer: []}, {...}, ..., {inner: [280, ...], outer: []}], ...]
[[{inner: [], outer: []}, {...}, ..., {inner: [199, ...], outer: []}], ...]
[[{inner: [], outer: []}, {...}, ..., {inner: [260, ...], outer: []}], ...]
[[{inner: [], outer: []}, {...}, ..., {inner: [209, ...], outer: []}], ...]
[[{

KeyboardInterrupt: 

In [None]:
train_dl = DataLoader(dataset_training, shuffle=True, batch_size=batch_size, pin_memory=True, num_workers=4)
test_dl = DataLoader(dataset_test, shuffle=True, batch_size=batch_size, pin_memory=True, num_workers=4)

In [None]:
epochs = 100
d_model = 128
num_heads = 4
num_layers = 6
d_ff = 256
dropout = 0.2
padding = converter.word2index["<PAD>"]
feature_num = len(dataset_test.model_feature_keys)

# Model, loss, and optimizer
model = Transformer(vocab_size, d_model, num_heads, num_layers, d_ff, feature_num, max_nodes, max_seq_length, dropout).to(device)
criterion = Loss(converter)

In [None]:
# Optionally introduce weight decay
# optimizer = optim.Adam(model.parameters(), lr=0.001, betas=(0.9, 0.98), eps=1e-9)

# Drop Step Size over time
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)
scheduler = StepLR(optimizer, step_size=10, gamma=0.5)

In [None]:
#Load Weights if needed
# weights = torch.load("/eos/user/c/czeh/tranformer_2.pt", weights_only=True)
# model.load_state_dict(weights["model_state_dict"])
# optimizer.load_state_dict(weights["optimizer_state_dict"])
# start_epoch = weights["epoch"]

In [None]:
train_loss_hist = []
val_loss_hist = []

In [None]:
# https://stats.stackexchange.com/questions/352036/what-should-i-do-when-my-neural-network-doesnt-learn
# Optionally introduce gradient clipping
torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=0.25)

fig_loss, ax_loss = plt.subplots(1, 1)
fig_loss.set_figwidth(6)
fig_loss.set_figheight(3)

display_loss = display(1, display_id=True)

optimizer.zero_grad()
# Training loop
for epoch in range(1, 101):
    print(f'Epoch: {epoch}')
    
    loss = train(model, converter, optimizer, train_dl, epoch, loss_obj=criterion, device=device)
    print(f"Training loss: {loss}")
    train_loss_hist.append(loss)
    
    val_loss = test(model, converter, test_dl, epoch, loss_obj=criterion, device=device)
    val_loss_hist.append(val_loss)
    print(f"Validation loss: {val_loss}")
    
    ax_loss.clear()
    plot_loss(train_loss_hist, val_loss_hist, ax=ax_loss, n=1)
    display_loss.update(fig_loss)
    time.sleep(1)
    
    scheduler.step()
    print(f"Epoch {epoch+1}, LR: {scheduler.get_last_lr()[0]}")

In [None]:
fig, ax = plt.subplots(1, 1)
fig.set_figheight(6)
fig.set_figwidth(10)
epochs = len(train_loss_hist)
ax.plot(range(1, epochs+1), moving_average(train_loss_hist, 8), label='train', linewidth=2)
ax.plot(range(1, epochs+1), moving_average(val_loss_hist, 8), label='val', linewidth=2)
ax.set_ylabel("Loss", fontsize=14)
ax.set_xlabel("Epochs", fontsize=14)
ax.set_title("Training and Validation Loss", fontsize=14)
ax.legend()

In [None]:
date = f"{datetime.now():%Y-%m-%d}"
save_model(model, epoch, optimizer, train_loss_hist, val_loss_hist, model_folder, f"tranformer_date_{date}.pt")

## Test Full Event

In [None]:
from EventGrouping import EventGrouping

In [None]:
model2 = Transformer(vocab_size, d_model, num_heads, num_layers, d_ff, feature_num, max_nodes, max_seq_length, dropout).to(device)
weights = torch.load("/eos/user/c/czeh/tranformer_4.pt", weights_only=True)
model2.load_state_dict(weights["model_state_dict"])

In [None]:
runner = EventGrouping(converter, model2, neighborhood=1, seq_length=input_length)
runner(dataset_test.get(0))

In [None]:
model_feature_keys = np.array([0,  2,  3,  4,  6,  7, 10, 14, 15, 16, 17, 18, 22, 24, 25, 26, 28, 29])
dataset_training.__getitem__(0)[0][:, model_feature_keys][:, -1]

In [None]:
dataset_test.get(0).cluster

In [None]:
dataset_training.node_feature_keys[16]

In [None]:
G = torch_geometric.utils.to_networkx(dataset_test.get(0), to_undirected=True)

In [None]:
dataset_training.get(0).x[:, 16]

In [None]:
fig, ax = plt.subplots(1, 1)
nx.draw(G, with_labels=True, ax=ax)

## Random Tests

In [None]:
targets = dataset_training.__getitem__(0)[2]
mask = targets != -4
mask.unsqueeze(-1)

In [None]:
targets[targets[:, -1] != -4, :]

In [None]:
targets[mask].shape[0]/3

In [None]:
opts = dataset_training.__getitem__(0)[1]
opts = torch.roll(opts, -1, dims=0)
opts[-1] = 5
opts

In [None]:
out_mask = opts != -4
opts[out_mask].shape[0]

In [None]:
targets = torch.reshape(targets[mask], (int(targets[mask].shape[0]/3), 3))

In [None]:
targets[0, :]