In [1]:
# Import required libraries
import math
import logging
import time
import torch
import numpy as np
import pickle
from pathlib import Path
import matplotlib.pyplot as plt
from sklearn.manifold import TSNE

from evaluation.evaluation import eval_edge_prediction
from model.tgn import TGN
from utils.utils import EarlyStopMonitor, RandEdgeSampler, get_neighbor_finder
from utils.data_processing import get_data, compute_time_statistics

In [8]:
# Random Seeds for Reproducability
torch.manual_seed(0)
np.random.seed(0)
torch.autograd.set_detect_anomaly(True)

<torch.autograd.anomaly_mode.set_detect_anomaly at 0x1038b84c0>

In [9]:

args = {
    "data": "wikipedia",
    "bs": 200,
    "prefix": "",
    "n_degree": 10,
    "n_head": 2,
    "n_epoch": 50,
    "n_layer": 1,
    "lr": 0.0001,
    "patience": 5,
    "n_runs": 1,
    "drop_out": 0.1,
    "gpu": 0,
    "node_dim": 100,
    "time_dim": 100,
    "backprop_every": 1,
    "use_memory": True,
    "embedding_module": "graph_attention",
    "message_function": "identity",
    "memory_updater": "gru",
    "aggregator": "last",
    "memory_update_at_end": False,
    "message_dim": 100,
    "memory_dim": 172,
    "different_new_nodes": False,
    "uniform": False,
    "randomize_features": False,
    "use_destination_embedding_in_message": False,
    "use_source_embedding_in_message": False,
    "dyrep": False,
}

# Extract key parameters for easier use
BATCH_SIZE = args["bs"]
NUM_NEIGHBORS = args["n_degree"]
NUM_EPOCH = args["n_epoch"]
NUM_HEADS = args["n_head"]
DROP_OUT = args["drop_out"]
GPU = args["gpu"]
DATA = args["data"]
NUM_LAYER = args["n_layer"]
LEARNING_RATE = args["lr"]
NODE_DIM = args["node_dim"]
TIME_DIM = args["time_dim"]
USE_MEMORY = args["use_memory"]
MESSAGE_DIM = args["message_dim"]
MEMORY_DIM = args["memory_dim"]

# Configure device
device = torch.device(f"cuda:{GPU}" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

Using device: cpu


In [10]:
# Load data
node_features, edge_features, full_data, train_data, val_data, test_data, new_node_val_data, new_node_test_data = get_data(
    DATA,
    different_new_nodes_between_val_and_test=args["different_new_nodes"],
    randomize_features=args["randomize_features"],
)

# Initialize neighbor finders
train_ngh_finder = get_neighbor_finder(train_data, args["uniform"])
full_ngh_finder = get_neighbor_finder(full_data, args["uniform"])

The dataset has 157474 interactions, involving 9227 different nodes
The training dataset has 81029 interactions, involving 6141 different nodes
The validation dataset has 23621 interactions, involving 3256 different nodes
The test dataset has 23621 interactions, involving 3564 different nodes
The new node validation dataset has 12016 interactions, involving 2120 different nodes
The new node test dataset has 11715 interactions, involving 2437 different nodes
922 nodes were used for the inductive testing, i.e. are never seen during training


In [11]:
# Initialize the TGN model
tgn = TGN(
    neighbor_finder=train_ngh_finder,
    node_features=node_features,
    edge_features=edge_features,
    device=device,
    n_layers=NUM_LAYER,
    n_heads=NUM_HEADS,
    dropout=DROP_OUT,
    use_memory=USE_MEMORY,
    message_dimension=MESSAGE_DIM,
    memory_dimension=MEMORY_DIM,
    memory_update_at_start=not args["memory_update_at_end"],
    embedding_module_type=args["embedding_module"],
    message_function=args["message_function"],
    aggregator_type=args["aggregator"],
    memory_updater_type=args["memory_updater"],
    n_neighbors=NUM_NEIGHBORS,
)

# Set up loss and optimizer
criterion = torch.nn.BCELoss()
optimizer = torch.optim.Adam(tgn.parameters(), lr=LEARNING_RATE)

# Move model to device
tgn = tgn.to(device)

In [12]:
# Training loop
for epoch in range(NUM_EPOCH):
    start_time = time.time()
    print(f"Starting epoch {epoch}")
    
    # Initialize memory
    if USE_MEMORY:
        tgn.memory.__init_memory__()
    
    tgn.set_neighbor_finder(train_ngh_finder)

    epoch_loss = []
    for batch_idx in range(0, len(train_data.sources), BATCH_SIZE):
        optimizer.zero_grad()
        sources = train_data.sources[batch_idx: batch_idx + BATCH_SIZE]
        destinations = train_data.destinations[batch_idx: batch_idx + BATCH_SIZE]
        timestamps = train_data.timestamps[batch_idx: batch_idx + BATCH_SIZE]
        edge_idxs = train_data.edge_idxs[batch_idx: batch_idx + BATCH_SIZE]
        
        size = len(sources)
        _, negatives = RandEdgeSampler(sources, destinations).sample(size)

        pos_prob, neg_prob = tgn.compute_edge_probabilities(
            sources, destinations, negatives, timestamps, edge_idxs, NUM_NEIGHBORS
        )

        pos_label = torch.ones(size, dtype=torch.float, device=device)
        neg_label = torch.zeros(size, dtype=torch.float, device=device)

        loss = criterion(pos_prob.squeeze(), pos_label) + criterion(neg_prob.squeeze(), neg_label)
        loss.backward()
        optimizer.step()
        epoch_loss.append(loss.item())
    
    print(f"Epoch {epoch} completed. Loss: {np.mean(epoch_loss):.4f}")

Starting epoch 0


  File "/Users/jonathansneh/.pyenv/versions/3.9.7/lib/python3.9/runpy.py", line 197, in _run_module_as_main
    return _run_code(code, main_globals, None,
  File "/Users/jonathansneh/.pyenv/versions/3.9.7/lib/python3.9/runpy.py", line 87, in _run_code
    exec(code, run_globals)
  File "/Users/jonathansneh/.local/lib/python3.9/site-packages/ipykernel_launcher.py", line 18, in <module>
    app.launch_new_instance()
  File "/Users/jonathansneh/.local/lib/python3.9/site-packages/traitlets/config/application.py", line 1075, in launch_instance
    app.start()
  File "/Users/jonathansneh/.local/lib/python3.9/site-packages/ipykernel/kernelapp.py", line 739, in start
    self.io_loop.start()
  File "/Users/jonathansneh/.local/lib/python3.9/site-packages/tornado/platform/asyncio.py", line 205, in start
    self.asyncio_loop.run_forever()
  File "/Users/jonathansneh/.pyenv/versions/3.9.7/lib/python3.9/asyncio/base_events.py", line 596, in run_forever
    self._run_once()
  File "/Users/jonathans

RuntimeError: one of the variables needed for gradient computation has been modified by an inplace operation: [torch.FloatTensor [688, 516]], which is output 0 of AsStridedBackward0, is at version 2; expected version 1 instead. Hint: the backtrace further above shows the operation that failed to compute its gradient. The variable in question was changed in there or anywhere later. Good luck!

In [14]:
print(dir(tgn))

['T_destination', '__annotations__', '__call__', '__class__', '__delattr__', '__dict__', '__dir__', '__doc__', '__eq__', '__format__', '__ge__', '__getattr__', '__getattribute__', '__getstate__', '__gt__', '__hash__', '__init__', '__init_subclass__', '__le__', '__lt__', '__module__', '__ne__', '__new__', '__reduce__', '__reduce_ex__', '__repr__', '__setattr__', '__setstate__', '__sizeof__', '__str__', '__subclasshook__', '__weakref__', '_apply', '_backward_hooks', '_backward_pre_hooks', '_buffers', '_call_impl', '_compiled_call_impl', '_forward_hooks', '_forward_hooks_always_called', '_forward_hooks_with_kwargs', '_forward_pre_hooks', '_forward_pre_hooks_with_kwargs', '_get_backward_hooks', '_get_backward_pre_hooks', '_get_name', '_is_full_backward_hook', '_load_from_state_dict', '_load_state_dict_post_hooks', '_load_state_dict_pre_hooks', '_maybe_warn_non_full_backward_hook', '_modules', '_named_members', '_non_persistent_buffers_set', '_parameters', '_register_load_state_dict_pre_hoo