In [1]:
import yaml
import torch
import time
import numpy as np

from models import GAT, GCN, SWEGNN
from data import TemporalGraphDataset

In [2]:
%load_ext autoreload
%autoreload 2

In [3]:
np.random.seed(42)
torch.manual_seed(42)

<torch._C.Generator at 0x1e2dad36fd0>

In [60]:
with open('config.yaml') as f:
    config = yaml.safe_load(f)

dataset, info = TemporalGraphDataset(node_features=config['node_features'],
                    edge_features=config['edge_features'],
                    **config['dataset_parameters']).load()

In [61]:
print(dataset[0])
print(type(dataset[0].x), dataset[0].x.shape)
print(type(dataset[0].edge_index), dataset[0].edge_index.shape)
print(type(dataset[0].edge_attr), dataset[0].edge_attr.shape)
print(type(dataset[0].y), dataset[0].y.shape)
print(info)

Data(x=[1268, 6], edge_index=[2, 2612], edge_attr=[2612, 8], y=[1268, 1], pos=[2, 1268])
<class 'torch.Tensor'> torch.Size([1268, 6])
<class 'torch.Tensor'> torch.Size([2, 2612])
<class 'torch.Tensor'> torch.Size([2612, 8])
<class 'torch.Tensor'> torch.Size([1268, 1])
{'num_static_node_features': 3, 'num_dynamic_node_features': 1, 'num_static_edge_features': 5, 'num_dynamic_edge_features': 1, 'previous_timesteps': 2}


In [62]:
num_train = int(len(dataset) * 0.8) # 80% train, 20% test

train_dataset = dataset[:num_train]
# train_loader = DataLoader(train_dataset) # batch_size=32, shuffle=True

test_dataset = dataset[num_train:]
# test_loader = DataLoader(test_dataset) # batch_size=32, shuffle=True

In [63]:
device = 'cuda:0' if torch.cuda.is_available() else 'cpu'
base_model_params = {
    'static_node_features': info['num_static_node_features'],
    'dynamic_node_features': info['num_dynamic_node_features'],
    'static_edge_features': info['num_static_edge_features'],
    'dynamic_edge_features': info['num_dynamic_edge_features'],
    'previous_timesteps': info['previous_timesteps'],
    'device': device,
}
lr_info = config['training_parameters']
model_info = config['model_parameters']

In [64]:
def train(model, loss_func, optimizer):
    start_time = time.time()
    num_epochs = 10
    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0

        for graph in train_dataset:
            graph = graph.to(device)
            labels = graph.y

            optimizer.zero_grad()

            outputs = model(graph)

            loss = loss_func(outputs, labels)
            running_loss += loss.item()

            loss.backward()
            optimizer.step()

        epoch_loss = running_loss / num_train
        print(f'Epoch [{epoch + 1}/{num_epochs}], Training Loss: {epoch_loss:.4f}')
    end_time = time.time()
    print(f'Total training time: {end_time - start_time} seconds')


def test(model, loss_func):
    start_time = time.time()
    model.eval()
    running_loss = 0.0
    with torch.no_grad():
        for graph in test_dataset:
            graph = graph.to(device)
            labels = graph.y

            outputs = model(graph)

            loss = loss_func(outputs, labels)
            running_loss += loss.item()
    end_time = time.time()

    # Print validation statistics
    print(f'Validation Loss: {running_loss:.4f}')
    print(f'Inference time: {end_time - start_time} seconds')

In [9]:
gcn_params = model_info['GCN']
model = GCN(**gcn_params, **base_model_params)
optimizer = torch.optim.Adam(model.parameters(), lr=lr_info['learning_rate'], weight_decay=lr_info['weight_decay'])
loss_func = torch.nn.L1Loss()

train(model, loss_func, optimizer)

Epoch [1/10], Training Loss: 5.9578
Epoch [2/10], Training Loss: 0.0264
Epoch [3/10], Training Loss: 0.0226
Epoch [4/10], Training Loss: 0.0208
Epoch [5/10], Training Loss: 0.0206
Epoch [6/10], Training Loss: 0.0206
Epoch [7/10], Training Loss: 0.0206
Epoch [8/10], Training Loss: 0.0206
Epoch [9/10], Training Loss: 0.0206
Epoch [10/10], Training Loss: 81.5901
Total training time: 15.083606243133545 seconds


In [10]:
test(model, loss_func)

Validation Loss: 27.0272
Inference time: 0.17727231979370117 seconds


In [11]:
gat_params = model_info['GAT']
model = GAT(**gat_params, **base_model_params)
optimizer = torch.optim.Adam(model.parameters(), lr=lr_info['learning_rate'], weight_decay=lr_info['weight_decay'])
loss_func = torch.nn.L1Loss()

train(model, loss_func, optimizer)

Epoch [1/10], Training Loss: 5.4834
Epoch [2/10], Training Loss: 0.0218
Epoch [3/10], Training Loss: 0.0214
Epoch [4/10], Training Loss: 0.0220
Epoch [5/10], Training Loss: 0.0609
Epoch [6/10], Training Loss: 0.0226
Epoch [7/10], Training Loss: 0.0207
Epoch [8/10], Training Loss: 17.3843
Epoch [9/10], Training Loss: 0.2914
Epoch [10/10], Training Loss: 0.0240
Total training time: 21.568446159362793 seconds


In [12]:
test(model, loss_func)

Validation Loss: 0.7756
Inference time: 0.2318413257598877 seconds


In [13]:
swe_gnn_params = model_info['SWEGNN']
model = SWEGNN(**swe_gnn_params, **base_model_params)
optimizer = torch.optim.Adam(model.parameters(), lr=lr_info['learning_rate'], weight_decay=lr_info['weight_decay'])
loss_func = torch.nn.L1Loss()

train(model, loss_func, optimizer)

Epoch [1/10], Training Loss: 0.0733
Epoch [2/10], Training Loss: 0.0218
Epoch [3/10], Training Loss: 0.0950
Epoch [4/10], Training Loss: 0.0206
Epoch [5/10], Training Loss: 0.0220
Epoch [6/10], Training Loss: 0.0206
Epoch [7/10], Training Loss: 0.0206
Epoch [8/10], Training Loss: 0.0206
Epoch [9/10], Training Loss: 0.0208
Epoch [10/10], Training Loss: 0.0207
Total training time: 173.00833797454834 seconds


In [14]:
test(model, loss_func)

Validation Loss: 0.1828
Inference time: 1.2645950317382812 seconds


In [15]:
# No encoder decoder
swe_gnn_params = model_info['SWEGNN']
swe_gnn_params['encoder_layers'] = 0
swe_gnn_params['encoder_activation'] = None
swe_gnn_params['decoder_layers'] = 0
swe_gnn_params['decoder_activation'] = None

model = SWEGNN(**swe_gnn_params, **base_model_params)
optimizer = torch.optim.Adam(model.parameters(), lr=lr_info['learning_rate'], weight_decay=lr_info['weight_decay'])
loss_func = torch.nn.L1Loss()

train(model, loss_func, optimizer)

Epoch [1/10], Training Loss: 0.6731
Epoch [2/10], Training Loss: 0.1494
Epoch [3/10], Training Loss: 0.1198
Epoch [4/10], Training Loss: 0.1002
Epoch [5/10], Training Loss: 0.0810
Epoch [6/10], Training Loss: 0.0594
Epoch [7/10], Training Loss: 0.0258
Epoch [8/10], Training Loss: 0.0306
Epoch [9/10], Training Loss: 0.0224
Epoch [10/10], Training Loss: 0.0214
Total training time: 132.33159112930298 seconds


In [16]:
test(model, loss_func)

Validation Loss: 0.1883
Inference time: 1.1571977138519287 seconds


## Self-Supervised Learning Methods

In [75]:
from tqdm import tqdm
from models.graph_mae2 import GraphMAE2
from utils.graph_mae2_utils import LinearRegression

In [None]:
def pretrain(model, dataset, optimizer):
    start_time = time.time()

    max_epoch = 20
    epoch_iter = tqdm(range(max_epoch))
    model.to(device)
    for epoch in epoch_iter:
        model.train()
        running_loss = 0.0

        for graph in dataset:
            graph = graph.to(device)
            x = graph.x # Target
            target_nodes = torch.arange(x.shape[0], device=device, dtype=torch.long)

            optimizer.zero_grad()

            loss = model(graph, x, targets=target_nodes)
            running_loss += loss.item()

            loss.backward()
            optimizer.step()

        epoch_loss = running_loss / num_train
        epoch_iter.set_description(f"# Epoch {epoch}: train_loss: {epoch_loss:.4f}")

    end_time = time.time()
    print(f'Total pre-training time: {end_time - start_time} seconds')
    return model

def linear_probing(model, dataset, in_dim, out_dim, lr_f, weight_decay_f):
    start_time = time.time()

    # Should we freeze model parameters or fine-tune them?
    encoder = LinearRegression(in_dim, out_dim).to(device)

    num_finetune_params = [p.numel() for p in encoder.parameters() if  p.requires_grad]
    print(f"num parameters for finetuning: {sum(num_finetune_params)}")

    loss_f = torch.nn.MSELoss()
    optimizer_f = torch.optim.Adam(encoder.parameters(), lr=lr_f, weight_decay=weight_decay_f)

    best_model = None
    max_epoch_f = 20
    epoch_iter_f = tqdm(range(max_epoch_f))
    model.eval()
    encoder.train()
    for epoch in epoch_iter_f:
        running_loss = 0.0

        for graph in dataset:
            optimizer_f.zero_grad()

            graph = graph.to(device)
            with torch.no_grad():
                x = model.embed(graph)
                x = x.to(device)
            label = graph.y

            out = encoder(x)
            loss = loss_f(out, label)

            loss.backward()
            # torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=3)
            optimizer_f.step()
            running_loss += loss.item()

        epoch_loss = running_loss / num_train
        epoch_iter_f.set_description(f"# Epoch {epoch}: train_loss: {epoch_loss:.4f}")

    end_time = time.time()
    print('Final loss: ', epoch_loss)
    print(f'Total fine-tuning time: {end_time - start_time} seconds')

    return encoder

In [78]:
graphmae2_params = model_info['GRAPHMAE2']
in_dim = dataset[0].x.shape[1]
model = GraphMAE2(in_dim=in_dim, **graphmae2_params)
optimizer = torch.optim.Adam(model.parameters(), lr=lr_info['learning_rate'], weight_decay=lr_info['weight_decay'])

trained_model = pretrain(model, train_dataset, optimizer)

out_dim = dataset[0].y.shape[1]
hidden_dim = graphmae2_params['num_hidden'] // graphmae2_params['nhead']
linear_probing(trained_model, train_dataset, hidden_dim, out_dim, lr_info['learning_rate'], lr_info['weight_decay'])

=== Use sce_loss and alpha_l=3 ===


# Epoch 19: train_loss: 0.0903: 100%|██████████| 20/20 [02:10<00:00,  6.51s/it]


Total pre-training time: 130.16157507896423 seconds
num parameters for finetuning: 33


# Epoch 19: train_loss: 29337.2740: 100%|██████████| 20/20 [00:27<00:00,  1.37s/it]

Final loss:  29337.27402439402
Total fine-tuning time: 27.372410535812378 seconds



