In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import os

while 'notebooks' in os.getcwd():
    os.chdir('..')

import pandas as pd
import torch
import torch.nn.functional as F
from ogb.nodeproppred import DglNodePropPredDataset, Evaluator
from sklearn.metrics import roc_auc_score
import logging
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.offline as pyo
import numpy as np

from src.dgl_models import GraphSAGE
from src.data.node_classifier.arxiv import load_dataset, get_symmetric_graph
from src.train.node_classifier import WeigthedGraphSageNodeClassifier

In [3]:
logging.basicConfig(
    format='%(asctime)s - %(levelname)s : %(message)s',
    level=logging.INFO,
    datefmt='%Y-%m-%d %H:%M:%S'
)

In [4]:
torch.cuda.is_available()

True

In [5]:
device = f'cuda:0' if torch.cuda.is_available() else 'cpu'
device = torch.device(device)
device

device(type='cuda', index=0)

## Data Loading

In [63]:
dataset = load_dataset()

In [64]:
rewired_edges_df = pd.read_csv('data/graph_modifications/01-1-rewired_edges_same_degrees.csv')
print(rewired_edges_df.shape)
rewired_edges_df.head()

(2315598, 3)


Unnamed: 0,source,target,weight
0,0.0,161310.0,0.790365
1,0.0,113080.0,0.790083
2,0.0,139581.0,0.790068
3,0.0,21760.0,0.789761
4,0.0,10839.0,0.789665


In [65]:
graph = dataset[0][0]
graph

Graph(num_nodes=169343, num_edges=1166243,
      ndata_schemes={'year': Scheme(shape=(1,), dtype=torch.int64), 'feat': Scheme(shape=(128,), dtype=torch.float32)}
      edata_schemes={})

### Remove all old edges

In [67]:
graph.remove_edges(graph.edge_ids(graph.edges()[0], graph.edges()[1]))
graph

Graph(num_nodes=169343, num_edges=0,
      ndata_schemes={'year': Scheme(shape=(1,), dtype=torch.int64), 'feat': Scheme(shape=(128,), dtype=torch.float32)}
      edata_schemes={})

### Add rewired edges

In [68]:
graph.add_edges(
    torch.tensor(rewired_edges_df['source'].astype(int), dtype=torch.int64),
    torch.tensor(rewired_edges_df['target'].astype(int), dtype=torch.int64))

In [69]:
graph

Graph(num_nodes=169343, num_edges=2315598,
      ndata_schemes={'year': Scheme(shape=(1,), dtype=torch.int64), 'feat': Scheme(shape=(128,), dtype=torch.float32)}
      edata_schemes={})

In [71]:
graph.number_of_edges()

2315598

In [72]:
torch.mean(graph.out_degrees() * 1.)

tensor(13.6740)

In [73]:
torch.mean(graph.in_degrees() * 1.)

tensor(13.6740)

In [74]:
split_idx = dataset.get_idx_split()

In [75]:
split_idx

{'train': tensor([     0,      1,      2,  ..., 169145, 169148, 169251]),
 'valid': tensor([   349,    357,    366,  ..., 169185, 169261, 169296]),
 'test': tensor([   346,    398,    451,  ..., 169340, 169341, 169342])}

In [76]:
features = graph.ndata['feat'].cuda()
labels = dataset.labels.cuda()
train_mask = split_idx['train'].cuda()
val_mask = split_idx['valid'].cuda()
test_mask = split_idx['test'].cuda()

In [77]:
train_nid = train_mask.nonzero().squeeze()
val_nid = val_mask.nonzero().squeeze()
test_nid = test_mask.nonzero().squeeze()

In [78]:
n_edges = graph.number_of_edges()

In [79]:
graph

Graph(num_nodes=169343, num_edges=2315598,
      ndata_schemes={'year': Scheme(shape=(1,), dtype=torch.int64), 'feat': Scheme(shape=(128,), dtype=torch.float32)}
      edata_schemes={})

In [81]:
graph = graph.to(device)

In [82]:
n_layers = 3
n_iters = 3000
epochs = 5000
log_steps = 100
input_dim = features.shape[1]
hidden_channels = input_dim * 2
output_dim = dataset.num_classes
lr_rate = 0.001
dropout = 0.5

In [83]:
model = GraphSAGE(
    n_layers=n_layers,
    in_channels=input_dim,
    hidden_channels=hidden_channels,
    out_channels=output_dim,
    dropout=dropout)\
    .to(device)

In [84]:
def train(model, graph, features, train_mask, optimizer, edge_weight=None):
    model.train()

    optimizer.zero_grad()
    out = model(graph, features, edge_weight=edge_weight)[train_mask]
    loss = F.nll_loss(out, labels.squeeze(1)[train_mask])
    loss.backward()
    optimizer.step()

    return loss.item()

In [85]:
@torch.no_grad()
def test(model, graph, features, labels, train_mask, val_mask, test_mask, evaluator, edge_weight=None):
    model.eval()

    out = model(graph, features, edge_weight=edge_weight)
    y_pred = out.argmax(dim=-1, keepdim=True)

    train_acc = evaluator.eval({
        'y_true': labels[train_mask],
        'y_pred': y_pred[train_mask],
    })['acc']
    valid_acc = evaluator.eval({
        'y_true': labels[val_mask],
        'y_pred': y_pred[val_mask],
    })['acc']
    test_acc = evaluator.eval({
        'y_true': labels[test_mask],
        'y_pred': y_pred[test_mask],
    })['acc']

    return train_acc, valid_acc, test_acc

In [86]:
evaluator = Evaluator(name='ogbn-arxiv')

In [87]:
edge_weights = None

In [88]:
optimizer = torch.optim.Adam(model.parameters(), lr=lr_rate)
for epoch in range(1, 1 + epochs):
    loss = train(model, graph, features, train_mask, optimizer, edge_weight=edge_weights)
    result = test(model, graph, features, labels, train_mask, val_mask, test_mask, evaluator, edge_weight=edge_weights)

    if epoch % log_steps == 0:
        train_acc, valid_acc, test_acc = result
        print(f'Epoch: {epoch:02d}, '
              f'Loss: {loss:.4f}, '
              f'Train: {100 * train_acc:.2f}%, '
              f'Valid: {100 * valid_acc:.2f}% '
              f'Test: {100 * test_acc:.2f}%')

Epoch: 100, Loss: 1.4433, Train: 63.47%, Valid: 63.96% Test: 64.17%
Epoch: 200, Loss: 1.2534, Train: 67.27%, Valid: 66.60% Test: 66.33%
Epoch: 300, Loss: 1.1701, Train: 68.94%, Valid: 67.35% Test: 66.85%
Epoch: 400, Loss: 1.1165, Train: 70.13%, Valid: 67.82% Test: 67.28%
Epoch: 500, Loss: 1.0787, Train: 71.15%, Valid: 68.28% Test: 67.51%
Epoch: 600, Loss: 1.0452, Train: 72.03%, Valid: 68.60% Test: 67.89%
Epoch: 700, Loss: 1.0119, Train: 72.76%, Valid: 68.75% Test: 67.97%
Epoch: 800, Loss: 0.9902, Train: 73.50%, Valid: 69.00% Test: 68.21%
Epoch: 900, Loss: 0.9701, Train: 74.22%, Valid: 69.19% Test: 68.48%
Epoch: 1000, Loss: 0.9509, Train: 74.89%, Valid: 69.22% Test: 68.54%
Epoch: 1100, Loss: 0.9302, Train: 75.39%, Valid: 69.29% Test: 68.55%
Epoch: 1200, Loss: 0.9131, Train: 75.89%, Valid: 69.42% Test: 68.63%
Epoch: 1300, Loss: 0.9004, Train: 76.35%, Valid: 69.45% Test: 68.85%
Epoch: 1400, Loss: 0.8885, Train: 76.82%, Valid: 69.34% Test: 68.57%
Epoch: 1500, Loss: 0.8781, Train: 77.20%, V