In [1]:
import sys
sys.path.append('../../../../')
from aiagents4pharma.talk2knowledgegraphs.models.nbfnet import tasks, util, models, datasets

import hydra
import os
import time
import torch

from torch_geometric.data import Data
from torch_geometric.datasets import RelLinkPredDataset, WordNet18RR

  from .autonotebook import tqdm as notebook_tqdm
INFO:aiagents4pharma.talk2scholars.tools.pdf.question_and_answer:Loaded Question and Answer tool configuration.


In [2]:
args = {
    "config": "../../../../aiagents4pharma/talk2knowledgegraphs/models/config/transductive/wn18rr.yaml"
}

In [3]:
# Load Config
with hydra.initialize(version_base=None, config_path="../../../../aiagents4pharma/talk2knowledgegraphs/models/config/transductive"):
    cfg = hydra.compose(config_name="wn18rr")
# cfg.dataset.version = "v1"
cfg.train.gpus = [0]
cfg.dataset.root = "../../../../../data/datasets/knowledge_graphs"
cfg


{'output_dir': '~/experiments/', 'dataset': {'class': 'WN18RR', 'root': '../../../../../data/datasets/knowledge_graphs'}, 'model': {'class': 'NBFNet', 'input_dim': 32, 'hidden_dims': [32, 32, 32, 32, 32, 32], 'message_func': 'distmult', 'aggregate_func': 'pna', 'short_cut': True, 'layer_norm': True, 'dependent': False}, 'task': {'num_negative': 32, 'strict_negative': True, 'adversarial_temperature': 1, 'metric': ['mr', 'mrr', 'hits@1', 'hits@3', 'hits@10']}, 'optimizer': {'class': 'Adam', 'lr': 0.005}, 'train': {'gpus': [0], 'batch_size': 16, 'num_epoch': 20, 'log_interval': 100}}

In [4]:
# Create Working Directory
working_dir = os.path.join(os.path.expanduser(cfg.output_dir), cfg.model["class"], cfg.dataset["class"], time.strftime("%Y-%m-%d-%H-%M-%S"))
working_dir

'C:\\Users\\mulyadi/experiments/NBFNet\\WN18RR\\2025-04-30-17-58-53'

In [5]:
# Set random seed
default_seed = 1024
torch.manual_seed(default_seed + util.get_rank())

<torch._C.Generator at 0x1a1e68bcc10>

In [6]:
# Check Inductive or Transductive
is_inductive = cfg.dataset["class"].startswith("Ind")
is_inductive


False

In [7]:
# Build dataset
cfg.dataset["class"]

'WN18RR'

In [8]:
cfg.dataset

{'class': 'WN18RR', 'root': '../../../../../data/datasets/knowledge_graphs'}

In [9]:
cls = cfg.dataset["class"]
cls

'WN18RR'

In [10]:
from omegaconf import OmegaConf

cfg_ = OmegaConf.to_container(cfg, resolve=True)
cfg_ = OmegaConf.create(cfg_)
del cfg_["dataset"]["class"]
cfg_

{'output_dir': '~/experiments/', 'dataset': {'root': '../../../../../data/datasets/knowledge_graphs'}, 'model': {'class': 'NBFNet', 'input_dim': 32, 'hidden_dims': [32, 32, 32, 32, 32, 32], 'message_func': 'distmult', 'aggregate_func': 'pna', 'short_cut': True, 'layer_norm': True, 'dependent': False}, 'task': {'num_negative': 32, 'strict_negative': True, 'adversarial_temperature': 1, 'metric': ['mr', 'mrr', 'hits@1', 'hits@3', 'hits@10']}, 'optimizer': {'class': 'Adam', 'lr': 0.005}, 'train': {'gpus': [0], 'batch_size': 16, 'num_epoch': 20, 'log_interval': 100}}

In [11]:
cfg_.dataset

{'root': '../../../../../data/datasets/knowledge_graphs'}

### WN18RR

In [19]:
# Check dataset
dataset = WordNet18RR(**cfg_.dataset)
# convert wn18rr into the same format as fb15k-237

In [20]:
# Build Dataset
data = dataset.data
data



Data(edge_index=[2, 93003], edge_type=[93003], train_mask=[93003], val_mask=[93003], test_mask=[93003], num_nodes=40943)

In [21]:
data.edge_index.max()

tensor(40942)

In [22]:
edge_type = data.edge_type[data.train_mask]
edge_type

tensor([ 3,  9, 10,  ...,  3,  3,  3])

In [23]:
edge_index = data.edge_index[:, data.train_mask]
edge_index

tensor([[    0,     0,     1,  ..., 39607, 39608, 39609],
        [10211, 25525,  3891,  ...,   227,  2995,  7084]])

In [24]:
# Transductive WN18RR
data = dataset.data
num_nodes = int(data.edge_index.max()) + 1
num_relations = int(data.edge_type.max()) + 1
edge_index = data.edge_index[:, data.train_mask]
edge_type = data.edge_type[data.train_mask]
edge_index = torch.cat([edge_index, edge_index.flip(0)], dim=-1)
edge_type = torch.cat([edge_type, edge_type + num_relations])
train_data = Data(edge_index=edge_index, edge_type=edge_type, num_nodes=num_nodes,
                          target_edge_index=data.edge_index[:, data.train_mask],
                          target_edge_type=data.edge_type[data.train_mask])
valid_data = Data(edge_index=edge_index, edge_type=edge_type, num_nodes=num_nodes,
                    target_edge_index=data.edge_index[:, data.val_mask],
                    target_edge_type=data.edge_type[data.val_mask])
test_data = Data(edge_index=edge_index, edge_type=edge_type, num_nodes=num_nodes,
                    target_edge_index=data.edge_index[:, data.test_mask],
                    target_edge_type=data.edge_type[data.test_mask])
dataset.data, dataset.slices = dataset.collate([train_data, valid_data, test_data])
dataset.num_relations = num_relations * 2

In [27]:
num_relations

11

In [28]:
dataset.num_relations

22

In [26]:
train_data.target_edge_index, train_data.target_edge_type

(tensor([[    0,     0,     1,  ..., 39607, 39608, 39609],
         [10211, 25525,  3891,  ...,   227,  2995,  7084]]),
 tensor([ 3,  9, 10,  ...,  3,  3,  3]))

In [None]:
train_data.target_edge_type()

In [25]:
train_data, valid_data, test_data

(Data(edge_index=[2, 173670], edge_type=[173670], num_nodes=40943, target_edge_index=[2, 86835], target_edge_type=[86835]),
 Data(edge_index=[2, 173670], edge_type=[173670], num_nodes=40943, target_edge_index=[2, 3034], target_edge_type=[3034]),
 Data(edge_index=[2, 173670], edge_type=[173670], num_nodes=40943, target_edge_index=[2, 3134], target_edge_type=[3134]))

In [77]:
dataset.collate([train_data, valid_data, test_data])

(Data(edge_index=[2, 521010], edge_type=[521010], num_nodes=122829, target_edge_index=[2, 93003], target_edge_type=[93003]),
 {'edge_index': tensor([     0, 173670, 347340, 521010]),
  'edge_type': tensor([     0, 173670, 347340, 521010]),
  'target_edge_index': tensor([    0, 86835, 89869, 93003]),
  'target_edge_type': tensor([    0, 86835, 89869, 93003])})

In [81]:
dataset, dataset.data, dataset.slices



(WordNet18RR(3),
 Data(edge_index=[2, 521010], edge_type=[521010], num_nodes=122829, target_edge_index=[2, 93003], target_edge_type=[93003]),
 {'edge_index': tensor([     0, 173670, 347340, 521010]),
  'edge_type': tensor([     0, 173670, 347340, 521010]),
  'target_edge_index': tensor([    0, 86835, 89869, 93003]),
  'target_edge_type': tensor([    0, 86835, 89869, 93003])})

In [83]:
dataset.slices['edge_index']

tensor([     0, 173670, 347340, 521010])

### FB15K-237

In [12]:
cfg.dataset

{'class': 'WN18RR', 'root': '../../../../../data/datasets/knowledge_graphs'}

In [13]:
# Transductive FB15K-237
dataset = RelLinkPredDataset(name='FB15k-237', **{'root': '../../../../../data/datasets/knowledge_graphs'})
dataset

FB15k-237()

In [14]:
data = dataset.data
data



Data(edge_index=[2, 544230], num_nodes=14541, edge_type=[544230], train_edge_index=[2, 272115], train_edge_type=[272115], valid_edge_index=[2, 17535], valid_edge_type=[17535], test_edge_index=[2, 20466], test_edge_type=[20466])

In [15]:
data = dataset.data
train_data = Data(edge_index=data.edge_index, edge_type=data.edge_type, num_nodes=data.num_nodes,
                  target_edge_index=data.train_edge_index, target_edge_type=data.train_edge_type)
valid_data = Data(edge_index=data.edge_index, edge_type=data.edge_type, num_nodes=data.num_nodes,
                  target_edge_index=data.valid_edge_index, target_edge_type=data.valid_edge_type)
test_data = Data(edge_index=data.edge_index, edge_type=data.edge_type, num_nodes=data.num_nodes,
                 target_edge_index=data.test_edge_index, target_edge_type=data.test_edge_type)
dataset.data, dataset.slices = dataset.collate([train_data, valid_data, test_data])

In [18]:
train_data.target_edge_index, train_data.target_edge_type

(tensor([[3364, 8077, 3776,  ..., 9124, 1318, 7664],
         [8619, 6142, 7385,  ..., 7803, 3679, 6680]]),
 tensor([183, 124, 166,  ..., 189, 155,   9]))

In [16]:
train_data, valid_data, test_data

(Data(edge_index=[2, 544230], edge_type=[544230], num_nodes=14541, target_edge_index=[2, 272115], target_edge_type=[272115]),
 Data(edge_index=[2, 544230], edge_type=[544230], num_nodes=14541, target_edge_index=[2, 17535], target_edge_type=[17535]),
 Data(edge_index=[2, 544230], edge_type=[544230], num_nodes=14541, target_edge_index=[2, 20466], target_edge_type=[20466]))

In [17]:
dataset.data, dataset.slices

(Data(edge_index=[2, 1632690], edge_type=[1632690], num_nodes=43623, target_edge_index=[2, 310116], target_edge_type=[310116]),
 {'edge_index': tensor([      0,  544230, 1088460, 1632690]),
  'edge_type': tensor([      0,  544230, 1088460, 1632690]),
  'target_edge_index': tensor([     0, 272115, 289650, 310116]),
  'target_edge_type': tensor([     0, 272115, 289650, 310116])})

In [94]:
dataset

FB15k-237()

In [97]:
print("#train: %d, #valid: %d, #test: %d" % (dataset[0].target_edge_index.shape[1], dataset[1].target_edge_index.shape[1], dataset[2].target_edge_index.shape[1]))

#train: 272115, #valid: 17535, #test: 20466


In [121]:
dataset[0]

Data(edge_index=[2, 544230], edge_type=[544230], target_edge_index=[2, 272115], target_edge_type=[272115], num_nodes=14541)

In [120]:
dataset[0].edge_index

tensor([[3364, 8077, 3776,  ..., 7803, 3679, 6680],
        [8619, 6142, 7385,  ..., 9124, 1318, 7664]])

In [122]:
dataset[0].edge_type

tensor([183, 124, 166,  ..., 426, 392, 246])

In [126]:
dataset[0].edge_index.shape[1], dataset[0].edge_type.shape[0]

(544230, 544230)

In [131]:
dataset[0].target_edge_index

tensor([[3364, 8077, 3776,  ..., 9124, 1318, 7664],
        [8619, 6142, 7385,  ..., 7803, 3679, 6680]])

In [132]:
dataset[0].target_edge_type

tensor([183, 124, 166,  ..., 189, 155,   9])

In [143]:
dataset[0].edge_type[272115]

tensor(420)

In [146]:
dataset[1]

Data(edge_index=[2, 544230], edge_type=[544230], target_edge_index=[2, 17535], target_edge_type=[17535], num_nodes=14541)

In [None]:
dataset[0].target_edge_index.shape[1], dataset[0].target_edge_type.shape[0]

(272115, 272115)

In [116]:
total_a = (272115 + 17535 + 20466)
percent_train = 272115 / total_a
percent_valid = 17535 / total_a
percent_test = 20466 / total_a
print("train: %.2f, valid: %.2f, test: %.2f" % (percent_train, percent_valid, percent_test))

train: 0.88, valid: 0.06, test: 0.07


In [100]:
cfg_num_relation = dataset.num_relations
cfg_num_relation

474

### Build Model

In [101]:
cfg.model

{'class': 'NBFNet', 'input_dim': 32, 'hidden_dims': [32, 32, 32, 32, 32, 32], 'message_func': 'distmult', 'aggregate_func': 'pna', 'short_cut': True, 'layer_norm': True, 'dependent': False}

In [107]:
from omegaconf import OmegaConf

cfg_ = OmegaConf.to_container(cfg, resolve=True)
cfg_ = OmegaConf.create(cfg_)
del cfg_["model"]["class"]
cfg_
cfg_.model["num_relation"] = dataset.num_relations

In [105]:
cfg_.model

{'input_dim': 32, 'hidden_dims': [32, 32, 32, 32, 32, 32], 'message_func': 'distmult', 'aggregate_func': 'pna', 'short_cut': True, 'layer_norm': True, 'dependent': False}

In [108]:
model = models.NBFNet(**cfg_.model)
model

NBFNet(
  (layers): ModuleList(
    (0-5): 6 x GeneralizedRelationalConv()
  )
  (query): Embedding(474, 32)
  (mlp): Sequential(
    (0): Linear(in_features=64, out_features=64, bias=True)
    (1): ReLU()
    (2): Linear(in_features=64, out_features=1, bias=True)
  )
)

In [None]:
# Checkpoint
state = torch.load(cfg.checkpoint, map_location="cpu")
model.load_state_dict(state["model"])

In [None]:
def get_device(cfg):
    if cfg.train.gpus:
        device = torch.device(cfg.train.gpus[get_rank()])
    else:
        device = torch.device("cpu")
    return device

### Training

In [112]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
device

device(type='cpu')

In [113]:
train_data, valid_data, test_data = dataset[0], dataset[1], dataset[2]
train_data = train_data.to(device)
valid_data = valid_data.to(device)
test_data = test_data.to(device)

In [114]:
# for transductive setting, use the whole graph for filtered ranking
filtered_data = Data(edge_index=dataset.data.target_edge_index, edge_type=dataset.data.target_edge_type)
filtered_data = filtered_data.to(device)



### Training

In [None]:
train_and_validate(cfg, model, train_data, valid_data, filtered_data=filtered_data)

### Validation

In [None]:
test(cfg, model, valid_data, filtered_data=filtered_data)

### Testing

In [None]:
test(cfg, model, test_data, filtered_data=filtered_data)