In [1]:
import pandas as pd
import numpy as np
import operator
import gzip
import networkx as nx
import time
import random
random.seed(10)

import torch
import torch.nn as nn
from torch.nn import Linear
import torch.nn.functional as F

import torch_geometric
from torch_geometric.data import HeteroData
import torch_geometric.transforms as T
from torch_geometric.nn import GCNConv, SAGEConv, GATConv, Linear, to_hetero

from sklearn.metrics import precision_score, recall_score, f1_score

from src.utils import *
from src.gnn import *

In [2]:
print(torch.__version__)

2.0.1+cu118


In [3]:
print(torch_geometric.__version__)

2.4.0


In [4]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

cuda


# OWL2Bench

In [5]:
OWL2Bench_dbs = [{'path' : './datasets/OWL2Bench/OWL2Bench1/',
                  'train_file' : '_train_OWL2Bench1',
                  'test_file' : '_test_OWL2Bench1',
                  'file' : 'OWL2Bench1'},
                 {'path' : './datasets/OWL2Bench/OWL2Bench2/',
                  'train_file' : '_train_OWL2Bench2',
                  'test_file' : '_test_OWL2Bench2',
                  'file' : 'OWL2Bench2'}]

In [6]:
for db_ in OWL2Bench_dbs:    
    path = db_['path']
    train_file = db_['train_file']
    test_file = db_['test_file']
    file = db_['file']

    g_train, g_train_filter_subclass, g_train_filter_assertion, g_test, g_test_filter_subclass, g_test_filter_assertion = \
    load_ore_graphs(path, train_file, test_file)
    
    print('GAT')
    st = time.time()
    model = GNN()
    model._train(device, 'GAT', g_train)
    torch.save(model, f'Models/{file}_GAT')
    et = time.time()
    elapsed_time = et - st
    print(f'Run time: {elapsed_time:.0f} seconds, {elapsed_time/60:.0f} minutes')
    print()

    print('2-Hop GAT')
    st = time.time()
    model = GNN()
    model._train(device, '2-Hop GAT', g_train)
    torch.save(model, f'Models/{file}_2HopGAT')
    et = time.time()
    elapsed_time = et - st
    print(f'Run time: {elapsed_time:.0f} seconds, {elapsed_time/60:.0f} minutes')
    print()
    
    print('Filtered 2-Hop GAT')
    st = time.time()
    model = GNN()
    model._train(device, '2-Hop GAT', g_train, g_train_filter_subclass, g_train_filter_assertion)
    torch.save(model, f'Models/{file}_GATReasoner')
    et = time.time()
    elapsed_time = et - st
    print(f'Run time: {elapsed_time:.0f} seconds, {elapsed_time/60:.0f} minutes')
    print()

Running... _train_OWL2Bench1 _test_OWL2Bench1
# Train - Triplets: 8096, # Nodes: 3683, # Edges: 8096
# Test - Triplets: 2315, # Nodes: 1865, # Edges: 2315

GAT
Epoch: 0, Loss: 0.3522
Epoch: 400, Loss: 0.0575
Epoch: 800, Loss: 0.0292
Run time: 9 seconds, 0 minutes

2-Hop GAT
Epoch: 0, Loss: 0.3104
Epoch: 400, Loss: 0.1639
Epoch: 800, Loss: 0.1600
Run time: 18 seconds, 0 minutes

GAT Reasoner
Epoch: 0, Loss: 0.3620
Epoch: 400, Loss: 0.2858
Epoch: 800, Loss: 0.2405
Run time: 9 seconds, 0 minutes

Running... _train_OWL2Bench2 _test_OWL2Bench2
# Train - Triplets: 15633, # Nodes: 7132, # Edges: 15633
# Test - Triplets: 4469, # Nodes: 3641, # Edges: 4469

GAT
Epoch: 0, Loss: 0.3593
Epoch: 400, Loss: 0.0498
Epoch: 800, Loss: 0.0282
Run time: 14 seconds, 0 minutes

2-Hop GAT
Epoch: 0, Loss: 0.3284
Epoch: 400, Loss: 0.1200
Epoch: 800, Loss: 0.1149
Run time: 48 seconds, 1 minutes

GAT Reasoner
Epoch: 0, Loss: 0.2927
Epoch: 400, Loss: 0.1455
Epoch: 800, Loss: 0.1387
Run time: 24 seconds, 0 minutes

# OWL

In [None]:
ORE_dbs = [{'path' : './datasets/ORE/ORE1/',
            'train_file' : '_train_ORE1',
            'test_file' : '_test_ORE1',
            'file' : 'ORE1'},
           {'path' : './datasets/ORE/ORE2/',
            'train_file' : '_train_ORE2',
            'test_file' : '_test_ORE2',
            'file' : 'ORE2'},
           {'path' : './datasets/ORE/ORE3/',
            'train_file' : '_train_ORE3',
            'test_file' : '_test_ORE3',
            'file' : 'ORE3'}]

In [None]:
for db_ in ORE_dbs:    
    path = db_['path']
    train_file = db_['train_file']
    test_file = db_['test_file']
    file = db_['file']

    g_train, g_train_filter_subclass, g_train_filter_assertion, g_test, g_test_filter_subclass, g_test_filter_assertion = \
    load_ore_graphs(path, train_file, test_file)
    
    print('GAT')
    st = time.time()
    model = GNN()
    model._train(device, 'GAT', g_train)
    torch.save(model, f'Models/{file}_GAT')
    et = time.time()
    elapsed_time = et - st
    print(f'Run time: {elapsed_time:.0f} seconds, {elapsed_time/60:.0f} minutes')
    print()

    print('2-Hop GAT')
    st = time.time()
    model = GNN()
    model._train(device, '2-Hop GAT', g_train)
    torch.save(model, f'Models/{file}_2HopGAT')
    et = time.time()
    elapsed_time = et - st
    print(f'Run time: {elapsed_time:.0f} seconds, {elapsed_time/60:.0f} minutes')
    print()
    
    print('Filtered 2-Hop GAT')
    st = time.time()
    model = GNN()
    model._train(device, '2-Hop GAT', g_train, g_train_filter_subclass, g_train_filter_assertion)
    torch.save(model, f'Models/{file}_GATReasoner')
    et = time.time()
    elapsed_time = et - st
    print(f'Run time: {elapsed_time:.0f} seconds, {elapsed_time/60:.0f} minutes')
    print()

# CaLiGraph

In [None]:
CLG_dbs = [{'path' : './datasets/clg/clg_10e4/',
            'train_file' : 'clg_10e4-train.nt',
            'test_file' : 'clg_10e4-test.nt',
            'file' : 'clg_10e4'},
           {'path' : './datasets/clg/clg_10e5/',
            'train_file' : 'clg_10e5-train.nt',
            'test_file' : 'clg_10e5-test.nt',
            'file' : 'clg_10e5'}]

In [None]:
for db_ in CLG_dbs:    
    path = db_['path']
    train_file = db_['train_file']
    test_file = db_['test_file']
    file = db_['file']

    g_train, g_train_filter_subclass, g_train_filter_assertion, g_test, g_test_filter_subclass, g_test_filter_assertion = \
    load_clg_graphs(path, train_file, test_file)
    
    print('GAT')
    st = time.time()
    model = GNN()
    model._train(device, 'GAT', g_train)
    torch.save(model, f'Models/{file}_GAT')
    et = time.time()
    elapsed_time = et - st
    print(f'Run time: {elapsed_time:.0f} seconds, {elapsed_time/60:.0f} minutes')
    print()

    print('2-Hop GAT')
    st = time.time()
    model = GNN()
    model._train(device, '2-Hop GAT', g_train)
    torch.save(model, f'Models/{file}_2HopGAT')
    et = time.time()
    elapsed_time = et - st
    print(f'Run time: {elapsed_time:.0f} seconds, {elapsed_time/60:.0f} minutes')
    print()
    
    print('Filtered 2-Hop GAT')
    st = time.time()
    model = GNN()
    model._train(device, '2-Hop GAT', g_train, g_train_filter_subclass, g_train_filter_assertion)
    torch.save(model, f'Models/{file}_GATReasoner')
    et = time.time()
    elapsed_time = et - st
    print(f'Run time: {elapsed_time:.0f} seconds, {elapsed_time/60:.0f} minutes')
    print()