In [1]:
import pandas as pd
import numpy as np
import operator
import gzip
import networkx as nx
import time
import random
random.seed(10)

import torch
import torch.nn as nn
from torch.nn import Linear
import torch.nn.functional as F

import torch_geometric
from torch_geometric.data import HeteroData
import torch_geometric.transforms as T
from torch_geometric.nn import GCNConv, SAGEConv, GATConv, Linear, to_hetero

from sklearn.metrics import precision_score, recall_score, f1_score

from src.utils import *
from src.gnn import *

In [2]:
print(torch.__version__)
print(torch_geometric.__version__)

2.0.1+cu118
2.4.0


In [3]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

cuda


**Read df_train, df_test**

In [7]:
df_subsumption_train = pd.read_csv('datasets/clg/clg_full/subsumption_train.pd.gz', compression='gzip')
df_relation_train = pd.read_csv('datasets/clg/clg_full/relation_train.pd.gz', compression='gzip')
df_assertion_train = pd.read_csv('datasets/clg/clg_full/assertion_train.pd.gz', compression='gzip')
df_train = pd.concat([df_subsumption_train,df_relation_train,df_assertion_train])

In [4]:
df_test = pd.read_csv('datasets/clg/clg_full/clg_full-test.pd.gz', compression='gzip')

**Get g_train, g_test**

In [None]:
df_train = df_train.rename(columns={'sub':'s', 'rel':'p', 'obj':'o'})
df_test = df_test.rename(columns={'sub':'s', 'rel':'p', 'obj':'o'})

In [None]:
g_train = create_graph(df_train)

In [None]:
g_test = create_graph(df_test)

In [None]:
df_subsumption_train = df_subsumption_train.rename(columns={'sub':'s', 'rel':'p', 'obj':'o'})
df_assertion_train = df_assertion_train.rename(columns={'sub':'s', 'rel':'p', 'obj':'o'})

In [None]:
g_train_filter_subclass = create_graph(df_subsumption_train)

In [None]:
g_train_filter_assertion = create_graph(df_assertion_train)

In [None]:
df_test_filter_subclass = df_test[df_test['p'] == '<http://www.w3.org/2000/01/rdf-schema#subClassOf>']
g_test_filter_subclass = create_graph(df_test_filter_subclass)

df_test_filter_assertion = df_test[df_test['p'] == '<http://www.w3.org/1999/02/22-rdf-syntax-ns#type>']
g_test_filter_assertion = create_graph(df_test_filter_assertion)

**Stats**

In [None]:
print(f'# Train - Triplets: {len(df_train)}, # Nodes: {g_train.number_of_nodes()}, # Edges: {g_train.number_of_edges()}')
print(f'# Test - Triplets: {len(df_test)}, # Nodes: {g_test.number_of_nodes()}, # Edges: {g_test.number_of_edges()}')

print(f'# Train - Subclass Edges: {g_train_filter_subclass.number_of_edges()}')
print(f'# Test - Subclass Edges: {g_test_filter_subclass.number_of_edges()}')

print(f'# Train - Assertion Edges: {g_train_filter_assertion.number_of_edges()}')
print(f'# Test - Assertion Edges: {g_test_filter_assertion.number_of_edges()}')

**Train**

In [None]:
print('GAT')
st = time.time()
model = GNN()
model._train(device, 'GAT', g_train)
torch.save(model, f'Models/{file}_GAT')
et = time.time()
elapsed_time = et - st
print(f'Run time: {elapsed_time:.0f} seconds, {elapsed_time/60:.0f} minutes')
print()

print('2-Hop GAT')
st = time.time()
model = GNN()
model._train(device, '2-Hop GAT', g_train)
torch.save(model, f'Models/{file}_2HopGAT')
et = time.time()
elapsed_time = et - st
print(f'Run time: {elapsed_time:.0f} seconds, {elapsed_time/60:.0f} minutes')
print()

print('Filtered 2-Hop GAT')
st = time.time()
model = GNN()
model._train(device, '2-Hop GAT', g_train, g_train_filter_subclass, g_train_filter_assertion)
torch.save(model, f'Models/{file}_Filtered2HopGAT')
et = time.time()
elapsed_time = et - st
print(f'Run time: {elapsed_time:.0f} seconds, {elapsed_time/60:.0f} minutes')
print()

**Eval**

In [None]:
print('GAT:')
model = torch.load(f'Models/{file}_GAT')
print('SubClass Relations')
model._eval(min(g_test_filter_subclass.number_of_nodes(),100), 'GAT', g_test_filter_subclass)
print('Assertion Relations')
model._eval(min(g_test_filter_assertion.number_of_nodes(),100), 'GAT', g_test_filter_assertion)
print('All Relations')
model._eval(min(g_test.number_of_nodes(),100), 'GAT', g_test)
print()

print('2-Hop GAT:')
model = torch.load(f'Models/{file}_2HopGAT')
print('SubClass Relations')
model._eval(min(g_test_filter_subclass.number_of_nodes(),100), '2-Hop GAT', g_test_filter_subclass)
print('Assertion Relations')
model._eval(min(g_test_filter_assertion.number_of_nodes(),100), '2-Hop GAT', g_test_filter_assertion)
print('All Relations')
model._eval(min(g_test.number_of_nodes(),100), '2-Hop GAT', g_test)
print()

print('Filtered 2-Hop GAT:')
model = torch.load(f'Models/{file}_Filtered2HopGAT')
print('SubClass Relations')
model._eval(min(g_test_filter_subclass.number_of_nodes(),100), '2-Hop GAT', g_test_filter_subclass, g_test_filter_subclass, g_test_filter_assertion)
print('Assertion Relations')
model._eval(min(g_test_filter_assertion.number_of_nodes(),100), '2-Hop GAT', g_test_filter_assertion, g_test_filter_subclass, g_test_filter_assertion)
print('All Relations')
model._eval(min(g_test.number_of_nodes(),100), '2-Hop GAT', g_test, g_test_filter_subclass, g_test_filter_assertion)
print()