In [1]:
import os.path as osp
import os
import torch
import torch.nn.functional as F
%matplotlib inline
import matplotlib.pyplot as plt
from torch_geometric.datasets import Planetoid
import torch_geometric.transforms as T
from torch_geometric.nn import GCNConv, RGCNConv#GNNExplainer
from torch.nn import Sequential, Linear
import tensorflow as tf
import numpy as np
from torch_geometric.data import Data
import random as rn
import utils

In [2]:
SEED = 123
os.environ['PYTHONHASHSEED'] = str(SEED)
os.environ['TF_DETERMINISTIC_OPS'] = '1'
np.random.seed(SEED)
rn.seed(SEED)
tf.random.set_seed(SEED)
torch.manual_seed(SEED)

<torch._C.Generator at 0x108afbe70>

In [3]:
import os.path as osp
import torch
import torch.nn.functional as F
import matplotlib.pyplot as plt
from torch_geometric.datasets import Planetoid
import torch_geometric.transforms as T
from torch_geometric.nn import GCNConv, RGCNConv,GNNExplainer

In [4]:
# dataset = 'Cora'
# #path = osp.join(osp.dirname(osp.realpath(__file__)), '..', 'data', 'Planetoid')
# dataset = Planetoid('/Users/nhalliwe/Desktop/pytorch_geometric-master/data/Planetoid',
#                     dataset, transform=T.NormalizeFeatures())
# data = dataset[0]

In [5]:


class Net(torch.nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = RGCNConv(in_channels=2,out_channels=50,num_relations=2)
#         self.conv2 = RGCNConv(16, dataset.num_classes, dataset.num_relations,
#                               num_bases=30)
        self.DistMult = DistMult(embedding_dim=50,num_relations=2)

    def forward(self,triple, edge_index, rel_idx):
        x = F.relu(self.conv1(triple, edge_index, rel_idx))
        x = F.sigmoid(self.DistMult(x,rel_idx))
        #x = self.conv2(x, edge_index, edge_type)
        return x

class DistMult(torch.nn.Module):
    def __init__(self,embedding_dim,num_relations):
        super().__init__()
        self.embedding_dim = embedding_dim
        self.num_relations = num_relations
        self.weights = torch.nn.Parameter(torch.Tensor(self.num_relations,self.embedding_dim))
        
        torch.nn.init.normal_(self.weights, mean=0.0, std=1)
        
    def forward(self,x,rel_idx):
        
        head_e,tail_e = x

        return torch.sum(head_e * self.weights[rel_idx][0] * tail_e)

In [26]:
import os
import os.path as osp
from collections import Counter

import gzip
import pandas as pd
import numpy as np
import torch

from torch_geometric.data import (InMemoryDataset, Data, download_url,
                                  extract_tar)


class Entities(InMemoryDataset):
    r"""The relational entities networks "AIFB", "MUTAG", "BGS" and "AM" from
    the `"Modeling Relational Data with Graph Convolutional Networks"
    <https://arxiv.org/abs/1703.06103>`_ paper.
    Training and test splits are given by node indices.
    Args:
        root (string): Root directory where the dataset should be saved.
        name (string): The name of the dataset (:obj:`"AIFB"`,
            :obj:`"MUTAG"`, :obj:`"BGS"`, :obj:`"AM"`).
        transform (callable, optional): A function/transform that takes in an
            :obj:`torch_geometric.data.Data` object and returns a transformed
            version. The data object will be transformed before every access.
            (default: :obj:`None`)
        pre_transform (callable, optional): A function/transform that takes in
            an :obj:`torch_geometric.data.Data` object and returns a
            transformed version. The data object will be transformed before
            being saved to disk. (default: :obj:`None`)
    """

    url = 'https://data.dgl.ai/dataset/{}.tgz'

    def __init__(self, root, name, transform=None, pre_transform=None):
        assert name in ['AIFB', 'AM', 'MUTAG', 'BGS']
        self.name = name.lower()
        super(Entities, self).__init__(root, transform, pre_transform)
        self.data, self.slices = torch.load(self.processed_paths[0])

    @property
    def raw_dir(self):
        return osp.join(self.root, self.name, 'raw')

    @property
    def processed_dir(self):
        return osp.join(self.root, self.name, 'processed')

    @property
    def num_relations(self):
        return self.data.edge_type.max().item() + 1

    @property
    def num_classes(self):
        return self.data.train_y.max().item() + 1

    @property
    def raw_file_names(self):
        return [
            '{}_stripped.nt.gz'.format(self.name),
            'completeDataset.tsv',
            'trainingSet.tsv',
            'testSet.tsv',
        ]

    @property
    def processed_file_names(self):
        return 'data.pt'

    def download(self):
        path = download_url(self.url.format(self.name), self.root)
        extract_tar(path, self.raw_dir)
        os.unlink(path)

    def triples(self, graph, relation=None):
        for s, p, o in graph.triples((None, relation, None)):
            yield s, p, o

    def process(self):
        import rdflib as rdf

        graph_file, task_file, train_file, test_file = self.raw_paths

        g = rdf.Graph()
        with gzip.open(graph_file, 'rb') as f:
            g.parse(file=f, format='nt')

        freq_ = Counter(g.predicates())

        def freq(rel):
            return freq_[rel] if rel in freq_ else 0

        relations = sorted(set(g.predicates()), key=lambda rel: -freq(rel))
        subjects = set(g.subjects())
        objects = set(g.objects())
        nodes = list(subjects.union(objects))

        relations_dict = {rel: i for i, rel in enumerate(list(relations))}
        nodes_dict = {node: i for i, node in enumerate(nodes)}

        edge_list = []
        for s, p, o in g.triples((None, None, None)):
            src, dst, rel = nodes_dict[s], nodes_dict[o], relations_dict[p]
            edge_list.append([src, dst, 2 * rel])
            edge_list.append([dst, src, 2 * rel + 1])

        edge_list = sorted(edge_list, key=lambda x: (x[0], x[1], x[2]))
        edge = torch.tensor(edge_list, dtype=torch.long).t().contiguous()
        edge_index, edge_type = edge[:2], edge[2]

        if self.name == 'am':
            label_header = 'label_cateogory'
            nodes_header = 'proxy'
        elif self.name == 'aifb':
            label_header = 'label_affiliation'
            nodes_header = 'person'
        elif self.name == 'mutag':
            label_header = 'label_mutagenic'
            nodes_header = 'bond'
        elif self.name == 'bgs':
            label_header = 'label_lithogenesis'
            nodes_header = 'rock'

        labels_df = pd.read_csv(task_file, sep='\t')
        labels_set = set(labels_df[label_header].values.tolist())
        labels_dict = {lab: i for i, lab in enumerate(list(labels_set))}
        nodes_dict = {np.unicode(key): val for key, val in nodes_dict.items()}

        train_labels_df = pd.read_csv(train_file, sep='\t')
        train_indices, train_labels = [], []
        for nod, lab in zip(train_labels_df[nodes_header].values,
                            train_labels_df[label_header].values):
            train_indices.append(nodes_dict[nod])
            train_labels.append(labels_dict[lab])

        train_idx = torch.tensor(train_indices, dtype=torch.long)
        train_y = torch.tensor(train_labels, dtype=torch.long)

        test_labels_df = pd.read_csv(test_file, sep='\t')
        test_indices, test_labels = [], []
        for nod, lab in zip(test_labels_df[nodes_header].values,
                            test_labels_df[label_header].values):
            test_indices.append(nodes_dict[nod])
            test_labels.append(labels_dict[lab])

        test_idx = torch.tensor(test_indices, dtype=torch.long)
        test_y = torch.tensor(test_labels, dtype=torch.long)

        data = Data(edge_index=edge_index)
        data.edge_type = edge_type
        data.train_idx = train_idx
        data.train_y = train_y
        data.test_idx = test_idx
        data.test_y = test_y
        data.num_nodes = edge_index.max().item() + 1

        data, slices = self.collate([data])
        torch.save((data, slices), self.processed_paths[0])

    def __repr__(self):
        return '{}{}()'.format(self.name.upper(), self.__class__.__name__)

In [27]:
dataset = Entities('/Users/nhalliwe/Downlaods/MUTAG.tar.gz', 'MUTAG')
data = dataset[0]

Downloading https://data.dgl.ai/dataset/mutag.tgz
Extracting /Users/nhalliwe/Downlaods/MUTAG.tar.gz/mutag.tgz
Processing...
Done!


In [29]:
from torch_geometric.utils import k_hop_subgraph

In [30]:
node_idx = torch.cat([data.train_idx, data.test_idx], dim=0)
node_idx, edge_index, mapping, edge_mask = k_hop_subgraph(
    node_idx, 2, data.edge_index, relabel_nodes=True)

data.num_nodes = node_idx.size(0)
data.edge_index = edge_index
data.edge_type = data.edge_type[edge_mask]
data.train_idx = mapping[:data.train_idx.size(0)]
data.test_idx = mapping[data.train_idx.size(0):]

In [17]:

# node_idx, edge_index, mapping, edge_mask = k_hop_subgraph(
#     2, num_hops=2, edge_index=edge_index, relabel_nodes=True)

In [31]:
RGCNConv(in_channels=3,out_channels=50,num_relations=1)(edge_index=edge_index,edge_type=edge_type)

In [None]:
data = np.load(os.path.join('/Users/nhalliwe/Desktop/Explain-KG','data','royalty.npz'))
RULE = 'aunt'

triples, traces = data[RULE + '_triples'], data[RULE + '_traces']
entities = data[RULE + '_entities'].tolist()
relations = data[RULE + '_relations'].tolist()  

NUM_ENTITIES = len(entities)
NUM_RELATIONS = len(relations)
EMBEDDING_DIM = 50
OUTPUT_DIM = 50
LEARNING_RATE = 1e-3
NUM_EPOCHS = 2000

ent2idx = dict(zip(entities, range(NUM_ENTITIES)))
rel2idx = dict(zip(relations, range(NUM_RELATIONS)))

triples2idx = utils.array2idx(triples,ent2idx,rel2idx)
traces2idx = utils.array2idx(traces,ent2idx,rel2idx)

In [None]:

# print(triple.shape)
# print(edge_index.shape)
# print(rel_idx.shape)

In [None]:
from copy import copy
from math import sqrt
from typing import Optional

import torch
from tqdm import tqdm
import matplotlib.pyplot as plt
import networkx as nx
from torch_geometric.nn import MessagePassing
from torch_geometric.data import Data
from torch_geometric.utils import k_hop_subgraph, to_networkx

EPS = 1e-15


class GNNExplainer(torch.nn.Module):
    r"""The GNN-Explainer model from the `"GNNExplainer: Generating
    Explanations for Graph Neural Networks"
    <https://arxiv.org/abs/1903.03894>`_ paper for identifying compact subgraph
    structures and small subsets node features that play a crucial role in a
    GNN’s node-predictions.

    .. note::

        For an example of using GNN-Explainer, see `examples/gnn_explainer.py
        <https://github.com/rusty1s/pytorch_geometric/blob/master/examples/
        gnn_explainer.py>`_.

    Args:
        model (torch.nn.Module): The GNN module to explain.
        epochs (int, optional): The number of epochs to train.
            (default: :obj:`100`)
        lr (float, optional): The learning rate to apply.
            (default: :obj:`0.01`)
        num_hops (int, optional): The number of hops the :obj:`model` is
            aggregating information from.
            If set to :obj:`None`, will automatically try to detect this
            information based on the number of
            :class:`~torch_geometric.nn.conv.message_passing.MessagePassing`
            layers inside :obj:`model`. (default: :obj:`None`)
        log (bool, optional): If set to :obj:`False`, will not log any learning
            progress. (default: :obj:`True`)
    """

    coeffs = {
        'edge_size': 0.005,
        'edge_reduction': 'sum',
        'node_feat_size': 1.0,
        'node_feat_reduction': 'mean',
        'edge_ent': 1.0,
        'node_feat_ent': 0.1,
    }

    def __init__(self, model, epochs: int = 100, lr: float = 0.01,
                 num_hops: Optional[int] = None, log: bool = True):
        super(GNNExplainer, self).__init__()
        self.model = model
        self.epochs = epochs
        self.lr = lr
        self.__num_hops__ = num_hops
        self.log = log

    def __set_masks__(self, x, edge_index, init="normal"):
        (N, F), E = x.size(), edge_index.size(1)

        std = 0.1
        self.node_feat_mask = torch.nn.Parameter(torch.randn(F) * 0.1)

        std = torch.nn.init.calculate_gain('relu') * sqrt(2.0 / (2 * N))
        self.edge_mask = torch.nn.Parameter(torch.randn(E) * std)

        for module in self.model.modules():
            if isinstance(module, MessagePassing):
                module.__explain__ = True
                module.__edge_mask__ = self.edge_mask

    def __clear_masks__(self):
        for module in self.model.modules():
            if isinstance(module, MessagePassing):
                module.__explain__ = False
                module.__edge_mask__ = None
        self.node_feat_masks = None
        self.edge_mask = None

    @property
    def num_hops(self):
        if self.__num_hops__ is not None:
            return self.__num_hops__

        k = 0
        for module in self.model.modules():
            if isinstance(module, MessagePassing):
                k += 1
        return k

    def __flow__(self):
        for module in self.model.modules():
            if isinstance(module, MessagePassing):
                return module.flow
        return 'source_to_target'

    def __subgraph__(self, node_idx, x, edge_index, **kwargs):
        num_nodes, num_edges = x.size(0), edge_index.size(1)

        subset, edge_index, mapping, edge_mask = k_hop_subgraph(
            node_idx, self.num_hops, edge_index, relabel_nodes=True,
            num_nodes=num_nodes, flow=self.__flow__())

        x = x[subset]
        for key, item in kwargs.items():
            if torch.is_tensor(item) and item.size(0) == num_nodes:
                item = item[subset]
            elif torch.is_tensor(item) and item.size(0) == num_edges:
                item = item[edge_mask]
            kwargs[key] = item

        return x, edge_index, mapping, edge_mask, kwargs

    def __loss__(self, node_idx, log_logits, pred_label):
        loss = -log_logits[node_idx, pred_label[node_idx]]
        print('pred_label',pred_label)
        print('pred_label[idx]',pred_label[node_idx])
        print(';;',log_logits)

        m = self.edge_mask.sigmoid()
        edge_reduce = getattr(torch, self.coeffs['edge_reduction'])
        loss = loss + self.coeffs['edge_size'] * edge_reduce(m)
        ent = -m * torch.log(m + EPS) - (1 - m) * torch.log(1 - m + EPS)
        loss = loss + self.coeffs['edge_ent'] * ent.mean()

        m = self.node_feat_mask.sigmoid()
        node_feat_reduce = getattr(torch, self.coeffs['node_feat_reduction'])
        loss = loss + self.coeffs['node_feat_size'] * node_feat_reduce(m)
        ent = -m * torch.log(m + EPS) - (1 - m) * torch.log(1 - m + EPS)
        loss = loss + self.coeffs['node_feat_ent'] * ent.mean()

        return loss

    def explain_node(self, node_idx, x, edge_index, **kwargs):
        r"""Learns and returns a node feature mask and an edge mask that play a
        crucial role to explain the prediction made by the GNN for node
        :attr:`node_idx`.

        Args:
            node_idx (int): The node to explain.
            x (Tensor): The node feature matrix.
            edge_index (LongTensor): The edge indices.
            **kwargs (optional): Additional arguments passed to the GNN module.

        :rtype: (:class:`Tensor`, :class:`Tensor`)
        """

        self.model.eval()
        self.__clear_masks__()

        num_edges = edge_index.size(1)

        # Only operate on a k-hop subgraph around `node_idx`.
        x, edge_index, mapping, hard_edge_mask, kwargs = self.__subgraph__(
            node_idx, x, edge_index, **kwargs)

        # Get the initial prediction.
        with torch.no_grad():
            log_logits = self.model(x=x, edge_index=edge_index, **kwargs)
            pred_label = log_logits.argmax(dim=-1)

        self.__set_masks__(x, edge_index)
        self.to(x.device)

        optimizer = torch.optim.Adam([self.node_feat_mask, self.edge_mask],
                                     lr=self.lr)

        if self.log:  # pragma: no cover
            pbar = tqdm(total=self.epochs)
            pbar.set_description(f'Explain node {node_idx}')

        for epoch in range(1, self.epochs + 1):
            optimizer.zero_grad()
            h = x * self.node_feat_mask.view(1, -1).sigmoid()
            log_logits = self.model(x=h, edge_index=edge_index, **kwargs)
            loss = self.__loss__(mapping, log_logits, pred_label)
            loss.backward()
            optimizer.step()

            if self.log:  # pragma: no cover
                pbar.update(1)

        if self.log:  # pragma: no cover
            pbar.close()

        node_feat_mask = self.node_feat_mask.detach().sigmoid()
        edge_mask = self.edge_mask.new_zeros(num_edges)
        edge_mask[hard_edge_mask] = self.edge_mask.detach().sigmoid()

        self.__clear_masks__()

        return node_feat_mask, edge_mask


    def visualize_subgraph(self, node_idx, edge_index, edge_mask, y=None,
                           threshold=None, **kwargs):
        r"""Visualizes the subgraph around :attr:`node_idx` given an edge mask
        :attr:`edge_mask`.

        Args:
            node_idx (int): The node id to explain.
            edge_index (LongTensor): The edge indices.
            edge_mask (Tensor): The edge mask.
            y (Tensor, optional): The ground-truth node-prediction labels used
                as node colorings. (default: :obj:`None`)
            threshold (float, optional): Sets a threshold for visualizing
                important edges. If set to :obj:`None`, will visualize all
                edges with transparancy indicating the importance of edges.
                (default: :obj:`None`)
            **kwargs (optional): Additional arguments passed to
                :func:`nx.draw`.

        :rtype: :class:`matplotlib.axes.Axes`, :class:`networkx.DiGraph`
        """

        assert edge_mask.size(0) == edge_index.size(1)

        # Only operate on a k-hop subgraph around `node_idx`.
        subset, edge_index, _, hard_edge_mask = k_hop_subgraph(
            node_idx, self.num_hops, edge_index, relabel_nodes=True,
            num_nodes=None, flow=self.__flow__())

        edge_mask = edge_mask[hard_edge_mask]

        if threshold is not None:
            edge_mask = (edge_mask >= threshold).to(torch.float)

        if y is None:
            y = torch.zeros(edge_index.max().item() + 1,
                            device=edge_index.device)
        else:
            y = y[subset].to(torch.float) / y.max().item()

        data = Data(edge_index=edge_index, att=edge_mask, y=y,
                    num_nodes=y.size(0)).to('cpu')
        G = to_networkx(data, node_attrs=['y'], edge_attrs=['att'])
        mapping = {k: i for k, i in enumerate(subset.tolist())}
        G = nx.relabel_nodes(G, mapping)

        node_kwargs = copy(kwargs)
        node_kwargs['node_size'] = kwargs.get('node_size') or 800
        node_kwargs['cmap'] = kwargs.get('cmap') or 'cool'

        label_kwargs = copy(kwargs)
        label_kwargs['font_size'] = kwargs.get('font_size') or 10

        pos = nx.spring_layout(G)
        ax = plt.gca()
        for source, target, data in G.edges(data=True):
            ax.annotate(
                '', xy=pos[target], xycoords='data', xytext=pos[source],
                textcoords='data', arrowprops=dict(
                    arrowstyle="->",
                    alpha=max(data['att'], 0.1),
                    shrinkA=sqrt(node_kwargs['node_size']) / 2.0,
                    shrinkB=sqrt(node_kwargs['node_size']) / 2.0,
                    connectionstyle="arc3,rad=0.1",
                ))
        nx.draw_networkx_nodes(G, pos, node_color=y.tolist(), **node_kwargs)
        nx.draw_networkx_labels(G, pos, **label_kwargs)

        return ax, G


    def __repr__(self):
        return f'{self.__class__.__name__}()'

In [None]:
explainer = GNNExplainer(model, epochs=200)
node_idx = 10
node_feat_mask, edge_mask = explainer.explain_node(node_idx, x, edge_index)
ax, G = explainer.visualize_subgraph(node_idx, edge_index, edge_mask, y=data.y)
plt.show()

In [None]:
# BGS and AM graphs are too big to process them in a full-batch fashion.
# Since our model does only make use of a rather small receptive field, we
# filter the graph to only contain the nodes that are at most 2-hop neighbors
# away from any training/test node.

# path = '/Users/nhalliwe/Desktop/pytorch_geometric-master/data/Entities'#osp.join(osp.dirname(osp.realpath(__file__)), '..', 'data', 'Entities')
# dataset = Entities(path, 'AM')
# data = dataset[0]

# node_idx = torch.cat([data.train_idx, data.test_idx], dim=0)
# node_idx, edge_index, mapping, edge_mask = k_hop_subgraph(
#     node_idx, 2, data.edge_index, relabel_nodes=True)

# data.num_nodes = node_idx.size(0)
# data.edge_index = edge_index
# data.edge_type = data.edge_type[edge_mask]
# data.train_idx = mapping[:data.train_idx.size(0)]
# data.test_idx = mapping[data.train_idx.size(0):]

# import pickle as pkl
# def load_data_pkl(pkl_path):
#     with open(pkl_path,"rb") as f:
#         data=pkl.load(f)
#     A=data["A"]
#     y=data["y"]
#     train_idx=data["train_idx"]
#     test_idx=data["test_idx"]

#     return A,y,train_idx,test_idx

In [None]:
# pkl_path="/Users/nhalliwe/Downloads/relation-gcn-pytorch-master/aifb/aifb.pickle"
# A,y,train_idx,test_idx=load_data_pkl(pkl_path)
# class Net(torch.nn.Module):
#     def __init__(self):
#         super(Net, self).__init__()
#         self.conv1 = RGCNConv(data.num_nodes, 16, dataset.num_relations,
#                               num_bases=30)
#         self.conv2 = RGCNConv(16, dataset.num_classes, dataset.num_relations,
#                               num_bases=30)

#     def forward(self, edge_index, edge_type):
#         x = F.relu(self.conv1(None, edge_index, edge_type))
#         x = self.conv2(x, edge_index, edge_type)
#         return F.log_softmax(x, dim=1)


# def train():
#     model.train()
#     optimizer.zero_grad()
#     out = model(data.edge_index, data.edge_type)
#     loss = F.nll_loss(out[data.train_idx], data.train_y)
#     loss.backward()
#     optimizer.step()
#     return loss.item()


# @torch.no_grad()
# def test():
#     model.eval()
#     pred = model(data.edge_index, data.edge_type).argmax(dim=-1)
#     train_acc = pred[data.train_idx].eq(data.train_y).to(torch.float).mean()
#     test_acc = pred[data.test_idx].eq(data.test_y).to(torch.float).mean()
#     return train_acc.item(), test_acc.item()


# for epoch in range(1, 51):
#     loss = train()
#     train_acc, test_acc = test()
#     print(f'Epoch: {epoch:02d}, Loss: {loss:.4f}, Train: {train_acc:.4f} '
#           f'Test: {test_acc:.4f}')

# device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# model = Net().to(device)
# cora_data = cora_data.to(device)
# optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)
# x, edge_index = cora_data.x, cora_data.edge_index

# for epoch in range(1, 201):
#     model.train()
#     optimizer.zero_grad()
#     log_logits = model(x, edge_index)
#     loss = F.nll_loss(log_logits[cora_data.train_mask], cora_data.y[cora_data.train_mask])
#     loss.backward()
#     optimizer.step()

# explainer = GNNExplainer(model, epochs=200)
# node_idx = 10
# node_feat_mask, edge_mask = explainer.explain_node(node_idx, x, edge_index)
# ax, G = explainer.visualize_subgraph(node_idx, edge_index, edge_mask, y=cora_data.y)
# plt.show()

In [None]:
# import utils

# npdata = np.load(os.path.join('.','data','royalty_spouse.npz'))

# train = npdata['X_train']
# test = npdata['X_test']

# train_exp = npdata['train_exp']
# test_exp = npdata['test_exp']

# full_train = np.concatenate((train,train_exp.reshape(-1,3)), axis=0)

# entities = npdata['entities'].tolist()
# relations = npdata['relations'].tolist()

# NUM_ENTITIES = len(entities)
# NUM_RELATIONS = len(relations)
# NUM_FEATURES = 50

# ent2idx = dict(zip(entities, range(NUM_ENTITIES)))
# rel2idx = dict(zip(relations, range(NUM_RELATIONS)))

# train2idx = utils.array2idx(full_train,ent2idx,rel2idx)
# test2idx = utils.array2idx(test,ent2idx,rel2idx)

# testexp2idx = utils.array2idx(test_exp,ent2idx,rel2idx)

# # #entity_embeddings = np.load(os.path.join('.','data','transE_embeddings.npz'))['entity_embeddings']
# X = np.random.randn(NUM_ENTITIES,NUM_FEATURES)

# train2idx_ = np.concatenate([train2idx[:,0].reshape(-1,1),train2idx[:,2].reshape(-1,1)], axis=1)
# test2idx_ = np.concatenate([test2idx[:,0].reshape(-1,1),test2idx[:,2].reshape(-1,1)], axis=1)

In [None]:
#np.concatenate([train2idx[,test2idx], axis=0).shape

all_data = np.concatenate([train2idx,test2idx], axis=0)
all_labels = np.concatenate([train2idx[:,1],test2idx[:,1]], axis=0)

train_mask = np.concatenate([np.ones(train2idx.shape[0], dtype=bool), np.zeros(test2idx.shape[0],dtype=bool)])
test_mask = np.concatenate([np.zeros(train2idx.shape[0], dtype=bool), np.ones(test2idx.shape[0],dtype=bool)])

In [None]:
# X = []
# for i,_,j in all_data:
    
#     X.append([entity_embeddings[i],entity_embeddings[j]])

# X = np.array(X)
X = torch.tensor(X)
y = torch.tensor(all_labels)

train_horizontal = np.stack([train2idx_[:,0],train2idx_[:,1]],axis=0)
test_horizontal = np.stack([test2idx_[:,0],test2idx_[:,1]],axis=0)

edge_index = torch.tensor(np.concatenate([train_horizontal, test_horizontal],axis=1), dtype=torch.long)
data = Data(x=X, y=y, edge_index=edge_index,num_classes=NUM_RELATIONS)

In [None]:
# X_test = torch.tensor([entity_embeddings[ent2idx[h]] for h,_,_ in test])
# y_test = torch.tensor([(rel2idx[r]) for _,r,_ in test])
# test_ents = np.array([(ent2idx[h],ent2idx[t]) for h,_,t in test]).T
# test_ents_flipped = np.stack((test_ents[1,:], test_ents[0,:]))
# test_edge_index = torch.tensor(np.concatenate((test_ents,test_ents_flipped), axis=1), dtype=torch.long)
# test_data = Data(x_test=X_test,y_test=y_test,test_edge_index=test_edge_index,num_classes=num_relations)

In [None]:
# class Net(torch.nn.Module):
#     def __init__(self):
#         super(Net, self).__init__()
#         self.lin = Sequential(Linear(10,10))
#         self.conv1 = GCNConv(data.num_features, 16)
#         self.conv2 = GCNConv(16, data.num_classes)

#     def forward(self, x, edge_index):
#         x = F.relu(self.conv1(x, edge_index))
#         x = F.dropout(x, training=self.training)
#         x = self.conv2(x, edge_index)
#         return F.log_softmax(x, dim=1)

# device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# model = Net().to(device)
# data = data.to(device)
# optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)
# x, edge_index = data.x, data.edge_index

# for epoch in range(1, 201):
#     model.train()
#     optimizer.zero_grad()
#     log_logits = model(x, edge_index)
#     loss = F.nll_loss(log_logits, data.y)
#     loss.backward()
#     optimizer.step()

# explainer = GNNExplainer(model, epochs=200)

# x_test,test_y, test_edge_index = test_data.x_test,test_data.y_test,test_data.test_edge_index

In [None]:
# def get_explanations(i,x,y,edge_index, explainer):

#     node_feat_mask, edge_mask = explainer.explain_node(i, x, edge_index)
    
#     _, G = explainer.visualize_subgraph(i, edge_index, edge_mask, y=y)

#     temp = []
#     exp = list(G.edges)
#     for tup in exp:
#         sorted_tup = tuple(sorted(tup))
#         temp.append(sorted_tup)

#     return list(set(temp))

In [None]:
#node_feat_mask, edge_mask = explainer.explain_node(0, x, edge_index)

In [None]:
#explainer.visualize_subgraph(2278, edge_index, edge_mask, y=y)

In [None]:
# A = np.zeros(shape=(num_entities,num_entities))

# for h,r,t in d:
    
#     h_idx = entities.index(h)
#     #r_idx = relations.index(r)
#     t_idx = entities.index(t)
    
#     A[h_idx, t_idx] = 1
    
# A = torch.tensor(A, dtype=torch.float)
#A = torch.randn(5,10)
# X = torch.randn(2708,1433)
# y = torch.randint(3, (2708,))

# X = torch.randn(10,1433)
# y = torch.randint(3, (10,))

In [None]:
# class Net(torch.nn.Module):
#     def __init__(self):
#         super(Net, self).__init__()
#         self.lin = Sequential(Linear(10,10))
#         self.conv1 = GCNConv(data.num_features, 16)
#         self.conv2 = GCNConv(16, data.num_classes)

#     def forward(self, x, edge_index):
#         x = F.relu(self.conv1(x, edge_index))
#         x = F.dropout(x, training=self.training)
#         x = self.conv2(x, edge_index)
#         return F.log_softmax(x, dim=1)

# device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# model = Net().to(device)
# data = data.to(device)
# optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)
# x, edge_index = data.x, data.edge_index

# for epoch in range(1, 201):
#     model.train()
#     optimizer.zero_grad()
#     log_logits = model(x, edge_index)
#     loss = F.nll_loss(log_logits, data.y)
#     loss.backward()
#     optimizer.step()

# explainer = GNNExplainer(model, epochs=200)

In [None]:
# node_idx = 3
# node_feat_mask, edge_mask = explainer.explain_node(node_idx, x, edge_index)
# ax, G = explainer.visualize_subgraph(node_idx, edge_index, edge_mask, y=data.y)
# plt.show()

In [None]:
# explanations = []

# for i in range(2):
    
#     node_feat_mask, edge_mask = explainer.explain_node(i, x, edge_index)
#     _, G = explainer.visualize_subgraph(i, edge_index, edge_mask, y=data.y)
    
#     explanations.append(list(G.edges))

In [None]:
# unique_explanations = []
# for i in explanations:
#     temp = []
#     for tup in i:
#         sorted_tup = tuple(sorted(tup))
#         temp.append(sorted_tup)
#     unique_explanations.append(list(set(temp)))

In [None]:
#unique_explanations