In [1]:
######## IMPORT EXTERNAL FILES ###########
import torch
import torch.nn.functional as F
import torch.nn.utils.parametrize as parametrize
import torch.nn as nn

import torch_geometric
from torch_geometric.loader import DataLoader
from torch_geometric.transforms import RandomLinkSplit
from torch_geometric.utils import train_test_split_edges, negative_sampling

import pytorch_lightning as pl
from pytorch_lightning.callbacks.early_stopping import EarlyStopping
from pytorch_lightning.callbacks import Callback
from pytorch_lightning.loggers import WandbLogger

######### IMPORT INTERNAL FILES ###########
import sys
sys.path.append("../../src")
from GRAFF import *
from config import *

  from .autonotebook import tqdm as notebook_tqdm


Link prediction features initialized.....


In [2]:
import random
from torch_geometric.utils import to_networkx
import networkx as nx
import matplotlib.pyplot as plt

def convert_to_networkx(graph, n_sample=None):

    g = to_networkx(graph, node_attrs=["x"])
    y = graph.y.numpy()

    if n_sample is not None:
        sampled_nodes = random.sample(g.nodes, n_sample)
        g = g.subgraph(sampled_nodes)
        y = y[sampled_nodes]

    return g, y


def plot_graph(g, y):

    plt.figure(figsize=(9, 7))
    nx.draw_spring(g, node_size=30, arrows=False, node_color=y)
    plt.show() 

In [3]:
# g, y = convert_to_networkx(dataset[0])
# plot_graph(g, y)

In [4]:

# final_dataset = train_test_split_edges(dataset[0], val_ratio = 0.1, test_ratio= 0.1)

In [11]:
# def indices(dataset, split_idx):
#     ''' According to the dataset, and the specified splitting (e.g. in Geom-GCN there are 10 splits) 
#         We identify the indices. 

#         args:
#           - dataset: torch-geometric data type,
#           - split_idx: in the Geom-GCN implementations the available splittings are from 0-9    
        
#         output:
#           - (train_indices, val_indices, test_indices):
#                  indices that corrensponds to the whole graph. 
    
#     '''

#     train_idx = dataset.train_mask[:, split_idx]
#     val_idx = dataset.val_mask[:, split_idx]
#     test_idx = dataset.test_mask[:, split_idx]

#     train_indices = torch.nonzero(train_idx)
#     val_indices = torch.nonzero(val_idx)
#     test_indices = torch.nonzero(test_idx)

#     return train_indices.squeeze(1), val_indices.squeeze(1), test_indices.squeeze(1)


 

# final = train_test_split_edges(dataset[0])


In [12]:
class DataModuleLP(pl.LightningDataModule):

    def __init__(self,  train_set, val_set, test_set, mode, batch_size):

        self.mode = mode  # "hp" or "test"
        self.batch_size = batch_size
        self.train_set, self.val_set, self.test_set = train_set, val_set, test_set

    def setup(self, stage=None):
        if stage == 'fit':

            # edge_index are the message passing edges,
            # edge_label_index are the supervision edges.
            if self.train_set.pos_edge_label_index.shape[1] < self.train_set.edge_index.shape[1]:
                pos_mask_edge = self.train_set.pos_edge_label_index.shape[1]

                self.train_set.edge_index = self.train_set.edge_index[:, pos_mask_edge:]
            else:
                self.train_set.pos_edge_label_index = self.train_set.edge_index[:, :self.train_set.edge_index.shape[1] // 2]
                self.train_set.neg_edge_label_index = self.train_set.neg_edge_label_index[:, :self.train_set.edge_index.shape[1] // 2]

                self.train_set.edge_index = self.train_set.edge_index[:, self.train_set.edge_index.shape[1] // 2:]

                

    def train_dataloader(self, *args, **kwargs):
        return DataLoader([self.train_set], batch_size=batch_size, shuffle=False)

    def val_dataloader(self, *args, **kwargs):
        if self.mode == 'hp':
            return DataLoader([self.val_set], batch_size=batch_size, shuffle=False)
        elif self.mode == 'test':
            return DataLoader([self.test_set], batch_size=batch_size, shuffle=False)

In [13]:
mode = 'hp'
save = True
if save:
    transform = RandomLinkSplit(num_val = 0.1, num_test = 0.1, is_undirected=True if dataset_name != 'Texas' else False, split_labels= True, neg_sampling_ratio=200)

    # Edges are divided into three sets
    train_data, val_data, test_data = transform(dataset[0])

    # Negative edges are extracted
    torch.save(train_data, dataset_name + "/train_data.pt")
    torch.save(val_data, dataset_name + "/val_data.pt")
    torch.save(test_data, dataset_name + "/test_data.pt")
load = True
if load:
    train_data = torch.load(dataset_name + "/train_data.pt")
    val_data = torch.load(dataset_name + "/val_data.pt")
    test_data = torch.load(dataset_name + "/test_data.pt")

In [14]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'

In [15]:
DM = DataModuleLP(train_data.clone(), val_data.clone(), test_data.clone(), mode = 'hp', batch_size = batch_size)
DM.setup('fit')
DM.setup('test')


In [16]:
print(train_data)
print(val_data)
print(test_data)


Data(x=[2708, 1433], edge_index=[2, 8448], y=[2708], train_mask=[2708, 10], val_mask=[2708, 10], test_mask=[2708, 10], pos_edge_label=[4224], pos_edge_label_index=[2, 4224], neg_edge_label=[844800], neg_edge_label_index=[2, 844800])
Data(x=[2708, 1433], edge_index=[2, 8448], y=[2708], train_mask=[2708, 10], val_mask=[2708, 10], test_mask=[2708, 10], pos_edge_label=[527], pos_edge_label_index=[2, 527], neg_edge_label=[105400], neg_edge_label_index=[2, 105400])
Data(x=[2708, 1433], edge_index=[2, 9502], y=[2708], train_mask=[2708, 10], val_mask=[2708, 10], test_mask=[2708, 10], pos_edge_label=[527], pos_edge_label_index=[2, 527], neg_edge_label=[105400], neg_edge_label_index=[2, 105400])


In [17]:
for i in DM.train_dataloader():
    print(i)
for i in DM.val_dataloader():
    print(i)


DataBatch(x=[2708, 1433], edge_index=[2, 4224], y=[2708], train_mask=[2708, 10], val_mask=[2708, 10], test_mask=[2708, 10], pos_edge_label=[4224], pos_edge_label_index=[2, 4224], neg_edge_label=[844800], neg_edge_label_index=[2, 844800], batch=[2708], ptr=[2])
DataBatch(x=[2708, 1433], edge_index=[2, 8448], y=[2708], train_mask=[2708, 10], val_mask=[2708, 10], test_mask=[2708, 10], pos_edge_label=[527], pos_edge_label_index=[2, 527], neg_edge_label=[105400], neg_edge_label_index=[2, 105400], batch=[2708], ptr=[2])


In [17]:
class LinkPredictor(nn.Module):
    def __init__(self, input_dim, output_dim, num_layers = 0, bias = False, dropout= 0):
        super().__init__()
        
        self.num_layers = num_layers
        if self.num_layers != 0:
            layers = []
            layers.append(nn.Linear(input_dim, output_dim, bias = bias))
            for layer in range(self.num_layers):
                layers.append(nn.Linear(output_dim, output_dim, bias = bias))
            layers.append(nn.Linear(output_dim, 1, bias = bias))
        
            self.layers = nn.Sequential(*layers)
            self.dropout = dropout
             
    def reset_parameters(self):
        for layer in self.layers:
            layer.reset_parameters()
        
    def forward(self, x_i, x_j, training = False):
        
        out = x_i * x_j 
        if self.num_layers != 0:
            for layer in self.layers:
                out = layer(out)
                out = F.relu(out)
                out = F.dropout(out, p = self.dropout, training = training)
        out = out.sum(dim = -1)
        print(out)
        return torch.sigmoid(out)
    

In [18]:

class PhysicsGNN_LP(nn.Module):
    def __init__(self, dataset, hidden_dim, output_dim, num_layers, num_layers_mlp, link_bias, dropout, step=0.1, symmetry_type='1', self_loops=False, device='cpu'):
        super().__init__()

        self.enc = torch.nn.Linear(
            dataset.num_features, hidden_dim, bias=False)

        self.external_w = External_W(hidden_dim, device=device)
        self.source_b = Source_b(device=device)
        self.pairwise_w = PairwiseInteraction_w(
            hidden_dim, symmetry_type=symmetry_type, device=device)

        self.layers = [GRAFFConv(self.external_w, self.source_b, self.pairwise_w,
                                 self_loops=self_loops) for i in range(num_layers)]

        self.step = step
        self.link_pred = LinkPredictor(
            hidden_dim, output_dim, num_layers_mlp, link_bias, dropout)
        self.reset_parameters()
        self.to(device)

    def reset_parameters(self):
        self.enc.reset_parameters()
        self.external_w.reset_parameters()
        self.source_b.reset_parameters()
        self.pairwise_w.reset_parameters()
        self.link_pred.reset_parameters()

    def forward(self, data, train=True):

        if train:
            x, edge_index = data.x.clone(), data.pos_forward_pass.clone()
        else:
            x, edge_index = data.x.clone(), data.edge_index.clone()

        x = enc_out = self.enc(x)

        x0 = enc_out.clone()
        for layer in self.layers:

            x = x + self.step*F.relu(layer(x, edge_index, x0))

        if train:
            pos_edge = data.pos_masked_edges.clone()
        else:
            pos_edge = data.edge_label_index.clone()

        neg_edge = data.neg_edges.clone()
        pos_pred = self.link_pred(
            x[pos_edge[0]], x[pos_edge[1]], training=train)
        neg_pred = self.link_pred(
            x[neg_edge[0]], x[neg_edge[1]], training=train)

        return pos_pred, neg_pred

In [19]:
output_dim = 64
mlp_layer = 2
link_bias = False
dropout = 0.1

In [20]:
PG = PhysicsGNN_LP(dataset, hidden_dim, output_dim, num_layers, mlp_layer, link_bias, dropout)

In [21]:
out = PG(train_data.clone())

tensor([7.3387e-06, 7.6268e-04, 2.8153e-04, 0.0000e+00, 2.1349e-04, 8.6843e-04,
        1.0140e-03, 5.3435e-04, 6.4876e-04, 0.0000e+00, 1.3798e-03, 1.1496e-04,
        0.0000e+00, 2.6795e-04, 6.6629e-04, 0.0000e+00, 6.1495e-04, 0.0000e+00,
        5.2026e-04, 2.1459e-04, 4.6524e-04, 1.2086e-03, 1.2082e-03, 1.3040e-03,
        3.1008e-04, 1.2190e-04, 0.0000e+00, 9.2872e-04, 8.1915e-04, 5.3745e-04,
        1.9134e-03, 1.7363e-04, 3.2125e-04, 0.0000e+00, 0.0000e+00, 9.0923e-05,
        0.0000e+00, 3.2903e-03, 1.7395e-03, 0.0000e+00, 0.0000e+00, 9.6303e-04,
        4.3483e-04, 0.0000e+00, 3.8775e-04, 5.3147e-04, 0.0000e+00, 1.5167e-03,
        9.7916e-04, 6.8913e-05, 0.0000e+00, 9.5810e-04, 1.4172e-03, 0.0000e+00,
        8.2258e-04, 1.6605e-03, 4.6283e-04, 0.0000e+00, 1.0299e-03, 6.2316e-04,
        2.0211e-05, 0.0000e+00, 8.4758e-04, 1.4357e-03, 7.6352e-04, 5.0854e-04,
        5.2834e-04, 5.9502e-04, 0.0000e+00], grad_fn=<SumBackward1>)
tensor([2.1905e-04, 0.0000e+00, 1.8350e-03, 0.0000e

In [None]:
torch.topk(pred_positive, 100 if pred_positive.shape[0] >= 100 else pred_positive.shape[0])

In [None]:
top_k = 100 if pred_positive.shape[0] >= 100 else pred_positive.shape[0]
hit_count = 0
positive_indices = list(range(pred_positive.shape[0]))
top_k_predictions = torch.topk(pred_positive, top_k).indices
for i in range(len(positive_indices)):
    if positive_indices[i] in top_k_predictions:
        hit_count += 1

hit_ratio = hit_count / len(positive_indices)
hit_ratio 

In [None]:
# How to repeat the experiments? 
# What are the splittings? If i do my own splittings should i repeat the experiments? 
# Message passing questions..........