# Imports

In [None]:
import torch
import numpy as np
from torch import nn
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score

# Model

In [15]:
class Model(nn.Module):
    def __init__(self, in_features, out_features):
        super().__init__()
        self.embedder = nn.Linear(in_features, out_features)
    def forward(self, x):
        x = self.embedder(x)
        return x / (x ** 2).sum() ** 0.5

# Test sample from dataset

In [16]:
def generate_negatives(positive):
    # chance of match ~ 0.0002%
    return torch.randint(0, len(nodes), positive.shape).to('cuda')

In [25]:
nodes = torch.tensor(np.load('author_embeddings.npy')).to('cuda').float()
positive = torch.tensor(np.load('edges.npy')).to('cuda').T

positive = positive[torch.randperm(positive.size(0))][:100_000]
negative = generate_negatives(positive)

In [26]:
model = Model(nodes.shape[-1], 128).to('cuda')
model.load_state_dict(torch.load('model_weights.pth'))

# Results

In [27]:
inp = torch.cat((positive, negative))
out = torch.cat((torch.ones(positive.shape[0]), torch.zeros(positive.shape[0]))).to('cuda')

idx = torch.randperm(inp.size(0))
inp = inp[idx]
out = out[idx]

a, b = nodes[inp[:, 0]], nodes[inp[:, 1]]
a, b = model(a), model(b)

res = ((a * b).sum(axis=-1) + 1) / 2
lbl = (res > 0.5).float()

out = out.cpu().detach().numpy()
res = res.cpu().detach().numpy()
lbl = lbl.cpu().detach().numpy()

accuracy = accuracy_score(out, lbl)
precision = precision_score(out, lbl)
recall = recall_score(out, lbl)
f1 = f1_score(out, lbl)
roc_auc = roc_auc_score(out, res)

print(f"Accuracy:  {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall:    {recall:.4f}")
print(f"F1-Score:  {f1:.4f}")
print(f"ROC-AUC:   {roc_auc:.4f}")

Accuracy:  0.5510
Precision: 0.5423
Recall:    0.6538
F1-Score:  0.5929
ROC-AUC:   0.5681
