# Useful libraries

In [None]:
! pip install dgl -f https://data.dgl.ai/wheels/cu118/repo.html
! pip install dglgo -f https://data.dgl.ai/wheels-test/repo.htm
import dgl
import warnings
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
import scipy.sparse as sp
import sklearn.metrics as m
warnings.filterwarnings("ignore")

# Models

In [None]:
class GNCN(nn.Module):
  def __init__(self, in_features, out_features, s=1.8, activation=lambda x: x, dropout=0):
    super().__init__()
    self.drop = nn.Dropout(dropout)
    self.weights = nn.Linear(in_features, out_features, bias=False)
    self.act = activation
    self.s = s
  def reset_parameters(self):
    self.weights.reset_parameters()
  def forward(self, A, X):
    X = self.drop(X)
    X = F.normalize(self.weights(X))
    return self.act(self.s * A.mm(X))

class InputEncoder(nn.Module):
  def __init__(self, feat_dim, emb_dim, pe_enc=False, device='cpu'):
    super().__init__()
    self.linear = nn.Linear(feat_dim, emb_dim)
    self.pe_enc = pe_enc
    self.device = device
    self.init = True if self.pe_enc else False
  def reset_parameters(self):
    self.init = True if self.pe_enc else False
    for layer in self.children():
      if hasattr(layer, 'reset_parameters'):
        layer.reset_parameters()
  def forward(self, A, X):
    if self.init:
      print("Computing laplacian positional embeddings...")
      self.pe = laplacian_pe(A, self.linear.out_features, self.device)
      self.init = False
    if self.pe_enc:
      return self.linear(X) + self.pe
    else:
      return self.linear(X)

class GTAE(nn.Module):
  def __init__(self, feat_dim, emb_dim, latent_size, n_heads, n_layers=2, pe_enc=False, device='cpu'):
    super().__init__()
    self.input = InputEncoder(feat_dim, emb_dim, pe_enc, device)
    t_enc = nn.TransformerEncoderLayer(emb_dim, n_heads, emb_dim, activation=F.gelu, dropout=0.1)
    self.t_enc_stack = nn.TransformerEncoder(t_enc, n_layers)
    self.linear = nn.Linear(emb_dim, latent_size)
  def reset_parameters(self):
    for layer in self.children():
      if hasattr(layer, 'reset_parameters'):
        layer.reset_parameters()
  def forward(self, A, X):
    self.enc = self.linear(self.t_enc_stack(self.input(A, X), A))
    return F.sigmoid(self.enc.mm(self.enc.T))

class GCAE(nn.Module):
  def __init__(self, in_feat, hid_size, latent_size):
    super().__init__()
    self.conv1 = dgl.nn.GraphConv(in_feat, hid_size, activation=F.relu)
    self.conv2 = dgl.nn.GraphConv(hid_size, latent_size)
    self.proj = nn.Sequential(
        nn.Linear(latent_size, latent_size * 2, bias=False),
        nn.ReLU(),
        nn.Linear(latent_size * 2, latent_size, bias=False)
    )
  def reset_parameters(self):
    for layer in self.children():
      if hasattr(layer, 'reset_parameters'):
        layer.reset_parameters()
  def forward(self, A, X, need_proj=False):
    self.enc = self.conv2(A, self.conv1(A, X))
    if need_proj:
      return F.sigmoid(self.enc.mm(self.enc.T)), self.proj(self.enc)
    else:
      return F.sigmoid(self.enc.mm(self.enc.T))

class GATAE(nn.Module):
  def __init__(self, in_feat, hid_size, latent_size, n_heads1, n_heads2):
    super().__init__()
    if hid_size % n_heads1 != 0:
      return Exception(f"hid_size ({hid_size}) must be divisible by n_heads1 ({n_heads1})")
    self.conv1 = dgl.nn.GATConv(in_feat, int(hid_size / n_heads1), n_heads1, activation=F.elu)
    self.conv2 = dgl.nn.GATConv(hid_size, latent_size, n_heads2)
    self.proj = nn.Sequential(
        nn.Linear(latent_size, latent_size * 2, bias=False),
        nn.ReLU(),
        nn.Linear(latent_size * 2, latent_size, bias=False)
    )
  def reset_parameters(self):
    for layer in self.children():
      if hasattr(layer, 'reset_parameters'):
        layer.reset_parameters()
  def forward(self, A, X, need_proj=False):
    # In the first attention layer head's output are concatenated, in the second
    # they're averaged
    out, att = self.conv1(A, X, get_attention=True)
    self.enc = self.conv2(A, out.flatten(1)).mean(1)
    if need_proj:
      return F.sigmoid(self.enc.mm(self.enc.T)), self.proj(self.enc)
    else:
      return F.sigmoid(self.enc.mm(self.enc.T)), att

class GNCAE(nn.Module):
  def __init__(self, in_feat, hid_size, latent_size, s=1.8):
    super().__init__()
    self.conv1 = GNCN(in_feat, hid_size, s=s, activation=F.relu)
    self.conv2 = GNCN(hid_size, latent_size, s=s)
    self.proj = nn.Sequential(
        nn.Linear(latent_size, latent_size * 2, bias=False),
        nn.ReLU(),
        nn.Linear(latent_size * 2, latent_size, bias=False)
    )
  def reset_parameters(self):
    for layer in self.children():
      if hasattr(layer, 'reset_parameters'):
        layer.reset_parameters()
  def normalize(self, A):
    # Add self loops
    A += torch.eye(A.shape[0], out=torch.empty_like(A))
    # Compute degree matrix
    D = A.sum(axis=1).pow(-0.5)
    return (D[None,:].T * A * D).to_sparse()
  def forward(self, A, X, need_proj=False):
    A = self.normalize(A)
    self.enc = self.conv2(A, self.conv1(A, X))
    if need_proj:
      return F.sigmoid(self.enc.mm(self.enc.T)), self.proj(self.enc)
    else:
      return F.sigmoid(self.enc.mm(self.enc.T))


# Data preprocessing

In [None]:
def sparse_to_tuple(sparse_mx):
    if not sp.isspmatrix_coo(sparse_mx):
        sparse_mx = sparse_mx.tocoo()
    coords = np.vstack((sparse_mx.row, sparse_mx.col)).transpose()
    return coords

def ismember(a, b):
    return np.any(np.all((a - b) == 0, axis=-1))

# Splits data in train and test, returns an iterator to the k folds
def kfold(adj, k=10, valid=0.05):
    all_edges = sparse_to_tuple(adj)
    edges = sparse_to_tuple(sp.triu(adj))
    n_edges = edges.shape[0]
    valid_size = int(n_edges * valid)

    edges_idx = np.arange(n_edges)
    np.random.shuffle(edges_idx)
    k_train_folds = np.array_split(edges_idx, k)

    # build negative samples, one for each fold (used only for testing)
    neg_edges = np.empty((0,2), dtype=int)
    while len(neg_edges) < n_edges + valid_size * k:
      i = np.random.randint(0, adj.shape[0])
      j = np.random.randint(0, adj.shape[0])
      if ismember([i, j], all_edges):
          continue
      if ismember([i, j], neg_edges) or ismember([j, i], neg_edges):
          continue
      neg_edges = np.vstack((neg_edges, [i, j]))
    test_neg_edges = np.array_split(neg_edges[:n_edges], k)
    val_neg_edges = np.array_split(neg_edges[n_edges:], k)

    # positive samples for train, test and validation
    for i, test_edges_idx in enumerate(k_train_folds):
      train_edges_idx = np.hstack(np.delete(k_train_folds, i, axis=0))
      np.random.shuffle(train_edges_idx)

      test_edges = edges[test_edges_idx]
      val_edges = edges[train_edges_idx[:valid_size]]
      train_edges = edges[train_edges_idx[valid_size:]]

      # build train adjacency matric
      adj_train = sp.csr_matrix((np.ones(len(train_edges)), (train_edges[:,0], train_edges[:,1])), adj.shape, dtype=adj.dtype)
      adj_train = adj_train + adj_train.T
      yield adj_train, train_edges, test_edges, test_neg_edges[i], val_edges, val_neg_edges[i]

# Split data in train, test, validation
def split_data(adj, test=0.1, valid=0.05):
    all_edges = sparse_to_tuple(adj)
    edges = sparse_to_tuple(sp.triu(adj))
    # 10% test and 5% validation
    n_test = int(edges.shape[0] * test)
    n_val = int(edges.shape[0] * valid)

    edges_idx = np.arange(edges.shape[0])
    np.random.shuffle(edges_idx)
    val_edges = edges[edges_idx[:n_val]]
    test_edges = edges[edges_idx[n_val:(n_val + n_test)]]
    train_edges = edges[edges_idx[(n_val + n_test):]]

    edges_false = np.empty((0,2), dtype=int)
    while len(edges_false) < len(test_edges) + len(val_edges):
        i = np.random.randint(0, adj.shape[0])
        j = np.random.randint(0, adj.shape[0])
        if ismember([i, j], all_edges):
            continue
        if ismember([i, j], edges_false) or ismember([j, i], edges_false):
            continue
        edges_false = np.vstack((edges_false, [i, j]))
    test_edges_false, val_edges_false = edges_false[:len(test_edges)], edges_false[len(test_edges):]

    adj_train = sp.csr_matrix((np.ones(len(train_edges)), (train_edges[:,0], train_edges[:,1])), adj.shape, dtype=adj.dtype)
    adj_train = adj_train + adj_train.T

    return adj_train, train_edges, test_edges, test_edges_false, val_edges, val_edges_false

# Reconstruction based learning
Here is implemented:
- Metrics calculation (AUC, AP, MRR, MR and Hits)
- k-fold training

In [None]:
def compute_ranking_metrics(true, pred, edges, neg_sampling=100):
  n = true.shape[0]
  # add self loops
  true = true + torch.eye(n)

  hits1 = 0
  hits3 = 0
  hits10 = 0
  mr = 0
  mrr = 0
  for pos in edges:
    scores = [pred[tuple(pos)]]
    i = np.random.choice([0,1])
    for j in np.random.choice(np.arange(n)[true[i] == 0], neg_sampling, False):
      neg = pos.copy()
      neg[(i-1)%2] = j
      scores.append(pred[tuple(neg)])
    rank = float(np.argsort(scores)[::-1].argmin() + 1)
    hits10 += 1 if rank <= 10 else 0
    hits3 += 1 if rank <= 3 else 0
    hits1 += 1 if rank == 1 else 0
    mr += rank
    mrr += 1 / rank

  metrics = {'mrr': mrr / len(edges),
             'mr': mr / len(edges),
             'hits1': hits1 / len(edges),
             'hits3': hits3 / len(edges),
             'hits10': hits10 / len(edges)}
  return metrics

def compute_metrics(pos_true, neg_true, preds):
  n = pos_true.shape[0]
  pos_pred = []
  neg_pred = []
  for i in range(n):
    pos_pred.append(preds[tuple(pos_true[i])])
    neg_pred.append(preds[tuple(neg_true[i])])
  p = np.hstack((np.asarray(pos_pred), np.asarray(neg_pred)))
  t = np.hstack((np.ones(n), np.zeros(n)))

  metrics = {'loss': float(m.log_loss(t, p)),
             'ap': float(m.average_precision_score(t, p)),
             'auc': float(m.roc_auc_score(t, p))}
  return metrics

def train_kfold(
    model,            # Model to train
    A,                # Graph adjacency matrix
    X,                # Node's features
    k=10,             # K fold
    lr=1e-2,          # Learning rate
    wd=1e-5,          # Weight decay
    valid_size=0.05,  # Validation set size
    epochs_max=300,   # Max number of epochs
    patience_max=30,  # Max number of epochs to wait before early stopping (if no improvements)
    dgl_model=True,   # Must be True if the model is implemented via DGL library, False otherwise
    device='cpu'):
  """
  Train a specific model using k-fold cross validation, returns a list of metrics
  computed on each fold
  """
  fold = 1
  metrics = []
  for A_train, train_e, test_e, test_ef, val_e, val_ef in kfold(A, k, valid_size):
    # reset weights
    model.reset_parameters()
    if dgl_model:
      A_train_dgl = dgl.from_scipy(A_train)
      A_train_dgl = A_train_dgl.add_self_loop()

    # scipy sparse matrix -> torch dense tensor
    n = A_train.sum()
    A_train = torch.from_numpy(A_train.todense()).to(device)
    pos_w = (A_train.shape[0] * A_train.shape[0] - n) / n
    norm = A_train.shape[0] * A_train.shape[0] / ((A_train.shape[0] * A_train.shape[0] - n) * 2)
    A_train_w = A_train.clone().flatten()
    A_train_w[A_train_w == 1] = pos_w
    A_train_w[A_train_w == 0] = 1

    model.train()
    losses = {'train_loss': [], 'valid_loss': []}
    opt = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=wd)
    loss = nn.BCELoss(weight=A_train_w)
    patience_act = 0
    min_loss = np.inf
    for epoch in range(epochs_max):
      if patience_act >= patience_max:
        print(f'[fold {fold}] max patience reached after {patience_max} epochs, training stopped')
        break
      model.zero_grad()
      out = model(A_train_dgl if dgl_model else A_train, X)
      l = norm * loss(out.flatten(), A_train.flatten())
      l.backward()
      opt.step()
      val_metrics = compute_metrics(val_e, val_ef, out.detach().cpu())
      if val_metrics['loss'] < min_loss:
        min_loss = val_metrics['loss']
        model_state = model.state_dict()
        patience_act = 0
      elif epoch > 50:
        patience_act += 1
      if epoch % 10 == 0:
        losses['train_loss'].append(np.float16(l.item()))
        losses['valid_loss'].append(np.float16(val_metrics['loss']))
      print(f'[fold {fold}] epoch {epoch}: train loss = {l:.4f}, valid loss = {val_metrics["loss"]:.4f}, valid AUC = {val_metrics["auc"]:.4f}')

    # Load best model based on validation set metrics
    model.eval()
    model.load_state_dict(model_state)
    with torch.no_grad():
      out = model(A_train_dgl if dgl_model else A_train, X).cpu()
      test_metrics = compute_metrics(test_e, test_ef, out)
      test_ranking = compute_ranking_metrics(torch.Tensor(A.todense()), out, test_e)
    print(f'[fold {fold}] test metrics: {test_metrics}')
    print(f'[fold {fold}] test mrr: {test_ranking}')
    test_metrics.update(test_ranking)
    test_metrics.update(losses)
    metrics.append(test_metrics)
    fold += 1
  return metrics

## Run experiments

In [None]:
import pickle
import json
dev = 'cuda' if torch.cuda.is_available() else 'cpu'

DATASET = 'facebook'
if DATASET == 'facebook':
  with open('facebook.json') as f:
    data = json.load(f)
  X = torch.Tensor(data['feat'])
  edges = np.asarray(data['edges'])
  A = sp.csr_matrix((np.ones(len(edges)), (edges[:,0], edges[:,1])), (X.shape[0], X.shape[0]), dtype=np.float32)
else:
  if DATASET == 'cora':
    g = dgl.data.CoraGraphDataset()[0]
  elif DATASET == 'citeseer':
    g = dgl.data.CiteseerGraphDataset()[0]
  elif DATASET == 'pubmed':
    g = dgl.data.PubmedGraphDataset()[0]
  # adjacency matrix
  A = g.adj_external(scipy_fmt='csr').astype(np.float32)
  # feature vectors
  X = g.ndata['feat'].to(dev)

m1 = GCAE(X.shape[1], 128, 64)
m2 = GNCAE(X.shape[1], 128, 64, s=1.8)
m3 = GATAE(X.shape[1], 128, 64, 4, 1)
m4 = GTAE(X.shape[1], 64, 32, 4, n_layers=1, pe_enc=True, device=dev)
for model in [m1]:
  model = model.to(dev)
  metrics = train_kfold(model, A, X, epochs_max=500, dgl_model=True, dev=dev)
  with open(f'{DATASET}_{model._get_name()}_metrics.pkl', 'wb') as f:
    pickle.dump(metrics, f)

# Reconstruction + contrastive learning

In [None]:
from itertools import combinations
from math import comb

def contrastive_loss(x_base, x_aug, temp=1.0, device='cpu'):
  proj_size = x_base.shape[0]
  mask = (torch.diag(torch.full((proj_size,), -np.inf), proj_size) + torch.diag(torch.full((proj_size,), -np.inf), -proj_size)).to(device)
  boh1 = torch.vstack((x_aug, x_base))
  boh2 = torch.vstack((x_base, x_aug))
  sim = F.normalize(boh1).mm(F.normalize(boh2).T)
  sim /= temp
  return -F.log_softmax(sim + mask, dim=1).diag().mean()

def train_contrastive_kfold(
    model,                  # Model to train
    A,                      # Graph adjacency matrix
    X,                      # Node's features
    part_type='clust_conj', # Type of partitioning to use
    n_part=2,               # Number of partitions
    contrastive_w=0.5,      # Weight coefficient given to the contrastive loss
    contrastive_t=0.5,      # Temperature used in contrastive loss softmax
    proj_size=64,           # Dimension of the node's projections, used only if part_type == 'clust_dis'
    k=10,                   # K fold
    lr=1e-2,                # Learning rate
    wd=1e-5,                # Weight decay
    valid_size=0.05,        # Validation set size
    epochs_max=300,         # Max number of epochs
    patience_max=30,        # Max number of epochs to wait before early stopping (if no improvements are made)
    dgl_model=True,         # Must be True if the model is implemented via DGL library, False otherwise
    device='cpu'):
  if part_type not in ['res', 'clust_dis', 'clust_conj']:
    return Exception('invalid partitioning type')

  fold = 1
  metrics = []
  for A_train, train_e, test_e, test_ef, val_e, val_ef in kfold(A, k, valid_size):
    # reset weights
    model.reset_parameters()

    # static partitioning
    if part_type == 'clust_dis':
      subA, subA_nodes = partition(A_train, n_part)
    elif part_type == 'clust_conj':
      subA, subA_nodes = partition(A_train, n_part, remove_edges=False)
    else:
      subA = random_edge_splitting(A_train, n_part)

    subA_dgl = []
    if dgl_model:
      for i in subA:
        tmp = dgl.from_scipy(i).to(device)
        tmp = tmp.add_self_loop()
        subA_dgl.append(tmp)

    norm = []
    loss = []
    for i in range(len(subA)):
      subA[i] = torch.from_numpy(subA[i].todense()).to(device)
      n = subA[i].sum()
      pos_w = (subA[i].shape[0] * subA[i].shape[0] - n) / n
      norm.append(subA[i].shape[0] * subA[i].shape[0] / ((subA[i].shape[0] * subA[i].shape[0] - n) * 2))
      tmp = subA[i].clone().flatten()
      tmp[tmp == 1] = pos_w
      tmp[tmp == 0] = 1
      loss.append(nn.BCELoss(weight=tmp))

    model.train()
    opt = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=wd)
    patience_act = 0
    min_loss = np.inf
    contrastive_active = False
    for epoch in range(epochs_max):
      if patience_act >= patience_max:
        print(f'[fold {fold}] max patience reached after {patience_max} epochs, training stopped')
        break
      if epoch > 50:
        contrastive_active = True
      model.zero_grad()
      embeddings = []
      l = 0
      if part_type == 'clust_dis':
        proj_base = torch.empty((A.shape[0], proj_size))
        proj_aug = torch.empty((A.shape[0], proj_size))
      else:
        projs = []

      for i in range(len(subA)):
        out, proj = model(subA_dgl[i] if dgl_model else subA[i], X, need_proj=True)
        embeddings.append(model.enc)
        l += norm[i] * loss[i](out.flatten(), subA[i].flatten())

        if part_type == 'clust_dis':
          subA_not = np.hstack(np.delete(subA_nodes, i, axis=0))
          ## Augmented projections
          proj_aug[subA_nodes[i],:] = proj[subA_nodes[i],:]
          ## Base projections
          proj_base[subA_not,:] = proj[subA_not[i],:]
        else:
          projs.append(proj)
      l /= n_part

      # contrastive loss
      if contrastive_active:
        if part_type == 'clust_dis':
          lc = contrastive_loss(proj_base, proj_aug, contrastive_t, device=device)
        else:
          lc = 0
          for c in combinations(projs, 2):
            lc += contrastive_loss(*c, contrastive_t, device=device)
          lc /= comb(n_part, 2)
        l += contrastive_w * lc

      l.backward()
      opt.step()

      embeddings = torch.stack(embeddings).mean(dim=0)
      out = F.sigmoid(embeddings.mm(embeddings.T))
      val_metrics = compute_metrics(val_e, val_ef, out.detach().cpu())
      if val_metrics['loss'] < min_loss:
        min_loss = val_metrics['loss']
        model_state = model.state_dict()
        patience_act = 0
      elif epoch > 50:
        patience_act += 1
      print(f'[fold {fold}] epoch {epoch}: train loss = {l:.4f}, valid loss = {val_metrics["loss"]:.4f}, valid AUC = {val_metrics["auc"]:.4f}')

    # Load best model based on validation set metrics
    model.eval()
    model.load_state_dict(model_state)
    with torch.no_grad():
      embeddings = []
      for i in range(len(subA)):
        out = model(subA_dgl[i] if dgl_model else subA[i], X)
        embeddings.append(model.enc)
      embeddings = torch.stack(embeddings).mean(dim=0)
      out = F.sigmoid(embeddings.mm(embeddings.T))
      test_metrics = compute_metrics(test_e, test_ef, out.cpu())
      test_ranking = compute_ranking_metrics(torch.Tensor(A.todense()), out.cpu(), test_e)
    print(f'[fold {fold}] test metrics: {test_metrics}')
    print(f'[fold {fold}] test mrr: {test_ranking}')
    test_metrics.update(test_ranking)
    metrics.append(test_metrics)
    fold += 1
  return metrics

### Partitioning
- Using Fiedler eigenvector
- Random edge splitting

In [None]:
def laplacian_eig(adj, norm=False, device='cpu'):
  if not torch.is_tensor(adj):
    adj = torch.Tensor(adj.todense())
  if norm:
    D = torch.diag(adj.sum(axis=1).clip(1).pow(-0.5))
    L = torch.eye(adj.shape[0]).to(device) - D.mm(adj.mm(D))
  else:
    D = torch.diag(adj.sum(axis=1))
    L = D - adj

  L = L.cpu()
  eig, eiv = map(lambda x: x.astype(float), np.linalg.eig(L))
  eig[eig < 1e-10] = 0
  return eig, np.asarray(eiv)

def laplacian_pe(adj, k, device='cpu'):
  eig, eiv = laplacian_eig(adj, norm=True, device=device)
  eiv = eiv[:, eig > 0]
  eig = eig[eig > 0]
  sort = np.argsort(eig)
  sign = 2 * (np.random.rand(k) > 0.5) - 1
  return torch.Tensor(sign * np.real(eiv[:, sort[:k]])).to(device)

def partition(adj, k, remove_edges=True, device='cpu'):
  eig, eiv = laplacian_eig(adj, device=device)
  nodes = eiv[:, eig[eig > 0].argmin()].flatten()

  adj_list = []
  p = np.array_split(np.argsort(nodes), k)
  for i, part in enumerate(p):
    if remove_edges:
      A = adj.copy()
      # All nodes not present in the current partition
      not_p = np.hstack(np.delete(p, i, axis=0))
      A[not_p,:] = 0
      A[:,not_p] = 0
      A.eliminate_zeros()
    else:
      A = sp.dok_matrix(adj.shape, dtype=np.float32).tocsr()
      A[part,:] = adj[part,:]
      A[:,part] = adj[:,part]
      A.eliminate_zeros()
    adj_list.append(A)
  return adj_list, p

def random_edge_splitting(adj, k):
  adj = sp.triu(adj)
  if not sp.isspmatrix_coo(adj):
    adj = adj.tocoo()
  edges_idx = np.arange(adj.sum())
  np.random.shuffle(edges_idx)
  p = np.array_split(edges_idx, k)
  adj_list = []
  for part in p:
    part = part.astype(np.int32)
    rows = np.hstack((adj.row[part], adj.col[part]))
    cols = np.hstack((adj.col[part], adj.row[part]))
    A = sp.csr_matrix((np.ones(len(part) * 2), (rows, cols)), adj.shape, dtype=np.float32)
    adj_list.append(A)
  return adj_list


## Run experiments

In [None]:
import pickle
import json
dev = 'cuda' if torch.cuda.is_available() else 'cpu'

DATASET = 'pubmed'
if DATASET == 'facebook':
  with open('facebook.json') as f:
    data = json.load(f)
  X = torch.Tensor(data['feat'])
  edges = np.asarray(data['edges'])
  A = sp.csr_matrix((np.ones(len(edges)), (edges[:,0], edges[:,1])), (X.shape[0], X.shape[0]), dtype=np.float32)
else:
  if DATASET == 'cora':
    g = dgl.data.CoraGraphDataset()[0]
  elif DATASET == 'citeseer':
    g = dgl.data.CiteseerGraphDataset()[0]
  elif DATASET == 'pubmed':
    g = dgl.data.PubmedGraphDataset()[0]
  # adjacency matrix
  A = g.adj_external(scipy_fmt='csr').astype(np.float32)
  # feature vectors
  X = g.ndata['feat'].to(dev)

m1 = GCAE(X.shape[1], 128, 64)
m2 = GNCAE(X.shape[1], 128, 64, s=1.8)
m3 = GATAE(X.shape[1], 128, 64, 4, 1)
for model in [m1]:
  model = model.to(dev)
  metrics = train_contrastive_kfold(model, A, X, dgl_model=True, part_type='clust_conj', patience_max=50, dev=dev)
  with open(f'{DATASET}_{model._get_name()}_metrics_contrastive.pkl', 'wb') as f:
    pickle.dump(metrics, f)

# Print saved results in tabular format

In [None]:
import os
DATASET = 'cora'
for f in os.listdir(DATASET):
  with open(os.path.join(DATASET, f), 'rb') as f:
    mtr = pickle.load(f)
  auc = np.array([x['auc'] for x in mtr])
  ap = np.array([x['ap'] for x in mtr])
  mrr = np.array([x['mrr'] for x in mtr])
  mr = np.array([x['mr'] for x in mtr])
  h1 = np.array([x['hits1'] for x in mtr])
  h3 = np.array([x['hits3'] for x in mtr])
  h10 = np.array([x['hits10'] for x in mtr])
  print(f.name)
  print(f'{auc.mean():.3f} {auc.std():.2f}\t{ap.mean():.3f} {ap.std():.2f}\t{mrr.mean():.3f} {mrr.std():.2f}\t{mr.mean():.3f} {mr.std():.2f}\t{h1.mean():.3f} {h1.std():.2f}\t{h3.mean():.3f} {h3.std():.2f}\t{h10.mean():.3f} {h10.std():.2f}')