In [None]:
import os
from sklearn.manifold import TSNE
from torch_geometric.utils import negative_sampling
from fairn2v import Node2Vec
from os.path import join, dirname, realpath
import numpy as np
import torch
import matplotlib.pyplot as plt
from sklearn import preprocessing
from utils import (
    encode_classes,
    emb_fairness,
    train_rn2v_adaptive,
    emblink_fairness,
)
device = "cuda" if torch.cuda.is_available() else "cpu"
np.random.seed(332)

In [None]:
config = dict(
    learning_rate=0.01, walk_length=30, walks_per_node=10, p=0.50, q=0.75, delta=0.3
)
dataset_path = join(dirname(realpath("__file__")), "data", "dblp")

with open(
    join(dataset_path, "author-author.csv"), mode="r", encoding="ISO-8859-1"
) as file_name:
    edges = np.genfromtxt(file_name, delimiter=",", dtype=int)

with open(
    join(dataset_path, "countries.csv"), mode="r", encoding="ISO-8859-1"
) as file_name:
    attributes = np.genfromtxt(file_name, delimiter=",", dtype=str)

sensitive = encode_classes(attributes[:, 1])
num_classes = len(np.unique(sensitive))
N = sensitive.shape[0]

In [None]:
m = np.random.choice(len(edges), int(len(edges) * 0.8), replace=False)
tr_mask = np.zeros(len(edges), dtype=bool)
tr_mask[m] = True
pos_edges_tr = edges[tr_mask]
pos_edges_te = edges[~tr_mask]

pos_edges_te = torch.LongTensor(pos_edges_te.T).to(device)
neg_edges_te = negative_sampling(
    edge_index=pos_edges_te, num_nodes=N, num_neg_samples=pos_edges_te.size(1)
).to(device)

pos_edges_tr = torch.LongTensor(pos_edges_tr.T).to(device)
neg_edges_tr = negative_sampling(
    edge_index=pos_edges_tr, num_nodes=N, num_neg_samples=pos_edges_tr.size(1)
).to(device)

In [None]:
epochs = 51
model = Node2Vec(
    pos_edges_tr,
    embedding_dim=128,
    walk_length=config["walk_length"],
    context_size=10,
    walks_per_node=config["walks_per_node"],
    p=config["p"],
    q=config["q"],
    num_negative_samples=1,
    sparse=True,
).to(device)

loader = model.loader(batch_size=64, shuffle=True, num_workers=8)

optimizer = torch.optim.SparseAdam(list(model.parameters()), lr=config["learning_rate"])

In [None]:
Y = torch.LongTensor(sensitive).to(device)
Y_aux = (Y[pos_edges_tr[0, :]] != Y[pos_edges_tr[1, :]]).to(device)
randomization = (torch.FloatTensor(epochs, Y_aux.size(0)).uniform_() < 0.5 + config["delta"]).to(
    device
)

In [None]:
for epoch in range(1, epochs):

    loss = train_rn2v_adaptive(
        model,
        loader,
        optimizer,
        device,
        pos_edges_tr,
        Y_aux,
        randomization[epoch],
        N,
    )

In [None]:
model.eval()
scaler = preprocessing.StandardScaler()
XB = scaler.fit_transform(model().detach().cpu())
YB = sensitive

In [None]:
node_rb = emb_fairness(XB, YB)
print(node_rb)

In [None]:
link_rb = emblink_fairness(XB, YB, pos_edges_tr.to("cpu"), pos_edges_te.to("cpu"))
print(link_rb)