In [1]:
import networkx as nx
import numpy as np
import time
# from sklearn.decomposition import PCA
from umap import UMAP

import sys
sys.path.append("..")
import utilities

In [2]:
# id, pos_x, pos_y, inFoRM, proj_x, proj_y

In [3]:
# networkx positions 
def get_node_pos(G, node_features):
    G = nx.read_edgelist(edgelist_file)
    nodes = list(G.nodes())

    start = time.time()
    nodePos = nx.spring_layout(G, seed=42)
    end = time.time()

    for node in nodes:
        if node not in node_features:
            node_features[node] = {"id": node}
        node_features[node]["pos_x"] = nodePos[node][0]
        node_features[node]["pos_y"] = nodePos[node][1]
    print("Spring Layout Elapsed Time: {}".format(int(end - start)))

In [4]:
# def get_pca_proj(G, embeddings, node_features):
#     start = time.time()
#     embeddings_pca = PCA(n_components=2).fit_transform(embeddings)
#     end = time.time()
#     nodes = list(G.nodes())
#     for i in range(len(nodes)):
#         node_features[nodes[i]]["proj_x"] = embeddings_pca[i][0]
#         node_features[nodes[i]]["proj_y"] = embeddings_pca[i][1]

#     print("PCA Elapsed Time: {}".format(int(end - start)))

def get_umap_proj(G, embeddings, node_features):
    start = time.time()
    umap_2d = UMAP(n_components=2, init='random', random_state=0)

    embeddings_umap = umap_2d.fit_transform(embeddings)
    end = time.time()
    nodes = list(G.nodes())
    for i in range(len(nodes)):
        node_features[nodes[i]]["proj_x"] = embeddings_umap[i][0]
        node_features[nodes[i]]["proj_y"] = embeddings_umap[i][1]

    print("UMAP Elapsed Time: {}".format(int(end - start)))

In [5]:
def get_inFoRM(G, embeddings, node_features):
    adj_matrix = nx.to_numpy_array(G)
    assert(adj_matrix.max() == 1)
    start = time.time()
    inFoRM_scores = utilities.unfairness_scores_normalized(embeddings, adj_matrix, G)
    end = time.time()
    nodes = list(G.nodes())
    for i in range(len(inFoRM_scores)):
        node_features[nodes[i]]["InFoRM"] = inFoRM_scores[i]
    print("InFoRM Elapsed Time: {}".format(int(end - start)))

In [6]:
# def get_inFoRM_hops(G, embeddings, node_features, nr_hops):
#     # adj_matrix = nx.to_numpy_array(G)
#     # assert(adj_matrix.max() == 1)
#     start = time.time()
#     inFoRM_hops_scores = utilities.uk_hop_InFoRM_scores_normalized(embeddings, G, nr_hops)
#     end = time.time()
#     nodes = list(G.nodes())
#     for i in range(len(inFoRM_hops_scores)):
#         node_features[nodes[i]]["InFoRM_hops"] = inFoRM_hops_scores[i]
#     print("InFoRM hops Elapsed Time: {}".format(int(end - start)))

In [19]:
graph_metadata = {"Facebook": {"edgelist": "../edgelists/facebook_combined.edgelist"},
                 "LastFM": {"edgelist": "../edgelists/lastfm_asia_edges.edgelist"},
                 "wikipedia": {"edgelist": "../edgelists/wikipedia.edgelist"},
                  "protein-protein": {"edgelist": "../edgelists/ppi.edgelist"},
                  "ca-HepTh": {"edgelist": "../edgelists/ca-HepTh.edgelist"},
                  "AutonomousSystems": {"edgelist": "../edgelists/AS.edgelist"},
                 }
# graph_metadata = {
#                  #"wikipedia": {"edgelist": "../edgelists/wikipedia.edgelist"},
#                   #protein-protein": {"edgelist": "../edgelists/ppi.edgelist"},
#                   #"ca-HepTh": {"edgelist": "../edgelists/ca-HepTh.edgelist"},
#                   "AutonomousSystems": {"edgelist": "../edgelists/AS.edgelist"},
#                  }
embedding_algs = ["Node2Vec", "HOPE", "HGCN", "LaplacianEigenmap", "SDNE", "SVD"]

for graph_name in graph_metadata:
    print("\n\n" + graph_name)
    node_features = {}

    edgelist_file = graph_metadata[graph_name]["edgelist"]
    G = nx.read_edgelist(edgelist_file)

    get_node_pos(G, node_features)

    # ids from the nodes are not sorted
    # for i,n in node_features.items():
    #     print(i,n)
    
    for embedding_alg in embedding_algs:
        print("\n" + embedding_alg)
        embedding_file = "../embeddings/{}/{}/{}_{}_64_embedding.npy".format(graph_name, 
                                                                             embedding_alg, 
                                                                             graph_name, 
                                                                             embedding_alg)
        embeddings = np.load(embedding_file)
        
        node_features_copy = node_features.copy()
        get_umap_proj(G, embeddings, node_features_copy)
        
        output_file = "../embeddings/{}/{}/{}_{}_64_embedding_node_features_InFoRM_scores_umap.csv".format(graph_name, 
                                                                                         embedding_alg, 
                                                                                         graph_name, 
                                                                                         embedding_alg)
        adj_matrix = nx.to_numpy_array(G) # only needed for consistency check
        inFoRM_1_score = utilities.unfairness_scores_normalized(embeddings, adj_matrix, G) # only needed for consistency check
        with open(output_file, "w") as outputCSV:
            outputCSV.write("id,pos_x,pos_y,proj_x,proj_y,nr_hops,InFoRM_hops\n")
            for nr_hops in range(1,3):
                inFoRM_hops_score = utilities.k_hop_InFoRM_scores_normalized(embeddings, G, nr_hops)
                
                for node_id in node_features:
                    if nr_hops == 1 and round(inFoRM_hops_score[int(node_id)],8) != round(inFoRM_1_score[int(node_id)],8):
                        # check consistency with previous function for nr_hops = 1
                        print('node_id',node_id)
                        print('inFoRM_hops_score[int(node_id)]',inFoRM_hops_score[int(node_id)])
                        print('inFoRM_1_score[int(node_id)]',inFoRM_1_score[int(node_id)])
                        break
                    outputCSV.write("{},{},{},{},{},{},{}\n".format(node_features[node_id]["id"],
                                                                    node_features[node_id]["pos_x"],
                                                                    node_features[node_id]["pos_y"],
                                                                    node_features[node_id]["proj_x"],
                                                                    node_features[node_id]["proj_y"],
                                                                    nr_hops,
                                                                    inFoRM_hops_score[int(node_id)]))
            



ca-HepTh
Spring Layout Elapsed Time: 208

Node2Vec
PCA Elapsed Time: 0


IndexError: index 24325 is out of bounds for axis 0 with size 9877