In [1]:
import networkx as nx
import numpy as np
import time
from sklearn.decomposition import PCA

import sys
sys.path.append("..")
import utilities
from utilities import group_unfairness_score, group_unfairness_scores
import csv

In [2]:
# id, pos_x, pos_y, inFoRM, proj_x, proj_y

In [3]:
# networkx positions 
def get_node_pos(G, node_features):
    G = nx.read_edgelist(edgelist_file)
    nodes = list(G.nodes())

    start = time.time()
    nodePos = nx.spring_layout(G, seed=42)
    end = time.time()

    for node in nodes:
        if node not in node_features:
            node_features[node] = {"id": node}
        node_features[node]["pos_x"] = nodePos[node][0]
        node_features[node]["pos_y"] = nodePos[node][1]
    print("Spring Layout Elapsed Time: {}".format(int(end - start)))

In [4]:
def get_pca_proj(G, embeddings, node_features):
    start = time.time()
    embeddings_pca = PCA(n_components=2).fit_transform(embeddings)
    end = time.time()
    nodes = list(G.nodes())
    for i in range(len(nodes)):
        node_features[nodes[i]]["proj_x"] = embeddings_pca[i][0]
        node_features[nodes[i]]["proj_y"] = embeddings_pca[i][1]

    print("PCA Elapsed Time: {}".format(int(end - start)))

In [5]:
def get_inFoRM(G, embeddings, node_features):
    adj_matrix = nx.to_numpy_array(G)
    assert(adj_matrix.max() == 1)
    start = time.time()
    inFoRM_scores = utilities.unfairness_scores_normalized(embeddings, adj_matrix, G)
    end = time.time()
    nodes = list(G.nodes())
    for i in range(len(inFoRM_scores)):
        node_features[nodes[i]]["InFoRM"] = inFoRM_scores[i]
    print("InFoRM Elapsed Time: {}".format(int(end - start)))

In [6]:
# graph_metadata = {"Facebook": {"edgelist": "../edgelists/facebook_combined.edgelist"},
#                  "LastFM": {"edgelist": "../edgelists/lastfm_asia_edges.edgelist"},
#                  "wikipedia": {"edgelist": "../edgelists/wikipedia.edgelist"},
#                   "protein-protein": {"edgelist": "../edgelists/ppi.edgelist"},
#                   "ca-HepTh": {"edgelist": "../edgelists/ca-HepTh.edgelist"},
#                   "AutonomousSystems": {"edgelist": "../edgelists/AS.edgelist"},
#                  }
graph_metadata = {"Facebook": {"edgelist": "../edgelists/facebook_combined.edgelist", 
                                "features":"../edgelists/facebook/node_genders.txt"},
                    # "Ex1": {"edgelist": "../edgelists/facebook_combined.edgelist", 
                    #             "features":"../edgelists/facebook/node_genders.txt"}          
                 }
embedding_algs = ["Node2Vec", "HOPE", "HGCN", "LaplacianEigenmap", "SDNE", "SVD"]

In [7]:
# generate csv file with ... and InFoRM fairness score
# for graph_name in graph_metadata:
#     print("\n\n" + graph_name)
#     node_features = {}

#     edgelist_file = graph_metadata[graph_name]["edgelist"]
#     G = nx.read_edgelist(edgelist_file)
#     get_node_pos(G, node_features)
    
#     for embedding_alg in embedding_algs:
#         print("\n" + embedding_alg)
#         embedding_file = "../embeddings/{}/{}/{}_{}_64_embedding.npy".format(graph_name, 
#                                                                              embedding_alg, 
#                                                                              graph_name, 
#                                                                              embedding_alg)
#         embeddings = np.load(embedding_file)
        
#         node_features_copy = node_features.copy()
#         get_inFoRM(G, embeddings, node_features_copy)
#         get_pca_proj(G, embeddings, node_features_copy)
        
#         output_file = "../embeddings/{}/{}/{}_{}_64_embedding_node_features.csv".format(graph_name, 
#                                                                                          embedding_alg, 
#                                                                                          graph_name, 
#                                                                                          embedding_alg)
#         with open(output_file, "w") as outputCSV:
#             outputCSV.write("id, pos_x, pos_y, proj_x, proj_y, InFoRM\n")
#             for node_id in node_features:
#                 outputCSV.write("{}, {}, {}, {}, {}, {}\n".format(node_features[node_id]["id"],
#                                                                   node_features[node_id]["pos_x"],
#                                                                   node_features[node_id]["pos_y"],
#                                                                   node_features[node_id]["proj_x"],
#                                                                   node_features[node_id]["proj_y"],
#                                                                   node_features[node_id]["InFoRM"]))
            

In [8]:
# generate csv file with group fairness parameters and score
# nodeid, sensitive attribute (S), value (z), k, g.f. score
for graph_name in graph_metadata:
    print("\n\n" + graph_name)
    # node_features = {}

    edgelist_file = graph_metadata[graph_name]["edgelist"]
    G = nx.read_edgelist(edgelist_file)
    W = nx.to_numpy_array(G)
    # get_node_pos(G, node_features)
    
    for embedding_alg in embedding_algs:
        print("\n" + embedding_alg)
        embedding_file = "../embeddings/{}/{}/{}_{}_64_embedding.npy".format(graph_name, 
                                                                             embedding_alg, 
                                                                             graph_name, 
                                                                             embedding_alg)
        embedding = np.load(embedding_file)
        node_features_file = graph_metadata[graph_name]["features"]
        
        # get sensitive attributes
        with open(node_features_file, newline='') as f:
            reader = csv.reader(f)
            for row in reader:
                sensitive_attrs = row[1:]
                # print('sensitive_attrs',sensitive_attrs)
                break

        node_features = np.loadtxt(open(node_features_file, "rb"), delimiter=",", skiprows=1).astype(int)

        # select output file
        output_file_gf = "../embeddings/{}/{}/{}_{}_64_embedding_group_fairness_scores.csv".format(graph_name, 
                                                                                         embedding_alg, 
                                                                                         graph_name, 
                                                                                         embedding_alg)

        # hay 4038 nodos en el embedding y 4035 tienen genero
        # group_fairness_scores = group_unfairness_scores(Y, W, node_features, S, z, k)
        # cambio el orden del for para poder calcular todas las scores
        # si resolvemos los nodos faltantes se puede reajustar

        # nodes = list(G.nodes())
        # for i in range(len(inFoRM_scores)):
        #     node_features[nodes[i]]["InFoRM"] = inFoRM_scores[i]

        # create a dict nodeId->sensitive_attrs_val
        # dict node_id -> row in W
        dict_node_id2idx = {}
        for i,v in enumerate(G.nodes()):
            dict_node_id2idx[v] = i

        with open(output_file_gf, "w") as outputCSV:
            outputCSV.write("id,attribute,value,k,group_fairness_score_smaller_ks\n")

            for node_id in G.nodes(): # node ids start from 0
                node_idx = dict_node_id2idx[node_id]
                for attribute in sensitive_attrs:
                    dict_node_attr_val = dict([(node_features[i,0],node_features[i,1]) for i in range(len(node_features))])
                    # get values from the selected attribute
                    attr_pos = sensitive_attrs.index(attribute) + 1
                    sensitive_attrs_vals = np.unique(node_features[:,attr_pos])
                    for value in sensitive_attrs_vals:
                        for k in range(1,5): # define max value for k
                            # compute group fairness score
                            if node_idx in dict_node_attr_val.keys()  :
                                score = group_unfairness_score(G, embedding, W, node_idx, node_features, attr_pos, value, k)
                            else:
                                score = ""
                            
                                
                            # score = group_unfairness_score(embedding, W, node_id, node_features, attr_pos, value, k)
                            # write data to csv file
                            outputCSV.write("{},{},{},{},{}\n".format(node_id,
                                                    attribute,
                                                    value,
                                                    k,
                                                    score))



Facebook

Node2Vec

HOPE


KeyboardInterrupt: 