In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import networkx as nx
import os
import glob
import itertools
import time


In [2]:
path = "Datasets/"


In [3]:
def getProtectedAttributesTwitch(G, df):
    protected_attributes = dict(zip(df["numeric_id"], df["mature"]))
    newG = nx.set_node_attributes(G, protected_attributes, "mature")
    return newG


def getProtectedAttributesDeezer(G, df):
    protected_attributes = dict(zip(df["id"], df["target"]))
    newG = nx.set_node_attributes(G, protected_attributes, "gender")
    return newG


def getProtectedAttributesPokec(G, df):
    protected_attributes = dict(zip(df[0], df[3]))
    newG = nx.set_node_attributes(G, protected_attributes, "gender")
    return newG


def getprotectedAttributesDict_Facebook(featuresDF, featureNameDF, egoFeatDF):
    gender_index = featureNameDF.index[featureNameDF[1] == "gender;anonymized"].to_list()[
        0]
    featuresDF = featuresDF[[0, gender_index + 1]]
    egoFeatDF = egoFeatDF[[0, gender_index + 1]]
    featuresDict = dict(zip(featuresDF[0], featuresDF[gender_index + 1]))
    egoFeatDict = dict(zip(egoFeatDF[0], egoFeatDF[gender_index + 1]))
    featuresDict.update(egoFeatDict)
    return featuresDict


def getprotectedAttributesDict_GPlus(featuresDF, gender_index, egoFeatDF):
    # gender_index = featureNameDF.index[featureNameDF[1] == "gender:1"].to_list()[0]
    featuresDF = featuresDF[[0, gender_index + 1]]
    egoFeatDF = egoFeatDF[[0, gender_index + 1]]
    featuresDict = dict(zip(featuresDF[0], featuresDF[gender_index + 1]))
    egoFeatDict = dict(zip(egoFeatDF[0], egoFeatDF[gender_index + 1]))
    featuresDict.update(egoFeatDict)
    return featuresDict


def getEgoFeats(path):
    featFiles = []
    featNameFiles = []
    egoFeatFiles = []
    for file in os.listdir(path):
        if file.endswith(".feat"):
            featFiles.append(file)
        if file.endswith(".featnames"):
            featNameFiles.append(file)
        if file.endswith(".egofeat"):
            egoFeatFiles.append(file)
    return featFiles, featNameFiles, egoFeatFiles


def getProtectedAttributesFacebook(G):
    node_gender_dict = {}
    localpath = "Datasets/facebook/facebook/"
    fbFeatFiles, fbFeatNameFiles, fbEgoFeatFiles = getEgoFeats(localpath)
    for index in range(len(fbFeatFiles)):
        localFeaturesDF = pd.read_csv(
            localpath + fbFeatFiles[index], sep=" ", header=None)
        localFeatureNamesDf = pd.read_csv(
            localpath + fbFeatNameFiles[index], sep=" ", header=None)
        localEgoFeatDf = pd.read_csv(
            localpath + fbEgoFeatFiles[index], sep=" ", header=None)
        protectedAttrDict = getprotectedAttributesDict_Facebook(
            localFeaturesDF, localFeatureNamesDf, localEgoFeatDf)
        node_gender_dict.update(protectedAttrDict)

    newG = nx.set_node_attributes(G, node_gender_dict, "gender")
    return newG


def getProtectedAttributesGPlus(G):
    node_gender_dict = {}
    localpath = "Datasets/gplus/"
    gplusFeatFiles, gplusFeatNameFiles, gplusEgoFeatFiles = getEgoFeats(
        localpath)
    for index in range(len(gplusFeatFiles)):
        try:
            localFeaturesDF = pd.read_csv(
                localpath + gplusFeatFiles[index], sep=" ", header=None)
        # localFeatureNamesDf = pd.read_csv(localpath + gplusFeatNameFiles[index], sep=" ", header=None)
            localEgoFeatDf = pd.read_csv(
                localpath + gplusEgoFeatFiles[index], sep=" ", header=None)
            protectedAttrDict = getprotectedAttributesDict_GPlus(
                localFeaturesDF, 0, localEgoFeatDf)
            node_gender_dict.update(protectedAttrDict)
        except:
            print(gplusEgoFeatFiles[index])

    newG = nx.set_node_attributes(G, node_gender_dict, "gender")
    return newG


In [4]:
def initialize_community_attribute_Counter(communitiesList):
    protectedAttributeCountDict = {}
    communityCount = 0
    for i in range(len(communitiesList)):
        protectedAttributeCountDict["Community_{}".format(communityCount)] = {
            0: 0, 1: 0}
        communityCount += 1
    return protectedAttributeCountDict


def count_protected_attributes_frequency(G, communitiesList, protectedAttributeCountDict, attribute):
    communityCount = 0
    for community in communitiesList:
        for node in community:
            try:
                if G.nodes()[node][attribute] == 0:
                    protectedAttributeCountDict["Community_{}".format(
                        communityCount)][0] += 1
                else:
                    protectedAttributeCountDict["Community_{}".format(
                        communityCount)][1] += 1
            except KeyError:
                continue
        communityCount += 1
    return protectedAttributeCountDict


def calculate_community_balance(protectedAttributeCountDict):
    for x in protectedAttributeCountDict:
        red = protectedAttributeCountDict[x][0]
        blue = protectedAttributeCountDict[x][1]
        if red > blue:
            balance = blue/red
            protectedAttributeCountDict[x]["balance"] = balance
        else:
            balance = red/blue
            protectedAttributeCountDict[x]["balance"] = balance
    return protectedAttributeCountDict


def calculate_Fairness(G, communitiesList, attribute):

    protectedAttributeCountDict = initialize_community_attribute_Counter(
        communitiesList)

    protectedAttributeCountDict = count_protected_attributes_frequency(
        G, communitiesList, protectedAttributeCountDict, attribute)

    protectedAttributeCountDict = calculate_community_balance(
        protectedAttributeCountDict)

    return protectedAttributeCountDict


In [5]:
def countInCommunityDegree(G, communities):
    communitiesDegree = {}
    # communityCount = 0
    for index, community in enumerate(communities):
        communityDegree = {}
        communitySubgraph = G.subgraph(community)
        for node in communitySubgraph.nodes():
            communityDegree[node] = communitySubgraph.degree(node)
        communitiesDegree["Community_{}".format(index)] = communityDegree
    return communitiesDegree


def getLowInCommunityDegreeNodes(communitiesNodeDegrees):
    lowInCommunityDegreeNodes = {}
    for (communityName, community) in communitiesNodeDegrees.items():
        percent = len(community.keys()) * 30 / 100
        lowDegreeNodes = dict(
            filter(lambda elem: elem[1] < percent, community.items()))
        lowDegreeNodes = dict(
            sorted(lowDegreeNodes.items(), key=lambda item: item[1]))
        lowInCommunityDegreeNodes[communityName] = lowDegreeNodes
    return lowInCommunityDegreeNodes


def getLowInCommunityDegreeNodesProtectedAttribute(graph, lowInCommunityDegreeNodes, protectedAttributeLabel):
    lowInCommunityDegreeNodesProtectedAttribute = []
    for (node, degree) in lowInCommunityDegreeNodes.items():
        if graph.nodes()[node]["gender"] == protectedAttributeLabel:
            lowInCommunityDegreeNodesProtectedAttribute.append(node)
    return lowInCommunityDegreeNodesProtectedAttribute


# def calculate_global_fairness(fairnessDictionary):
#     globalFairness = 0
#     for communityInfo in fairnessDictionary.values():
#         globalFairness += communityInfo["balance"]
#     return globalFairness / len(fairnessDictionary.items())

def calculate_global_fairness(fairnessDictionary):
    zeroCounts = 0 
    oneCounts = 0
    for communityInfo in fairnessDictionary.values():
        zeroCounts += communityInfo[0]
        oneCounts += communityInfo[1]
    return min(zeroCounts/oneCounts, oneCounts/zeroCounts)


def getCommunitiesWithLowFairness(fairnessDictionary, threshold):
    lowFairnessCommunities = []
    for (communityName, communityInfo) in fairnessDictionary.items():
        if communityInfo["balance"] < threshold:
            lowFairnessCommunities.append(communityName)
    return lowFairnessCommunities


def seperateLowFairnessCommunities(lowFairnesCommunities, fairnessDictionary):
    zeroCommunities = []
    oneCommunities = []

    for community in lowFairnesCommunities:
        if fairnessDictionary[community][0] > fairnessDictionary[community][1]:
            zeroCommunities.append(community)
        else:
            oneCommunities.append(community)
    return zeroCommunities, oneCommunities


def updateFairnesDictionary(fairnessDictionary, startCommunity, destinationCommunity, startCommunityType):
    fairnessDictionary[startCommunity][startCommunityType] -= 1
    fairnessDictionary[destinationCommunity][startCommunityType] += 1
    newStartFairness = min((fairnessDictionary[startCommunity][0]/fairnessDictionary[startCommunity][1]),
                           fairnessDictionary[startCommunity][1]/fairnessDictionary[startCommunity][0])
    newDestinationFairness = min((fairnessDictionary[destinationCommunity][0]/fairnessDictionary[destinationCommunity][1]),
                                 fairnessDictionary[destinationCommunity][1]/fairnessDictionary[destinationCommunity][0])
    fairnessDictionary[startCommunity]["balance"] = newStartFairness
    fairnessDictionary[destinationCommunity]["balance"] = newDestinationFairness

    print("New fairness for {}: {}".format(startCommunity, newStartFairness))
    print("New fairness for {}: {}".format(
        destinationCommunity, newDestinationFairness))

    return fairnessDictionary, newStartFairness, newDestinationFairness


def sendNodes(startCommunityName, startCommunityNodes, destinationCommunities, fairnessDictionary, globalFairness, startCommunityType):
    destinationCommunitiesNameList = destinationCommunities
    destinationCounter = 0

    startCommunityCurrentFairness = fairnessDictionary[startCommunityName]["balance"]
    destinationCommunityCurrentFairness = fairnessDictionary[
        destinationCommunitiesNameList[destinationCounter]]["balance"]


    while (startCommunityCurrentFairness < globalFairness and len(startCommunityNodes) > 0 and destinationCounter < len(destinationCommunitiesNameList)):
        # print("Before Fixing")
        # print("Start Community: {}".format(startCommunityName))
        # print("Start Community Current Fairness: {}".format(startCommunityCurrentFairness))
        print("Destination Community: {}".format(destinationCommunitiesNameList[destinationCounter]))
        print("Destination Community Current Fairness: {}".format(destinationCommunityCurrentFairness))
        # print("Global Fairness: {}".format(globalFairness))
        # print("=====================================================================================")
        # time.sleep(5)

        if(destinationCommunityCurrentFairness >= globalFairness):
            destinationCounter += 1
            if(destinationCounter < len(destinationCommunitiesNameList)):
                destinationCommunityCurrentFairness = fairnessDictionary[
                    destinationCommunitiesNameList[destinationCounter]]["balance"]
        else:
            startCommunityNodes.pop(0)  # dont care where it goes for now
            # Update stats and fairness
            fairnessDictionary, startCommunityCurrentFairness, destinationCommunityCurrentFairness = updateFairnesDictionary(fairnessDictionary, startCommunityName,
                                                                                                                             destinationCommunitiesNameList[destinationCounter], startCommunityType)
            # print("After fixing")
            # print("Start Community: {}".format(startCommunityName))
            # print("Start Community Current Fairness: {}".format(startCommunityCurrentFairness))
            # print("Destination Community: {}".format(destinationCommunitiesNameList[destinationCounter]))
            # print("Destination Community Current Fairness: {}".format(destinationCommunityCurrentFairness))
            # print("Global Fairness: {}".format(globalFairness))
            # print("=====================================================================================")
            # time.sleep(5)

    return fairnessDictionary


def fixCommunityFairness(graph, fairnessDictionary, lowInCommunityDegreeNodes):

    zeroCommunitiesDict = {}
    oneCommunitiesDict = {}

    global_community_fairness = calculate_global_fairness(fairnessDictionary)
    lowFairnessCommunities = getCommunitiesWithLowFairness(
        fairnessDictionary, global_community_fairness)
    zeroCommunities, oneCommunities = seperateLowFairnessCommunities(
        lowFairnessCommunities, fairnessDictionary)

    print(len(zeroCommunities))
    print(len(oneCommunities))
    if len(zeroCommunities) == 0 or len(oneCommunities) == 0:
        print("Fairness cannot  be fixed")
        return

    for community in zeroCommunities:
        nodes = getLowInCommunityDegreeNodesProtectedAttribute(
            graph, lowInCommunityDegreeNodes[community], 0)
        zeroCommunitiesDict[community] = nodes
        # get the nodes that want to leave from each community

    for community in oneCommunities:
        nodes = getLowInCommunityDegreeNodesProtectedAttribute(
            graph, lowInCommunityDegreeNodes[community], 1)
        oneCommunitiesDict[community] = nodes

    for community in zeroCommunities:
        print("Test for {}".format(community))
        print()
        fairnessDictionary = sendNodes(community, zeroCommunitiesDict[community],
                                       oneCommunities, fairnessDictionary, global_community_fairness, 0)
    for community in oneCommunities:
        print("test for {}".format(community))
        fairnessDictionary = sendNodes(community, oneCommunitiesDict[community],
                                        zeroCommunities, fairnessDictionary, global_community_fairness, 1)

    return fairnessDictionary


In [6]:
def convert_Communities_To_Fairness_CSV(paths):
    for path in paths:
        communityDF = pd.read_csv("communities/{}".format(path), sep="\t", header=None)
        communityListDf = communityDF.groupby(1).aggregate(lambda x: list(x))
        communitiesList = communityListDf[0].to_list()
        if path.startswith("twitch"):
            protected_count = calculate_Fairness(twitchGamersCCGraph, communitiesList, "mature")
            testdf = pd.DataFrame(protected_count)
            testdf = testdf.T
            testdf.to_csv("fairness_CSV/{}_fairness.csv".format(path))
        elif path.startswith("gplus"):
            protected_count = calculate_Fairness(gplusCCGraph, communitiesList, "gender")
            testdf = pd.DataFrame(protected_count)
            testdf = testdf.T
            testdf.to_csv("fairness_CSV/{}_fairness.csv".format(path))
        elif path.startswith("facebook"):
            protected_count = calculate_Fairness(facebookLargestCCGraph, communitiesList, "gender")
            testdf = pd.DataFrame(protected_count)
            testdf = testdf.T
            testdf.to_csv("fairness_CSV/{}_fairness.csv".format(path))
        elif path.startswith("deezer"):
            protected_count = calculate_Fairness(deezerLargestCCGraph, communitiesList, "gender")
            testdf = pd.DataFrame(protected_count)
            testdf = testdf.T
            testdf.to_csv("fairness_CSV/{}_fairness.csv".format(path))
        elif path.startswith("pokec"):
            protected_count = calculate_Fairness(pokec_test, communitiesList, "gender")
            testdf = pd.DataFrame(protected_count)
            testdf = testdf.T
            testdf.to_csv("fairness_CSV/{}_fairness.csv".format(path))
        

In [7]:
def most_frequent(list):
    return max(set(list), key=list.count)


In [6]:
# Execution Time 2mins
# gplus_graph = nx.read_edgelist("{}gplus/gplus_combined.txt".format(path), nodetype=str, delimiter=" ", create_using=nx.DiGraph())


In [8]:
gplus_graph = nx.read_edgelist("gplus/gplus_combined_undirected.txt".format(
    path), nodetype=str, delimiter=" ", create_using=nx.Graph())


In [9]:
getProtectedAttributesGPlus(gplus_graph)


101560853443212199687.egofeat
109327480479767108490.egofeat
115625564993990145546.egofeat
116899029375914044550.egofeat


In [10]:
gplusCC = max(nx.connected_components(gplus_graph), key=len)
gplusCCGraph = gplus_graph.subgraph(gplusCC)


In [10]:
louvain_communities_Gplus = nx.algorithms.community.louvain_communities(
    gplusCCGraph)


In [11]:
community_counter = 0
with open("output/gplusLouvain.txt", "w") as f:
    for community in louvain_communities_Gplus:
        for node in community:
            f.write("{}\t{}\n".format(node, community_counter))
        community_counter += 1


In [12]:
genderCountGplus = {}
genderCountGplus = calculate_Fairness(
    gplusCCGraph, louvain_communities_Gplus, 'gender')


In [13]:
genderCountGplus


{'Community_0': {0: 9951, 1: 11309, 'balance': 0.8799186488637368},
 'Community_1': {0: 143, 1: 363, 'balance': 0.3939393939393939},
 'Community_2': {0: 869, 1: 2758, 'balance': 0.3150833937635968},
 'Community_3': {0: 78, 1: 108, 'balance': 0.7222222222222222},
 'Community_4': {0: 61, 1: 465, 'balance': 0.13118279569892474},
 'Community_5': {0: 14, 1: 32, 'balance': 0.4375},
 'Community_6': {0: 12444, 1: 13850, 'balance': 0.8984837545126354},
 'Community_7': {0: 163, 1: 352, 'balance': 0.4630681818181818},
 'Community_8': {0: 224, 1: 246, 'balance': 0.9105691056910569},
 'Community_9': {0: 111, 1: 353, 'balance': 0.31444759206798867},
 'Community_10': {0: 5, 1: 40, 'balance': 0.125},
 'Community_11': {0: 746, 1: 650, 'balance': 0.871313672922252},
 'Community_12': {0: 5488, 1: 2258, 'balance': 0.41144314868804666},
 'Community_13': {0: 2277, 1: 1775, 'balance': 0.7795344751866491},
 'Community_14': {0: 5, 1: 18, 'balance': 0.2777777777777778},
 'Community_15': {0: 534, 1: 1529, 'balan

In [15]:
label_propagation_Gplus = nx.algorithms.community.label_propagation_communities(
    gplusCCGraph)


In [16]:
community_counter = 0
with open("output/gplusLabelPropagation.txt", "w") as f:
    for community in label_propagation_Gplus:
        for node in community:
            f.write("{}\t{}\n".format(node, community_counter))
        community_counter += 1


In [10]:
# Twitch gamers Graph
twitchGamers_graph = nx.read_edgelist(
    "{}twitch_gamers/large_twitch_edges.csv".format(path), nodetype=int, delimiter=",")


In [11]:
# Twitch gamers features
twitchGamers_features = pd.read_csv(
    "{}twitch_gamers/large_twitch_features.csv".format(path))


In [12]:
getProtectedAttributesTwitch(twitchGamers_graph, twitchGamers_features)


In [13]:
twitchGamersCC = max(nx.connected_components(twitchGamers_graph), key=len)
twitchGamersCCGraph = twitchGamers_graph.subgraph(twitchGamersCC)


In [10]:
# Εxecution time 5mins
louvain_communities_twitch = nx.algorithms.community.louvain_communities(
    twitchGamersCCGraph)


In [18]:
community_counter = 0
with open("output/twitchLouvain.txt", "w") as f:
    for community in louvain_communities_twitch:
        for node in community:
            f.write("{}\t{}\n".format(node, community_counter))
        community_counter += 1


In [22]:
louvain_gender_count = {}
louvain_gender_count = calculate_Fairness(
    twitchGamers_graph, louvain_communities_twitch, "mature")
louvain_gender_count


{'Community_0': {0: 3486, 1: 1523, 'balance': 0.4368904188181297},
 'Community_1': {0: 357, 1: 186, 'balance': 0.5210084033613446},
 'Community_2': {0: 10551, 1: 6717, 'balance': 0.6366221211259596},
 'Community_3': {0: 3754, 1: 1288, 'balance': 0.34310069259456577},
 'Community_4': {0: 237, 1: 266, 'balance': 0.8909774436090225},
 'Community_5': {0: 478, 1: 230, 'balance': 0.4811715481171548},
 'Community_6': {0: 189, 1: 153, 'balance': 0.8095238095238095},
 'Community_7': {0: 6981, 1: 10957, 'balance': 0.6371269508077029},
 'Community_8': {0: 556, 1: 173, 'balance': 0.31115107913669066},
 'Community_9': {0: 19502, 1: 16437, 'balance': 0.8428366321402933},
 'Community_10': {0: 3311, 1: 5490, 'balance': 0.6030965391621129},
 'Community_11': {0: 3446, 1: 1177, 'balance': 0.3415554265815438},
 'Community_12': {0: 1419, 1: 792, 'balance': 0.5581395348837209},
 'Community_13': {0: 910, 1: 254, 'balance': 0.27912087912087913},
 'Community_14': {0: 12945, 1: 20515, 'balance': 0.6310017060687

In [19]:
# 12m execution time
twitchGamersCCGraphComms = nx.algorithms.community.label_propagation_communities(
    twitchGamersCCGraph)


In [22]:
# maybe get the top 5 communities?
twitchLabelPropagationList = list(twitchGamersCCGraphComms)


In [25]:
community_counter = 0
with open("output/twitchLabelPropagation.txt", "w") as f:
    for community in twitchLabelPropagationList:
        for node in community:
            f.write("{}\t{}\n".format(node, community_counter))
        community_counter += 1


In [15]:
genderCountTwitch = {}
genderCountTwitch = calculate_Fairness(
    twitchGamers_graph, twitchLabelPropagationList, "mature")


In [16]:
genderCountTwitch


{'Community_0': {0: 83131, 1: 76960, 'balance': 0.9257677641313108},
 'Community_1': {0: 1563, 1: 1017, 'balance': 0.6506717850287908},
 'Community_2': {0: 2366, 1: 777, 'balance': 0.32840236686390534},
 'Community_3': {0: 473, 1: 135, 'balance': 0.2854122621564482},
 'Community_4': {0: 2, 1: 0, 'balance': 0.0},
 'Community_5': {0: 580, 1: 53, 'balance': 0.09137931034482759},
 'Community_6': {0: 854, 1: 44, 'balance': 0.05152224824355972},
 'Community_7': {0: 1, 1: 1, 'balance': 1.0},
 'Community_8': {0: 0, 1: 2, 'balance': 0.0},
 'Community_9': {0: 2, 1: 0, 'balance': 0.0},
 'Community_10': {0: 1, 1: 1, 'balance': 1.0},
 'Community_11': {0: 0, 1: 2, 'balance': 0.0},
 'Community_12': {0: 2, 1: 0, 'balance': 0.0},
 'Community_13': {0: 6, 1: 0, 'balance': 0.0},
 'Community_14': {0: 2, 1: 0, 'balance': 0.0},
 'Community_15': {0: 4, 1: 0, 'balance': 0.0},
 'Community_16': {0: 2, 1: 0, 'balance': 0.0},
 'Community_17': {0: 2, 1: 1, 'balance': 0.5},
 'Community_18': {0: 1, 1: 1, 'balance': 1

In [17]:
# 2m 31.6s execution time
twitchKernighan = nx.community.kernighan_lin_bisection(
    twitchGamersCCGraph, weight="weight")


In [19]:
genderCounttwitchKernighan = {}
genderCounttwitchKernighan = calculate_Fairness(
    twitchGamers_graph, twitchKernighan, "mature")


In [21]:
genderCounttwitchKernighan


{'Community_0': {0: 50786, 1: 33271, 'balance': 0.6551214901744575},
 'Community_1': {0: 38295, 1: 45762, 'balance': 0.836829684017307}}

In [14]:
# Deezer europe graph
deezer_graph = nx.read_edgelist(
    "{}deezer_europe/deezer_europe/deezer_europe_edges.csv".format(path), nodetype=int, delimiter=",")


In [15]:
# Read Deezer genders
deezer_gendersDf = pd.read_csv(
    "{}/deezer_europe/deezer_europe/deezer_europe_target.csv".format(path))


In [16]:
getProtectedAttributesDeezer(deezer_graph, deezer_gendersDf)


In [17]:
deezerLargestCC = max(nx.connected_components(deezer_graph), key=len)
deezerLargestCCGraph = deezer_graph.subgraph(deezerLargestCC)


In [29]:
deezerLabelPropagationComms = nx.algorithms.community.label_propagation_communities(
    deezerLargestCCGraph)


In [30]:
deezerLabelPropagationList = list(deezerLabelPropagationComms)


In [31]:
community_counter = 0
with open("output/deezerLabelPropagation.txt", "w") as f:
    for community in deezerLabelPropagationList:
        for node in community:
            f.write("{}\t{}\n".format(node, community_counter))
        community_counter += 1


In [32]:
louvain_communities_deezer = nx.community.louvain_communities(
    deezerLargestCCGraph)


In [34]:
community_counter = 0
with open("output/deezerLouvain.txt", "w") as f:
    for community in louvain_communities_deezer:
        for node in community:
            f.write("{}\t{}\n".format(node, community_counter))
        community_counter += 1


In [10]:
# Execution time 2m 26.1s
# pokec_graph = nx.read_edgelist("{}/pokec/soc-pokec-relationships.txt".format(path), nodetype=int, delimiter="\t", create_using=nx.DiGraph())


In [18]:
pokec_test = nx.read_edgelist("pokec/soc-pokec-undirected.txt",
                              nodetype=int, delimiter="\t", create_using=nx.Graph())


In [19]:
pokec_features = pd.read_csv(
    "{}/pokec/soc-pokec-profiles.txt".format(path), delimiter="\t", header=None, usecols=[0, 3])


In [20]:
getProtectedAttributesPokec(pokec_test, pokec_features)


In [21]:
pokecCC = max(nx.connected_components(pokec_test), key=len)
pokecCCGraph = pokec_test.subgraph(pokecCC)


In [11]:
labelPropagationPokec = nx.algorithms.community.label_propagation_communities(
    pokecCCGraph)


In [12]:
community_counter = 0
with open("output/pokecLabelPropagation.txt", "w") as f:
    for community in labelPropagationPokec:
        for node in community:
            f.write("{}\t{}\n".format(node, community_counter))
        community_counter += 1


In [12]:
louvain_communities_Pokec = nx.algorithms.community.louvain_communities(
    pokecCCGraph)


In [13]:
community_counter = 0
with open("output/pokecLouvain.txt", "w") as f:
    for community in louvain_communities_Pokec:
        for node in community:
            f.write("{}\t{}\n".format(node, community_counter))
        community_counter += 1


In [63]:
facebook_graph_all = nx.read_edgelist(
    "{}facebook/facebook/facebook_combined.txt".format(path), nodetype=int, delimiter=" ")


In [64]:
getProtectedAttributesFacebook(facebook_graph_all)


In [65]:
facebookLargestCC = max(nx.connected_components(facebook_graph_all), key=len)
facebookLargestCCGraph = facebook_graph_all.subgraph(facebookLargestCC)


In [77]:
facebookComms = nx.community.label_propagation_communities(
    facebookLargestCCGraph)


In [78]:
facebookLabelPropagationList = list(facebookComms)


In [43]:
community_counter = 0
with open("output/facebookLabelPropagation.txt", "w") as f:
    for community in facebookLabelPropagationList:
        for node in community:
            f.write("{}\t{}\n".format(node, community_counter))
        community_counter += 1


In [168]:
genderCountFacebook = {}
genderCountFacebook = calculate_Fairness(
    facebook_graph_all, facebookLabelPropagationList, 'gender')


In [132]:
print(calculate_global_fairness(genderCountFacebook))
# print(calculate_global_fairness2(genderCountFacebook))

0.6114217252396166


In [169]:
testcommDegree = countInCommunityDegree(facebookLargestCCGraph, facebookLabelPropagationList)
# testcommDegree


facebookLowDegreeNodes = getLowInCommunityDegreeNodes(testcommDegree)
# facebookLowDegreeNodes

In [170]:
genderCountFacebook

{'Community_0': {0: 122, 1: 76, 'balance': 0.6229508196721312},
 'Community_1': {0: 22, 1: 14, 'balance': 0.6363636363636364},
 'Community_2': {0: 8, 1: 2, 'balance': 0.25},
 'Community_3': {0: 4, 1: 4, 'balance': 1.0},
 'Community_4': {0: 5, 1: 3, 'balance': 0.6},
 'Community_5': {0: 23, 1: 11, 'balance': 0.4782608695652174},
 'Community_6': {0: 2, 1: 0, 'balance': 0.0},
 'Community_7': {0: 168, 1: 47, 'balance': 0.27976190476190477},
 'Community_8': {0: 8, 1: 8, 'balance': 1.0},
 'Community_9': {0: 2, 1: 1, 'balance': 0.5},
 'Community_10': {0: 1, 1: 2, 'balance': 0.5},
 'Community_11': {0: 674, 1: 356, 'balance': 0.5281899109792285},
 'Community_12': {0: 4, 1: 2, 'balance': 0.5},
 'Community_13': {0: 5, 1: 2, 'balance': 0.4},
 'Community_14': {0: 1, 1: 2, 'balance': 0.5},
 'Community_15': {0: 3, 1: 0, 'balance': 0.0},
 'Community_16': {0: 383, 1: 369, 'balance': 0.9634464751958225},
 'Community_17': {0: 6, 1: 4, 'balance': 0.6666666666666666},
 'Community_18': {0: 0, 1: 2, 'balance'

In [171]:
newFairness = fixCommunityFairness(facebookLargestCCGraph, genderCountFacebook, facebookLowDegreeNodes)

25
7
Test for Community_2

Test for Community_4

Test for Community_5

Destination Community: Community_10
Destination Community Current Fairness: 0.5
New fairness for Community_5: 0.5
New fairness for Community_10: 1.0
Destination Community: Community_10
Destination Community Current Fairness: 1.0
Destination Community: Community_14
Destination Community Current Fairness: 0.5
New fairness for Community_5: 0.5238095238095238
New fairness for Community_14: 1.0
Destination Community: Community_14
Destination Community Current Fairness: 1.0
Destination Community: Community_18
Destination Community Current Fairness: 0.0
New fairness for Community_5: 0.55
New fairness for Community_18: 0.5
Destination Community: Community_18
Destination Community Current Fairness: 0.5
New fairness for Community_5: 0.5789473684210527
New fairness for Community_18: 1.0
Test for Community_6

Test for Community_7

Destination Community: Community_10
Destination Community Current Fairness: 1.0
Destination Commun

In [173]:
newFairness

{'Community_0': {0: 122, 1: 76, 'balance': 0.6229508196721312},
 'Community_1': {0: 22, 1: 14, 'balance': 0.6363636363636364},
 'Community_2': {0: 8, 1: 2, 'balance': 0.25},
 'Community_3': {0: 4, 1: 4, 'balance': 1.0},
 'Community_4': {0: 5, 1: 3, 'balance': 0.6},
 'Community_5': {0: 19, 1: 11, 'balance': 0.5789473684210527},
 'Community_6': {0: 2, 1: 0, 'balance': 0.0},
 'Community_7': {0: 162, 1: 47, 'balance': 0.29012345679012347},
 'Community_8': {0: 8, 1: 8, 'balance': 1.0},
 'Community_9': {0: 2, 1: 1, 'balance': 0.5},
 'Community_10': {0: 2, 1: 2, 'balance': 1.0},
 'Community_11': {0: 674, 1: 356, 'balance': 0.5281899109792285},
 'Community_12': {0: 4, 1: 2, 'balance': 0.5},
 'Community_13': {0: 5, 1: 2, 'balance': 0.4},
 'Community_14': {0: 2, 1: 2, 'balance': 1.0},
 'Community_15': {0: 3, 1: 0, 'balance': 0.0},
 'Community_16': {0: 383, 1: 369, 'balance': 0.9634464751958225},
 'Community_17': {0: 6, 1: 4, 'balance': 0.6666666666666666},
 'Community_18': {0: 2, 1: 2, 'balance'

In [88]:
genderCountFacebook

{'Community_0': {0: 122, 1: 76, 'balance': 0.6229508196721312},
 'Community_1': {0: 22, 1: 14, 'balance': 0.6363636363636364},
 'Community_2': {0: 8, 1: 2, 'balance': 0.25},
 'Community_3': {0: 4, 1: 4, 'balance': 1.0},
 'Community_4': {0: 5, 1: 3, 'balance': 0.6},
 'Community_5': {0: 23, 1: 11, 'balance': 0.4782608695652174},
 'Community_6': {0: 2, 1: 0, 'balance': 0.0},
 'Community_7': {0: 165, 1: 47, 'balance': 0.28484848484848485},
 'Community_8': {0: 8, 1: 8, 'balance': 1.0},
 'Community_9': {0: 2, 1: 1, 'balance': 0.5},
 'Community_10': {0: 1, 1: 2, 'balance': 0.5},
 'Community_11': {0: 674, 1: 356, 'balance': 0.5281899109792285},
 'Community_12': {0: 4, 1: 2, 'balance': 0.5},
 'Community_13': {0: 5, 1: 2, 'balance': 0.4},
 'Community_14': {0: 1, 1: 2, 'balance': 0.5},
 'Community_15': {0: 3, 1: 0, 'balance': 0.0},
 'Community_16': {0: 383, 1: 369, 'balance': 0.9634464751958225},
 'Community_17': {0: 6, 1: 4, 'balance': 0.6666666666666666},
 'Community_18': {0: 1, 1: 2, 'balance'

In [70]:
genderCountFacebook


{'Community_0': {0: 122, 1: 76, 'balance': 0.6229508196721312},
 'Community_1': {0: 22, 1: 14, 'balance': 0.6363636363636364},
 'Community_2': {0: 8, 1: 2, 'balance': 0.25},
 'Community_3': {0: 4, 1: 4, 'balance': 1.0},
 'Community_4': {0: 5, 1: 3, 'balance': 0.6},
 'Community_5': {0: 23, 1: 11, 'balance': 0.4782608695652174},
 'Community_6': {0: 2, 1: 0, 'balance': 0.0},
 'Community_7': {0: 168, 1: 47, 'balance': 0.27976190476190477},
 'Community_8': {0: 8, 1: 8, 'balance': 1.0},
 'Community_9': {0: 2, 1: 1, 'balance': 0.5},
 'Community_10': {0: 1, 1: 2, 'balance': 0.5},
 'Community_11': {0: 674, 1: 356, 'balance': 0.5281899109792285},
 'Community_12': {0: 4, 1: 2, 'balance': 0.5},
 'Community_13': {0: 5, 1: 2, 'balance': 0.4},
 'Community_14': {0: 1, 1: 2, 'balance': 0.5},
 'Community_15': {0: 3, 1: 0, 'balance': 0.0},
 'Community_16': {0: 383, 1: 369, 'balance': 0.9634464751958225},
 'Community_17': {0: 6, 1: 4, 'balance': 0.6666666666666666},
 'Community_18': {0: 0, 1: 2, 'balance'

In [41]:
louvain_communities_facebook = nx.community.louvain_communities(
    facebookLargestCCGraph)


In [44]:
community_counter = 0
with open("output/facebookLouvain.txt", "w") as f:
    for community in louvain_communities_facebook:
        for node in community:
            f.write("{}\t{}\n".format(node, community_counter))
        community_counter += 1


In [45]:
girvan_newman_facebook = nx.algorithms.community.girvan_newman(
    facebookLargestCCGraph)


In [25]:
communitiesPathList = os.listdir("communities")
# communitiesPathList
convert_Communities_To_Fairness_CSV(communitiesPathList)


In [47]:
karate = nx.karate_club_graph()


In [42]:
testGraph = nx.gnp_random_graph(100,0.4)

In [56]:
import random
attrbutes = {}
for i in range(len(karate.nodes())):
    attrbutes[i] = random.randint(0, 1)
attrbutes

{0: 0,
 1: 0,
 2: 0,
 3: 0,
 4: 0,
 5: 1,
 6: 1,
 7: 1,
 8: 0,
 9: 1,
 10: 1,
 11: 0,
 12: 0,
 13: 1,
 14: 1,
 15: 1,
 16: 0,
 17: 0,
 18: 0,
 19: 1,
 20: 1,
 21: 0,
 22: 0,
 23: 0,
 24: 0,
 25: 0,
 26: 0,
 27: 0,
 28: 0,
 29: 0,
 30: 0,
 31: 0,
 32: 1,
 33: 1}

In [57]:
nx.set_node_attributes(karate, attrbutes, 'gender')


In [58]:
karateLabelPropagation = nx.algorithms.community.label_propagation_communities(
    karate)


In [59]:
test = calculate_Fairness(karate, karateLabelPropagation, "gender")
test


{'Community_0': {0: 11, 1: 4, 'balance': 0.36363636363636365},
 'Community_1': {0: 10, 1: 6, 'balance': 0.6},
 'Community_2': {0: 1, 1: 2, 'balance': 0.5}}

In [60]:
testcommDegree = countInCommunityDegree(karate, karateLabelPropagation)
testcommDegree


{'Community_0': {0: 12,
  1: 7,
  3: 5,
  4: 2,
  7: 3,
  10: 2,
  11: 1,
  12: 2,
  13: 3,
  17: 2,
  19: 2,
  21: 2,
  24: 2,
  25: 2,
  31: 3},
 'Community_1': {32: 11,
  33: 14,
  2: 5,
  8: 4,
  9: 2,
  14: 2,
  15: 2,
  18: 2,
  20: 2,
  22: 2,
  23: 4,
  26: 2,
  27: 3,
  28: 2,
  29: 4,
  30: 3},
 'Community_2': {16: 2, 5: 2, 6: 2}}

In [61]:
testcommDegree = countInCommunityDegree(karate, karateLabelPropagation)
testcommDegree


karateLowDegreeNodes = getLowInCommunityDegreeNodes(testcommDegree)
karateLowDegreeNodes


{'Community_0': {11: 1, 4: 2, 10: 2, 12: 2, 17: 2, 19: 2, 21: 2, 24: 2, 25: 2},
 'Community_1': {9: 2, 14: 2, 15: 2, 18: 2, 20: 2, 22: 2, 26: 2, 28: 2},
 'Community_2': {}}

In [62]:
fixCommunityFairness(karate, test, karateLowDegreeNodes)


Fairness cannot  be fixed


In [20]:
getLowInCommunityDegreeNodesProtectedAttribute(
    karate, karateLowDegreeNodes["Community_0"], 0)


[10, 11, 12, 17, 19]

In [89]:
for community in testcommDegree.values():
    print(community)


{0: 12, 1: 7, 3: 5, 4: 2, 7: 3, 10: 2, 11: 1, 12: 2, 13: 3, 17: 2, 19: 2, 21: 2, 24: 2, 25: 2, 31: 3}
{32: 11, 33: 14, 2: 5, 8: 4, 9: 2, 14: 2, 15: 2, 18: 2, 20: 2, 22: 2, 23: 4, 26: 2, 27: 3, 28: 2, 29: 4, 30: 3}
{16: 2, 5: 2, 6: 2}


In [82]:
test1 = testcommDegree["Community0"]
test1


{0: 12,
 1: 7,
 3: 5,
 4: 2,
 7: 3,
 10: 2,
 11: 1,
 12: 2,
 13: 3,
 17: 2,
 19: 2,
 21: 2,
 24: 2,
 25: 2,
 31: 3}

In [88]:
test2 = dict(filter(lambda elem: elem[1] < len(
    test1.keys()) * 30 / 100, test1.items()))
test2


{4: 2,
 7: 3,
 10: 2,
 11: 1,
 12: 2,
 13: 3,
 17: 2,
 19: 2,
 21: 2,
 24: 2,
 25: 2,
 31: 3}