In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import networkx as nx
import os
import glob
import itertools
import time


In [None]:
path = "Datasets/"

In [None]:
def getProtectedAttributesTwitch(G, df):
    protected_attributes = dict(zip(df["numeric_id"], df["protected"]))
    newG = nx.set_node_attributes(G, protected_attributes, "protected")
    return newG


def getProtectedAttributesDeezer(G, df):
    protected_attributes = dict(zip(df["id"], df["target"]))
    newG = nx.set_node_attributes(G, protected_attributes, "protected")
    return newG


def getProtectedAttributesPokec(G, df):
    protected_attributes = dict(zip(df[0], df[3]))
    newG = nx.set_node_attributes(G, protected_attributes, "protected")
    return newG


def getprotectedAttributesDict_Facebook(featuresDF, featureNameDF, egoFeatDF):
    gender_index = featureNameDF.index[featureNameDF[1] == "gender;anonymized"].to_list()[
        0]
    featuresDF = featuresDF[[0, gender_index + 1]]
    egoFeatDF = egoFeatDF[[0, gender_index + 1]]
    featuresDict = dict(zip(featuresDF[0], featuresDF[gender_index + 1]))
    egoFeatDict = dict(zip(egoFeatDF[0], egoFeatDF[gender_index + 1]))
    featuresDict.update(egoFeatDict)
    return featuresDict


def getprotectedAttributesDict_GPlus(featuresDF, gender_index, egoFeatDF):
    # gender_index = featureNameDF.index[featureNameDF[1] == "gender:1"].to_list()[0]
    featuresDF = featuresDF[[0, gender_index + 1]]
    egoFeatDF = egoFeatDF[[0, gender_index + 1]]
    featuresDict = dict(zip(featuresDF[0], featuresDF[gender_index + 1]))
    egoFeatDict = dict(zip(egoFeatDF[0], egoFeatDF[gender_index + 1]))
    featuresDict.update(egoFeatDict)
    return featuresDict


def getEgoFeats(path):
    featFiles = []
    featNameFiles = []
    egoFeatFiles = []
    for file in os.listdir(path):
        if file.endswith(".feat"):
            featFiles.append(file)
        if file.endswith(".featnames"):
            featNameFiles.append(file)
        if file.endswith(".egofeat"):
            egoFeatFiles.append(file)
    return featFiles, featNameFiles, egoFeatFiles


def getProtectedAttributesFacebook(G):
    node_gender_dict = {}
    localpath = "Datasets/facebook/facebook/"
    fbFeatFiles, fbFeatNameFiles, fbEgoFeatFiles = getEgoFeats(localpath)
    for index in range(len(fbFeatFiles)):
        localFeaturesDF = pd.read_csv(
            localpath + fbFeatFiles[index], sep=" ", header=None)
        localFeatureNamesDf = pd.read_csv(
            localpath + fbFeatNameFiles[index], sep=" ", header=None)
        localEgoFeatDf = pd.read_csv(
            localpath + fbEgoFeatFiles[index], sep=" ", header=None)
        protectedAttrDict = getprotectedAttributesDict_Facebook(
            localFeaturesDF, localFeatureNamesDf, localEgoFeatDf)
        node_gender_dict.update(protectedAttrDict)

    newG = nx.set_node_attributes(G, node_gender_dict, "protected")
    return newG


def getProtectedAttributesGPlus(G):
    node_gender_dict = {}
    localpath = "Datasets/gplus/"
    gplusFeatFiles, gplusFeatNameFiles, gplusEgoFeatFiles = getEgoFeats(
        localpath)
    for index in range(len(gplusFeatFiles)):
        try:
            localFeaturesDF = pd.read_csv(
                localpath + gplusFeatFiles[index], sep=" ", header=None)
        # localFeatureNamesDf = pd.read_csv(localpath + gplusFeatNameFiles[index], sep=" ", header=None)
            localEgoFeatDf = pd.read_csv(
                localpath + gplusEgoFeatFiles[index], sep=" ", header=None)
            protectedAttrDict = getprotectedAttributesDict_GPlus(
                localFeaturesDF, 0, localEgoFeatDf)
            node_gender_dict.update(protectedAttrDict)
        except:
            print(gplusEgoFeatFiles[index])

    newG = nx.set_node_attributes(G, node_gender_dict, "protected")
    return newG


In [None]:
def initialize_community_attribute_Counter(communitiesList):
    protectedAttributeCountDict = {}
    communityCount = 0
    for i in range(len(communitiesList)):
        protectedAttributeCountDict["Community_{}".format(communityCount)] = {
            0: 0, 1: 0}
        communityCount += 1
    return protectedAttributeCountDict


def count_protected_attributes_frequency(G, communitiesList, protectedAttributeCountDict, attribute):
    communityCount = 0
    for community in communitiesList:
        for node in community:
            try:
                if G.nodes()[node][attribute] == 0:
                    protectedAttributeCountDict["Community_{}".format(
                        communityCount)][0] += 1
                else:
                    protectedAttributeCountDict["Community_{}".format(
                        communityCount)][1] += 1
            except KeyError:
                continue
        communityCount += 1
    return protectedAttributeCountDict


def calculate_community_balance(protectedAttributeCountDict):
    for x in protectedAttributeCountDict:
        red = protectedAttributeCountDict[x][0]
        blue = protectedAttributeCountDict[x][1]
        if red == 0 and blue == 0:
            pass
        elif red >= blue:
            balance = blue/red
            protectedAttributeCountDict[x]["balance"] = balance
        else:
            balance = red/blue
            protectedAttributeCountDict[x]["balance"] = balance
    return protectedAttributeCountDict


def calculate_Fairness(G, communitiesList, attribute):

    protectedAttributeCountDict = initialize_community_attribute_Counter(
        communitiesList)

    protectedAttributeCountDict = count_protected_attributes_frequency(
        G, communitiesList, protectedAttributeCountDict, attribute)

    protectedAttributeCountDict = calculate_community_balance(
        protectedAttributeCountDict)

    return protectedAttributeCountDict


In [None]:
def countInCommunityDegree(G, communities):
    communitiesDegree = {}
    # communityCount = 0
    for index, community in enumerate(communities):
        communityDegree = {}
        communitySubgraph = G.subgraph(community)
        for node in communitySubgraph.nodes():
            communityDegree[node] = communitySubgraph.degree(node)
        communitiesDegree["Community_{}".format(index)] = communityDegree
    return communitiesDegree


def getLowInCommunityDegreeNodes(communitiesNodeDegrees):
    lowInCommunityDegreeNodes = {}
    for (communityName, community) in communitiesNodeDegrees.items():
        percent = len(community.keys()) * 40 / 100
        lowDegreeNodes = dict(
            filter(lambda elem: elem[1] < percent, community.items()))
        lowDegreeNodes = dict(
            sorted(lowDegreeNodes.items(), key=lambda item: item[1]))
        lowInCommunityDegreeNodes[communityName] = lowDegreeNodes
    return lowInCommunityDegreeNodes


def getLowInCommunityDegreeNodesProtectedAttribute(graph, lowInCommunityDegreeNodes, protectedAttributeLabel):
    lowInCommunityDegreeNodesProtectedAttribute = []
    for (node, degree) in lowInCommunityDegreeNodes.items():
        try:
            if graph.nodes()[node]["protected"] == protectedAttributeLabel:
                lowInCommunityDegreeNodesProtectedAttribute.append(node)
        except KeyError:
            pass
    return lowInCommunityDegreeNodesProtectedAttribute


def calculate_global_fairness(fairnessDictionary):
    zeroCounts = 0 
    oneCounts = 0
    for communityInfo in fairnessDictionary.values():
        zeroCounts += communityInfo[0]
        oneCounts += communityInfo[1]
    try:
        if zeroCounts > oneCounts:
            return oneCounts / zeroCounts
        else:
            return zeroCounts / oneCounts
    except ZeroDivisionError:
        return 0
    # return min(zeroCounts/oneCounts, oneCounts/zeroCounts)


def getCommunitiesWithLowFairness(fairnessDictionary, threshold):
    lowFairnessCommunities = []
    for (communityName, communityInfo) in fairnessDictionary.items():
        if communityInfo["balance"] < threshold:
            lowFairnessCommunities.append(communityName)
    return lowFairnessCommunities


def seperateLowFairnessCommunities(lowFairnesCommunities, fairnessDictionary):
    zeroCommunities = []
    oneCommunities = []

    for community in lowFairnesCommunities:
        if fairnessDictionary[community][0] > fairnessDictionary[community][1]:
            zeroCommunities.append(community)
        else:
            oneCommunities.append(community)
    return zeroCommunities, oneCommunities


def updateFairnesDictionary(fairnessDictionary, startCommunity, destinationCommunity, startCommunityType):
    fairnessDictionary[startCommunity][startCommunityType] -= 1
    fairnessDictionary[destinationCommunity][startCommunityType] += 1
    
    if fairnessDictionary[startCommunity][0] > fairnessDictionary[startCommunity][1]:
        newStartFairness = fairnessDictionary[startCommunity][1]/fairnessDictionary[startCommunity][0]
    else:
        newStartFairness = fairnessDictionary[startCommunity][0]/fairnessDictionary[startCommunity][1]

    if fairnessDictionary[destinationCommunity][0] > fairnessDictionary[destinationCommunity][1]:
        newDestinationFairness = fairnessDictionary[destinationCommunity][1]/fairnessDictionary[destinationCommunity][0]
    else:
        newDestinationFairness = fairnessDictionary[destinationCommunity][0]/fairnessDictionary[destinationCommunity][1]
    
    # newStartFairness = min((fairnessDictionary[startCommunity][0]/fairnessDictionary[startCommunity][1]),
    #                        fairnessDictionary[startCommunity][1]/fairnessDictionary[startCommunity][0])
    # newDestinationFairness = min((fairnessDictionary[destinationCommunity][0]/fairnessDictionary[destinationCommunity][1]),
    #                              fairnessDictionary[destinationCommunity][1]/fairnessDictionary[destinationCommunity][0])
    fairnessDictionary[startCommunity]["balance"] = newStartFairness
    fairnessDictionary[destinationCommunity]["balance"] = newDestinationFairness

    # print("New fairness for {}: {}".format(startCommunity, newStartFairness))
    # print("New fairness for {}: {}".format(
    #     destinationCommunity, newDestinationFairness))

    return fairnessDictionary, newStartFairness, newDestinationFairness


def sendNodes(startCommunityName, startCommunityNodes, destinationCommunities, fairnessDictionary, globalFairness, startCommunityType):
    destinationCommunitiesNameList = destinationCommunities
    destinationCounter = 0

    startCommunityCurrentFairness = fairnessDictionary[startCommunityName]["balance"]
    destinationCommunityCurrentFairness = fairnessDictionary[
        destinationCommunitiesNameList[destinationCounter]]["balance"]


    while (startCommunityCurrentFairness < globalFairness and len(startCommunityNodes) > 0 and destinationCounter < len(destinationCommunitiesNameList)):
        # print("Destination Community: {}".format(destinationCommunitiesNameList[destinationCounter]))
        # print("Destination Community Current Fairness: {}".format(destinationCommunityCurrentFairness))

        if(destinationCommunityCurrentFairness >= globalFairness):
            destinationCounter += 1
            if(destinationCounter < len(destinationCommunitiesNameList)):
                destinationCommunityCurrentFairness = fairnessDictionary[
                    destinationCommunitiesNameList[destinationCounter]]["balance"]
        else:
            startCommunityNodes.pop(0)  # dont care where it goes for now
            # Update stats and fairness
            fairnessDictionary, startCommunityCurrentFairness, destinationCommunityCurrentFairness = updateFairnesDictionary(fairnessDictionary, startCommunityName,
                                                                                                                             destinationCommunitiesNameList[destinationCounter], startCommunityType)

    return fairnessDictionary


def fixCommunityFairness(graph, fairnessDictionary, lowInCommunityDegreeNodes):

    newFairnessDictionary = fairnessDictionary.copy()
    zeroCommunitiesDict = {}
    oneCommunitiesDict = {}

    global_community_fairness = calculate_global_fairness(newFairnessDictionary)
    lowFairnessCommunities = getCommunitiesWithLowFairness(
        newFairnessDictionary, global_community_fairness)
    zeroCommunities, oneCommunities = seperateLowFairnessCommunities(
        lowFairnessCommunities, newFairnessDictionary)

    # print(len(zeroCommunities))
    # print(len(oneCommunities))
    if len(zeroCommunities) == 0 or len(oneCommunities) == 0:
        print("Fairness cannot  be fixed")
        return fairnessDictionary

    for community in zeroCommunities:
        nodes = getLowInCommunityDegreeNodesProtectedAttribute(
            graph, lowInCommunityDegreeNodes[community], 0)
        zeroCommunitiesDict[community] = nodes
        # get the nodes that want to leave from each community

    for community in oneCommunities:
        nodes = getLowInCommunityDegreeNodesProtectedAttribute(
            graph, lowInCommunityDegreeNodes[community], 1)
        oneCommunitiesDict[community] = nodes

    for community in zeroCommunities:
        # print("Test for {}".format(community))
        # print()
        newFairnessDictionary = sendNodes(community, zeroCommunitiesDict[community],
                                       oneCommunities, fairnessDictionary, global_community_fairness, 0)
    for community in oneCommunities:
        # print("test for {}".format(community))
        newFairnessDictionary = sendNodes(community, oneCommunitiesDict[community],
                                        zeroCommunities, fairnessDictionary, global_community_fairness, 1)

    return newFairnessDictionary


In [None]:
def convert_Communities_To_Fairness_CSV(paths):
    for path in paths:
        communityDF = pd.read_csv("comE_communities/communities/{}".format(path), sep="\t", header=None)
        communityListDf = communityDF.groupby(1).aggregate(lambda x: list(x))
        communitiesList = communityListDf[0].to_list()
        if path.startswith("twitch"):
            protected_count = calculate_Fairness(twitchGamersCCGraph, communitiesList, "protected")
            testdf = pd.DataFrame(protected_count)
            testdf = testdf.T
            testdf.to_csv("comE_communities/output/{}_fairness.csv".format(path))
        elif path.startswith("gplus"):
            protected_count = calculate_Fairness(gplus_graph, communitiesList, "protected")
            testdf = pd.DataFrame(protected_count)
            testdf = testdf.T
            testdf.to_csv("comE_communities/output/{}_fairness.csv".format(path))
        elif path.startswith("facebook"):
            protected_count = calculate_Fairness(facebookLargestCCGraph, communitiesList, "protected")
            testdf = pd.DataFrame(protected_count)
            testdf = testdf.T
            testdf.to_csv("comE_communities/output/{}_fairness.csv".format(path))
        elif path.startswith("deezer"):
            protected_count = calculate_Fairness(deezerLargestCCGraph, communitiesList, "protected")
            testdf = pd.DataFrame(protected_count)
            testdf = testdf.T
            testdf.to_csv("comE_communities/output/{}_fairness.csv".format(path))
        elif path.startswith("pokec"):
            protected_count = calculate_Fairness(pokec_test, communitiesList, "protected")
            testdf = pd.DataFrame(protected_count)
            testdf = testdf.T
            testdf.to_csv("fairness_CSV/{}_fairness.csv".format(path))
        

In [1]:
def calculateNewFairnessPipeline(paths):
    for path in paths:
        communityDF = pd.read_csv("communities/{}".format(path), sep="\t", header=None)
        communityListDf = communityDF.groupby(1).aggregate(lambda x: list(x))
        communitiesList = communityListDf[0].to_list()
        if path.startswith("twitch"):
            oldFairness = calculate_Fairness(twitchGamersCCGraph, communitiesList, "protected")
            inCommunityDegreeDict = countInCommunityDegree(twitchGamersCCGraph, communitiesList)
            LowDegreeNodes = getLowInCommunityDegreeNodes(inCommunityDegreeDict)
            newFairness = fixCommunityFairness(twitchGamersCCGraph, oldFairness, LowDegreeNodes)

            oldFairnessdf = pd.DataFrame(oldFairness)
            oldFairnessdf = oldFairnessdf.T
            print("Old Fairness")
            average_fairness_local(oldFairnessdf, path)
            newFairnessdf = pd.DataFrame(newFairness)
            newFairnessdf = newFairnessdf.T
            print("New Fairness")
            average_fairness_local(newFairnessdf, path)
            print("======================================================================")
            
        elif path.startswith("gplus"):
            oldFairness = calculate_Fairness(gplusCCGraph, communitiesList, "protected")
            inCommunityDegreeDict = countInCommunityDegree(gplusCCGraph, communitiesList)
            LowDegreeNodes = getLowInCommunityDegreeNodes(inCommunityDegreeDict)
            newFairness = fixCommunityFairness(gplusCCGraph, oldFairness, LowDegreeNodes)

            oldFairnessdf = pd.DataFrame(oldFairness)
            oldFairnessdf = oldFairnessdf.T
            print("Old Fairness")
            average_fairness_local(oldFairnessdf, path)
            newFairnessdf = pd.DataFrame(newFairness)
            newFairnessdf = newFairnessdf.T
            print("New Fairness")
            average_fairness_local(newFairnessdf, path)
            print("======================================================================")

        elif path.startswith("facebook"):
            oldFairness = calculate_Fairness(facebookLargestCCGraph, communitiesList, "protected")
            inCommunityDegreeDict = countInCommunityDegree(facebookLargestCCGraph, communitiesList)
            LowDegreeNodes = getLowInCommunityDegreeNodes(inCommunityDegreeDict)
            newFairness = fixCommunityFairness(facebookLargestCCGraph, oldFairness, LowDegreeNodes)

            oldFairnessdf = pd.DataFrame(oldFairness)
            oldFairnessdf = oldFairnessdf.T
            print("Old Fairness")
            average_fairness_local(oldFairnessdf, path)
            newFairnessdf = pd.DataFrame(newFairness)
            newFairnessdf = newFairnessdf.T
            print("New Fairness")
            average_fairness_local(newFairnessdf, path)
            print("======================================================================")
        elif path.startswith("deezer"):
            oldFairness = calculate_Fairness(deezerLargestCCGraph, communitiesList, "protected")
            inCommunityDegreeDict = countInCommunityDegree(deezerLargestCCGraph, communitiesList)
            LowDegreeNodes = getLowInCommunityDegreeNodes(inCommunityDegreeDict)
            newFairness = fixCommunityFairness(deezerLargestCCGraph, oldFairness, LowDegreeNodes)

            oldFairnessdf = pd.DataFrame(oldFairness)
            oldFairnessdf = oldFairnessdf.T
            print("Old Fairness")
            average_fairness_local(oldFairnessdf, path)
            newFairnessdf = pd.DataFrame(newFairness)
            newFairnessdf = newFairnessdf.T
            print("New Fairness")
            average_fairness_local(newFairnessdf, path)
            print("======================================================================")
        elif path.startswith("pokec"):
            oldFairness = calculate_Fairness(pokecCCGraph, communitiesList, "protected")
            inCommunityDegreeDict = countInCommunityDegree(pokecCCGraph, communitiesList)
            LowDegreeNodes = getLowInCommunityDegreeNodes(inCommunityDegreeDict)
            newFairness = fixCommunityFairness(pokecCCGraph, oldFairness, LowDegreeNodes)

            oldFairnessdf = pd.DataFrame(oldFairness)
            oldFairnessdf = oldFairnessdf.T
            print("Old Fairness")
            average_fairness_local(oldFairnessdf, path)
            newFairnessdf = pd.DataFrame(newFairness)
            newFairnessdf = newFairnessdf.T
            print("New Fairness")
            average_fairness_local(newFairnessdf, path)
            print("======================================================================")

In [None]:
def average_fairness_local(df, path):
    # df = pd.read_csv("comE_communities/output/{}".format(path), sep=",")
    zeroCounts = df[0].sum()
    oneCounts = df[1].sum()
    if zeroCounts > oneCounts:
        balance = oneCounts/zeroCounts
    else:
        balance = zeroCounts/oneCounts
    # balance = min(zeroCounts/oneCounts, oneCounts/zeroCounts)
    averageFairnes = df[["balance"]].mean()
    print("Average Fairness for {} is {}".format(path, averageFairnes))
    print("Global Fairness for {} is {}".format(path, balance))

In [None]:
def average_fairness(paths):
    for path in paths:
        fairnessDf = pd.read_csv("comE_communities/output/{}".format(path), sep=",")
        zeroCounts = fairnessDf["0"].sum()
        oneCounts = fairnessDf["1"].sum()
        balance = min(zeroCounts/oneCounts, oneCounts/zeroCounts)
        averageFairnes = fairnessDf[["balance"]].mean()
        print("Average Fairness for {} is {}".format(path, averageFairnes))
        print("Global Fairness for {} is {}".format(path, balance))
        print("=====================================================================================")

In [None]:
def most_frequent(list):
    return max(set(list), key=list.count)


# Google Plus

In [None]:
# Execution Time 2mins
# gplus_graph = nx.read_edgelist("{}gplus/gplus_combined.txt".format(path), nodetype=str, delimiter=" ", create_using=nx.DiGraph())


In [None]:
gplus_graph = nx.read_edgelist("gplus/gplus_combined_undirected.txt".format(path), nodetype=str, delimiter=" ", create_using=nx.Graph())


In [None]:
getProtectedAttributesGPlus(gplus_graph)

In [None]:
gplusCC = max(nx.connected_components(gplus_graph), key=len)
gplusCCGraph = gplus_graph.subgraph(gplusCC)


# Twitch Gamers

In [None]:
# Twitch gamers Graph
twitchGamers_graph = nx.read_edgelist(
    "{}twitch_gamers/large_twitch_edges.csv".format(path), nodetype=int, delimiter=",")

In [None]:
# Twitch gamers features
twitchGamers_features = pd.read_csv(
    "{}twitch_gamers/large_twitch_features.csv".format(path))


In [None]:
getProtectedAttributesTwitch(twitchGamers_graph, twitchGamers_features)

In [None]:
twitchGamersCC = max(nx.connected_components(twitchGamers_graph), key=len)
twitchGamersCCGraph = twitchGamers_graph.subgraph(twitchGamersCC)

# Deezer


In [None]:
# Deezer europe graph
deezer_graph = nx.read_edgelist(
    "{}deezer_europe/deezer_europe/deezer_europe_edges.csv".format(path), nodetype=int, delimiter=",")


In [None]:
# Read Deezer genders
deezer_gendersDf = pd.read_csv(
    "{}/deezer_europe/deezer_europe/deezer_europe_target.csv".format(path))

In [None]:
getProtectedAttributesDeezer(deezer_graph, deezer_gendersDf)

In [None]:
deezerLargestCC = max(nx.connected_components(deezer_graph), key=len)
deezerLargestCCGraph = deezer_graph.subgraph(deezerLargestCC)

# Pokec

In [None]:
# Execution time 2m 26.1s
# pokec_graph = nx.read_edgelist("{}/pokec/soc-pokec-relationships.txt".format(path), nodetype=int, delimiter="\t", create_using=nx.DiGraph())

In [None]:
pokec_test = nx.read_edgelist("pokec/soc-pokec-undirected.txt",
                              nodetype=int, delimiter="\t", create_using=nx.Graph())


In [None]:
pokec_features = pd.read_csv(
    "{}/pokec/soc-pokec-profiles.txt".format(path), delimiter="\t", header=None, usecols=[0, 3])

In [None]:
getProtectedAttributesPokec(pokec_test, pokec_features)

In [None]:
pokecCC = max(nx.connected_components(pokec_test), key=len)
pokecCCGraph = pokec_test.subgraph(pokecCC)


# Facebook

In [None]:
facebook_graph_all = nx.read_edgelist(
    "{}facebook/facebook/facebook_combined.txt".format(path), nodetype=int, delimiter=" ")

In [None]:
getProtectedAttributesFacebook(facebook_graph_all)

In [None]:
facebookLargestCC = max(nx.connected_components(facebook_graph_all), key=len)
facebookLargestCCGraph = facebook_graph_all.subgraph(facebookLargestCC)

# ALL TOGETHER


In [None]:
gplus_graph = nx.read_edgelist("gplus/gplus_combined_undirected.txt".format(path), nodetype=str, delimiter=" ", create_using=nx.Graph())

getProtectedAttributesGPlus(gplus_graph)
gplusCC = max(nx.connected_components(gplus_graph), key=len)
gplusCCGraph = gplus_graph.subgraph(gplusCC)

twitchGamers_graph = nx.read_edgelist(
    "{}twitch_gamers/large_twitch_edges.csv".format(path), nodetype=int, delimiter=",")

twitchGamers_features = pd.read_csv(
    "{}twitch_gamers/large_twitch_features.csv".format(path))


getProtectedAttributesTwitch(twitchGamers_graph, twitchGamers_features)
twitchGamersCC = max(nx.connected_components(twitchGamers_graph), key=len)
twitchGamersCCGraph = twitchGamers_graph.subgraph(twitchGamersCC)

deezer_graph = nx.read_edgelist(
    "{}deezer_europe/deezer_europe/deezer_europe_edges.csv".format(path), nodetype=int, delimiter=",")

deezer_gendersDf = pd.read_csv(
    "{}/deezer_europe/deezer_europe/deezer_europe_target.csv".format(path))

getProtectedAttributesDeezer(deezer_graph, deezer_gendersDf)

deezerLargestCC = max(nx.connected_components(deezer_graph), key=len)
deezerLargestCCGraph = deezer_graph.subgraph(deezerLargestCC)


pokec_test = nx.read_edgelist("pokec/soc-pokec-undirected.txt",
                              nodetype=int, delimiter="\t", create_using=nx.Graph())

pokec_features = pd.read_csv(
    "{}/pokec/soc-pokec-profiles.txt".format(path), delimiter="\t", header=None, usecols=[0, 3])

getProtectedAttributesPokec(pokec_test, pokec_features)

pokecCC = max(nx.connected_components(pokec_test), key=len)
pokecCCGraph = pokec_test.subgraph(pokecCC)

facebook_graph_all = nx.read_edgelist(
    "{}facebook/facebook/facebook_combined.txt".format(path), nodetype=int, delimiter=" ")

getProtectedAttributesFacebook(facebook_graph_all)

facebookLargestCC = max(nx.connected_components(facebook_graph_all), key=len)
facebookLargestCCGraph = facebook_graph_all.subgraph(facebookLargestCC)

In [None]:
communityDF = pd.read_csv("communities/gplusLouvain.txt", sep="\t", header=None)
communityListDf = communityDF.groupby(1).aggregate(lambda x: list(x))
communitiesList = communityListDf[0].to_list()

communitiesList

oldFairness = calculate_Fairness(gplusCCGraph, communitiesList, "protected")
df = pd.DataFrame(oldFairness)
df = df.T
print("Old Fairness")
print(df[["balance"]].mean())


# oldFairness
inCommunityDegreeDict = countInCommunityDegree(gplusCCGraph, communitiesList)
# inCommunityDegreeDict
LowDegreeNodes = getLowInCommunityDegreeNodes(inCommunityDegreeDict)
# LowDegreeNodes
newFairness = fixCommunityFairness(gplusCCGraph, oldFairness, LowDegreeNodes)
dfNew = pd.DataFrame(newFairness)
dfNew = dfNew.T
print("New Fairness")
print(dfNew[["balance"]].mean())


In [None]:
df = pd.DataFrame(oldFairness)
df = df.T
df.describe()

In [None]:
dfNew = pd.DataFrame(newFairness)
dfNew = dfNew.T
dfNew.describe()

# Community Detection

## Google Plus Communities


In [None]:
louvain_communities_Gplus = nx.algorithms.community.louvain_communities(gplusCCGraph)


In [None]:
# Lecagy Code for extracting the communities from the Louvain algorithm to csv

# community_counter = 0
# with open("output/gplusLouvain.txt", "w") as f:
#     for community in louvain_communities_Gplus:
#         for node in community:
#             f.write("{}\t{}\n".format(node, community_counter))
#         community_counter += 1


In [None]:
genderCountGplus = {}
genderCountGplus = calculate_Fairness(
    gplusCCGraph, gplusCCGraph, 'gender')


In [None]:
label_propagation_Gplus = nx.algorithms.community.label_propagation_communities(
    gplusCCGraph)


In [None]:
# Lecagy Code for extracting the communities from the Label Propagation algorithm to csv
# community_counter = 0
# with open("output/gplusLabelPropagation.txt", "w") as f:
#     for community in label_propagation_Gplus:
#         for node in community:
#             f.write("{}\t{}\n".format(node, community_counter))
#         community_counter += 1


## Twitch Communities


In [None]:
# Εxecution time 5mins
louvain_communities_twitch = nx.algorithms.community.louvain_communities(
    twitchGamersCCGraph)


In [None]:
# Lecagy Code for extracting the communities from the Louvain algorithm to csv
# community_counter = 0
# with open("output/twitchLouvain.txt", "w") as f:
#     for community in louvain_communities_twitch:
#         for node in community:
#             f.write("{}\t{}\n".format(node, community_counter))
#         community_counter += 1


In [None]:
louvain_gender_count = {}
louvain_gender_count = calculate_Fairness(
    twitchGamers_graph, louvain_communities_twitch, "protected")
louvain_gender_count


In [None]:
# 12m execution time
twitchGamersCCGraphComms = nx.algorithms.community.label_propagation_communities(
    twitchGamersCCGraph)
twitchLabelPropagationList = list(twitchGamersCCGraphComms)

In [None]:
# Lecagy Code for extracting the communities from the Label propagation algorithm to csv
# community_counter = 0
# with open("output/twitchLabelPropagation.txt", "w") as f:
#     for community in twitchLabelPropagationList:
#         for node in community:
#             f.write("{}\t{}\n".format(node, community_counter))
#         community_counter += 1


In [None]:
genderCountTwitch = {}
genderCountTwitch = calculate_Fairness(
    twitchGamers_graph, twitchLabelPropagationList, "protected")


## Deezer Communities

In [None]:
deezerLabelPropagationComms = nx.algorithms.community.label_propagation_communities(
    deezerLargestCCGraph)


In [None]:
deezerLabelPropagationList = list(deezerLabelPropagationComms)


In [None]:
# Lecagy Code for extracting the communities from the Label propagation algorithm to csv
# community_counter = 0
# with open("output/deezerLabelPropagation.txt", "w") as f:
#     for community in deezerLabelPropagationList:
#         for node in community:
#             f.write("{}\t{}\n".format(node, community_counter))
#         community_counter += 1


In [None]:
louvain_communities_deezer = nx.community.louvain_communities(
    deezerLargestCCGraph)


In [None]:
# Lecagy Code for extracting the communities from the Louvain algorithm to csv
# community_counter = 0
# with open("output/deezerLouvain.txt", "w") as f:
#     for community in louvain_communities_deezer:
#         for node in community:
#             f.write("{}\t{}\n".format(node, community_counter))
#         community_counter += 1


## Pokec Communities

In [None]:
labelPropagationPokec = nx.algorithms.community.label_propagation_communities(
    pokecCCGraph)


In [None]:
# Lecagy Code for extracting the communities from the Label propagation algorithm to csv
# community_counter = 0
# with open("output/pokecLabelPropagation.txt", "w") as f:
#     for community in labelPropagationPokec:
#         for node in community:
#             f.write("{}\t{}\n".format(node, community_counter))
#         community_counter += 1


In [None]:
louvain_communities_Pokec = nx.algorithms.community.louvain_communities(
    pokecCCGraph)


In [None]:
# Lecagy Code for extracting the communities from the Louvain algorithm to csv
# community_counter = 0
# with open("output/pokecLouvain.txt", "w") as f:
#     for community in louvain_communities_Pokec:
#         for node in community:
#             f.write("{}\t{}\n".format(node, community_counter))
#         community_counter += 1


## Facebook Communities


In [None]:
facebookComms = nx.community.label_propagation_communities(
    facebookLargestCCGraph)


In [None]:
facebookLabelPropagationList = list(facebookComms)


In [None]:
# community_counter = 0
# with open("output/facebookLabelPropagation.txt", "w") as f:
#     for community in facebookLabelPropagationList:
#         for node in community:
#             f.write("{}\t{}\n".format(node, community_counter))
#         community_counter += 1


In [None]:
genderCountFacebook = {}
genderCountFacebook = calculate_Fairness(
    facebook_graph_all, facebookLabelPropagationList, 'gender')


In [None]:
print(calculate_global_fairness(genderCountFacebook))
# print(calculate_global_fairness2(genderCountFacebook))

In [None]:
testcommDegree = countInCommunityDegree(facebookLargestCCGraph, facebookLabelPropagationList)
# testcommDegree


facebookLowDegreeNodes = getLowInCommunityDegreeNodes(testcommDegree)
# facebookLowDegreeNodes

In [None]:
genderCountFacebook
df = pd.DataFrame(genderCountFacebook)
df = df.T
df.describe()

In [None]:
newFairness = fixCommunityFairness(facebookLargestCCGraph, genderCountFacebook, facebookLowDegreeNodes)

In [None]:
newFairness
dfNew = pd.DataFrame(newFairness)
dfNew = dfNew.T
dfNew.describe()

In [None]:
louvain_communities_facebook = nx.community.louvain_communities(
    facebookLargestCCGraph)


In [None]:
# community_counter = 0
# with open("output/facebookLouvain.txt", "w") as f:
#     for community in louvain_communities_facebook:
#         for node in community:
#             f.write("{}\t{}\n".format(node, community_counter))
#         community_counter += 1


## Read comE outputs communities

In [None]:
communitiesPathList = os.listdir("comE_communities\communities")
# communitiesPathList
convert_Communities_To_Fairness_CSV(communitiesPathList)
