# Minority Detection

Detect minority nodes of the listed datasets. Tuples of the `GRAPHS` list represent the basename of the dataset (edgelist file without suffix ex. .txt) and a flag for whether the graph is directed or not. True for directed and False for undirected.

**NOTE** Regenerating on directed graphs can produced different results.

In [1]:
import os
import networkx as nx

# local imports
import communities as cm

In [2]:
# datasets
CONGRESS = "congress", True
EMAIL_EU = "email_eu", True
WIKI_VOTE = "wiki_vote", True
FACEBOOK = "facebook", False
LASTFM = "lastfm", False
DEEZER = "deezer", False

GRAPHS = [ FACEBOOK ]

INPUT_DIR = "../../input/"

# Community Detection Threshold
# Size for which a partition must be larger than to be
# considered a community in this experiment.
COMM_THRESHOLD = 2

# Minority Detection Threshold
# The ratio of nodes that are assigned to the minority group.
MINORITY_RATIO = 0.15

In [3]:
def list_to_file(file_path, values):
    with open(file_path, 'w') as f:
        for value in values:
            f.write(str(value) + '\n')

In [4]:
for g, directed in GRAPHS:
    graph_path = os.path.join(INPUT_DIR, g + ".txt")
    minorities_path = os.path.join(INPUT_DIR, g + ".minorities")

    # read graph
    if directed:
        G = nx.read_edgelist(graph_path, nodetype=int, create_using=nx.DiGraph())
        comms = cm.directed_comms(G)
    else:
        G = nx.read_edgelist(graph_path, nodetype=int, create_using=nx.Graph())
        comms = cm.undirected_comms(G)
    
    num_nodes = G.number_of_nodes()
    num_minorities = round(num_nodes * MINORITY_RATIO)

    # nodes sorted by comm size
    comms.sort(key=len)
    comms = [comm for comm in comms if len(comm) > COMM_THRESHOLD]
    flat_comms = [node for comm in comms for node in comm]

    # write minorities to file
    minorities = flat_comms[:num_minorities]

    assert len(set(minorities)) == len(minorities)
    list_to_file(minorities_path, minorities)