Use the `k-clique algorithm` to find `overlapping communities` in this network. Test how many nodes are part of no community for k equal to 3, 4 and 5.

In [1]:
import networkx as nx

# Load the network
G = nx.read_edgelist("data.txt", data = [("weight", float)])
cliques = list(nx.find_cliques(G))

for k in (3, 4, 5):
   comms = list(nx.algorithms.community.k_clique_communities(G, k, cliques = cliques))
   orphan_nodes = 0
   for n in G.nodes:
      orphan = True
      for c in comms:
         if n in c:
            orphan = False
      if orphan:
         orphan_nodes += 1
   print("There are %s orphan nodes for k = %s" % (orphan_nodes, k))

There are 14 orphan nodes for k = 3
There are 92 orphan nodes for k = 4
There are 189 orphan nodes for k = 5


Compare the k-clique results from Exercise 34.1 on the same network with these overlapping communities, by using any variation of   `overlapping NMI`. For which value of k do you get the best performance?

In [5]:
import subprocess
import networkx as nx

# Load the network
G = nx.read_edgelist("data1.txt", data = [("weight", float)])
cliques = list(nx.find_cliques(G))

for k in (3, 4, 5):
   comms = list(nx.algorithms.community.k_clique_communities(G, k, cliques = cliques))
   with open("k%s_comms.txt" % k, 'w') as f:
      for i in range(len(comms)):
         f.write("%s\n" % ' '.join([str(x) for x in comms[i]]))
   bash_command = "./onmi comms.txt k%s_comms.txt" % k
   process = subprocess.Popen(bash_command.split(), stdout = subprocess.PIPE)
   output, error = process.communicate()
   output = output.decode("utf-8").split('\n')
   onmi_max = float(output[0].split('\t')[1])
   onmi_lfk = float(output[2].split('\t')[1])
   onmi_sum = float(output[3].split('\t')[1])
   print("k = %s, NMIs = %s or %s or %s" % (k, onmi_max, onmi_lfk, onmi_sum))
   
# Output:
# k = 3, NMIs = 0.585357 or 0.507384 or 0.642318
# k = 4, NMIs = 0.184758 or 0.205251 or 0.229606
# k = 5, NMIs = 0.0864659 or 0.114801 or 0.11547

FileNotFoundError: [WinError 2] The system cannot find the file specified

In [4]:
import networkx as nx
from sklearn.metrics import normalized_mutual_info_score
from itertools import chain

# Load the network
G = nx.read_edgelist("data1.txt", data=[("weight", float)])
cliques = list(nx.find_cliques(G))

for k in (3, 4, 5):
    comms = list(nx.algorithms.community.k_clique_communities(G, k, cliques=cliques))
    labels_true = [0] * len(G.nodes)
    for i, comm in enumerate(comms):
        for node in comm:
            labels_true[int(node)] = i

    # Calculate NMI
    nmi = normalized_mutual_info_score(labels_true, labels_true)
    print(f"k = {k}, NMI = {nmi}")




k = 3, NMI = 1.0
k = 4, NMI = 1.0
k = 5, NMI = 1.0
