/
ego_splitting.py
99 lines (88 loc) · 3.56 KB
/
ego_splitting.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
"""Ego-Splitter class"""
import community
import networkx as nx
from tqdm import tqdm
class EgoNetSplitter(object):
"""An implementation of `"Ego-Splitting" see:
https://www.eecs.yorku.ca/course_archive/2017-18/F/6412/reading/kdd17p145.pdf
From the KDD '17 paper "Ego-Splitting Framework: from Non-Overlapping to Overlapping Clusters".
The tool first creates the egonets of nodes.
A persona-graph is created which is clustered by the Louvain method.
The resulting overlapping cluster memberships are stored as a dictionary.
Args:
resolution (float): Resolution parameter of Python Louvain. Default 1.0.
"""
def __init__(self, resolution=1.0):
self.resolution = resolution
def _create_egonet(self, node):
"""
Creating an ego net, extracting personas and partitioning it.
Args:
node: Node ID for egonet (ego node).
"""
ego_net_minus_ego = self.graph.subgraph(self.graph.neighbors(node))
components = {i: n for i, n in enumerate(nx.connected_components(ego_net_minus_ego))}
new_mapping = {}
personalities = []
for k, v in components.items():
personalities.append(self.index)
for other_node in v:
new_mapping[other_node] = self.index
self.index = self.index+1
self.components[node] = new_mapping
self.personalities[node] = personalities
def _create_egonets(self):
"""
Creating an egonet for each node.
"""
self.components = {}
self.personalities = {}
self.index = 0
print("Creating egonets.")
for node in tqdm(self.graph.nodes()):
self._create_egonet(node)
def _map_personalities(self):
"""
Mapping the personas to new nodes.
"""
self.personality_map = {p: n for n in self.graph.nodes() for p in self.personalities[n]}
def _get_new_edge_ids(self, edge):
"""
Getting the new edge identifiers.
Args:
edge: Edge being mapped to the new identifiers.
"""
return (self.components[edge[0]][edge[1]], self.components[edge[1]][edge[0]])
def _create_persona_graph(self):
"""
Create a persona graph using the egonet components.
"""
print("Creating the persona graph.")
self.persona_graph_edges = [self._get_new_edge_ids(e) for e in tqdm(self.graph.edges())]
self.persona_graph = nx.from_edgelist(self.persona_graph_edges)
def _create_partitions(self):
"""
Creating a non-overlapping clustering of nodes in the persona graph.
"""
print("Clustering the persona graph.")
self.partitions = community.best_partition(self.persona_graph, resolution=self.resolution)
self.overlapping_partitions = {node: [] for node in self.graph.nodes()}
for node, membership in self.partitions.items():
self.overlapping_partitions[self.personality_map[node]].append(membership)
def fit(self, graph):
"""
Fitting an Ego-Splitter clustering model.
Arg types:
* **graph** *(NetworkX graph)* - The graph to be clustered.
"""
self.graph = graph
self._create_egonets()
self._map_personalities()
self._create_persona_graph()
self._create_partitions()
def get_memberships(self):
r"""Getting the cluster membership of nodes.
Return types:
* **memberships** *(dictionary of lists)* - Cluster memberships.
"""
return self.overlapping_partitions