Skip to content

Commit

Permalink
assert each cluster is a set
Browse files Browse the repository at this point in the history
  • Loading branch information
j-i-l committed Feb 20, 2020
1 parent bfc8dcc commit a5317f4
Show file tree
Hide file tree
Showing 3 changed files with 225 additions and 8 deletions.
Binary file added majortrack/.method.py.swp
Binary file not shown.
215 changes: 215 additions & 0 deletions majortrack/method.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,215 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-

"""Module documentation goes here
and here
and ...
"""


class MajorTrack(object):
r"""
Parameters
===========
clusterings: list, dict
Sequence of clusterings.
**If provided as a `dict`**:
keys: float, datetime
The time points.
values: list, dict
The membership list of each clustering indicating to which cluster
a data source belongs.
See :obj:`~MajorTrack.memberships` for details.
\**kwargs optional parameter:
timepoints: list
The time points of each clustering.
Note
-----
If `clusterings` if of type `dict` then the keys will be used as time
points and this optional parameter is ignored, even if provided.
group_matchup_method: str (default='fraction')
Set the method to calculate the similarity between two clusters from
different clusterings. By default the fraction of identical members is
used as explained in
`the original article <https://arxiv.org/abs/1912.04261>'_.
Attributes
==========
group_matchup: list
Holds for each time point the tracing and mapping sets of all clusters.
Each element is a `dict` with the keys ``'forward'`` and ``'backward'``.
Both hold a `dict` indicating for a cluster the best matching cluster
along with the similarity score of the particular relation in a `tuple`.
Example
-------
:: code-block: python
self.group_matchup[1] = {
'backward': {0: (0, 1.0), ...},
^ ^ ^
| | similarity score
| cluster from previous time point
cluster from current time point.
}
"""
def __init__(self, clusterings, **kwargs):
assert isinstance(clusterings, (list, dict))
if isinstance(clusterings, list):
self.timepoints = kwargs.pop(
'timepoints', list(range(len(clusterings)))
)
self.clusterings = clusterings
# sort both clusterings and timepoints according to timepoints
self.timepoints, self.clusterings = zip(
*sorted(
zip(self.timepoints, self.clusterings),
key=lambda x: x[0]
)
)
else:
self.timepoints = sorted(clusterings.keys())
self.clusterings = list(clusterings[tp] for tp in self.timepoints)

self.group_matchup_method = kwargs.get(
'group_matchup_method',
'fraction'
)

def get_group_matchup(self, matchup_method=None):
r"""
Determine majority relation between neighbouring snapshots.
Parameters
===========
matchup_method: str (default=None)
If provided this overwrites `self.group_matchup_method. It determines
the method to use when calculating similarities between clusters from
neighbouring snapshots.
Returns
=======
self: :class:`.MajorTrack`
with new attribute :ref:`group_matchup`.
########
Between each pair of consecutive time points all groups are compared
and matched (if possible) using `matchup_method`.
Set:
----
- self.group_matchup: List holding for each time point a dictionary
with 'backward'/'forward' matchups. A matchup is a dict indicating
for each group (id) the best match. The best match is given by a
tuple with group id and similarity score.
E.g.: self.group_matchup[1] = {
'backward': {0: (0, 1.0), ...},
'forward': {0: (1, 0.7), ...}
}
"""
if matchup_method is None:
matchup_method = self.group_matchup_method
# if self.group_matchup:
self.group_matchup = []
# if self.group_similarities:
self.group_similarities = []
self.group_matchup.append(
{
'backward': {
_group_id: (None, None)
for _group_id in range(len(self.groupings[0]))
}
}
)
self.group_similarities.append(
{
'backward': {
_group_id: None
for _group_id in range(len(self.groupings[0]))
}
}
)
for _idx in range(self.length - 1):
_group_similarities = self._get_group_similarities(
_idx, _idx + 1,
method=matchup_method
)
# set forward matchup/similarities for current step
self.group_matchup[-1][
'forward'
] = _group_similarities['forward']['matchup']
self.group_similarities[-1][
'forward'
] = _group_similarities['forward']['similarities']
# create backward matchup/similarities for next step
self.group_matchup.append(
{'backward': _group_similarities[
'backward'
]['matchup']}
)
self.group_similarities.append(
{'backward': _group_similarities[
'backward'
]['similarities']}
)
# complete forward matchup/similarites with None's
self.group_matchup[-1]['forward'] = {
_group_id: (None, None)
for _group_id in range(len(self.groupings[-1]))
}
self.group_similarities[-1]['forward'] = {
_group_id: None
for _group_id in range(len(self.groupings[-1]))
}

def get_span(self, idx, span_set, get_indivs=True):
r"""
Create the tracer tree.
Parameters
===========
####
Get the span (time forward)
"""
span_tree = {}
if isinstance(span_set, int):
span_tree[idx] = [self.groupings[idx][span_set]]
elif isinstance(span_set, str):
span_tree[idx] = filter(
lambda g: span_set in g,
self.groupings[idx]
)
else:
span_tree[idx] = [span_set]
current_set = set.union(*span_tree[idx])
for _idx in range(idx + 1, self.length):
next_groupings = self.groupings[_idx]
next_contained = [*filter(
lambda grp: any([memb in current_set for memb in grp]),
next_groupings
)]
if next_contained:
span_tree[_idx] = next_contained
current_set = set.union(*next_contained)
else:
break
if get_indivs:
return span_tree
else:
span_tree_idxs = {}
for _idx in span_tree:
_span_set = set.union(*span_tree[_idx])
span_tree_idxs[_idx] = [
i for i in range(len(self.groupings[_idx]))
if any([
el in _span_set
for el in self.groupings[_idx][i]
])
]
return span_tree_idxs
18 changes: 10 additions & 8 deletions majortrack/tracker.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ class MajorTrack(object):
Determine if :obj:`~LazyList`'s should be used to store data about
dynamic clusters or normal lists.
Most likely you want to use normal lists.
.. bibliography:: ../references.bib
Attributes
Expand Down Expand Up @@ -102,7 +102,7 @@ class MajorTrack(object):
group_mappings: list(list)
Holds for each slice a list of mapping sets. The list is ordered like
:attr:`~.MajorTrack.grougings`.
:obj:`~.MajorTrack.grougings`.
Example
--------
Expand Down Expand Up @@ -234,6 +234,7 @@ def __init__(self, clusterings, history, **kwargs):
else:
self.timepoints = sorted(clusterings.keys())
self.clusterings = list(clusterings[tp] for tp in self.timepoints)
assert isinstance(self.clusterings[0], set)
self.length = len(self.timepoints)
# now determine the slice widths
self.slice_widths = kwargs.get('slice_widths', None)
Expand Down Expand Up @@ -382,11 +383,11 @@ def combined_population(
If further arguments are provided (all have to be unnamed), then the
union is taken between all of these time points.
Example
-------
.. code-block:: python
self.resident_population(2,4,5)
This will return the combined population of the time points 2, 4
Expand Down Expand Up @@ -846,14 +847,15 @@ def get_group_matchup(self, matchup_method=None):
Parameters
===========
matchup_method: str (default=None)
If provided this overwrites :attr:`~.MajorTrack.group_matchup_method`.
If provided this overwrites
:attr:`~majortrack.MajorTrack.group_matchup_method`.
It determines the method to use when calculating similarities between
clusters from neighbouring snapshots.
Returns
=======
self: :class:`.MajorTrack`
with new attribute :attr:`~.MajorTrack.group_matchup`.
with new attribute :obj:`~.MajorTrack.group_matchup`.
"""
if matchup_method is None:
Expand Down Expand Up @@ -1956,7 +1958,7 @@ def get_community_shrinkages(self,):
None: None
Adds new attributes:
- attr:`~.MajorTrack.community_shrinkages`
- :attr:`~.MajorTrack.community_shrinkages`
"""
# birth events are not growth events
self.community_shrinkages = [[]]
Expand Down Expand Up @@ -2073,7 +2075,7 @@ def get_auto_corrs(self, residents=True):
None: None
Adds new attributes:
- attr:`~.MajorTrack.community_autocorrs`
- :attr:`~.MajorTrack.community_autocorrs`
"""
self.community_autocorrs = {}
for idx in range(1, self.length):
Expand Down

0 comments on commit a5317f4

Please sign in to comment.