Skip to content

Commit

Permalink
Improve efficiency of activity deduplication algorithm.
Browse files Browse the repository at this point in the history
  • Loading branch information
cpfair committed Feb 25, 2016
1 parent 1de30ea commit 89deb62
Show file tree
Hide file tree
Showing 3 changed files with 41 additions and 9 deletions.
21 changes: 21 additions & 0 deletions tapiriik/services/interchange.py
Original file line number Diff line number Diff line change
Expand Up @@ -332,6 +332,27 @@ def __eq__(self, other):
def __ne__(self, other):
return not self.__eq__(other)

# We define ascending as most-recent first.
# For simplicity, we compare without timezones.
# The only place this ordering is (intentionally...) used, it doesn't matter.
def __gt__(self, other):
try:
return self.StartTime.replace(tzinfo=None) < other.StartTime.replace(tzinfo=None)
except AttributeError:
return self.StartTime.replace(tzinfo=None) < other.replace(tzinfo=None)

def __ge__(self, other):
try:
return self.StartTime.replace(tzinfo=None) <= other.StartTime.replace(tzinfo=None)
except AttributeError:
return self.StartTime.replace(tzinfo=None) <= other.replace(tzinfo=None)

def __lt__(self, other):
return not self.__ge__(other)

def __le__(self, other):
return not self.__gt__(other)


class UploadedActivity (Activity):
pass # will contain list of which service instances contain this activity - not really merited
Expand Down
25 changes: 18 additions & 7 deletions tapiriik/sync/sync.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,9 @@
import pytz
import kombu
import json
import bisect

# Set this up seperate from the logger used in this scope, so services logging messages are caught and logged into user's files.
# Set this up separate from the logger used in this scope, so services logging messages are caught and logged into user's files.
_global_logger = logging.getLogger("tapiriik")

_global_logger.setLevel(logging.DEBUG)
Expand Down Expand Up @@ -439,8 +440,13 @@ def _accumulateActivities(self, conn, svcActivities, no_add=False):
if act.TZ and not hasattr(act.TZ, "localize"):
raise ValueError("Got activity with TZ type " + str(type(act.TZ)) + " instead of a pytz timezone")
# Used to ensureTZ() right here - doubt it's needed any more?
existElsewhere = [
x for x in self._activities if
# Binsearch to find which activities actually need individual attention.
# Otherwise it's O(mn^2).
# self._activities is sorted most recent first
relevantActivitiesStart = bisect.bisect_left(self._activities, act.StartTime + timezoneErrorPeriod)
relevantActivitiesEnd = bisect.bisect_right(self._activities, act.StartTime - timezoneErrorPeriod, lo=relevantActivitiesStart)
extantActIter = (
x for x in (self._activities[idx] for idx in range(relevantActivitiesStart, relevantActivitiesEnd)) if
(
# Identical
x.UID == act.UID
Expand Down Expand Up @@ -481,9 +487,14 @@ def _accumulateActivities(self, conn, svcActivities, no_add=False):
and
# Prevents closely-spaced activities of known different type from being lumped together - esp. important for manually-enetered ones
(x.Type == ActivityType.Other or act.Type == ActivityType.Other or x.Type == act.Type or ActivityType.AreVariants([act.Type, x.Type]))
]
if len(existElsewhere) > 0:
existingActivity = existElsewhere[0]
)

try:
existingActivity = next(extantActIter)
except StopIteration:
existingActivity = None

if existingActivity:
# we don't merge the exclude values here, since at this stage the services have the option of just not returning those activities
if act.TZ is not None and existingActivity.TZ is None:
existingActivity.TZ = act.TZ
Expand Down Expand Up @@ -522,7 +533,7 @@ def _accumulateActivities(self, conn, svcActivities, no_add=False):
act.UIDs = existingActivity.UIDs # stop the circular inclusion, not that it matters
continue
if not no_add:
self._activities.append(act)
bisect.insort_left(self._activities, act)

def _determineEligibleRecipientServices(self, activity, recipientServices):
from tapiriik.auth import User
Expand Down
4 changes: 2 additions & 2 deletions tapiriik/testing/sync.py
Original file line number Diff line number Diff line change
Expand Up @@ -393,8 +393,8 @@ def test_activity_coalesce(self):
s._accumulateActivities(recB, [copy.deepcopy(actB)])

self.assertEqual(len(s._activities), 2)
act = s._activities[0]
self.assertEqual(act.Type, actA.Type)
self.assertEqual(s._activities[0].Type, actB.Type)
self.assertEqual(s._activities[1].Type, actA.Type)

def test_eligibility_excluded(self):
user = TestTools.create_mock_user()
Expand Down

0 comments on commit 89deb62

Please sign in to comment.