From 444c095e7ca21e8f9bac35901b4648b869091d7f Mon Sep 17 00:00:00 2001
From: "K. Shankari" <shankari@eecs.berkeley.edu>
Date: Thu, 6 Jul 2017 20:45:01 -0700
Subject: [PATCH] Add support to reset habitica autocheck tasks to a particular
 point

Currently, this assumes that points were not given after that point.
If points were given, but incorrectly, we don't have a huge option on how to
fix it since there is no way to "set points".

TODO: investigate whether decrementing the task counter the appropriate number
of times give an idempotent result?
---
 bin/ext_service/reset_habitica_timestamps.py  | 74 +++++++++++++++++++
 .../habitica/auto_tasks/active_distance.py    |  9 ++-
 emission/net/ext_service/habitica/executor.py | 50 +++++++++++++
 .../tests/netTests/TestHabiticaAutocheck.py   | 57 +++++++++++---
 4 files changed, 178 insertions(+), 12 deletions(-)
 create mode 100644 bin/ext_service/reset_habitica_timestamps.py

diff --git a/bin/ext_service/reset_habitica_timestamps.py b/bin/ext_service/reset_habitica_timestamps.py
new file mode 100644
index 000000000..61527988d
--- /dev/null
+++ b/bin/ext_service/reset_habitica_timestamps.py
@@ -0,0 +1,74 @@
+"""
+Script to launch the pipeline reset code.
+Options documented in 
+https://github.com/e-mission/e-mission-server/issues/333#issuecomment-312464984
+"""
+import logging
+
+import argparse
+import uuid
+import arrow
+import copy
+import pymongo
+
+import emission.net.ext_service.habitica.executor as enehe
+import emission.core.get_database as edb
+
+def _get_user_list(args):
+    if args.all:
+        return _find_all_users()
+    elif args.platform:
+        return _find_platform_users(args.platform)
+    elif args.email_list:
+        return _email_2_user_list(args.email_list)
+    else:
+        assert args.user_list is not None
+        return [uuid.UUID(u) for u in args.user_list]
+
+def _find_platform_users(platform):
+   return edb.get_timeseries_db().find({'metadata.platform': platform}).distinct(
+       'user_id')
+
+def _find_all_users():
+   return edb.get_timeseries_db().find().distinct('user_id')
+
+def _email_2_user_list(email_list):
+    return [ecwu.User.fromEmail(e) for e in email_list]
+
+if __name__ == '__main__':
+    logging.basicConfig(level=logging.DEBUG)
+
+    parser = argparse.ArgumentParser(description="Reset the habitica pipeline.  Does NOT delete points, so to avoid double counting, use only in situations where the original run would not have given any points")
+    # Options corresponding to
+    # https://github.com/e-mission/e-mission-server/issues/333#issuecomment-312464984
+    group = parser.add_mutually_exclusive_group(required=True)
+    group.add_argument("-a", "--all", action="store_true", default=False,
+        help="reset the pipeline for all users")
+    group.add_argument("-p", "--platform", choices = ['android', 'ios'],
+                        help="reset the pipeline for all on the specified platform")
+    group.add_argument("-u", "--user_list", nargs='+',
+        help="user ids to reset the pipeline for")
+    group.add_argument("-e", "--email_list", nargs='+',
+        help="email addresses to reset the pipeline for")
+    parser.add_argument("date",
+        help="date to reset the pipeline to. Format 'YYYY-mm-dd' e.g. 2016-02-17. Interpreted in UTC, so 2016-02-17 will reset the pipeline to 2016-02-16T16:00:00-08:00 in the pacific time zone")
+    parser.add_argument("-n", "--dry_run", action="store_true", default=False,
+                        help="do everything except actually perform the operations")
+
+    args = parser.parse_args()
+    print args
+
+    print "Resetting timestamps to %s" % args.date
+    print "WARNING! Any points awarded after that date will be double counted!"
+    # Handle the first row in the table
+    day_dt = arrow.get(args.date, "YYYY-MM-DD")
+    logging.debug("day_dt is %s" % day_dt)
+    day_ts = day_dt.timestamp
+    logging.debug("day_ts is %s" % day_ts)
+    user_list = _get_user_list(args)
+    logging.info("received list with %s users" % user_list)
+    logging.info("first few entries are %s" % user_list[0:5])
+    for user_id in user_list:
+        logging.info("resetting user %s to ts %s" % (user_id, day_ts))
+        enehe.reset_all_tasks_to_ts(user_id, day_ts, args.dry_run)
+
diff --git a/emission/net/ext_service/habitica/auto_tasks/active_distance.py b/emission/net/ext_service/habitica/auto_tasks/active_distance.py
index c57af5e70..779ba519e 100644
--- a/emission/net/ext_service/habitica/auto_tasks/active_distance.py
+++ b/emission/net/ext_service/habitica/auto_tasks/active_distance.py
@@ -4,6 +4,7 @@
 import logging
 import arrow
 import attrdict as ad
+import copy
 
 # Our imports
 import emission.core.get_database as edb
@@ -97,5 +98,11 @@ def give_points(user_id, task, curr_state):
     logging.debug("Returning %s" % new_state)
     return new_state
 
-
+def reset_to_ts(user_id, ts, task, curr_state):
+    new_state = copy.copy(curr_state)
+    new_state['last_timestamp'] = ts
+    # We don't know what the leftover walk/bike stuff without re-running from
+    # scratch, so let's leave it untouched. Error can be max 1 point, which is
+    # not too bad.
+    return new_state
 
diff --git a/emission/net/ext_service/habitica/executor.py b/emission/net/ext_service/habitica/executor.py
index 73991eb2b..377d385c9 100644
--- a/emission/net/ext_service/habitica/executor.py
+++ b/emission/net/ext_service/habitica/executor.py
@@ -45,6 +45,47 @@ def give_points_for_task(user_id, task):
                   (map_fn, task.task_id, new_state))
     save_task_state(user_id, task, new_state)
 
+def reset_all_tasks_to_ts(user_id, ts, is_dry_run):
+    # Get the tasks from habitica
+    logging.debug("Entering habitica autocheck for user %s" % user_id)
+    habitica_task_result = get_tasks_from_habitica(user_id)
+    logging.debug("Retrieved %d from habitica for user %s" % (len(habitica_task_result), user_id))
+    reset_tasks_to_ts(user_id, ts, habitica_task_result["data"], is_dry_run)
+
+def reset_tasks_to_ts(user_id, ts, habitica_tasks, is_dry_run):
+    """
+    Split this out into a separate function to make it easier to test
+    We can retrieve habitica tasks, munge them and then pass them through to this
+    :param user_id: user id
+    :param habitica_tasks: list of habitica tasks
+    :return:
+    """
+    # Filter out manual and convert auto to wrapper
+    auto_tasks = get_autocheckable(habitica_tasks)
+    logging.debug("after autocheckable filter %s -> %s" % (len(habitica_tasks),
+                                                           len(auto_tasks)))
+    for task in auto_tasks:
+        logging.debug("About to give points for user %s, task %s" % (user_id,
+                                                                     task.task_id))
+        try:
+            reset_task_to_ts(user_id, ts, is_dry_run, task)
+        except Exception as e:
+            logging.error("While processing task %s, found error %s" %
+                          (task.task_id, e))
+
+def reset_task_to_ts(user_id, ts, is_dry_run, task):
+    curr_state = get_task_state(user_id, task)
+    logging.debug("for task %s, curr_state = %s" % (task.task_id, user_id))
+    reset_fn = get_reset_fn(task.mapper)
+    # TODO: Figure out if we should pass in the args separately
+    new_state = reset_fn(user_id, ts, task, curr_state)
+    logging.debug("after running mapping function %s for task %s, new_state = %s" %
+                  (reset_fn, task.task_id, new_state))
+    if is_dry_run:
+        logging.info("is_dry_run = True, not saving the state")
+    else:
+        save_task_state(user_id, task, new_state)
+
 def get_tasks_from_habitica(user_id):
     tasks_uri = "/api/v3/tasks/user"
     # Get all tasks from the user
@@ -102,6 +143,15 @@ def get_map_fn(fn_name):
     module = importlib.import_module(module_name)
     return getattr(module, "give_points")
 
+# Function to map the name to code
+def get_reset_fn(fn_name):
+    import importlib
+
+    module_name = get_module_name(fn_name)
+    logging.debug("module_name = %s" % module_name)
+    module = importlib.import_module(module_name)
+    return getattr(module, "reset_to_ts")
+
 def get_module_name(fn_name):
     return "emission.net.ext_service.habitica.auto_tasks.{key}".format(
         key=fn_name)
diff --git a/emission/tests/netTests/TestHabiticaAutocheck.py b/emission/tests/netTests/TestHabiticaAutocheck.py
index 24b6d12d5..cde9e09dd 100644
--- a/emission/tests/netTests/TestHabiticaAutocheck.py
+++ b/emission/tests/netTests/TestHabiticaAutocheck.py
@@ -104,16 +104,16 @@ def _fillModeDistanceDuration(self, section_list):
   def testAutomaticRewardActiveTransportation(self):
     # Create a task that we can retrieve later
 
-    new_task_text = randomGen()
-    new_habit = {'type': "habit", 'text': new_task_text,
+    self.new_task_text = randomGen()
+    new_habit = {'type': "habit", 'text': self.new_task_text,
                  'notes': 'AUTOCHECK: {"mapper": "active_distance",'
                           '"args": {"walk_scale": 1000, "bike_scale": 3000}}'}
     habit_id = proxy.create_habit(self.testUUID, new_habit)
 
-    dummy_task = enehat.Task()
-    dummy_task.task_id = habit_id
+    self.dummy_task = enehat.Task()
+    self.dummy_task.task_id = habit_id
     logging.debug("in testAutomaticRewardActiveTransportation,"
-        "the new habit id is = %s and task is %s" % (habit_id, dummy_task))
+        "the new habit id is = %s and task is %s" % (habit_id, self.dummy_task))
 
     #Create test data -- code copied from TestTimeGrouping
     key = (2016, 5, 3)
@@ -142,27 +142,62 @@ def testAutomaticRewardActiveTransportation(self):
     logging.debug("in testAutomaticRewardActiveTransportation, result = %s" % summary_ts)
     
     #Get user data before scoring
-    user_before = autocheck.get_task_state(self.testUUID, dummy_task)
+    user_before = autocheck.get_task_state(self.testUUID, self.dummy_task)
     self.assertIsNone(user_before)
 
     # Needed to work, otherwise sections from may won't show up in the query!
     modification = {"last_timestamp": arrow.Arrow(2016,5,1).timestamp, "bike_count": 0, "walk_count":0}
-    autocheck.save_task_state(self.testUUID, dummy_task, modification)
+    autocheck.save_task_state(self.testUUID, self.dummy_task, modification)
 
-    user_before = autocheck.get_task_state(self.testUUID, dummy_task)
+    user_before = autocheck.get_task_state(self.testUUID, self.dummy_task)
     self.assertEqual(int(user_before['bike_count']), 0)
 
     habits_before = proxy.habiticaProxy(self.testUUID, 'GET', "/api/v3/tasks/user?type=habits", None).json()
-    bike_pts_before = [habit['history'] for habit in habits_before['data'] if habit['text'] == new_task_text]
+    bike_pts_before = [habit['history'] for habit in habits_before['data'] if habit['text'] == self.new_task_text]
     #Score points
     autocheck.give_points_for_all_tasks(self.testUUID)
     #Get user data after scoring and check results
-    user_after = autocheck.get_task_state(self.testUUID, dummy_task)
+    user_after = autocheck.get_task_state(self.testUUID, self.dummy_task)
     self.assertEqual(int(user_after['bike_count']),1500)
     habits_after = proxy.habiticaProxy(self.testUUID, 'GET', "/api/v3/tasks/user?type=habits", None).json()
-    bike_pts_after = [habit['history'] for habit in habits_after['data'] if habit['text'] == new_task_text]
+    bike_pts_after = [habit['history'] for habit in habits_after['data'] if habit['text'] == self.new_task_text]
     self.assertTrue(len(bike_pts_after[0]) - len(bike_pts_before[0]) == 2)
 
+  def testResetActiveTransportation(self):
+    self.testAutomaticRewardActiveTransportation()
+
+    #Get user data before resetting
+    user_before = autocheck.get_task_state(self.testUUID, self.dummy_task)
+    self.assertEqual(int(user_before['bike_count']), 1500)
+
+    habits_before = proxy.habiticaProxy(self.testUUID, 'GET', "/api/v3/tasks/user?type=habits", None).json()
+    bike_pts_before = [habit['history'] for habit in habits_before['data'] if habit['text'] == self.new_task_text]
+
+    #Reset
+    reset_ts = arrow.Arrow(2016,5,3,9).timestamp
+    autocheck.reset_all_tasks_to_ts(self.testUUID, reset_ts, is_dry_run=False)
+
+    # Check timestamp 
+    user_after = autocheck.get_task_state(self.testUUID, self.dummy_task)
+    self.assertEqual(int(user_after['last_timestamp']), reset_ts)
+
+    # Re-score points
+    # This should give points for the second and third sections
+    # So I expect to see an additional distance of 2.5 + 3.5 km = 6km
+    autocheck.give_points_for_all_tasks(self.testUUID)
+
+    #Get user data after scoring and check results
+    # We already had bike_count = 1500, and this is a round number, so it
+    # should continue to be 1500
+    user_after = autocheck.get_task_state(self.testUUID, self.dummy_task)
+    self.assertEqual(int(user_after['bike_count']), 0)
+
+    # and we should have 6 points more?
+    habits_after = proxy.habiticaProxy(self.testUUID, 'GET', "/api/v3/tasks/user?type=habits", None).json()
+    bike_pts_after = [habit['history'] for habit in habits_after['data'] if habit['text'] == self.new_task_text]
+    logging.debug("bike_pts_after = %s" % (len(bike_pts_after[0]) - len(bike_pts_before[0])))
+    self.assertTrue(len(bike_pts_after[0]) - len(bike_pts_before[0]) == 3)
+
 def randomGen():
     alphabet = "abcdefghijklmnopqrstuvwxyz0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"
     length = 5