Merge pull request #4 from cmudig/interaction-logging

Interaction logging
cmudig · Sep 13, 2021 · 1101e7d · 1101e7d
2 parents d58c766 + 1d31dbd
commit 1101e7d
Show file tree

Hide file tree

Showing 13 changed files with 354 additions and 73 deletions.
diff --git a/README.md b/README.md
@@ -16,7 +16,7 @@ pip install emblaze
 
 The widget should work out of the box when you run `jupyter lab` (see example code below).
 
-*Jupyter Notebook note:* If you are using Jupyter Notebook 5.2 or earlier, you may also need to enable
+_Jupyter Notebook note:_ If you are using Jupyter Notebook 5.2 or earlier, you may also need to enable
 the nbextension:
 
 ```bash
@@ -25,7 +25,7 @@ jupyter nbextension enable --py --sys-prefix emblaze
 
 ## Standalone Demo
 
-Although the full application is designed to work as a Jupyter widget, you can run a standalone version with most of the available features directly in your browser. To do so, simply run the following command after pip-installing the package (note: you do *not* need to clone the repository to run the standalone app):
+Although the full application is designed to work as a Jupyter widget, you can run a standalone version with most of the available features directly in your browser. To do so, simply run the following command after pip-installing the package (note: you do _not_ need to clone the repository to run the standalone app):
 
 ```bash
 python -m emblaze.server
@@ -168,6 +168,32 @@ with open(os.path.join(data_dir, dataset_name, "thumbnails.json"), "w") as file:
     json.dump(thumbnails.to_json(), file)
 ```
 
+### Deployment
+
+First clean all npm build intermediates:
+
+```
+npm run clean
+```
+
+Bump the widget version in `emblaze/_version.py` and `package.json` if applicable. Then build the notebook widgets and standalone app:
+
+```
+npm run build:all
+```
+
+Run the packaging script to generate the wheel for distribution:
+
+```
+python -m build
+```
+
+Upload to PyPI (replace `<VERSION>` with the version number):
+
+```
+twine upload dist/emblaze-<VERSION>*
+```
+
 ### Development Notes
 
 - Svelte transitions don't seem to work well as they force an expensive re-layout operation. Avoid using them during interactions.
diff --git a/emblaze/recommender.py b/emblaze/recommender.py
@@ -43,14 +43,17 @@ def _make_neighbor_mat(self, neighbors, num_columns):
 
     def _pairwise_jaccard_distances(self, neighbors):
         """Computes the jaccard distance between each row of the given set of neighbors."""
+        lengths = np.array([len(n) for n in neighbors], dtype=np.uint16)
+        if np.sum(lengths) == 0:
+            return np.zeros((len(neighbors), len(neighbors)))
+
         # Make a one-hot matrix of neighbors
         neighbor_mat = self._make_neighbor_mat(neighbors, max(np.max([n for x in neighbors for n in x]) + 1, len(neighbors)))
         # Calculate intersection of sets using dot product
         intersection = np.dot(neighbor_mat, neighbor_mat.T)
         del neighbor_mat
 
         # Use set trick: len(x | y) = len(x) + len(y) - len(x & y)
-        lengths = np.array([len(n) for n in neighbors], dtype=np.uint16)
         length_sums = lengths[:,np.newaxis] + lengths[np.newaxis,:]
         union = np.maximum(length_sums - intersection, np.array([1], dtype=np.uint16), casting='no')
         del length_sums
@@ -182,7 +185,8 @@ def query(self, ids_of_interest=None, filter_ids=None, frame_idx=None, preview_f
                 neighbor_ids = None    
 
             for cluster in self.clusters[frame_key]:
-                frame_labels = "{} &rarr; {}".format(self.embeddings[cluster['frame']].label, self.embeddings[cluster['previewFrame']].label)
+                frame_labels = "{} &rarr; {}".format(self.embeddings[cluster['frame']].label or "Frame " + str(cluster['frame']),
+                                                     self.embeddings[cluster['previewFrame']].label or "Frame " + str(cluster['previewFrame']))
                 base_score = (cluster['consistency'] + cluster['innerChange'] + cluster['gain'] + cluster['loss']) * np.log(len(cluster['ids']))
                 if filter_set is not None:
                     if not cluster['ids'] & filter_set:

diff --git a/emblaze/thumbnails.py b/emblaze/thumbnails.py
@@ -44,6 +44,20 @@ def __init__(self, names, descriptions=None, ids=None):
         if descriptions is not None:
             self.data.set_field(Field.DESCRIPTION, descriptions)
 
+    def name(self, ids=None):
+        """
+        Returns the name(s) for the given set of IDs, or all points if
+        ids is not provided.
+        """
+        return self.data.field(Field.NAME, ids=ids)
+
+    def description(self, ids=None):
+        """
+        Returns the description(s) for the given set of IDs, or all points if
+        ids is not provided. Returns None if descriptions are not present.
+        """
+        return self.data.field(Field.DESCRIPTION, ids=ids)
+
     def to_json(self):
         result = super().to_json()
         names = self.data.field(Field.NAME)
@@ -78,26 +92,6 @@ def from_json(data, ids=None):
         descriptions = [items[id_val].get("description", "") for id_val in ids]
         return TextThumbnails(names, descriptions, ids)
 
-    @staticmethod
-    def from_json(data, ids=None):
-        """
-        Builds a TextThumbnails object from a JSON object. The provided object should
-        have an "items" key with a dictionary mapping ID values to text thumbnail
-        objects, each of which must have a 'name' and optionally 'description' keys.
-        """
-        assert "items" in data, "JSON object must contain an 'items' field"
-        items = data["items"]
-        if ids is None:
-            try:
-                ids = [int(id_val) for id_val in list(items.keys())]
-                items = {int(k): v for k, v in items.items()}
-            except:
-                ids = list(items.keys())
-            ids = sorted(ids)
-        names = [items[id_val]["name"] for id_val in ids]
-        descriptions = [items[id_val].get("description", "") for id_val in ids]
-        return TextThumbnails(names, descriptions, ids)
-
     def __getitem__(self, ids):
         """
         Returns text thumbnail information for the given IDs.
@@ -143,9 +137,9 @@ def __init__(self, images, spritesheets=None, ids=None, grid_dimensions=None, im
         """
         super().__init__("spritesheet")
         if spritesheets is not None:
-            self.images = None
             self.ids = ImageThumbnails._get_spritesheet_ids(spritesheets)
             self.spritesheets = spritesheets
+            self.images = None
         else:
             self.images = images
             self.ids = ids or np.arange(len(images))
@@ -173,14 +167,44 @@ def __getitem__(self, ids):
             return [self[id_val] for id_val in ids]
         else:
             result = {}
-            if self.images is not None:
-                result["image"] = self.images[self._id_index[ids]]
+            result["image"] = self.image(ids)
             if self.text_data is not None:
                 result["name"] = self.text_data.field(Field.NAME, ids)
                 if self.text_data.has_field(Field.DESCRIPTION):
                     result["description"] = self.text_data.field(Field.DESCRIPTION, ids)
             return result
 
+    def image(self, ids=None):
+        """
+        Returns the image(s) for the given ID or set of IDs, or all points if ids
+        is not provided.
+        """
+        if self.images is None:
+            self.images = self._make_raw_images()
+
+        if isinstance(ids, (list, np.ndarray, set)):
+            index = [self._id_index[int(id_val)] for id_val in ids]
+        else:
+            index = self._id_index[int(ids)]
+
+        return self.images[index]     
+
+    def name(self, ids=None):
+        """
+        Returns the name(s) for the given set of IDs, or all points if
+        ids is not provided. Returns None if names are not available.
+        """
+        if self.text_data is None: return None
+        return self.text_data.field(Field.NAME, ids=ids)
+
+    def description(self, ids=None):
+        """
+        Returns the description(s) for the given set of IDs, or all points if
+        ids is not provided. Returns None if descriptions are not present.
+        """
+        if self.text_data is None: return None
+        return self.text_data.field(Field.DESCRIPTION, ids=ids)
+
     def to_json(self):
         result = super().to_json()
         result["spritesheets"] = self.spritesheets
@@ -215,8 +239,8 @@ def from_json(data, ids=None):
         descriptions = None
         if "items" in data:
             items = data["items"]
-            names = [items[str(id_val)]["name"] for id_val in ids]
-            descriptions = [items[str(id_val)].get("description", "") for id_val in ids]
+            names = [items[id_val]["name"] for id_val in ids]
+            descriptions = [items[id_val].get("description", "") for id_val in ids]
 
         return ImageThumbnails(None,
                                spritesheets=spritesheets,
@@ -232,7 +256,33 @@ def _get_spritesheet_ids(spritesheets):
         except:
             pass
         ids = sorted(ids)
-        return ids
+        return np.array(ids)
+
+    def _make_raw_images(self):
+        """
+        Regenerates and returns the original images matrix based on self.spritesheets
+        and self.ids.
+        """
+        assert len(self.spritesheets), "spritesheets is empty"
+        random_spec = self.spritesheets[list(self.spritesheets.keys())[0]]["spec"]["frames"]
+        random_frame = random_spec[list(random_spec.keys())[0]]["frame"]
+        cols = random_frame["w"]
+        rows = random_frame["h"]
+
+        result = np.zeros((len(self.ids), rows, cols, 4), dtype=np.uint8)
+        seen_ids = set()
+        for key, spritesheet in self.spritesheets.items():
+            buffer = BytesIO(base64.b64decode(spritesheet["image"].encode('ascii')))
+            img = np.array(Image.open(buffer, formats=('PNG',)))
+
+            for id_val, image_spec in spritesheet["spec"]["frames"].items():
+                frame = image_spec["frame"]
+                result[self._id_index[int(id_val)]] = img[frame["y"]:frame["y"] + frame["h"],
+                                                          frame["x"]:frame["x"] + frame["w"]]
+                seen_ids.add(int(id_val))
+        if len(seen_ids & set(self.ids.tolist())) != len(self.ids):
+            print("missing ids when loading images from spritesheets:", set(self.ids.tolist()) - seen_ids)
+        return result
 
     def make_spritesheets(self, images, ids, grid_dimensions=None, image_size=None):
         """

diff --git a/emblaze/utils.py b/emblaze/utils.py
@@ -1,6 +1,11 @@
+import sys
 import numpy as np
 from affine import Affine
 from numba import jit
+import json
+import datetime
+import platform
+import os
 
 class Field:
     """Standardized field names for embeddings and projections. These data can
@@ -110,3 +115,36 @@ def inverse_intersection(seqs1, seqs2, mask_ids, outer):
         if len(set1) or len(set2):
             distances[i] = 1 / (1 + num_intersection)
     return distances
+
+class LoggingHelper:
+    """
+    Writes and/or updates a JSON file with interaction information.
+    """
+    def __init__(self, filepath, addl_info=None):
+        super().__init__()
+        self.filepath = filepath
+
+        if not os.path.exists(self.filepath):
+            current_data = {
+                "timestamp": str(datetime.datetime.now()),
+                "platform": platform.platform(),
+                "version": sys.version,
+                "logs": []
+            }
+            if addl_info is not None:
+                current_data.update(addl_info)
+            with open(self.filepath, "w") as file:
+                json.dump(current_data, file)
+
+
+    def add_logs(self, entries):
+        """
+        Adds a list of logging entries to the log file.
+        """
+        with open(self.filepath, "r") as file:
+            current_data = json.load(file)
+
+        current_data["logs"] += entries
+
+        with open(self.filepath, "w") as file:
+            json.dump(current_data, file)
diff --git a/emblaze/viewer.py b/emblaze/viewer.py
@@ -15,7 +15,7 @@
 from .frame_colors import compute_colors
 from .datasets import EmbeddingSet
 from .thumbnails import Thumbnails
-from .utils import Field, SidebarPane, matrix_to_affine, affine_to_matrix, DataType, PreviewMode
+from .utils import Field, LoggingHelper, SidebarPane, matrix_to_affine, affine_to_matrix, DataType, PreviewMode
 from .recommender import SelectionRecommender
 from datetime import datetime
 import json
@@ -91,6 +91,16 @@ class Viewer(DOMWidget):
     previewMode = Unicode("").tag(sync=True)
     previewParameters = Dict({}).tag(sync=True)
 
+    # List of past interactions with the widget. When saveInteractionsFlag is
+    # set to True by the widget, the backend will save the interaction history
+    # to file using the loggingHelper.
+    interactionHistory = List([]).tag(sync=True)
+    saveInteractionsFlag = Bool(False).tag(sync=True)
+
+    # Whether to save interaction history/logs to file
+    loggingEnabled = Bool(False).tag(sync=True)
+    loggingHelper = None
+
     def __init__(self, *args, **kwargs):
         """
         embeddings: An EmbeddingSet object.
@@ -102,7 +112,12 @@ def __init__(self, *args, **kwargs):
         self.saveSelectionFlag = False
         self.loadSelectionFlag = False
         self.selectionList = []
-        self.performanceSuggestionsMode = len(self.embeddings[0]) >= PERFORMANCE_SUGGESTIONS_ENABLE
+        if self.loggingEnabled:
+            self.loggingHelper = LoggingHelper('emblaze_logs_{}.json'.format(datetime.now().strftime("%Y%m%d_%H%M%S")),
+                                               {'numFrames': len(self.embeddings),
+                                                'numPoints': len(self.embeddings[0])})
+
+        self._update_performance_suggestions_mode()
         if not self.colorScheme:
             self.colorScheme = self.detect_color_scheme()
         if not self.previewMode:
@@ -328,14 +343,18 @@ def _observe_suggestion_flag(self, change):
         if change.new and not self.loadingSuggestions:
             self._update_suggested_selections()
 
+    def _update_performance_suggestions_mode(self):
+        """Determines whether to use the performance mode for computing suggestions."""
+        self.performanceSuggestionsMode = len(self.embeddings[0]) * len(self.embeddings) >= PERFORMANCE_SUGGESTIONS_ENABLE
+
     def _update_suggested_selections_background(self):
         """Function that runs in the background to recompute suggested selections."""
         self.recomputeSuggestionsFlag = False
         if self.loadingSuggestions: 
             return
 
         filter_points = None
-        self.performanceSuggestionsMode = len(self.embeddings[0]) >= PERFORMANCE_SUGGESTIONS_ENABLE
+        self._update_performance_suggestions_mode()
         if self.performanceSuggestionsMode:
             # Check if sufficiently few points are visible to show suggestions
             if self.filterIDs and len(self.filterIDs) <= PERFORMANCE_SUGGESTIONS_RECOMPUTE:
@@ -415,3 +434,14 @@ def _update_suggested_selections(self):
         """Recomputes the suggested selections."""
         thread = threading.Thread(target=self._update_suggested_selections_background)
         thread.start()
+
+    @observe("saveInteractionsFlag")
+    def _save_interactions(self, change):
+        """
+        The widget sets the flag to save interaction history periodically
+        because we can't use a timer in the backend.
+        """
+        if change.new:
+            self.loggingHelper.add_logs(self.interactionHistory)
+            self.interactionHistory = []
+            self.saveInteractionsFlag = False