Skip to content

Commit

Permalink
Merge pull request #4 from cmudig/interaction-logging
Browse files Browse the repository at this point in the history
Interaction logging
  • Loading branch information
venkatesh-sivaraman committed Sep 13, 2021
2 parents d58c766 + 1d31dbd commit 1101e7d
Show file tree
Hide file tree
Showing 13 changed files with 354 additions and 73 deletions.
30 changes: 28 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ pip install emblaze

The widget should work out of the box when you run `jupyter lab` (see example code below).

*Jupyter Notebook note:* If you are using Jupyter Notebook 5.2 or earlier, you may also need to enable
_Jupyter Notebook note:_ If you are using Jupyter Notebook 5.2 or earlier, you may also need to enable
the nbextension:

```bash
Expand All @@ -25,7 +25,7 @@ jupyter nbextension enable --py --sys-prefix emblaze

## Standalone Demo

Although the full application is designed to work as a Jupyter widget, you can run a standalone version with most of the available features directly in your browser. To do so, simply run the following command after pip-installing the package (note: you do *not* need to clone the repository to run the standalone app):
Although the full application is designed to work as a Jupyter widget, you can run a standalone version with most of the available features directly in your browser. To do so, simply run the following command after pip-installing the package (note: you do _not_ need to clone the repository to run the standalone app):

```bash
python -m emblaze.server
Expand Down Expand Up @@ -168,6 +168,32 @@ with open(os.path.join(data_dir, dataset_name, "thumbnails.json"), "w") as file:
json.dump(thumbnails.to_json(), file)
```

### Deployment

First clean all npm build intermediates:

```
npm run clean
```

Bump the widget version in `emblaze/_version.py` and `package.json` if applicable. Then build the notebook widgets and standalone app:

```
npm run build:all
```

Run the packaging script to generate the wheel for distribution:

```
python -m build
```

Upload to PyPI (replace `<VERSION>` with the version number):

```
twine upload dist/emblaze-<VERSION>*
```

### Development Notes

- Svelte transitions don't seem to work well as they force an expensive re-layout operation. Avoid using them during interactions.
8 changes: 6 additions & 2 deletions emblaze/recommender.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,14 +43,17 @@ def _make_neighbor_mat(self, neighbors, num_columns):

def _pairwise_jaccard_distances(self, neighbors):
"""Computes the jaccard distance between each row of the given set of neighbors."""
lengths = np.array([len(n) for n in neighbors], dtype=np.uint16)
if np.sum(lengths) == 0:
return np.zeros((len(neighbors), len(neighbors)))

# Make a one-hot matrix of neighbors
neighbor_mat = self._make_neighbor_mat(neighbors, max(np.max([n for x in neighbors for n in x]) + 1, len(neighbors)))
# Calculate intersection of sets using dot product
intersection = np.dot(neighbor_mat, neighbor_mat.T)
del neighbor_mat

# Use set trick: len(x | y) = len(x) + len(y) - len(x & y)
lengths = np.array([len(n) for n in neighbors], dtype=np.uint16)
length_sums = lengths[:,np.newaxis] + lengths[np.newaxis,:]
union = np.maximum(length_sums - intersection, np.array([1], dtype=np.uint16), casting='no')
del length_sums
Expand Down Expand Up @@ -182,7 +185,8 @@ def query(self, ids_of_interest=None, filter_ids=None, frame_idx=None, preview_f
neighbor_ids = None

for cluster in self.clusters[frame_key]:
frame_labels = "{} &rarr; {}".format(self.embeddings[cluster['frame']].label, self.embeddings[cluster['previewFrame']].label)
frame_labels = "{} &rarr; {}".format(self.embeddings[cluster['frame']].label or "Frame " + str(cluster['frame']),
self.embeddings[cluster['previewFrame']].label or "Frame " + str(cluster['previewFrame']))
base_score = (cluster['consistency'] + cluster['innerChange'] + cluster['gain'] + cluster['loss']) * np.log(len(cluster['ids']))
if filter_set is not None:
if not cluster['ids'] & filter_set:
Expand Down
102 changes: 76 additions & 26 deletions emblaze/thumbnails.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,20 @@ def __init__(self, names, descriptions=None, ids=None):
if descriptions is not None:
self.data.set_field(Field.DESCRIPTION, descriptions)

def name(self, ids=None):
"""
Returns the name(s) for the given set of IDs, or all points if
ids is not provided.
"""
return self.data.field(Field.NAME, ids=ids)

def description(self, ids=None):
"""
Returns the description(s) for the given set of IDs, or all points if
ids is not provided. Returns None if descriptions are not present.
"""
return self.data.field(Field.DESCRIPTION, ids=ids)

def to_json(self):
result = super().to_json()
names = self.data.field(Field.NAME)
Expand Down Expand Up @@ -78,26 +92,6 @@ def from_json(data, ids=None):
descriptions = [items[id_val].get("description", "") for id_val in ids]
return TextThumbnails(names, descriptions, ids)

@staticmethod
def from_json(data, ids=None):
"""
Builds a TextThumbnails object from a JSON object. The provided object should
have an "items" key with a dictionary mapping ID values to text thumbnail
objects, each of which must have a 'name' and optionally 'description' keys.
"""
assert "items" in data, "JSON object must contain an 'items' field"
items = data["items"]
if ids is None:
try:
ids = [int(id_val) for id_val in list(items.keys())]
items = {int(k): v for k, v in items.items()}
except:
ids = list(items.keys())
ids = sorted(ids)
names = [items[id_val]["name"] for id_val in ids]
descriptions = [items[id_val].get("description", "") for id_val in ids]
return TextThumbnails(names, descriptions, ids)

def __getitem__(self, ids):
"""
Returns text thumbnail information for the given IDs.
Expand Down Expand Up @@ -143,9 +137,9 @@ def __init__(self, images, spritesheets=None, ids=None, grid_dimensions=None, im
"""
super().__init__("spritesheet")
if spritesheets is not None:
self.images = None
self.ids = ImageThumbnails._get_spritesheet_ids(spritesheets)
self.spritesheets = spritesheets
self.images = None
else:
self.images = images
self.ids = ids or np.arange(len(images))
Expand Down Expand Up @@ -173,14 +167,44 @@ def __getitem__(self, ids):
return [self[id_val] for id_val in ids]
else:
result = {}
if self.images is not None:
result["image"] = self.images[self._id_index[ids]]
result["image"] = self.image(ids)
if self.text_data is not None:
result["name"] = self.text_data.field(Field.NAME, ids)
if self.text_data.has_field(Field.DESCRIPTION):
result["description"] = self.text_data.field(Field.DESCRIPTION, ids)
return result

def image(self, ids=None):
"""
Returns the image(s) for the given ID or set of IDs, or all points if ids
is not provided.
"""
if self.images is None:
self.images = self._make_raw_images()

if isinstance(ids, (list, np.ndarray, set)):
index = [self._id_index[int(id_val)] for id_val in ids]
else:
index = self._id_index[int(ids)]

return self.images[index]

def name(self, ids=None):
"""
Returns the name(s) for the given set of IDs, or all points if
ids is not provided. Returns None if names are not available.
"""
if self.text_data is None: return None
return self.text_data.field(Field.NAME, ids=ids)

def description(self, ids=None):
"""
Returns the description(s) for the given set of IDs, or all points if
ids is not provided. Returns None if descriptions are not present.
"""
if self.text_data is None: return None
return self.text_data.field(Field.DESCRIPTION, ids=ids)

def to_json(self):
result = super().to_json()
result["spritesheets"] = self.spritesheets
Expand Down Expand Up @@ -215,8 +239,8 @@ def from_json(data, ids=None):
descriptions = None
if "items" in data:
items = data["items"]
names = [items[str(id_val)]["name"] for id_val in ids]
descriptions = [items[str(id_val)].get("description", "") for id_val in ids]
names = [items[id_val]["name"] for id_val in ids]
descriptions = [items[id_val].get("description", "") for id_val in ids]

return ImageThumbnails(None,
spritesheets=spritesheets,
Expand All @@ -232,7 +256,33 @@ def _get_spritesheet_ids(spritesheets):
except:
pass
ids = sorted(ids)
return ids
return np.array(ids)

def _make_raw_images(self):
"""
Regenerates and returns the original images matrix based on self.spritesheets
and self.ids.
"""
assert len(self.spritesheets), "spritesheets is empty"
random_spec = self.spritesheets[list(self.spritesheets.keys())[0]]["spec"]["frames"]
random_frame = random_spec[list(random_spec.keys())[0]]["frame"]
cols = random_frame["w"]
rows = random_frame["h"]

result = np.zeros((len(self.ids), rows, cols, 4), dtype=np.uint8)
seen_ids = set()
for key, spritesheet in self.spritesheets.items():
buffer = BytesIO(base64.b64decode(spritesheet["image"].encode('ascii')))
img = np.array(Image.open(buffer, formats=('PNG',)))

for id_val, image_spec in spritesheet["spec"]["frames"].items():
frame = image_spec["frame"]
result[self._id_index[int(id_val)]] = img[frame["y"]:frame["y"] + frame["h"],
frame["x"]:frame["x"] + frame["w"]]
seen_ids.add(int(id_val))
if len(seen_ids & set(self.ids.tolist())) != len(self.ids):
print("missing ids when loading images from spritesheets:", set(self.ids.tolist()) - seen_ids)
return result

def make_spritesheets(self, images, ids, grid_dimensions=None, image_size=None):
"""
Expand Down
38 changes: 38 additions & 0 deletions emblaze/utils.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,11 @@
import sys
import numpy as np
from affine import Affine
from numba import jit
import json
import datetime
import platform
import os

class Field:
"""Standardized field names for embeddings and projections. These data can
Expand Down Expand Up @@ -110,3 +115,36 @@ def inverse_intersection(seqs1, seqs2, mask_ids, outer):
if len(set1) or len(set2):
distances[i] = 1 / (1 + num_intersection)
return distances

class LoggingHelper:
"""
Writes and/or updates a JSON file with interaction information.
"""
def __init__(self, filepath, addl_info=None):
super().__init__()
self.filepath = filepath

if not os.path.exists(self.filepath):
current_data = {
"timestamp": str(datetime.datetime.now()),
"platform": platform.platform(),
"version": sys.version,
"logs": []
}
if addl_info is not None:
current_data.update(addl_info)
with open(self.filepath, "w") as file:
json.dump(current_data, file)


def add_logs(self, entries):
"""
Adds a list of logging entries to the log file.
"""
with open(self.filepath, "r") as file:
current_data = json.load(file)

current_data["logs"] += entries

with open(self.filepath, "w") as file:
json.dump(current_data, file)
36 changes: 33 additions & 3 deletions emblaze/viewer.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
from .frame_colors import compute_colors
from .datasets import EmbeddingSet
from .thumbnails import Thumbnails
from .utils import Field, SidebarPane, matrix_to_affine, affine_to_matrix, DataType, PreviewMode
from .utils import Field, LoggingHelper, SidebarPane, matrix_to_affine, affine_to_matrix, DataType, PreviewMode
from .recommender import SelectionRecommender
from datetime import datetime
import json
Expand Down Expand Up @@ -91,6 +91,16 @@ class Viewer(DOMWidget):
previewMode = Unicode("").tag(sync=True)
previewParameters = Dict({}).tag(sync=True)

# List of past interactions with the widget. When saveInteractionsFlag is
# set to True by the widget, the backend will save the interaction history
# to file using the loggingHelper.
interactionHistory = List([]).tag(sync=True)
saveInteractionsFlag = Bool(False).tag(sync=True)

# Whether to save interaction history/logs to file
loggingEnabled = Bool(False).tag(sync=True)
loggingHelper = None

def __init__(self, *args, **kwargs):
"""
embeddings: An EmbeddingSet object.
Expand All @@ -102,7 +112,12 @@ def __init__(self, *args, **kwargs):
self.saveSelectionFlag = False
self.loadSelectionFlag = False
self.selectionList = []
self.performanceSuggestionsMode = len(self.embeddings[0]) >= PERFORMANCE_SUGGESTIONS_ENABLE
if self.loggingEnabled:
self.loggingHelper = LoggingHelper('emblaze_logs_{}.json'.format(datetime.now().strftime("%Y%m%d_%H%M%S")),
{'numFrames': len(self.embeddings),
'numPoints': len(self.embeddings[0])})

self._update_performance_suggestions_mode()
if not self.colorScheme:
self.colorScheme = self.detect_color_scheme()
if not self.previewMode:
Expand Down Expand Up @@ -328,14 +343,18 @@ def _observe_suggestion_flag(self, change):
if change.new and not self.loadingSuggestions:
self._update_suggested_selections()

def _update_performance_suggestions_mode(self):
"""Determines whether to use the performance mode for computing suggestions."""
self.performanceSuggestionsMode = len(self.embeddings[0]) * len(self.embeddings) >= PERFORMANCE_SUGGESTIONS_ENABLE

def _update_suggested_selections_background(self):
"""Function that runs in the background to recompute suggested selections."""
self.recomputeSuggestionsFlag = False
if self.loadingSuggestions:
return

filter_points = None
self.performanceSuggestionsMode = len(self.embeddings[0]) >= PERFORMANCE_SUGGESTIONS_ENABLE
self._update_performance_suggestions_mode()
if self.performanceSuggestionsMode:
# Check if sufficiently few points are visible to show suggestions
if self.filterIDs and len(self.filterIDs) <= PERFORMANCE_SUGGESTIONS_RECOMPUTE:
Expand Down Expand Up @@ -415,3 +434,14 @@ def _update_suggested_selections(self):
"""Recomputes the suggested selections."""
thread = threading.Thread(target=self._update_suggested_selections_background)
thread.start()

@observe("saveInteractionsFlag")
def _save_interactions(self, change):
"""
The widget sets the flag to save interaction history periodically
because we can't use a timer in the backend.
"""
if change.new:
self.loggingHelper.add_logs(self.interactionHistory)
self.interactionHistory = []
self.saveInteractionsFlag = False

0 comments on commit 1101e7d

Please sign in to comment.