Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Interaction logging #4

Merged
merged 6 commits into from
Sep 13, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
30 changes: 28 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ pip install emblaze

The widget should work out of the box when you run `jupyter lab` (see example code below).

*Jupyter Notebook note:* If you are using Jupyter Notebook 5.2 or earlier, you may also need to enable
_Jupyter Notebook note:_ If you are using Jupyter Notebook 5.2 or earlier, you may also need to enable
the nbextension:

```bash
Expand All @@ -25,7 +25,7 @@ jupyter nbextension enable --py --sys-prefix emblaze

## Standalone Demo

Although the full application is designed to work as a Jupyter widget, you can run a standalone version with most of the available features directly in your browser. To do so, simply run the following command after pip-installing the package (note: you do *not* need to clone the repository to run the standalone app):
Although the full application is designed to work as a Jupyter widget, you can run a standalone version with most of the available features directly in your browser. To do so, simply run the following command after pip-installing the package (note: you do _not_ need to clone the repository to run the standalone app):

```bash
python -m emblaze.server
Expand Down Expand Up @@ -168,6 +168,32 @@ with open(os.path.join(data_dir, dataset_name, "thumbnails.json"), "w") as file:
json.dump(thumbnails.to_json(), file)
```

### Deployment

First clean all npm build intermediates:

```
npm run clean
```

Bump the widget version in `emblaze/_version.py` and `package.json` if applicable. Then build the notebook widgets and standalone app:

```
npm run build:all
```

Run the packaging script to generate the wheel for distribution:

```
python -m build
```

Upload to PyPI (replace `<VERSION>` with the version number):

```
twine upload dist/emblaze-<VERSION>*
```

### Development Notes

- Svelte transitions don't seem to work well as they force an expensive re-layout operation. Avoid using them during interactions.
8 changes: 6 additions & 2 deletions emblaze/recommender.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,14 +43,17 @@ def _make_neighbor_mat(self, neighbors, num_columns):

def _pairwise_jaccard_distances(self, neighbors):
"""Computes the jaccard distance between each row of the given set of neighbors."""
lengths = np.array([len(n) for n in neighbors], dtype=np.uint16)
if np.sum(lengths) == 0:
return np.zeros((len(neighbors), len(neighbors)))

# Make a one-hot matrix of neighbors
neighbor_mat = self._make_neighbor_mat(neighbors, max(np.max([n for x in neighbors for n in x]) + 1, len(neighbors)))
# Calculate intersection of sets using dot product
intersection = np.dot(neighbor_mat, neighbor_mat.T)
del neighbor_mat

# Use set trick: len(x | y) = len(x) + len(y) - len(x & y)
lengths = np.array([len(n) for n in neighbors], dtype=np.uint16)
length_sums = lengths[:,np.newaxis] + lengths[np.newaxis,:]
union = np.maximum(length_sums - intersection, np.array([1], dtype=np.uint16), casting='no')
del length_sums
Expand Down Expand Up @@ -182,7 +185,8 @@ def query(self, ids_of_interest=None, filter_ids=None, frame_idx=None, preview_f
neighbor_ids = None

for cluster in self.clusters[frame_key]:
frame_labels = "{} &rarr; {}".format(self.embeddings[cluster['frame']].label, self.embeddings[cluster['previewFrame']].label)
frame_labels = "{} &rarr; {}".format(self.embeddings[cluster['frame']].label or "Frame " + str(cluster['frame']),
self.embeddings[cluster['previewFrame']].label or "Frame " + str(cluster['previewFrame']))
base_score = (cluster['consistency'] + cluster['innerChange'] + cluster['gain'] + cluster['loss']) * np.log(len(cluster['ids']))
if filter_set is not None:
if not cluster['ids'] & filter_set:
Expand Down
102 changes: 76 additions & 26 deletions emblaze/thumbnails.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,20 @@ def __init__(self, names, descriptions=None, ids=None):
if descriptions is not None:
self.data.set_field(Field.DESCRIPTION, descriptions)

def name(self, ids=None):
"""
Returns the name(s) for the given set of IDs, or all points if
ids is not provided.
"""
return self.data.field(Field.NAME, ids=ids)

def description(self, ids=None):
"""
Returns the description(s) for the given set of IDs, or all points if
ids is not provided. Returns None if descriptions are not present.
"""
return self.data.field(Field.DESCRIPTION, ids=ids)

def to_json(self):
result = super().to_json()
names = self.data.field(Field.NAME)
Expand Down Expand Up @@ -78,26 +92,6 @@ def from_json(data, ids=None):
descriptions = [items[id_val].get("description", "") for id_val in ids]
return TextThumbnails(names, descriptions, ids)

@staticmethod
def from_json(data, ids=None):
"""
Builds a TextThumbnails object from a JSON object. The provided object should
have an "items" key with a dictionary mapping ID values to text thumbnail
objects, each of which must have a 'name' and optionally 'description' keys.
"""
assert "items" in data, "JSON object must contain an 'items' field"
items = data["items"]
if ids is None:
try:
ids = [int(id_val) for id_val in list(items.keys())]
items = {int(k): v for k, v in items.items()}
except:
ids = list(items.keys())
ids = sorted(ids)
names = [items[id_val]["name"] for id_val in ids]
descriptions = [items[id_val].get("description", "") for id_val in ids]
return TextThumbnails(names, descriptions, ids)

def __getitem__(self, ids):
"""
Returns text thumbnail information for the given IDs.
Expand Down Expand Up @@ -143,9 +137,9 @@ def __init__(self, images, spritesheets=None, ids=None, grid_dimensions=None, im
"""
super().__init__("spritesheet")
if spritesheets is not None:
self.images = None
self.ids = ImageThumbnails._get_spritesheet_ids(spritesheets)
self.spritesheets = spritesheets
self.images = None
else:
self.images = images
self.ids = ids or np.arange(len(images))
Expand Down Expand Up @@ -173,14 +167,44 @@ def __getitem__(self, ids):
return [self[id_val] for id_val in ids]
else:
result = {}
if self.images is not None:
result["image"] = self.images[self._id_index[ids]]
result["image"] = self.image(ids)
if self.text_data is not None:
result["name"] = self.text_data.field(Field.NAME, ids)
if self.text_data.has_field(Field.DESCRIPTION):
result["description"] = self.text_data.field(Field.DESCRIPTION, ids)
return result

def image(self, ids=None):
"""
Returns the image(s) for the given ID or set of IDs, or all points if ids
is not provided.
"""
if self.images is None:
self.images = self._make_raw_images()

if isinstance(ids, (list, np.ndarray, set)):
index = [self._id_index[int(id_val)] for id_val in ids]
else:
index = self._id_index[int(ids)]

return self.images[index]

def name(self, ids=None):
"""
Returns the name(s) for the given set of IDs, or all points if
ids is not provided. Returns None if names are not available.
"""
if self.text_data is None: return None
return self.text_data.field(Field.NAME, ids=ids)

def description(self, ids=None):
"""
Returns the description(s) for the given set of IDs, or all points if
ids is not provided. Returns None if descriptions are not present.
"""
if self.text_data is None: return None
return self.text_data.field(Field.DESCRIPTION, ids=ids)

def to_json(self):
result = super().to_json()
result["spritesheets"] = self.spritesheets
Expand Down Expand Up @@ -215,8 +239,8 @@ def from_json(data, ids=None):
descriptions = None
if "items" in data:
items = data["items"]
names = [items[str(id_val)]["name"] for id_val in ids]
descriptions = [items[str(id_val)].get("description", "") for id_val in ids]
names = [items[id_val]["name"] for id_val in ids]
descriptions = [items[id_val].get("description", "") for id_val in ids]

return ImageThumbnails(None,
spritesheets=spritesheets,
Expand All @@ -232,7 +256,33 @@ def _get_spritesheet_ids(spritesheets):
except:
pass
ids = sorted(ids)
return ids
return np.array(ids)

def _make_raw_images(self):
"""
Regenerates and returns the original images matrix based on self.spritesheets
and self.ids.
"""
assert len(self.spritesheets), "spritesheets is empty"
random_spec = self.spritesheets[list(self.spritesheets.keys())[0]]["spec"]["frames"]
random_frame = random_spec[list(random_spec.keys())[0]]["frame"]
cols = random_frame["w"]
rows = random_frame["h"]

result = np.zeros((len(self.ids), rows, cols, 4), dtype=np.uint8)
seen_ids = set()
for key, spritesheet in self.spritesheets.items():
buffer = BytesIO(base64.b64decode(spritesheet["image"].encode('ascii')))
img = np.array(Image.open(buffer, formats=('PNG',)))

for id_val, image_spec in spritesheet["spec"]["frames"].items():
frame = image_spec["frame"]
result[self._id_index[int(id_val)]] = img[frame["y"]:frame["y"] + frame["h"],
frame["x"]:frame["x"] + frame["w"]]
seen_ids.add(int(id_val))
if len(seen_ids & set(self.ids.tolist())) != len(self.ids):
print("missing ids when loading images from spritesheets:", set(self.ids.tolist()) - seen_ids)
return result

def make_spritesheets(self, images, ids, grid_dimensions=None, image_size=None):
"""
Expand Down
38 changes: 38 additions & 0 deletions emblaze/utils.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,11 @@
import sys
import numpy as np
from affine import Affine
from numba import jit
import json
import datetime
import platform
import os

class Field:
"""Standardized field names for embeddings and projections. These data can
Expand Down Expand Up @@ -110,3 +115,36 @@ def inverse_intersection(seqs1, seqs2, mask_ids, outer):
if len(set1) or len(set2):
distances[i] = 1 / (1 + num_intersection)
return distances

class LoggingHelper:
"""
Writes and/or updates a JSON file with interaction information.
"""
def __init__(self, filepath, addl_info=None):
super().__init__()
self.filepath = filepath

if not os.path.exists(self.filepath):
current_data = {
"timestamp": str(datetime.datetime.now()),
"platform": platform.platform(),
"version": sys.version,
"logs": []
}
if addl_info is not None:
current_data.update(addl_info)
with open(self.filepath, "w") as file:
json.dump(current_data, file)


def add_logs(self, entries):
"""
Adds a list of logging entries to the log file.
"""
with open(self.filepath, "r") as file:
current_data = json.load(file)

current_data["logs"] += entries

with open(self.filepath, "w") as file:
json.dump(current_data, file)
36 changes: 33 additions & 3 deletions emblaze/viewer.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
from .frame_colors import compute_colors
from .datasets import EmbeddingSet
from .thumbnails import Thumbnails
from .utils import Field, SidebarPane, matrix_to_affine, affine_to_matrix, DataType, PreviewMode
from .utils import Field, LoggingHelper, SidebarPane, matrix_to_affine, affine_to_matrix, DataType, PreviewMode
from .recommender import SelectionRecommender
from datetime import datetime
import json
Expand Down Expand Up @@ -91,6 +91,16 @@ class Viewer(DOMWidget):
previewMode = Unicode("").tag(sync=True)
previewParameters = Dict({}).tag(sync=True)

# List of past interactions with the widget. When saveInteractionsFlag is
# set to True by the widget, the backend will save the interaction history
# to file using the loggingHelper.
interactionHistory = List([]).tag(sync=True)
saveInteractionsFlag = Bool(False).tag(sync=True)

# Whether to save interaction history/logs to file
loggingEnabled = Bool(False).tag(sync=True)
loggingHelper = None

def __init__(self, *args, **kwargs):
"""
embeddings: An EmbeddingSet object.
Expand All @@ -102,7 +112,12 @@ def __init__(self, *args, **kwargs):
self.saveSelectionFlag = False
self.loadSelectionFlag = False
self.selectionList = []
self.performanceSuggestionsMode = len(self.embeddings[0]) >= PERFORMANCE_SUGGESTIONS_ENABLE
if self.loggingEnabled:
self.loggingHelper = LoggingHelper('emblaze_logs_{}.json'.format(datetime.now().strftime("%Y%m%d_%H%M%S")),
{'numFrames': len(self.embeddings),
'numPoints': len(self.embeddings[0])})

self._update_performance_suggestions_mode()
if not self.colorScheme:
self.colorScheme = self.detect_color_scheme()
if not self.previewMode:
Expand Down Expand Up @@ -328,14 +343,18 @@ def _observe_suggestion_flag(self, change):
if change.new and not self.loadingSuggestions:
self._update_suggested_selections()

def _update_performance_suggestions_mode(self):
"""Determines whether to use the performance mode for computing suggestions."""
self.performanceSuggestionsMode = len(self.embeddings[0]) * len(self.embeddings) >= PERFORMANCE_SUGGESTIONS_ENABLE

def _update_suggested_selections_background(self):
"""Function that runs in the background to recompute suggested selections."""
self.recomputeSuggestionsFlag = False
if self.loadingSuggestions:
return

filter_points = None
self.performanceSuggestionsMode = len(self.embeddings[0]) >= PERFORMANCE_SUGGESTIONS_ENABLE
self._update_performance_suggestions_mode()
if self.performanceSuggestionsMode:
# Check if sufficiently few points are visible to show suggestions
if self.filterIDs and len(self.filterIDs) <= PERFORMANCE_SUGGESTIONS_RECOMPUTE:
Expand Down Expand Up @@ -415,3 +434,14 @@ def _update_suggested_selections(self):
"""Recomputes the suggested selections."""
thread = threading.Thread(target=self._update_suggested_selections_background)
thread.start()

@observe("saveInteractionsFlag")
def _save_interactions(self, change):
"""
The widget sets the flag to save interaction history periodically
because we can't use a timer in the backend.
"""
if change.new:
self.loggingHelper.add_logs(self.interactionHistory)
self.interactionHistory = []
self.saveInteractionsFlag = False