In [None]:
# 1 · install required libraries
# !pip -q install --upgrade plotly anywidget pandas polars numpy umap-learn jscatter seaborn ipywidgets traitlets

print("✅ Libraries ready & custom-widget manager enabled")

In [4]:
# 2 · ensure project code is importable and reload edited modules
import sys, pathlib, importlib, pprint

root = pathlib.Path(".").resolve()
if str(root) not in sys.path:
    sys.path.append(str(root))

import helpers.data_loader as dl
import widget.nmf_widget as nmfw

# Reload modules during development
importlib.reload(dl)
importlib.reload(nmfw)

print("✅ helpers & widget packages reloaded from", root)

✅ helpers & widget packages reloaded from /net/users/voy/nmfwidget


In [5]:
# 3 · quick data sanity-check (optional)
H, ids, cancers = dl.get_prepared_data("config.json")
print("Matrix shape:", H.shape)
print("Sample IDs  :", len(ids))
print("Cancer types:", len(cancers))

Matrix shape: (410, 16)
Sample IDs  : 410
Cancer types: 410


In [6]:
import pandas as pd
import numpy as np
import umap
import jscatter
import seaborn as sns
import ipywidgets as widgets
from IPython.display import display, clear_output
import warnings

# --- Unified Visualization Journey Orchestration ---

# 1. Load ALL data needed for UMAP (NMF widget will load its specific K-value data internally)
# We load the default/first K-value dataset here to ensure consistency across the application.
component_matrix_all, all_sample_ids, cancer_types_all = dl.get_prepared_data(
    "config.json", k_filename=None
)

# 2. Perform UMAP dimensionality reduction on the comprehensive data
umap_warning_message = ""
with warnings.catch_warnings(record=True) as w:
    warnings.simplefilter("always")
    u = umap.UMAP(n_components=2, random_state=42).fit_transform(component_matrix_all)
    if w and issubclass(w[-1].category, UserWarning):
        umap_warning_message = str(w[-1].message)

if umap_warning_message:
    print(f"UMAP Warning: {umap_warning_message}")

# 3. Prepare a DataFrame with UMAP data and metadata (this will be the source for jscatter data and the table)
original_umap_df = pd.DataFrame(
    {
        "Sample ID": all_sample_ids,
        "Cancer Type": cancer_types_all,
        "UMAP-1": u[:, 0],
        "UMAP-2": u[:, 1],
    }
)

# 4. Create the UMAP scatter plot
unique_cancer_types = sorted(list(set(cancer_types_all)))
color_key = dict(
    zip(unique_cancer_types, sns.color_palette("turbo", len(unique_cancer_types)))
)

scatter_plot = (
    jscatter.Scatter(
        data=original_umap_df,  # Initial data: all samples from the original full dataframe
        x="UMAP-1",
        y="UMAP-2",
        color_by="Cancer Type",
        color_map=color_key,
        height=600,
        lasso_callback=True,  # Enable lasso selection to trigger events
    )
    .tooltip(properties=["Sample ID", "Cancer Type"])
    .size(
        default=5  # Increase the size of all dots
    )
    .options(
        {
            "aspectRatio": 1.0  # Set the aspect ratio to 1 for equal axes
        }
    )
)

# 5. Create an instance of your NMF Heatmap widget
heatmap_widget = nmfw.NMFHeatmapWidget(cfg_path="config.json")

# 6. Create an output widget for the selection data table
data_table_output = widgets.Output()
with data_table_output:
    print("Select points on either plot to see details.")

# --- Central Selection Management Logic ---

# Global variable to store the currently active selection (master list of sample IDs)
current_active_selection_ids = []


def update_ui_from_selection(source_widget: str, selected_ids: list[str]):
    """
    Central function to update all UI components based on a new selection.
    Ensures mutual exclusivity.
    """
    global current_active_selection_ids

    # Sort lists for robust comparison to avoid unnecessary updates
    if sorted(selected_ids) == sorted(current_active_selection_ids):
        return

    current_active_selection_ids = selected_ids
    print(
        f"Global selection updated by {source_widget}: {len(current_active_selection_ids)} samples."
    )

    # 1. Update UMAP plot (filter if selection is from NMF, otherwise just re-select/clear)
    if source_widget == "NMF Heatmap":
        if current_active_selection_ids:
            # Filter the original_umap_df to show only selected samples in UMAP
            filtered_umap_data_for_scatter = original_umap_df[
                original_umap_df["Sample ID"].isin(current_active_selection_ids)
            ].copy()
            scatter_plot.data = filtered_umap_data_for_scatter  # Update jscatter's data
            scatter_plot.select(
                []
            )  # Clear any internal selection on jscatter, as its data changed
        else:
            # If NMF selection cleared, revert UMAP to showing all samples
            scatter_plot.data = original_umap_df
            scatter_plot.select([])

        # Also clear any highlight on NMF heatmap if selection originated from NMF itself
        # (This prevents double-highlighting from internal selection + external highlight)
        heatmap_widget.update_highlight_from_external([])

    elif source_widget == "UMAP":
        # Highlight NMF heatmap based on UMAP selection
        heatmap_widget.update_highlight_from_external(current_active_selection_ids)
        # Also clear any selection on NMF heatmap's own traitlet to ensure mutual exclusivity
        heatmap_widget.selected_sample_ids_from_heatmap = []

    # 2. Update Data Table
    with data_table_output:
        clear_output(wait=True)  # Clear previous output efficiently
        if current_active_selection_ids:
            # Display data for selected samples from the original_umap_df
            display(
                original_umap_df[
                    original_umap_df["Sample ID"].isin(current_active_selection_ids)
                ].style.hide(axis="index")
            )
        else:
            print("No points selected.")


# 7. Define handlers for widget-specific selections
# (These handlers will call the central update_ui_from_selection function)


def umap_selection_handler(change):
    """Handler for selection changes originating from the UMAP plot."""
    selected_indices = change.new  # Indices relative to the current `scatter_plot.data`

    # FIX: Check if selected_indices is valid and not empty for NumPy array
    if selected_indices is not None and selected_indices.size > 0:
        # Map indices back to Sample IDs from the *currently displayed* scatter_plot.data
        # (since scatter_plot.data itself might be filtered)
        selected_ids = scatter_plot.data.iloc[selected_indices][
            "Sample ID"
        ].tolist()  # FIX: .data is now a DataFrame
    else:
        selected_ids = []

    update_ui_from_selection("UMAP", selected_ids)


def nmf_heatmap_selection_handler(change):
    """Handler for selection changes originating from the NMF Heatmap widget."""
    selected_ids = change.new  # This comes directly from heatmap_widget.selected_sample_ids_from_heatmap (a List traitlet)

    update_ui_from_selection("NMF Heatmap", selected_ids)


# 8. Observe selection changes on both plots
# We observe the underlying widget property for jscatter
scatter_plot.widget.observe(umap_selection_handler, names=["selection"])
heatmap_widget.observe(
    nmf_heatmap_selection_handler, names=["selected_sample_ids_from_heatmap"]
)


# 9. Arrange the widgets for display (Heatmap top, UMAP + DataTable bottom)
app_layout = widgets.VBox(
    [
        heatmap_widget,  # NMF Heatmap and its controls
        widgets.HBox(
            [  # UMAP plot and Data Table
                scatter_plot.show(),
                data_table_output,
            ]
        ),
    ]
)

# 10. Display the final interactive application
display(app_layout)

# Example of programmatic access to selected data (run in a new cell after interaction)
# The `current_active_selection_ids` global variable holds the current selection.
# You can use it like this:
# final_selected_df = original_umap_df[original_umap_df["Sample ID"].isin(current_active_selection_ids)]
# print(f"Programmatically accessed {len(final_selected_df)} selected samples:")
# display(final_selected_df)

Loading component colors from: nmf_component_color_map.json
Loaded component colors: {'Comp_0': '#332288', 'Comp_1': '#88CCEE', 'Comp_2': '#117733', 'Comp_3': '#DDCC77', 'Comp_4': '#CC6677', 'Comp_5': '#882255', 'Comp_6': '#AA4499', 'Comp_7': '#44AA99', 'Comp_8': '#999933', 'Comp_9': '#DDDDDD', 'Comp_10': '#E69F00', 'Comp_11': '#56B4E9', 'Comp_12': '#009E73', 'Comp_13': '#F0E442', 'Comp_14': '#0072B2', 'Comp_15': '#D55E00'}
Component color mapping: {'Component 1': {'color': '#332288', 'matched_key': 'Comp_0'}, 'Component 2': {'color': '#88CCEE', 'matched_key': 'Comp_1'}, 'Component 7': {'color': '#AA4499', 'matched_key': 'Comp_6'}, 'Component 11': {'color': '#E69F00', 'matched_key': 'Comp_10'}, 'Component 3': {'color': '#117733', 'matched_key': 'Comp_2'}, 'Component 5': {'color': '#CC6677', 'matched_key': 'Comp_4'}, 'Component 8': {'color': '#44AA99', 'matched_key': 'Comp_7'}, 'Component 9': {'color': '#999933', 'matched_key': 'Comp_8'}, 'Component 4': {'color': '#DDCC77', 'matched_key

Traceback (most recent call last):
  File "/net/users/voy/nmfwidget/.env/lib/python3.10/site-packages/ipywidgets/widgets/widget_selection.py", line 133, in findvalue
    return next(x for x in array if compare(x, value))
StopIteration

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/net/users/voy/nmfwidget/.env/lib/python3.10/site-packages/ipywidgets/widgets/widget_selection.py", line 242, in _validate_value
    return findvalue(self._options_values, value, self.equals) if value is not None else None
  File "/net/users/voy/nmfwidget/.env/lib/python3.10/site-packages/ipywidgets/widgets/widget_selection.py", line 135, in findvalue
    raise ValueError('%r not in array'%value)
ValueError: '' not in array

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/net/users/voy/nmfwidget/widget/nmf_widget.py", line 87, in __init__
    self.k_dropdown.value = self._current_

VBox(children=(NMFHeatmapWidget(), HBox(children=(VBox(children=(HBox(children=(VBox(children=(Button(icon='ar…

In [None]:
# Example of programmatic access to selected data (run in a new cell after interaction)
# The `current_active_selection_ids` global variable holds the current selection.
# You can use it like this:
# final_selected_df = original_umap_df[original_umap_df["Sample ID"].isin(current_active_selection_ids)]
# print(f"Programmatically accessed {len(final_selected_df)} selected samples:")
# display(final_selected_df)