In [None]:

# Installation (keep this for portability; skip if already installed)
# Use your own env or uncomment below.
# !pip install --upgrade pip
# !pip install datashader holoviews bokeh hvplot colorcet pandas numpy



# Datashader + HoloViews — Fast, Interactive, No Matplotlib
This notebook uses **Datashader** with **HoloViews**/**Bokeh** to render massive datasets quickly.
- No Matplotlib.
- Works with millions of points via server-side rasterization.
- Switch between **datashade** (on-the-fly) and **rasterize** (pre-aggregated) as needed.


In [None]:

import numpy as np, pandas as pd
import holoviews as hv
import hvplot.pandas  # noqa: F401 registers hvplot
from holoviews.operation.datashader import datashade, rasterize, shade, dynspread
import datashader as ds
import colorcet as cc

hv.extension("bokeh")
hv.opts.defaults(
    hv.opts.RGB(width=800, height=600),
    hv.opts.Points(width=800, height=600, tools=["hover", "box_zoom", "wheel_zoom", "pan", "reset"]),
    hv.opts.Curve(width=900, height=300, tools=["hover", "box_zoom", "wheel_zoom", "pan", "reset"]),
)
print("Versions -> HoloViews:", hv.__version__, "| Datashader:", ds.__version__)


## Generate synthetic datasets

In [None]:

rng = np.random.default_rng(42)

def gen_points(n, name):
    x = rng.normal(0, 1, n)
    y = 0.5 * x + rng.normal(0, 1, n)
    cat = rng.choice(list("ABC"), n, p=[0.5, 0.3, 0.2])
    return pd.DataFrame({"x": x, "y": y, "category": cat, "set": name})

small = gen_points(100_000, "small")
big = gen_points(5_000_000, "big")  # 5M points – shows datashader benefits
small.shape, big.shape


## Massive scatter — Datashade (on the fly)

In [None]:

points = hv.Points(big, kdims=["x","y"], vdims=["category"])

# datashade: aggregates dynamically to current viewport
shaded = datashade(points, cmap=cc.fire, aggregator=ds.count(), width=900, height=600)
dyn = dynspread(shaded, threshold=0.5, max_px=3)

(dyn.relabel("Datashader: 5M points (count)")
 .opts(active_tools=["wheel_zoom"]))


## Categorical density — count per class with datashader

In [None]:

cat_agg = datashade(points, aggregator=ds.count_cat("category"), color_key={"A":"#3b82f6","B":"#10b981","C":"#ef4444"},
                    width=900, height=600)
dynspread(cat_agg).relabel("Datashader: categorical count (A/B/C)")


## Rasterize (pre-aggregated binning) for stable interactions

In [None]:

ras = rasterize(points, aggregator=ds.count(), width=900, height=600).opts(colorbar=True, cmap=cc.fire)
ras.relabel("Rasterize: 5M points (count)")


## Time series — 1M points downsampled via Datashader

In [None]:

n = 1_000_000
t = pd.date_range("2020-01-01", periods=n, freq="S")
v = np.sin(np.linspace(0, 400*np.pi, n)) + 0.1 * rng.normal(size=n)
ts = pd.DataFrame({"t": t, "v": v})

curve = hv.Curve(ts, kdims="t", vdims="v")
ts_ds = datashade(curve, aggregator=ds.any())  # or ds.mean("v") after rasterize
ts_ds.relabel("Datashader: 1M-point time series")


## Linked zoom & inspect (Points + datashade background)

In [None]:

# Overlay raw subsample for tooltips over a datashaded background for speed
subsample = big.sample(50_000, random_state=1)
raw_pts = hv.Points(subsample, kdims=["x","y"], vdims=["category"]).opts(size=4, alpha=0.3, color="category")
bg = datashade(points, cmap=cc.kbc, aggregator=ds.count())
(bg * raw_pts).relabel("Hybrid: datashade background + sampled points with hover")


## Tips


- Prefer **`datashade()`** for dynamic aggregation when panning/zooming huge clouds.
- Use **`rasterize()`** for stable tiling and colorbars.
- For categories: `aggregator=ds.count_cat("category")` with a `color_key`.
- Combine **datashaded background** with a **small raw subsample** for rich hovers without killing performance.
- No Matplotlib anywhere.
