# Dashboard Notebook

In [None]:


# === ================================================================================
# === Dashboard v1.0 — — dashboard for data rainbow project
# === ================================================================================

import os, os.path as osp, json, re, zipfile
from urllib.parse import urlparse, quote as _urlquote

import pandas as pd
import numpy as np
import ipywidgets as W
from IPython.display import display

import leafmap  # (ipyleaflet backend)
from ipyleaflet import (
    Marker, MarkerCluster, Popup, AwesomeIcon, WidgetControl,
    FullScreenControl, ZoomControl
)

class DataRainbowDashboard:
    # ----------------------------------------------------------------------------------------
    # Config — file names & folders used by the dashboard
    # ----------------------------------------------------------------------------------------
    MEDIA_CSV  = "media.csv"    # Required: metadata table
    LOCS_CSV   = "location.csv" # Optional: used when media.csv has location_tag
    MEDIA_DIR  = "data"         # All media files live in ./data
    SAMPLE_ZIP = "sample.zip"   # Optional: when present, auto-unzips into ./data at first run

    # ----------------------------------------------------------------------------------------
    # Colors & Icons — visual mapping for markers
    # ----------------------------------------------------------------------------------------
    PALETTE = ["red","darkred","blue","cadetblue","green","darkgreen","orange",
               "purple","darkpurple","pink","gray","black","lightblue"]
    FA_BY_TYPE = {"audio":"music", "video":"video-camera", "image":"image"}

    _DATE8 = re.compile(r"^(\d{4})(\d{2})(\d{2})(.*)$")

    def __init__(self):
        # State (matches procedural version behavior)
        self.DATA = pd.DataFrame()
        self.media = pd.DataFrame()
        self.DATA_LAYERS = []
        self.DYNAMIC = {"legend": None, "notice": None}

        self.FIRST_RENDER = True
        self.LAST_FILTER_SIG = None
        self.LAST_TYPE_SEL = None
        self.LAST_EXTENT = None

        self.w = {}  # widgets

        # Startup sequence
        self._ensure_unzipped()
        self._load_data()
        self._build_widgets()
        self._init_map()
        self._layout_panel()
        self._wire_events()
        self.render()

    # ===================================== Utilities ============================================
    
    # Ensure working directory is the repo root so relative paths (media.csv, etc.) work
    import os
    from pathlib import Path

    def set_repo_cwd():
        here = Path.cwd()
        for p in [here, *here.parents]:
            if (p / "media.csv").exists() or (p / "location.csv").exists():
                os.chdir(p)
                print(f"Working directory set to repo root: {p}")
                return p
        print(f"Repo root not found; staying in: {here}")
        return here

    REPO_ROOT = set_repo_cwd()
    
    @staticmethod
    def infer_mime(url: str, fallback="audio/wav"):
        ext = osp.splitext(urlparse(str(url)).path)[1].lower()
        return {
            ".wav":"audio/wav",".mp3":"audio/mpeg",".m4a":"audio/mp4",".ogg":"audio/ogg",
            ".flac":"audio/flac",".aac":"audio/aac",
            ".mp4":"video/mp4",".webm":"video/webm",".mov":"video/quicktime",".m4v":"video/x-m4v",
            ".jpg":"image/jpeg",".jpeg":"image/jpeg",".png":"image/png",
            ".gif":"image/gif",".webp":"image/webp"
        }.get(ext, fallback)

    @staticmethod
    def first_text(*vals):
        for v in vals:
            if isinstance(v, str):
                s = v.strip()
                if s and s.lower() not in ("nan","na","none","null"):
                    return s
        return None

    def _parse_date_any(self, val):
        if val is None or (isinstance(val, float) and np.isnan(val)): return pd.NaT
        s = str(val).strip()
        if not s or s.lower() in ("nan","na","none","null"): return pd.NaT
        ts = pd.to_datetime(s, errors="coerce")
        if pd.isna(ts):
            m = self._DATE8.match(s)
            if m:
                y, mo, d, rest = m.groups()
                ts = pd.to_datetime(f"{y}-{mo}-{d}{rest}".strip(), errors="coerce")
        return ts

    @staticmethod
    def coalesce_device_col(df_):
        if "device" in df_.columns and "device_meta" in df_.columns:
            s = df_["device"].astype(str)
            s = s.where(s.str.strip().ne(""), df_["device_meta"].astype(str))
        elif "device" in df_.columns:
            s = df_["device"].astype(str)
        elif "device_meta" in df_.columns:
            s = df_["device_meta"].astype(str)
        else:
            s = pd.Series(["unknown"]*len(df_))
        return s.replace({"nan": np.nan}).fillna("unknown").str.strip().replace({"": "unknown"})

    import os, os.path as osp
    from urllib.parse import quote as _urlquote

    def _files_prefix(self) -> str:
        """
        Correct Jupyter 'files' handler prefix:
        - Binder/JupyterHub: '/user/<id>/files/'
        - Local classic: '/files/'
        """
        base = (
            os.environ.get("JUPYTERHUB_SERVICE_PREFIX")  # Binder / JupyterHub
            or os.environ.get("NB_PREFIX")               # sometimes set by Jupyter
            or ""
        )
        base = (base or "").rstrip("/")
        return f"{base}/files/" if base else "/files/"

    def _local_media_url(self, row) -> str:
        """
        Resolve a local media URL for a given row (Series).
        Returns a '/.../files/...' URL or '' if the file isn't present.
        """
        fname = row.get("file_name") or row.get("filename")
        if isinstance(fname, str) and fname.strip():
            path = osp.join(self.MEDIA_DIR, fname)
            if osp.exists(path):
                rel = osp.relpath(path, start=".").replace("\\", "/")
                return self._files_prefix() + _urlquote(rel)
        return ""

    @staticmethod
    def _guess_type_from_ext_or_field(row):
        for k in ("type", "media_type", "data_type"):
            v = str(row.get(k) or "").strip().lower()
            if v in ("audio","video","image"):
                return v
        ext = str(row.get("extension") or "").strip().lstrip(".").lower()
        if not ext:
            val = row.get("file_name") or row.get("filename") or ""
            ext = osp.splitext(str(val))[1].lstrip(".").lower()
        if ext in {"wav","mp3","m4a","ogg","flac","aac"}: return "audio"
        if ext in {"mp4","mov","m4v","webm"}: return "video"
        if ext in {"jpg","jpeg","png","gif","webp"}: return "image"
        return "audio"

    def _format_time_title(self, row):
        v = row.get("date_norm", None)
        if v is not None and not (isinstance(v, float) and np.isnan(v)):
            ts = pd.to_datetime(v, errors="coerce")
            if pd.notna(ts):
                if ts.hour + ts.minute + ts.second > 0:
                    return ts.strftime("%Y-%m-%d %H:%M:%S")
                return ts.strftime("%Y-%m-%d")
        dt_str = row.get("datetime")
        if isinstance(dt_str, str) and dt_str.strip(): return dt_str.strip()
        donly = row.get("date_only")
        if pd.notna(donly): return str(donly)
        return (row.get("display_name") or "Media")

    def _format_time_only(self, row):
        v = row.get("date_norm", None)
        ts = pd.to_datetime(v, errors="coerce")
        if pd.notna(ts) and (ts.hour + ts.minute + ts.second > 0):
            return ts.strftime("%H:%M:%S")
        t = row.get("time")
        if isinstance(t, str) and t.strip(): return t.strip()
        return ""

    # ================================= Files / Loading / Normalization ===========================
    def _ensure_unzipped(self):
        os.makedirs(self.MEDIA_DIR, exist_ok=True)
        has_media = any(osp.isfile(osp.join(self.MEDIA_DIR, f)) for f in os.listdir(self.MEDIA_DIR))
        if has_media:
            print(f"Media present in '{self.MEDIA_DIR}', skip unzip.")
            return
        if not osp.exists(self.SAMPLE_ZIP):
            print("sample.zip not found in root.")
            return
        try:
            with zipfile.ZipFile(self.SAMPLE_ZIP, "r") as zf:
                zf.extractall(self.MEDIA_DIR)
            print(f"Unzipped '{self.SAMPLE_ZIP}' → '{self.MEDIA_DIR}'.")
        except Exception as e:
            print(f"Unzip failed: {e}")

    def _load_data(self):
        if not osp.exists(self.MEDIA_CSV):
            raise FileNotFoundError(f"Missing {self.MEDIA_CSV}")

        media = pd.read_csv(self.MEDIA_CSV).copy()
        media.columns = [c.strip().lower() for c in media.columns]

        media["display_name"] = media.apply(
            lambda r: self.first_text(r.get("display_name"), r.get("title"), r.get("file_name"),
                                      r.get("name"), r.get("stem")) or "Media",
            axis=1
        )
        media["type"] = media.apply(self._guess_type_from_ext_or_field, axis=1)
        media["device_display"] = self.coalesce_device_col(media)

        if "date" in media.columns:
            d1 = media["date"].apply(self._parse_date_any)
        else:
            d1 = pd.Series([pd.NaT]*len(media))
        d2 = media["datetime"].apply(self._parse_date_any) if "datetime" in media.columns else pd.Series([pd.NaT]*len(media))
        if "time" in media.columns and "date" in media.columns:
            dt_from_pair = pd.to_datetime(
                media["date"].astype(str).str.strip() + " " + media["time"].astype(str).str.strip(),
                errors="coerce"
            )
            d2 = d2.fillna(dt_from_pair)
        media["date_norm"] = d1.fillna(d2)
        media["date_only"] = media["date_norm"].dt.date

        media["url"] = media.apply(self._local_media_url, axis=1)

        def _has_local_sample(r):
            fname = r.get("file_name") or r.get("filename")
            if isinstance(fname, str) and fname.strip():
                p = osp.join(self.MEDIA_DIR, fname)
                if osp.exists(p):
                    return True
            return False
        media["has_media_local"] = media.apply(_has_local_sample, axis=1)

        # Coordinates: join via location_tag OR direct lat/lon
        if "location_tag" in media.columns:
            if not osp.exists(self.LOCS_CSV):
                raise FileNotFoundError(
                    f"media.csv includes 'location_tag' but {self.LOCS_CSV} is missing. "
                    "Either provide location.csv or drop 'location_tag' and add lat/lon directly in media.csv."
                )
            locs = pd.read_csv(self.LOCS_CSV).copy()
            locs.columns = [c.strip().lower() for c in locs.columns]
            for req_col in ["location", "longitude", "latitude"]:
                if req_col not in locs.columns:
                    raise ValueError("location.csv must have columns: location, longitude, latitude")
            locs2 = locs.rename(columns={"longitude":"lon","latitude":"lat"})
            for c in ["lat","lon"]: locs2[c] = pd.to_numeric(locs2[c], errors="coerce")
            DATA = media.merge(
                locs2[["location","lat","lon"]],
                left_on="location_tag", right_on="location", how="left"
            )
        else:
            cand_lat = next((c for c in ["lat","latitude"] if c in media.columns), None)
            cand_lon = next((c for c in ["lon","longitude"] if c in media.columns), None)
            if cand_lat and cand_lon:
                DATA = media.copy()
                DATA["lat"] = pd.to_numeric(DATA[cand_lat], errors="coerce")
                DATA["lon"] = pd.to_numeric(DATA[cand_lon], errors="coerce")
                if "location" not in DATA.columns and "location_tag" in DATA.columns:
                    DATA["location"] = DATA["location_tag"]
            else:
                raise ValueError(
                    "No 'location_tag' column and no direct coordinates found. "
                    "Provide either 'location_tag' (and a location.csv) OR lat/lon (or latitude/longitude) in media.csv."
                )

        DATA["lat"] = pd.to_numeric(DATA["lat"], errors="coerce")
        DATA["lon"] = pd.to_numeric(DATA["lon"], errors="coerce")

        dedup_keys = [k for k in ["file_name","date_only","lat","lon","type"] if k in DATA.columns]
        if dedup_keys:
            DATA = DATA.sort_values(by=["date_norm", "file_name"], na_position="last").drop_duplicates(
                subset=dedup_keys, keep="first"
            )

        self.media = media
        self.DATA  = DATA

    # ================================= Widgets / Map / Layout ===================================
    def _uniq(self, series):
        if series is None: return []
        return sorted(pd.Series(series).dropna().astype(str).unique().tolist())

    def _build_widgets(self):
        w = self.w

        types   = self._uniq(self.media["type"]) if "type" in self.media else ["audio"]
        devices = self._uniq(self.DATA["device_display"]) if "device_display" in self.DATA else ["unknown"]
        places  = self._uniq(self.DATA["location"]) if "location" in self.DATA else []

        def _rows(n): return max(3, min(10, int(n)))

        w["title_html"] = W.HTML("<div style='font-size:22px;font-weight:800;color:#1f4b9a'>Sound Atlas Dataset Dashboard</div>")
        w["status1"]    = W.HTML("")
        w["status3"]    = W.HTML("")

        w["search"] = W.Text(placeholder="Search: name / location / device…", layout=W.Layout(width="100%"))
        w["place"]  = W.SelectMultiple(options=places,  value=(),             description="Location",
                                       rows=_rows(len(places)), layout=W.Layout(width="100%"))
        w["device"] = W.SelectMultiple(options=devices, value=tuple(devices), description="Device",
                                       rows=_rows(len(devices)), layout=W.Layout(width="100%"))
        w["type"]   = W.SelectMultiple(options=types,   value=tuple(types),   description="Type",
                                       rows=_rows(len(types)), layout=W.Layout(width="100%"))

        w["only_samples"] = W.Checkbox(value=False, description="Only items with sample clip")

        all_dates = self.DATA["date_only"].dropna() if "date_only" in self.DATA else pd.Series([], dtype="object")
        dmin = all_dates.min() if not all_dates.empty else None
        dmax = all_dates.max() if not all_dates.empty else None

        # Use calendar as date picker
        self.date_watch_widgets = []
        try:
            import ipyvuetify as v
            init_model = [dmin.isoformat(), dmax.isoformat()] if (dmin and dmax) else []
            w["datepicker"] = v.DatePicker(range=True, v_model=init_model, full_width=True, elevation=1,
                                           show_current=True, no_title=True)
            self.date_ui = w["datepicker"]
            def _get_date_range():
                vm = w["datepicker"].v_model or []
                from datetime import date
                s = date.fromisoformat(vm[0]) if len(vm)>=1 and vm[0] else None
                e = date.fromisoformat(vm[1]) if len(vm)>=2 and vm[1] else None
                return s, e
            self.get_date_range = _get_date_range
            self.date_watch_widgets = [w["datepicker"]]
        except Exception:
            try:
                from ipydatetime import DateRangePicker
                w["date_range"] = DateRangePicker(start=dmin, end=dmax, layout=W.Layout(width="100%"))
                self.date_ui = w["date_range"]
                def _get_date_range():
                    return (w["date_range"].start, w["date_range"].end)
                self.get_date_range = _get_date_range
                self.date_watch_widgets = [w["date_range"]]
            except Exception:
                w["date_start"] = W.DatePicker(description="Start", value=dmin, layout=W.Layout(width="49%"))
                w["date_end"]   = W.DatePicker(description="End",   value=dmax, layout=W.Layout(width="49%"))
                self.date_ui = W.HBox([w["date_start"], w["date_end"]], layout=W.Layout(width="100%", gap="8px"))
                def _get_date_range():
                    return (w["date_start"].value, w["date_end"].value)
                self.get_date_range = _get_date_range
                self.date_watch_widgets = [w["date_start"], w["date_end"]]

        w["cluster"] = W.ToggleButtons(options=[("Cluster pins","Cluster"), ("Single markers","Markers")],
                                       value="Cluster", description="Display")
        w["colorby"] = W.ToggleButtons(options=["device","type"], value="device", description="Color by")
        w["iconby"]  = W.ToggleButtons(options=[("Type icons","type"), ("Plain dot","dot")],
                                       value="type", description="Icon style")
        w["popup"]   = W.ToggleButtons(options=["compact","detailed"], value="compact", description="Popup")

        w["export_c"] = W.Button(description="Export CSV")
        w["export_g"] = W.Button(description="Export GeoJSON")
        w["reset"]    = W.Button(description="Reset filters", button_style="warning")

    def _init_map(self):
        self.m = leafmap.Map(center=(23.565,119.579), zoom=11, basemap="OpenStreetMap")
        self.m.layout = W.Layout(width="66%", height="85vh")
        for ctrl in list(self.m.controls): self.m.remove_control(ctrl)
        self.m.add_control(ZoomControl(position="topleft"))
        self.m.add(FullScreenControl())
        self.m.close_popup_on_click = False

    def _layout_panel(self):
        w = self.w

        def group_box(title, *kids):
            return W.VBox(
                [W.HTML(f"<div style='font-weight:700;color:#1f4b9a;margin-bottom:6px'>{title}</div>"), *kids],
                layout=W.Layout(width="100%", padding="10px", margin="8px 0",
                                border="1px solid #d0d7de", border_radius="8px", background_color="white")
            )

        filters_group = group_box(
            "Filters",
            w["search"], w["place"], w["device"], w["type"], w["only_samples"],
            W.HTML("<b>Date range</b>"), self.date_ui,
            W.HBox([w["reset"]], layout=W.Layout(justify_content="flex-end", width="100%")),
        )

        display_group = group_box(
            "Display",
            W.HBox([w["cluster"], w["colorby"], w["iconby"], w["popup"]], layout=W.Layout(gap="6px", flex_wrap="wrap"))
        )

        export_group  = group_box("Export", W.HBox([w["export_c"], w["export_g"]], layout=W.Layout(gap="8px")))

        header = W.VBox([w["title_html"], W.HTML(""), W.HTML("")], layout=W.Layout(margin="0 0 8px 0"))
        self.w_status1, self.w_status3 = header.children[1], header.children[2]

        panel = W.VBox([header, filters_group, display_group, export_group],
                       layout=W.Layout(width="34%", padding="8px", border="1px solid #ddd",
                                       margin="0 0 0 8px", overflow_y="visible"))

        display(W.HBox([self.m, panel], layout=W.Layout(width="100%")))

    # =================================== Filtering / Helpers ====================================
    def _text_hit(self, row, q):
        if not q: return True
        q = q.lower().strip()
        for c in ["display_name","file_name","name","location","device","device_meta","device_display"]:
            v = row.get(c)
            if isinstance(v, str) and q in v.lower():
                return True
        return False

    def apply_filters(self):
        f = self.DATA.copy()
        if self.w["place"].value and "location" in f.columns:
            f = f[f["location"].astype(str).isin(list(self.w["place"].value))]
        if self.w["device"].value and "device_display" in f.columns:
            f = f[f["device_display"].astype(str).isin(list(self.w["device"].value))]
        if self.w["type"].value and "type" in f.columns:
            f = f[f["type"].astype(str).isin(list(self.w["type"].value))]

        if self.w["only_samples"].value and "has_media_local" in f.columns:
            f = f[f["has_media_local"]]

        s, e = self.get_date_range()
        if s and not e: e = s
        if e and not s: s = e
        if s and e and "date_only" in f.columns:
            dser = f["date_only"]
            f = f[(dser >= s) & (dser <= e)]

        if self.w["search"].value.strip():
            f = f[f.apply(lambda r: self._text_hit(r, self.w["search"].value), axis=1)]

        if "lat" in f.columns and "lon" in f.columns:
            f = f.dropna(subset=["lat","lon"])
        else:
            f = f.iloc[0:0]
        return f

    def mk_color_map(self, values):
        cm, i = {}, 0
        for v in values:
            if v not in cm:
                cm[v] = self.PALETTE[i % len(self.PALETTE)]; i += 1
        return cm

    # ==================================== Popup Rendering =======================================
    @staticmethod
    def _section_header(title: str, count: int):
        return (
            f"<div style='margin:10px 0 6px 0;padding:6px 8px;background:#f6f8fa;"
            f"border:1px solid #d0d7de;border-radius:6px;font-weight:700'>"
            f"{title} <span style='font-weight:500;color:#555'>( {count} )</span>"
            f"</div>"
        )

    def popup_group_html(self, group_df: pd.DataFrame, mode="compact"):
        g = group_df.copy()
        first = g.iloc[0]
        date_header = str(first.get("date_only") or "")
        location_header = first.get("location") or ""

        def _time_key(row):
            v = row.get("date_norm")
            ts = pd.to_datetime(v, errors="coerce")
            return (ts.hour if pd.notna(ts) else 99, ts.minute if pd.notna(ts) else 99, ts.second if pd.notna(ts) else 99)

        sec_order = [("audio","Audio"),("video","Video"),("image","Image")]
        sections_html, total_items = [], 0

        for tkey, ttitle in sec_order:
            sec = g[g["type"].astype(str).str.lower().eq(tkey)].copy()
            if sec.empty: continue
            sec["_tkey"] = sec.apply(_time_key, axis=1)
            sec = sec.sort_values(by=["_tkey"]).drop(columns=["_tkey"])
            total_items += len(sec)

            html = [self._section_header(ttitle, len(sec))]
            for i, (_, row) in enumerate(sec.iterrows(), start=1):
                url  = row.get("url") or self._local_media_url(row)
                time_only = self._format_time_only(row)
                duration  = str(row.get("duration") or "").strip()
                fname     = row.get("file_name") or row.get("filename") or ""

                mime = self.infer_mime(url)
                if mime.startswith("image/"):
                    media = f'<img src="{url}" style="max-width:300px;border:1px solid #ddd;border-radius:4px;">'
                elif mime.startswith("video/"):
                    media = f'<video controls preload="none" style="width:300px;" playsinline><source src="{url}" type="{mime}"></video>'
                else:
                    media = f'<audio controls preload="none" style="width:300px;"><source src="{url}" type="{mime}"></audio>'

                missing = "" if url else "<div style='color:#b00020;margin-top:4px'>Media not found in ./data</div>"

                extra = ""
                if mode == "detailed":
                    exclude = {
                        "lat","lon","url","file_name","filename","date_only","date_norm",
                        "location","duration","type","display_name","device","device_meta","device_display",
                        "datetime","time"
                    }
                    kv = []
                    for k, v in row.items():
                        if k in exclude: continue
                        if isinstance(v, float) and np.isnan(v): continue
                        if v is None or (isinstance(v, str) and v.strip()==""): continue
                        kv.append(f"{k}: {v}")
                    if kv: extra = "<div style='color:#666;margin-top:4px'>" + "<br>".join(kv) + "</div>"

                html.append(
                    f"""
                    <div style="margin:8px 0 14px 0;padding:6px 8px;border-left:3px solid #eaecef;">
                      <div style="font-weight:600;margin:0 0 6px 0">item #{i}</div>
                      <div>time: <b>{time_only or '—'}</b></div>
                      <div>duration: <b>{duration or '—'}</b></div>
                      <div>file name: <code>{fname or '—'}</code></div>
                      {media}
                      {missing}
                      {extra}
                    </div>
                    """
                )
            sections_html.append("".join(html))

        header_html = f"""
            <div style="margin-bottom:8px">
              <div><b>date:</b> {date_header or '—'}</div>
              <div><b>location:</b> {location_header or '—'}</div>
              <div style="margin:6px 0 10px 0;"><b>items on this date:</b> {total_items}</div>
            </div>
        """
        body = "".join(sections_html)
        return f"<div style='max-height:60vh; overflow:auto; padding-right:6px'>{header_html}{body}</div>"

    # ===================================== Auto-fit helpers =====================================
    def _fit_bounds_from_df(self, df, *, min_zoom=11, single_zoom=16, pad_ratio=0.06):
        if df is None or df.empty or "lat" not in df or "lon" not in df:
            self.m.center = (23.565, 119.579); self.m.zoom = min_zoom
            return

        s = df.copy()
        s["lat"] = pd.to_numeric(s["lat"], errors="coerce")
        s["lon"] = pd.to_numeric(s["lon"], errors="coerce")
        s = s.dropna(subset=["lat","lon"])
        if s.empty:
            self.m.center = (23.565, 119.579); self.m.zoom = min_zoom
            return

        uniq_locs = s[["lat","lon"]].drop_duplicates()
        if len(uniq_locs) == 1:
            self.m.center = (float(uniq_locs["lat"].iloc[0]), float(uniq_locs["lon"].iloc[0]))
            self.m.zoom = single_zoom
            return

        lat_min, lat_max = float(s["lat"].min()), float(s["lat"].max())
        lon_min, lon_max = float(s["lon"].min()), float(s["lon"].max())
        if not all(map(np.isfinite, [lat_min, lat_max, lon_min, lon_max])):
            self.m.center = (23.565, 119.579); self.m.zoom = min_zoom
            return

        dlat = (lat_max - lat_min)
        dlon = (lon_max - lon_min)
        pad_lat = max(dlat * pad_ratio, 1e-5)
        pad_lon = max(dlon * pad_ratio, 1e-5)
        self.m.fit_bounds([[lat_min - pad_lat, lon_min - pad_lon],
                           [lat_max + pad_lat, lon_max + pad_lon]])

    def _filter_signature(self):
        s, e = self.get_date_range()
        return (
            tuple(sorted(self.w["place"].value)) if hasattr(self.w["place"], "value") else (),
            tuple(sorted(self.w["device"].value)) if hasattr(self.w["device"], "value") else (),
            tuple(sorted(self.w["type"].value)) if hasattr(self.w["type"], "value") else (),
            bool(self.w["only_samples"].value) if hasattr(self.w["only_samples"], "value") else False,
            (str(s), str(e)),
            (self.w["search"].value or "").strip().lower(),
            self.w["cluster"].value, self.w["colorby"].value, self.w["iconby"].value, self.w["popup"].value
        )

    def _compute_extent_tuple(self, df):
        if df is None or df.empty or "lat" not in df or "lon" not in df:
            return ("empty",)
        s = df.copy()
        s["lat"] = pd.to_numeric(s["lat"], errors="coerce")
        s["lon"] = pd.to_numeric(s["lon"], errors="coerce")
        s = s.dropna(subset=["lat","lon"])
        if s.empty:
            return ("empty",)
        lat_min, lat_max = float(s["lat"].min()), float(s["lat"].max())
        lon_min, lon_max = float(s["lon"].min()), float(s["lon"].max())
        group_count = int(s.groupby(["lat","lon","date_only"], dropna=False).size().shape[0])
        uniq_loc_count = int(s[["lat","lon"]].dropna().drop_duplicates().shape[0])
        return (round(lat_min, 6), round(lat_max, 6), round(lon_min, 6), round(lon_max, 6), group_count, uniq_loc_count)

    # ======================================= Render =============================================
    def _clear_dynamic(self):
        for k, ctrl in list(self.DYNAMIC.items()):
            if ctrl is not None:
                try: self.m.remove_control(ctrl)
                except Exception: pass
                self.DYNAMIC[k] = None

    def _clear_data_layers(self):
        for lyr in self.DATA_LAYERS:
            try: self.m.remove_layer(lyr)
            except Exception: pass
        self.DATA_LAYERS = []

    def render(self, *_):
        f = self.apply_filters()
        n = len(f)

        # Header texts
        self.w_status1.value = f"<div style='font-size:14px;color:#333'>Showing <b>{n}</b> items</div>"
        s, e = self.get_date_range()
        if s or e:
            self.w_status3.value = (
                f"<div style='font-size:13px;color:#666'>Date: {s if s else e} — {e if e else s}</div>"
                if (s and e) else f"<div style='font-size:13px;color:#666'>Date: {s or e}</div>"
            )
        else:
            self.w_status3.value = f"<div style='font-size:13px;color:#666'>Date: all</div>"

        # Reset layers & overlays (leave basemap intact)
        self._clear_data_layers()
        self._clear_dynamic()

        # Auto-fit decision
        new_sig = self._filter_signature()
        curr_type_sel = tuple(sorted(self.w["type"].value)) if hasattr(self.w["type"], "value") else ()
        type_changed = (curr_type_sel != (self.LAST_TYPE_SEL or ()))
        new_extent = self._compute_extent_tuple(f)
        extent_changed = (new_extent != self.LAST_EXTENT)
        should_autofit = self.FIRST_RENDER or (new_sig != self.LAST_FILTER_SIG) or type_changed or extent_changed

        if n > 0:
            # Color mapping by current choice (device/type)
            color_key = "device_display" if self.w["colorby"].value == "device" else "type"
            uniq = sorted([str(x) for x in f[color_key].fillna("unknown").astype(str).unique()])
            color_map = self.mk_color_map(uniq)

            # Group rows by (lat, lon, date_only), one location marker per group
            f["lat"] = f["lat"].astype(float)
            f["lon"] = f["lon"].astype(float)
            grouped_iter = f.groupby(["lat", "lon", "date_only"], dropna=False)

            markers = []
            for (_, _, _), g in grouped_iter:
                first = g.iloc[0]
                keyval = str(first.get(color_key, "unknown"))
                color = color_map.get(keyval, "gray")
                icon = AwesomeIcon(
                    name=(
                        "music" if str(first.get("type", "")).lower() == "audio"
                        else "video-camera" if str(first.get("type", "")).lower() == "video"
                        else "image"
                    ) if self.w["iconby"].value == "type" else "circle",
                    marker_color=color,
                    icon_color="white",
                )
                loc = (float(g["lat"].iloc[0]), float(g["lon"].iloc[0]))
                mk = Marker(location=loc, title=self._format_time_title(first), icon=icon, draggable=False)

                pop = Popup(
                    child=W.HTML(value=self.popup_group_html(g, mode=self.w["popup"].value)),
                    max_width=400, min_width=340, auto_close=False, close_button=True, keep_in_view=True,
                )
                mk.popup = pop

                def _bind_open_popup(marker, group_df):
                    def _cb(**kwargs):
                        marker.popup.child.value = self.popup_group_html(group_df, mode=self.w["popup"].value)
                        marker.open_popup()
                    return _cb
                mk.on_click(_bind_open_popup(mk, g.copy()))

                markers.append(mk)

            if self.w["cluster"].value == "Cluster":
                cluster_layer = MarkerCluster(markers=markers)
                self.m.add_layer(cluster_layer); self.DATA_LAYERS.append(cluster_layer)
            else:
                for mk in markers:
                    self.m.add_layer(mk); self.DATA_LAYERS.append(mk)

            # Legend overlay (counts by chosen color_key)
            counts = f[color_key].fillna("unknown").astype(str).value_counts().to_dict()
            legend_items = "".join(
                f'<div style="margin:2px 0;"><span style="display:inline-block;width:12px;height:20px;'
                f'background:{col};margin-right:6px;border:1px solid #333;"></span>{lab} ({counts.get(lab,0)})</div>'
                for lab, col in color_map.items()
            )
            legend_html = W.HTML(value=f"""
            <div style="background: white; padding: 8px 10px; border: 1px solid #bbb; border-radius: 6px;
                        box-shadow: 0 1px 3px rgba(0,0,0,.2); font-size: 14px; max-height: 40vh; overflow:auto;">
              <b>Color by: {'device' if self.w["colorby"].value=='device' else 'type'}</b><br>
              {legend_items}
            </div>
            """)
            ctrl = WidgetControl(widget=legend_html, position="bottomleft")
            self.m.add_control(ctrl); self.DYNAMIC["legend"] = ctrl

            if should_autofit:
                try:
                    self._fit_bounds_from_df(f, min_zoom=11, single_zoom=16, pad_ratio=0.06)
                except Exception:
                    self.m.center = (23.565, 119.579); self.m.zoom = 11

        else:
            notice = W.HTML(value="""
            <div style="background: rgba(255,255,255,.9); padding: 8px 10px; border: 1px solid #bbb; border-radius: 6px;
                        box-shadow: 0 1px 3px rgba(0,0,0,.2); font-size: 13px;">
              <b>No matching items</b> — adjust filters to see markers.
            </div>
            """)
            ctrl = WidgetControl(widget=notice, position="topleft")
            self.m.add_control(ctrl); self.DYNAMIC["notice"] = ctrl

            if should_autofit:
                self.m.center = (23.565, 119.579); self.m.zoom = 11

        # Update trackers
        self.FIRST_RENDER  = False
        self.LAST_FILTER_SIG = new_sig
        self.LAST_TYPE_SEL   = curr_type_sel
        self.LAST_EXTENT     = new_extent

    # =================================== Actions / Events =======================================
    def on_reset(self, _=None):
        w = self.w
        w["search"].value = ""
        w["place"].options = self._uniq(self.DATA["location"]) if "location" in self.DATA else []
        w["place"].value  = ()
        w["device"].value = tuple(self._uniq(self.DATA["device_display"]) or ["unknown"])
        w["type"].options = self._uniq(self.media["type"]) if "type" in self.media else ["audio"]
        w["type"].value   = tuple(w["type"].options)

        all_dates = self.DATA["date_only"].dropna() if "date_only" in self.DATA else pd.Series([], dtype="object")
        dmin = all_dates.min() if not all_dates.empty else None
        dmax = all_dates.max() if not all_dates.empty else None

        if "datepicker" in w:
            w["datepicker"].v_model = [dmin.isoformat(), dmax.isoformat()] if (dmin and dmax) else []
        elif "date_range" in w:
            w["date_range"].start, w["date_range"].end = dmin, dmax
        else:
            if "date_start" in w: w["date_start"].value = dmin
            if "date_end"   in w: w["date_end"].value   = dmax

        w["only_samples"].value = False
        w["cluster"].value="Cluster"; w["colorby"].value="device"; w["iconby"].value="type"; w["popup"].value="compact"
        self.render()

    def export_csv(self, _=None):
        f = self.apply_filters(); path = "filtered_export.csv"; f.to_csv(path, index=False)
        self.w_status1.value = f"<div style='font-size:14px;color:#333'>Saved CSV • {len(f)} rows → <code>{path}</code></div>"

    def export_geojson(self, _=None):
        f = self.apply_filters(); path = "filtered_export.geojson"
        def feat(row):
            return {"type":"Feature",
                    "geometry":{"type":"Point","coordinates":[float(row["lon"]), float(row["lat"])]},
                    "properties": {
                        k:(None if (isinstance(v,float) and np.isnan(v)) else v)
                        for k,v in row.drop(labels=["lat","lon"]).items()
                    }}
        gj = {"type":"FeatureCollection","features":[feat(r) for _, r in f.iterrows()]}
        with open(path,"w",encoding="utf-8") as fp: json.dump(gj, fp, ensure_ascii=False)
        self.w_status1.value = f"<div style='font-size:14px;color:#333'>Saved GeoJSON • {len(f)} features → <code>{path}</code></div>"

    def _rerender(self, *_): self.render()

    def _wire_events(self):
        w = self.w
        w["reset"].on_click(self.on_reset)
        w["export_c"].on_click(self.export_csv)
        w["export_g"].on_click(self.export_geojson)

        watch = [w["search"], w["place"], w["device"], w["type"], w["only_samples"],
                 w["cluster"], w["colorby"], w["iconby"], w["popup"]] + self.date_watch_widgets
        for ctrl in watch:
            if hasattr(ctrl, "observe"):
                if hasattr(ctrl, "v_model"): ctrl.observe(self._rerender, names="v_model")
                else:                        ctrl.observe(self._rerender, names="value")

# ---- Run (instantiate) ------------------------------------------------------------------------
_DASHBOARD_INSTANCE = DataRainbowDashboard()



Media present in 'data', skip unzip.


HBox(children=(Map(center=[23.565, 119.579], close_popup_on_click=False, controls=(ZoomControl(options=['posit…

In [None]:
# Expose the ipyleaflet/leafmap widget for export & Live Code
m = _DASHBOARD_INSTANCE.m
# Optional: show just the map widget too
# m


In [None]:

# Static export (runs only in CI when EXPORT_STATIC=1)
import os
from ipywidgets.embed import embed_minimal_html

if os.environ.get("EXPORT_STATIC") == "1":
    os.makedirs("docs/_static", exist_ok=True)
    embed_minimal_html(
        "docs/_static/dashboard_static.html",
        views=[m],
        title="Data Rainbow — Static Dashboard"
    )
    print("Exported docs/_static/dashboard_static.html")