# Results archive inspection

This notebook automatically inspects every `.zip` file stored in the `results` directory.
It parses the filename of each archive to extract useful metadata, relies on the included
`manifest.json` file to map Redis database dumps to their logical meaning, and previews
all extracted files directly below. Large files are truncated to the first bytes so the
notebook stays responsive.

In [None]:

from pathlib import Path
from dataclasses import dataclass
import os

import base64
import binascii
import json
import zipfile

RESULTS_DIR = Path("results")
HOST_BASE = 9
DB_LABELS = {
    0: "DATA",
    1: "CAN",
    2: "R",
    3: "NR",
    4: "CAR",
    5: "AR",
    6: "GP",
    7: "BP",
    8: "PR",
    9: "AP",
    10: "LOGS",
}
MAX_FULL_BYTES = 200_000
MAX_PREVIEW_BYTES = 10_000

zip_paths = sorted(RESULTS_DIR.glob("*.zip"))
zip_names = [path.name for path in zip_paths]
zip_inventory = {
    "results_dir": str(RESULTS_DIR),
    "count": len(zip_paths),
    "found": bool(zip_paths),
    "paths": [str(path) for path in zip_paths],
    "names": zip_names,
}

env_selected_index = os.environ.get("RESULTS_SELECTED_ZIP_INDEX")
env_selected_zip = os.environ.get("RESULTS_SELECTED_ZIP")
selected_zip_index = None
selected_zip_name = None

if env_selected_index is not None:
    try:
        candidate_index = int(env_selected_index)
    except ValueError:
        candidate_index = None
    if isinstance(candidate_index, int) and 0 <= candidate_index < len(zip_paths):
        selected_zip_index = candidate_index
        selected_zip_name = zip_names[selected_zip_index]
if selected_zip_name is None and env_selected_zip in zip_names:
    selected_zip_name = env_selected_zip
    selected_zip_index = zip_names.index(selected_zip_name)
if selected_zip_name is None and zip_names:
    selected_zip_index = 0
    selected_zip_name = zip_names[0]

selected_zip_path = zip_paths[selected_zip_index] if selected_zip_index is not None else None

if zip_names:
    print("Available ZIP archives:")
    for index, name in enumerate(zip_names):
        print(f"[{index}] {name}")
    user_choice = input("Select ZIP by index or name (press Enter to keep current selection): ").strip()
    if user_choice:
        resolved_index = None
        try:
            resolved_index = int(user_choice)
        except ValueError:
            resolved_index = None
        if resolved_index is not None and 0 <= resolved_index < len(zip_paths):
            selected_zip_index = resolved_index
            selected_zip_name = zip_names[selected_zip_index]
        elif user_choice in zip_names:
            selected_zip_name = user_choice
            selected_zip_index = zip_names.index(selected_zip_name)
        else:
            print("Invalid selection, keeping previous choice.")
    selected_zip_path = zip_paths[selected_zip_index] if selected_zip_index is not None else None
    if selected_zip_name is not None and selected_zip_path is not None:
        print(f"Current selection: [{selected_zip_index}] {selected_zip_name}")
    else:
        print("Current selection: none")
else:
    print("No ZIP archives found in results directory.")

zip_inventory["selection"] = {
    "name": selected_zip_name,
    "index": selected_zip_index,
    "path": str(selected_zip_path) if selected_zip_path else None,
}


In [None]:
def format_bytes(size):
    """Return a human-readable representation of a file size."""
    if size is None:
        return "-"
    units = ["B", "KB", "MB", "GB", "TB"]
    value = float(size)
    for unit in units:
        if value < 1024 or unit == units[-1]:
            if unit == "B":
                return f"{int(value)} {unit}"
            return f"{value:.2f} {unit}"
        value /= 1024
    return f"{value:.2f} B"


def parse_zip_metadata(zip_path):
    """Extract dataset, class, completion flag, and host numbers from the archive name."""
    dataset, class_name, completion_flag, host_fragment = zip_path.stem.rsplit("_", 3)
    try:
        host_offset = int(host_fragment)
        host_id = host_offset + HOST_BASE
    except ValueError:
        host_offset = None
        host_id = None
    flag_lower = completion_flag.lower()
    if flag_lower in {"true", "false"}:
        is_completed = flag_lower == "true"
    else:
        is_completed = None
    size_bytes = zip_path.stat().st_size
    return {
        "zip_path": str(zip_path),
        "zip_name": zip_path.name,
        "dataset": dataset,
        "class": class_name,
        "completion_raw": completion_flag,
        "is_completed": is_completed,
        "size_bytes": size_bytes,
        "size_text": format_bytes(size_bytes),
        "host_offset": host_offset,
        "host_id": host_id,
    }


def detect_root_prefix(archive, zip_path):
    """Guess the common directory prefix used inside the archive."""
    stem_prefix = f"{zip_path.stem}/"
    has_stem = any(
        info.filename.startswith(stem_prefix)
        for info in archive.infolist()
        if not info.is_dir()
    )
    if has_stem:
        return stem_prefix
    return ""


def resolve_manifest(archive, zip_path):
    """Return the manifest data together with the prefix used inside the archive."""
    candidates = []
    stem_prefix = f"{zip_path.stem}/"
    candidates.append(stem_prefix)
    for info in archive.infolist():
        if info.is_dir():
            dirname = info.filename
            if dirname.startswith("__MACOSX/"):
                continue
            if not dirname.endswith("/"):
                dirname += "/"
            candidates.append(dirname)
    candidates.append("")
    seen = set()
    for prefix in candidates:
        if prefix in seen:
            continue
        seen.add(prefix)
        manifest_path = f"{prefix}manifest.json"
        try:
            with archive.open(manifest_path) as manifest_file:
                manifest = json.load(manifest_file)
        except KeyError:
            continue
        else:
            return prefix, manifest
    raise KeyError("manifest.json not found")


class DumpDecodeError(RuntimeError):
    """Generic error raised while decoding a Redis DUMP payload."""


@dataclass
class DumpSections:
    payload: bytes
    version: int
    checksum: bytes


class _LengthEncoding:
    __slots__ = ("value", "encoding")

    def __init__(self, value=None, encoding=None):
        self.value = value
        self.encoding = encoding


RDB_ENCODING_INT8 = 0
RDB_ENCODING_INT16 = 1
RDB_ENCODING_INT32 = 2
RDB_ENCODING_LZF = 3


def split_dump_sections(raw: bytes) -> DumpSections:
    """Split payload, RDB version, and checksum from a Redis dump."""
    if len(raw) < 10:
        raise DumpDecodeError("DUMP payload is too short to contain metadata")
    checksum = raw[-8:]
    version_bytes = raw[-10:-8]
    version = int.from_bytes(version_bytes, "little", signed=False)
    payload = raw[:-10]
    return DumpSections(payload=payload, version=version, checksum=checksum)


def _read_length_info(buffer: bytes, offset: int):
    if offset >= len(buffer):
        raise DumpDecodeError("Offset out of range while reading length")
    first = buffer[offset]
    prefix = first >> 6
    if prefix == 0:
        length = first & 0x3F
        return _LengthEncoding(length), offset + 1
    if prefix == 1:
        if offset + 1 >= len(buffer):
            raise DumpDecodeError("Truncated 14-bit encoded length")
        second = buffer[offset + 1]
        length = ((first & 0x3F) << 8) | second
        return _LengthEncoding(length), offset + 2
    if prefix == 2:
        if offset + 4 >= len(buffer):
            raise DumpDecodeError("Truncated 32-bit encoded length")
        length = int.from_bytes(buffer[offset + 1 : offset + 5], "big", signed=False)
        return _LengthEncoding(length), offset + 5
    return _LengthEncoding(None, first & 0x3F), offset + 1


def lzf_decompress(data: bytes, expected_length: int) -> bytes:
    """Minimal implementation of the LZF decompression used by Redis."""
    output = bytearray()
    idx = 0
    data_len = len(data)
    while idx < data_len:
        ctrl = data[idx]
        idx += 1
        if ctrl < 32:
            literal_len = ctrl + 1
            if idx + literal_len > data_len:
                raise DumpDecodeError("Truncated literal LZF sequence")
            output.extend(data[idx : idx + literal_len])
            idx += literal_len
        else:
            length = ctrl >> 5
            ref_offset = len(output) - ((ctrl & 0x1F) << 8) - 1
            if length == 7:
                if idx >= data_len:
                    raise DumpDecodeError("Truncated LZF sequence while extending length")
                length += data[idx]
                idx += 1
            if idx >= data_len:
                raise DumpDecodeError("Truncated LZF sequence while resolving reference")
            ref_offset -= data[idx]
            idx += 1
            length += 2
            if ref_offset < 0:
                raise DumpDecodeError("Negative LZF reference")
            for _ in range(length):
                if ref_offset >= len(output):
                    raise DumpDecodeError("LZF reference out of range")
                output.append(output[ref_offset])
                ref_offset += 1
    if len(output) != expected_length:
        raise DumpDecodeError(
            f"Unexpected decompressed length: expected {expected_length}, got {len(output)}"
        )
    return bytes(output)


def _decode_special_encoding(buffer: bytes, offset: int, encoding: int):
    if encoding == RDB_ENCODING_INT8:
        if offset >= len(buffer):
            raise DumpDecodeError("Truncated 8-bit encoded integer")
        value = int.from_bytes(buffer[offset : offset + 1], "little", signed=True)
        return str(value).encode("ascii"), offset + 1
    if encoding == RDB_ENCODING_INT16:
        if offset + 2 > len(buffer):
            raise DumpDecodeError("Truncated 16-bit encoded integer")
        value = int.from_bytes(buffer[offset : offset + 2], "little", signed=True)
        return str(value).encode("ascii"), offset + 2
    if encoding == RDB_ENCODING_INT32:
        if offset + 4 > len(buffer):
            raise DumpDecodeError("Truncated 32-bit encoded integer")
        value = int.from_bytes(buffer[offset : offset + 4], "little", signed=True)
        return str(value).encode("ascii"), offset + 4
    if encoding == RDB_ENCODING_LZF:
        compressed_len_info, next_offset = _read_length_info(buffer, offset)
        data_len_info, data_offset = _read_length_info(buffer, next_offset)
        if compressed_len_info.value is None or data_len_info.value is None:
            raise DumpDecodeError("Invalid LZF length encoding")
        end = data_offset + compressed_len_info.value
        if end > len(buffer):
            raise DumpDecodeError("Truncated encoded string")
        compressed = buffer[data_offset:end]
        decompressed = lzf_decompress(compressed, data_len_info.value)
        return decompressed, end
    raise DumpDecodeError("Unknown string encoding")


def _read_encoded_string(buffer: bytes, offset: int):
    length_info, next_offset = _read_length_info(buffer, offset)
    if length_info.encoding is None:
        end = next_offset + length_info.value
        if end > len(buffer):
            raise DumpDecodeError("Truncated encoded string")
        return buffer[next_offset:end], end
    return _decode_special_encoding(buffer, next_offset, length_info.encoding)


def decode_string_from_dump(raw: bytes) -> bytes:
    sections = split_dump_sections(raw)
    payload = sections.payload
    if not payload:
        raise DumpDecodeError("Empty payload")
    object_type = payload[0]
    if object_type != 0:
        raise DumpDecodeError(f"Non-string object type: {object_type}")
    value, _ = _read_encoded_string(payload, 1)
    return value


def decode_bytes(value: str) -> bytes:
    if not isinstance(value, str):
        raise DumpDecodeError("Encoded value must be a string")
    try:
        return base64.b64decode(value.encode("ascii"))
    except (UnicodeEncodeError, binascii.Error) as exc:
        raise DumpDecodeError(f"Invalid base64 payload: {exc}") from exc


def decode_key(entry):
    return decode_bytes(entry["key"])


def text_preview(value: bytes, limit: int = 120) -> str:
    text = value.decode("utf-8", errors="replace")
    if len(text) > limit:
        return text[: limit - 1] + "."
    return text


def try_decode_value(entry):
    value_info = dict(entry.get("value") or {})
    data_b64 = value_info.get("data")
    if not data_b64:
        return "<no value>", value_info
    try:
        raw = decode_bytes(data_b64)
    except DumpDecodeError as exc:
        value_info["decode_error"] = str(exc)
        return "<invalid base64>", value_info
    details = {
        "dump_size": len(raw),
    }
    try:
        sections = split_dump_sections(raw)
        details["rdb_version"] = sections.version
        details["checksum"] = sections.checksum.hex()
    except DumpDecodeError as exc:
        details["dump_error"] = str(exc)
        return "<invalid dump>", details
    if entry.get("type") == "string":
        try:
            decoded = decode_string_from_dump(raw)
        except DumpDecodeError as exc:
            details["decode_error"] = str(exc)
            return "<string not decoded>", details
        details["decoded_bytes"] = decoded
        preview = text_preview(decoded)
        return preview, details
    return f"<{entry.get('type')} - {len(sections.payload)} bytes>", details


def shorten_text(text: str, limit: int = 600) -> str:
    sanitized = text.replace("````", "``` `")
    if len(sanitized) > limit:
        return sanitized[: limit - 1] + "."
    return sanitized


def summarise_backup_entries(entries, limit: int = 3):
    if not entries:
        return ["> No entries stored in this backup."]
    lines = []
    for index, entry in enumerate(entries[:limit], start=1):
        try:
            key_bytes = decode_key(entry)
            key_text = key_bytes.decode("utf-8", errors="replace") or "<empty key>"
        except (KeyError, DumpDecodeError) as exc:
            key_text = f"<unable to decode key: {exc}>"
        preview, details = try_decode_value(entry)
        entry_type = entry.get("type", "unknown")
        ttl = entry.get("pttl")
        ttl_text = f"{ttl}" if isinstance(ttl, int) else "persistent"
        lines.append(f"Entry {index}: key `{key_text}`")
        lines.append(f"Type: `{entry_type}`; TTL (ms): `{ttl_text}`")
        decoded_bytes = details.get("decoded_bytes")
        error = details.get("decode_error") or details.get("dump_error")
        if isinstance(decoded_bytes, (bytes, bytearray)):
            text_value = decoded_bytes.decode("utf-8", errors="replace")
            lines.append(shorten_text(text_value))
        else:
            lines.append(shorten_text(str(preview)))
        if error:
            lines.append(f"Warning: {error}")
    if len(entries) > limit:
        lines.append(f"Additional entries not shown: {len(entries) - limit}")
    return lines


def build_backup_preview(data):
    entries = data.get("entries") or []
    metadata = data.get("metadata") or {}
    return {
        "key_count": metadata.get("key_count", len(entries)),
        "created_at": metadata.get("created_at_utc"),
        "source": metadata.get("source") or {},
        "type_summary": metadata.get("type_summary") or {},
        "sample_entries": summarise_backup_entries(entries),
    }


def try_render_backup_preview(relative_name: str, payload: bytes):
    try:
        text = payload.decode("utf-8")
    except UnicodeDecodeError:
        return None
    try:
        data = json.loads(text)
    except json.JSONDecodeError:
        return None
    if not isinstance(data, dict):
        return None
    if "entries" not in data or "metadata" not in data:
        return None
    return build_backup_preview(data)


def get_relative_member_name(info, prefix):
    member_name = info.filename
    if prefix and member_name.startswith(prefix):
        return member_name[len(prefix):]
    return member_name


def is_logs_entry(relative_name):
    normalized = relative_name.replace('\\', '/').lstrip('./')
    return normalized == 'logs' or normalized.startswith('logs/')


In [None]:

import math

READ_JSON_LIMIT_BYTES = 5_000_000

def collect_archive_data(zip_path):
    meta = parse_zip_metadata(zip_path)
    result = {
        'zip_name': zip_path.name,
        'zip_path': str(zip_path),
        'metadata': meta,
        'manifest': None,
        'manifest_prefix': '',
        'db_overview': [],
        'members': [],
        'backups': {},
    }
    with zipfile.ZipFile(zip_path) as archive:
        try:
            prefix, manifest = resolve_manifest(archive, zip_path)
            result['manifest'] = manifest
            result['manifest_prefix'] = prefix
        except Exception:
            prefix = detect_root_prefix(archive, zip_path)
            manifest = None
            result['manifest_prefix'] = prefix
        if manifest:
            files_map = manifest.get('files', {})
            dbs = manifest.get('databases', [])
            for db_index in dbs:
                file_name = files_map.get(str(db_index))
                if not file_name:
                    continue
                archive_name = f"{prefix}{file_name}"
                try:
                    size = archive.getinfo(archive_name).file_size
                except KeyError:
                    size = None
                result['db_overview'].append({
                    'db_index': db_index,
                    'label': DB_LABELS.get(db_index, 'Unknown'),
                    'json_file': file_name,
                    'size_bytes': size,
                    'size_text': format_bytes(size) if size is not None else None,
                })
        members = sorted((info for info in archive.infolist() if not info.is_dir()), key=lambda info: info.filename)
        for info in members:
            relative = get_relative_member_name(info, prefix)
            if is_logs_entry(relative):
                continue
            size = info.file_size
            entry = {
                'relative_name': relative,
                'size_bytes': size,
                'size_text': format_bytes(size),
                'json_data': None,
                'json_truncated': False,
                'text_preview': None,
                'backup_preview': None,
            }
            read_entire = size <= MAX_FULL_BYTES or relative.endswith('.json')
            with archive.open(info.filename) as handle:
                payload = handle.read() if read_entire else handle.read(MAX_PREVIEW_BYTES)
            if relative.endswith('.json') and (size is None or size <= READ_JSON_LIMIT_BYTES):
                try:
                    text = payload.decode('utf-8')
                    data = json.loads(text)
                except Exception:
                    data = None
                if data is not None:
                    entry['json_data'] = data
                    preview = try_render_backup_preview(relative, payload)
                    if preview is not None:
                        entry['backup_preview'] = preview
                        result['backups'][relative] = data
                else:
                    entry['text_preview'] = payload.decode('utf-8', errors='replace')[:1000]
            else:
                entry['json_truncated'] = relative.endswith('.json') and (size is not None and size > READ_JSON_LIMIT_BYTES)
                try:
                    entry['text_preview'] = payload.decode('utf-8', errors='replace')[:1000]
                except Exception:
                    entry['text_preview'] = None
            result['members'].append(entry)
    return result

def _format_number(value: float) -> str:
    if math.isinf(value):
        return '∞' if value > 0 else '-∞'
    if math.isnan(value):
        return 'NaN'
    if abs(value) >= 1_000:
        return f"{value:.3g}"
    return f"{value:.6g}"

def _short_text(text: str, limit: int = 120) -> str:
    return text if len(text) <= limit else text[: limit - 1] + '…'

def _inline_summary(value, depth: int = 0) -> str:
    if isinstance(value, dict):
        return f"object({len(value)})"
    if isinstance(value, list):
        return f"array({len(value)})"
    if isinstance(value, (int, float)):
        return _format_number(value)
    if isinstance(value, str):
        return repr(_short_text(value))
    if value is None:
        return 'null'
    return repr(value)

def _extract_metadata_from_chunk(text: str):
    key = '"metadata"'
    idx = text.find(key)
    if idx == -1:
        return {}
    brace_start = text.find('{', idx)
    if brace_start == -1:
        return {}
    depth = 0
    for pos in range(brace_start, len(text)):
        char = text[pos]
        if char == '{':
            depth += 1
        elif char == '}':
            depth -= 1
            if depth == 0:
                snippet = text[brace_start : pos + 1]
                try:
                    return json.loads(snippet)
                except json.JSONDecodeError:
                    return {}
    return {}

def _fetch_backup_metadata(zip_path, member_name, size_limit=2_000_000):
    try:
        with zipfile.ZipFile(zip_path) as archive:
            with archive.open(member_name) as handle:
                chunk = handle.read(size_limit)
    except KeyError:
        return {}
    text = chunk.decode('utf-8', errors='replace')
    return _extract_metadata_from_chunk(text)

def _describe_json(value, depth: int = 0):
    if isinstance(value, dict):
        keys = list(value.keys())
        preview_keys = keys[:5]
        headline = f"object with {len(keys)} keys: {', '.join(preview_keys)}" + ("…" if len(keys) > 5 else '')
        detail_lines = []
        for key in keys[:3]:
            detail_lines.append(f"{key}: {_inline_summary(value[key], depth + 1)}")
        if len(keys) > 3:
            detail_lines.append('…')
        return headline, detail_lines
    if isinstance(value, list):
        length = len(value)
        headline = f"array with {length} items"
        sample = [_inline_summary(item, depth + 1) for item in value[:3]]
        detail_lines = []
        if sample:
            detail = ', '.join(sample)
            detail_lines.append(f"sample: {detail}{'…' if length > 3 else ''}")
        return headline, detail_lines
    return _inline_summary(value, depth), []

def summarise_endpoint_univers(series_map):
    if not isinstance(series_map, dict):
        return 'time series (unexpected structure)', []
    feature_names = sorted(series_map.keys())
    lengths = []
    details = []
    for name in feature_names[:5]:
        points = series_map.get(name)
        if isinstance(points, list):
            length = len(points)
            lengths.append(length)
            sample = []
            for value in points[:3]:
                if isinstance(value, (int, float)):
                    sample.append(_format_number(value))
                else:
                    sample.append(str(value))
            preview = ', '.join(sample)
            details.append(f"{name}: len={length}, sample=[{preview}{'…' if len(points) > 3 else ''}]")
        else:
            details.append(f"{name}: unexpected {type(points).__name__}")
    if len(series_map) > 5:
        details.append('…')
    if lengths:
        distinct_lengths = sorted(set(lengths))
        if len(distinct_lengths) == 1:
            headline = f"time series (Endpoint Univers) with {len(series_map)} features; length {distinct_lengths[0]}"
        else:
            headline = (
                f"time series (Endpoint Univers) with {len(series_map)} features; "
                + f"lengths {', '.join(str(l) for l in distinct_lengths)}"
            )
    else:
        headline = f"time series (Endpoint Univers) with {len(series_map)} features"
    return headline, details

def summarise_random_forest(trees):
    if not isinstance(trees, list):
        return 'random forest (unexpected structure)', []
    count = len(trees)
    details = []
    for tree in trees[:5]:
        if isinstance(tree, dict):
            tree_id = tree.get('tree_id')
            feature = tree.get('feature')
            value = tree.get('value')
            feature_text = feature if feature is not None else '?'
            threshold = _format_number(value) if isinstance(value, (int, float)) else str(value)
            prefix = f"tree {tree_id}" if tree_id is not None else 'tree'
            details.append(f"{prefix}: root feature {feature_text}, threshold {threshold}")
        else:
            details.append(f"tree: unexpected {type(tree).__name__}")
    if count > 5:
        details.append('…')
    return f"random forest with {count} trees", details

def summarise_rf_optimization(result):
    if not isinstance(result, dict):
        return 'RF optimisation summary (unexpected structure)', []
    best = result.get('best_params') or {}
    best_keys = list(best.keys())
    headline_parts = []
    cv_score = result.get('best_cv_score')
    test_score = result.get('test_score')
    if isinstance(cv_score, (int, float)):
        headline_parts.append(f"best CV {cv_score:.3f}")
    if isinstance(test_score, (int, float)):
        headline_parts.append(f"test {test_score:.3f}")
    iter_count = result.get('n_iter')
    if isinstance(iter_count, int):
        headline_parts.append(f"n_iter {iter_count}")
    headline = "RF optimisation results"
    if headline_parts:
        headline += " (" + ', '.join(headline_parts) + ")"
    details = []
    for param in best_keys[:5]:
        details.append(f"best_params.{param} = {best[param]}")
    if len(best_keys) > 5:
        details.append('…')
    used_test = result.get('used_test_for_validation')
    if isinstance(used_test, bool):
        details.append(f"used_test_for_validation: {used_test}")
    timestamp = result.get('timestamp')
    if timestamp:
        details.append(f"timestamp: {timestamp}")
    return headline, details

def summarise_entry(key, value_json, value_text, skip_sample_keys=True):
    if skip_sample_keys and key.startswith('sample_'):
        return None, None
    if key == 'EU' and value_json is not None:
        return summarise_endpoint_univers(value_json)
    if key == 'RF' and value_json is not None:
        return summarise_random_forest(value_json)
    if key == 'RF_OPTIMIZATION_RESULTS' and value_json is not None:
        return summarise_rf_optimization(value_json)
    if value_json is not None:
        return _describe_json(value_json)
    return _short_text(value_text), []

def summarise_entry_generic(key, value_json, value_text):
    if key == 'EU' and value_json is not None:
        return summarise_endpoint_univers(value_json)
    if key == 'RF' and value_json is not None:
        return summarise_random_forest(value_json)
    if key == 'RF_OPTIMIZATION_RESULTS' and value_json is not None:
        return summarise_rf_optimization(value_json)
    if value_json is not None:
        return _describe_json(value_json)
    return _short_text(value_text), []

archives_metadata = [parse_zip_metadata(path) for path in zip_paths]
archives_data = [collect_archive_data(path) for path in zip_paths]
manifests_by_archive = {item['zip_name']: item['manifest'] for item in archives_data}
manifest_prefix_by_archive = {item['zip_name']: item.get('manifest_prefix', '') for item in archives_data}
backups_by_archive = {item['zip_name']: item['backups'] for item in archives_data}
selected_archive_data = next((item for item in archives_data if item['zip_name'] == selected_zip_name), None)
selected_manifest = manifests_by_archive.get(selected_zip_name)
selected_manifest_prefix = manifest_prefix_by_archive.get(selected_zip_name, '')
selected_backups = backups_by_archive.get(selected_zip_name)

selected_db0_file_name = None
if selected_manifest:
    files_map = selected_manifest.get('files', {}) or {}
    selected_db0_file_name = files_map.get('0')

selected_db0_backup = None
if selected_backups and selected_db0_file_name:
    selected_db0_backup = selected_backups.get(selected_db0_file_name)

selected_db0_entries = []
selected_db0_values = []
if selected_db0_backup:
    selected_db0_entries = selected_db0_backup.get('entries') or []
    for entry in selected_db0_entries:
        try:
            key_bytes = decode_key(entry)
            key_text = key_bytes.decode('utf-8', errors='replace')
        except Exception as exc:
            key_text = f'<unable to decode key: {exc}>'
        preview, details = try_decode_value(entry)
        value_bytes = details.get('decoded_bytes') if isinstance(details, dict) else None
        if isinstance(value_bytes, (bytes, bytearray)):
            value_text = value_bytes.decode('utf-8', errors='replace')
        else:
            value_text = str(preview)
        value_json = None
        if isinstance(value_text, str):
            try:
                value_json = json.loads(value_text)
            except Exception:
                value_json = None
        selected_db0_values.append({
            'key': key_text,
            'type': entry.get('type'),
            'ttl_ms': entry.get('pttl'),
            'value_text': value_text,
            'value_bytes': value_bytes,
            'value_json': value_json,
            'details': details,
        })

selected_db0_values_by_key = {item['key']: item for item in selected_db0_values}

db1_entries = []
if selected_manifest and selected_backups is not None:
    files_map = selected_manifest.get('files', {}) or {}
    db1_file = files_map.get('1')
    if db1_file:
        data = selected_backups.get(db1_file) if isinstance(selected_backups, dict) else None
        if isinstance(data, dict):
            db1_entries = data.get('entries') or []

db1_entries_summary = []
for entry in db1_entries:
    try:
        key_bytes = decode_key(entry)
        key_text = key_bytes.decode('utf-8', errors='replace')
    except Exception as exc:
        key_text = f'<unable to decode key: {exc}>'
    preview, details = try_decode_value(entry)
    value_bytes = details.get('decoded_bytes') if isinstance(details, dict) else None
    if isinstance(value_bytes, (bytes, bytearray)):
        value_text = value_bytes.decode('utf-8', errors='replace')
    else:
        value_text = str(preview)
    value_json = None
    if isinstance(value_text, str):
        try:
            value_json = json.loads(value_text)
        except Exception:
            value_json = None
    headline, extra = summarise_entry_generic(key_text, value_json, value_text)
    db1_entries_summary.append({
        'key': key_text,
        'type': entry.get('type'),
        'ttl_ms': entry.get('pttl'),
        'headline': headline,
        'details': extra,
    })

other_db_summaries = []
if selected_manifest and selected_backups is not None:
    files_map = selected_manifest.get('files', {}) or {}
    selected_zip_path = Path(selected_archive_data['zip_path']) if selected_archive_data else None
    for db_index in range(1, 10):
        file_name = files_map.get(str(db_index))
        if not file_name or selected_zip_path is None:
            continue
        label = DB_LABELS.get(db_index, 'Unknown')
        data = selected_backups.get(file_name) if isinstance(selected_backups, dict) else None
        metadata = {}
        if isinstance(data, dict):
            metadata = data.get('metadata') or {}
        if not metadata:
            member_name = f"{selected_manifest_prefix}{file_name}"
            metadata = _fetch_backup_metadata(selected_zip_path, member_name)
        key_count = metadata.get('key_count') if isinstance(metadata, dict) else None
        type_summary = metadata.get('type_summary') if isinstance(metadata, dict) else None
        other_db_summaries.append({
            'db_index': db_index,
            'label': label,
            'file_name': file_name,
            'key_count': key_count,
            'type_summary': type_summary if isinstance(type_summary, dict) else None,
        })

selected_db0_summary = []
for item in selected_db0_values:
    headline, extra = summarise_entry(item['key'], item['value_json'], item['value_text'], skip_sample_keys=True)
    if headline is None and extra is None:
        continue
    selected_db0_summary.append({
        'key': item['key'],
        'type': item['type'],
        'ttl_ms': item['ttl_ms'],
        'headline': headline,
        'details': extra,
    })

if selected_db0_summary:
    print(f"DB 0 entries for {selected_zip_name}:")
    for entry in selected_db0_summary:
        ttl = entry['ttl_ms'] if isinstance(entry['ttl_ms'], int) else 'persistent'
        print(f"{entry['key']} (type={entry['type']}, ttl={ttl})")
        print(f"{entry['headline']}")
        for detail in entry['details']:
            print(f"    {detail}")
else:
    print('No DB 0 data available for the current selection.')



In [None]:
from collections import defaultdict
import json
import statistics

def summarise_db10_workers(selected_zip_name, *, show_summary=True):
    global DB10_WORKER_CACHE
    DB10_WORKER_CACHE = {}
    if not selected_zip_name:
        print("No archive selected.")
        return
    files_map = (selected_manifest or {}).get('files', {}) or {}
    db10_file = files_map.get('10')
    if not db10_file:
        print("No DB 10 available for the current selection.")
        return
    backups = selected_backups if isinstance(selected_backups, dict) else {}
    db10_data = backups.get(db10_file)
    if not isinstance(db10_data, dict):
        zip_path_str = selected_archive_data.get('zip_path') if selected_archive_data else None
        if zip_path_str:
            zip_path = Path(zip_path_str)
            if zip_path.exists():
                try:
                    with zipfile.ZipFile(zip_path) as archive:
                        payload = archive.read(f"{selected_manifest_prefix}{db10_file}")
                    db10_data = json.loads(payload.decode('utf-8', errors='replace'))
                except Exception:
                    db10_data = None
    if not isinstance(db10_data, dict):
        print("Unable to load DB 10 data.")
        return
    entries = db10_data.get('entries') or []
    if not entries:
        print("No entries available in DB 10.")
        DB10_WORKER_CACHE = {
            'zip_name': selected_zip_name,
            'db10_file': db10_file,
            'entries': entries,
            'db10_data': db10_data,
            'worker_stats': {},
            'worker_summaries': [],
        }
        return
    worker_stats = {}
    for entry in entries:
        preview, details = try_decode_value(entry)
        raw_bytes = details.get('decoded_bytes') if isinstance(details, dict) else None
        if isinstance(raw_bytes, (bytes, bytearray)):
            text = raw_bytes.decode('utf-8', errors='replace')
        elif isinstance(preview, (bytes, bytearray)):
            text = preview.decode('utf-8', errors='replace')
        else:
            text = str(preview)
        try:
            payload = json.loads(text)
        except Exception:
            continue
        worker_id = payload.get('worker_id')
        if not worker_id:
            try:
                key_text = decode_key(entry).decode('utf-8', errors='replace')
            except Exception:
                key_text = entry.get('key')
            if isinstance(key_text, str) and ':' in key_text:
                worker_id = key_text.rsplit(':', 1)[0]
            else:
                worker_id = str(key_text)
        stats = worker_stats.setdefault(worker_id, {
            'records': 0,
            'iterations': [],
            'queue_sizes': [],
            'car_queue_sizes': [],
            'total_seconds': [],
            'car_seconds': [],
            'can_seconds': [],
            'car_results': defaultdict(int),
            'can_results': defaultdict(int),
            'outcomes': defaultdict(int),
            'events': [],
        })
        stats['records'] += 1
        iteration = payload.get('iteration')
        if isinstance(iteration, (int, float)):
            stats['iterations'].append(int(iteration))
        queue_size = payload.get('queue_size')
        if isinstance(queue_size, (int, float)):
            stats['queue_sizes'].append(float(queue_size))
        car_queue_size = payload.get('car_queue_size')
        if isinstance(car_queue_size, (int, float)):
            stats['car_queue_sizes'].append(float(car_queue_size))
        timings = payload.get('timings') or {}
        total_seconds = timings.get('total_seconds')
        if isinstance(total_seconds, (int, float)):
            stats['total_seconds'].append(float(total_seconds))
        car_seconds = timings.get('car_seconds')
        if isinstance(car_seconds, (int, float)):
            stats['car_seconds'].append(float(car_seconds))
        can_seconds = timings.get('can_seconds')
        if isinstance(can_seconds, (int, float)):
            stats['can_seconds'].append(float(can_seconds))
        car_result = (payload.get('car_processing') or {}).get('result')
        if car_result:
            stats['car_results'][car_result] += 1
        can_result = (payload.get('can_processing') or {}).get('result')
        if can_result:
            stats['can_results'][can_result] += 1
        for outcome_key, outcome_value in (payload.get('outcomes') or {}).items():
            if isinstance(outcome_value, bool):
                label = f"{outcome_key}={'T' if outcome_value else 'F'}"
                stats['outcomes'][label] += 1
        stats['events'].append(payload)

    def summarize(values):
        if not values:
            return (None, None, None)
        return (min(values), statistics.mean(values), max(values))

    def format_number(value, digits=1):
        return f"{value:.{digits}f}" if isinstance(value, (int, float)) else '-'

    def format_hours(values):
        total = sum(values)
        return total / 3600 if total else 0.0

    def format_counter(mapping):
        if not mapping:
            return '-'
        items = sorted(dict(mapping).items())
        return ', '.join(f"{key}:{value}" for key, value in items)

    workers = []
    for worker_id, stats in worker_stats.items():
        queue_min, queue_avg, queue_max = summarize(stats['queue_sizes'])
        car_min, car_avg, car_max = summarize(stats['car_queue_sizes'])
        workers.append({
            'worker_id': worker_id,
            'records': stats['records'],
            'iter_min': min(stats['iterations']) if stats['iterations'] else None,
            'iter_max': max(stats['iterations']) if stats['iterations'] else None,
            'queue_avg': queue_avg,
            'car_queue_avg': car_avg,
            'queue_range': (queue_min, queue_max),
            'car_queue_range': (car_min, car_max),
            'total_hours': format_hours(stats['total_seconds']),
            'car_hours': format_hours(stats['car_seconds']),
            'can_hours': format_hours(stats['can_seconds']),
            'car_results': dict(stats['car_results']),
            'can_results': dict(stats['can_results']),
            'outcomes': dict(stats['outcomes']),
        })

    workers.sort(key=lambda item: item['worker_id'])
    DB10_WORKER_CACHE = {
        'zip_name': selected_zip_name,
        'db10_file': db10_file,
        'entries': entries,
        'db10_data': db10_data,
        'worker_stats': worker_stats,
        'worker_summaries': workers,
    }
    if show_summary:
        print(f"DB 10: {len(entries)} events, {len(workers)} workers for {selected_zip_name}")
    if not workers:
        return
    if not show_summary:
        return
    for worker in workers:
        iter_range = f"{worker['iter_min']}–{worker['iter_max']}" if worker['iter_min'] is not None else '-'
        queue_range = worker['queue_range']
        car_queue_range = worker['car_queue_range']
        queue_range_text = f"{format_number(queue_range[0])}-{format_number(queue_range[1])}" if queue_range[0] is not None else '-'
        car_range_text = f"{format_number(car_queue_range[0])}-{format_number(car_queue_range[1])}" if car_queue_range[0] is not None else '-'
        queue_avg_text = format_number(worker['queue_avg'])
        car_queue_avg_text = format_number(worker['car_queue_avg'])
        total_hours_text = format_number(worker['total_hours'], digits=2)
        car_hours_text = format_number(worker['car_hours'], digits=2)
        can_hours_text = format_number(worker['can_hours'], digits=2)
        print(
            f"- {worker['worker_id']}: records={worker['records']}, iter={iter_range}, "
            f"queue_avg={queue_avg_text}, car_queue_avg={car_queue_avg_text}, "
            f"queue_range={queue_range_text}, car_queue_range={car_range_text}, "
            f"h_tot={total_hours_text}, h_car={car_hours_text}, h_can={can_hours_text}"
        )
        print(
            "  "
            + " | ".join([
                f"car_results[{format_counter(worker['car_results'])}]",
                f"can_results[{format_counter(worker['can_results'])}]",
                f"outcomes[{format_counter(worker['outcomes'])}]",
            ])
        )

summarise_db10_workers(selected_zip_name)


In [None]:
import os
import statistics

def _select_worker_id(worker_stats):
    if not worker_stats:
        return None
    workers = sorted(worker_stats)
    selected = globals().get('SELECTED_DB10_WORKER')
    env_selected = os.environ.get('RESULTS_SELECTED_WORKER')
    if env_selected in worker_stats:
        selected = env_selected
    if selected not in worker_stats:
        selected = workers[0]
    print("Workers found in DB 10:")
    for idx, wid in enumerate(workers):
        marker = "*" if wid == selected else " "
        print(f"{marker}[{idx}] {wid}")
    user_choice = input("Select worker by index or name (press Enter to keep the current selection): ").strip()
    if user_choice:
        resolved = None
        if user_choice.isdigit():
            index = int(user_choice)
            if 0 <= index < len(workers):
                resolved = workers[index]
        if resolved is None and user_choice in worker_stats:
            resolved = user_choice
        if resolved is None:
            print("Invalid selection, keeping the current worker.")
        else:
            selected = resolved
    globals()['SELECTED_DB10_WORKER'] = selected
    return selected

def inspect_db10_worker(worker_id=None, *, max_events=5, sort_by='iteration', interactive=False):
    cache = globals().get('DB10_WORKER_CACHE')
    if not cache or cache.get('zip_name') != selected_zip_name:
        summarise_db10_workers(selected_zip_name, show_summary=False)
        cache = globals().get('DB10_WORKER_CACHE')
    if not cache:
        print("DB 10 statistics are not available.")
        return
    worker_stats = cache.get('worker_stats') or {}
    if not worker_stats:
        print("No workers found in DB 10.")
        return
    if worker_id and worker_id in worker_stats:
        globals()['SELECTED_DB10_WORKER'] = worker_id
    selected_worker = worker_id or globals().get('SELECTED_DB10_WORKER')
    if selected_worker not in worker_stats:
        if interactive:
            selected_worker = _select_worker_id(worker_stats)
        else:
            print('Specify a worker_id or call the function with interactive=True to pick one interactively.')
            print('Available workers:')
            for wid in sorted(worker_stats):
                print(f' - {wid}')
            return
    elif interactive:
        change = input("Press Enter to keep the current worker or type 'c' to choose a different worker: ").strip().lower()
        if change == 'c':
            selected_worker = _select_worker_id(worker_stats)
        else:
            globals()['SELECTED_DB10_WORKER'] = selected_worker
    else:
        globals()['SELECTED_DB10_WORKER'] = selected_worker
    if selected_worker not in worker_stats:
        print('No worker selected.')
        return
    stats = worker_stats[selected_worker]

    def fmt(value, digits=2):
        return f"{value:.{digits}f}" if isinstance(value, (int, float)) else '-'

    def safe_mean(values):
        return statistics.mean(values) if values else None

    iterations = stats.get('iterations', [])
    queue_sizes = stats.get('queue_sizes', [])
    car_queue_sizes = stats.get('car_queue_sizes', [])
    total_seconds = stats.get('total_seconds', [])
    car_seconds = stats.get('car_seconds', [])
    can_seconds = stats.get('can_seconds', [])

    print(f"Worker {selected_worker} in DB 10 ({cache.get('zip_name')}):")
    print(f"- recorded events: {stats.get('records', 0)}")
    if iterations:
        print(f"- iteration range: {min(iterations)}–{max(iterations)}")
    else:
        print("- iteration range: -")
    if queue_sizes:
        print(
            f"- queue size: avg {fmt(safe_mean(queue_sizes), 1)}, "
            f"min {fmt(min(queue_sizes), 0)}, max {fmt(max(queue_sizes), 0)}"
        )
    else:
        print("- queue size: no data")
    if car_queue_sizes:
        print(
            f"- CAR queue size: avg {fmt(safe_mean(car_queue_sizes), 1)}, "
            f"min {fmt(min(car_queue_sizes), 0)}, max {fmt(max(car_queue_sizes), 0)}"
        )
    else:
        print("- CAR queue size: no data")

    def sum_hours(values):
        return fmt(sum(values) / 3600 if values else None, 2)

    print(f"- total hours: {sum_hours(total_seconds)}")
    print(f"  - CAR: {sum_hours(car_seconds)}")
    print(f"  - CAN: {sum_hours(can_seconds)}")

    def format_counter(mapping):
        if not mapping:
            return '-'
        items = sorted(dict(mapping).items())
        return ', '.join(f"{key}:{value}" for key, value in items)

    print(f"- CAR outcomes: {format_counter(stats.get('car_results'))}")
    print(f"- CAN outcomes: {format_counter(stats.get('can_results'))}")
    print(f"- outcomes: {format_counter(stats.get('outcomes'))}")

    events = list(stats.get('events') or [])
    if not events:
        print("No event details stored.")
        return

    def event_key(event):
        if sort_by == 'timestamp':
            return (event.get('timestamp_start') or '', event.get('iteration') or float('inf'))
        iteration = event.get('iteration')
        return (iteration if iteration is not None else float('inf'), event.get('timestamp_start') or '')

    events_sorted = sorted(events, key=event_key)
    selected_events = events_sorted[-max_events:] if max_events and max_events > 0 else events_sorted
    print(f"Showing {len(selected_events)} events out of {len(events_sorted)} (sorted by {sort_by}).")

    for event in selected_events:
        iteration = event.get('iteration')
        timestamp_start = event.get('timestamp_start')
        timestamp_end = event.get('timestamp_end')
        queue_size = event.get('queue_size')
        car_queue_size = event.get('car_queue_size')
        timings = event.get('timings') or {}
        car_processing = event.get('car_processing') or {}
        can_processing = event.get('can_processing') or {}
        outcomes = event.get('outcomes') or {}
        line = (
            f"- iter={iteration}, start={timestamp_start}, end={timestamp_end}, "
            f"queue={queue_size}, car_queue={car_queue_size}, "
            f"tot_s={fmt(timings.get('total_seconds'), 1)}, "
            f"car={car_processing.get('result') or '-'} ({fmt(timings.get('car_seconds'), 1)}s), "
            f"can={can_processing.get('result') or '-'} ({fmt(timings.get('can_seconds'), 1)}s)"
        )
        print(line)
        extra_parts = []
        if car_processing.get('time_seconds'):
            extra_parts.append(f"car_step={fmt(car_processing.get('time_seconds'), 1)}s")
        if can_processing.get('time_seconds'):
            extra_parts.append(f"can_step={fmt(can_processing.get('time_seconds'), 1)}s")
        extensions = event.get('extensions')
        if isinstance(extensions, dict) and extensions:
            ext_summary = ', '.join(f"{k}:{v}" for k, v in extensions.items())
            extra_parts.append(f"extensions[{ext_summary}]")
        raw_info = event.get('raw_info')
        if isinstance(raw_info, dict) and raw_info:
            info_summary = ', '.join(f"{k}:{v}" for k, v in raw_info.items())
            extra_parts.append(f"raw_info[{info_summary}]")
        if outcomes:
            outcomes_summary = ', '.join(
                f"{key}={'T' if bool(value) else 'F'}" for key, value in sorted(outcomes.items())
            )
            extra_parts.append(f"outcomes[{outcomes_summary}]")
        if extra_parts:
            print("  " + " | ".join(extra_parts))

# Imposta a True per attivare la selezione interattiva
RUN_INTERACTIVE_INSPECTION = False
if RUN_INTERACTIVE_INSPECTION:
    inspect_db10_worker(max_events=5, interactive=True)


In [None]:
import statistics

def build_db10_worker_report(*, max_events=5, sort_by='iteration'):
    cache = globals().get('DB10_WORKER_CACHE')
    if not cache or cache.get('zip_name') != selected_zip_name:
        summarise_db10_workers(selected_zip_name, show_summary=False)
        cache = globals().get('DB10_WORKER_CACHE')
    if not cache:
        print('DB 10 statistics are not available.')
        return {}
    worker_stats = cache.get('worker_stats') or {}
    if not worker_stats:
        print('No workers found in DB 10.')
        return {}

    def safe_mean(values):
        return statistics.mean(values) if values else None

    def hours(values):
        return sum(values) / 3600 if values else 0.0

    def event_key(event):
        if sort_by == 'timestamp':
            return (event.get('timestamp_start') or '', event.get('iteration') or float('inf'))
        iteration = event.get('iteration')
        return (iteration if iteration is not None else float('inf'), event.get('timestamp_start') or '')

    def serialize_event(event):
        timings = event.get('timings') or {}
        car_processing = event.get('car_processing') or {}
        can_processing = event.get('can_processing') or {}
        return {
            'iteration': event.get('iteration'),
            'timestamp_start': event.get('timestamp_start'),
            'timestamp_end': event.get('timestamp_end'),
            'queue_size': event.get('queue_size'),
            'car_queue_size': event.get('car_queue_size'),
            'timings': {
                'total_seconds': timings.get('total_seconds'),
                'car_seconds': timings.get('car_seconds'),
                'can_seconds': timings.get('can_seconds'),
            },
            'car_processing': {
                'result': car_processing.get('result'),
                'time_seconds': car_processing.get('time_seconds'),
                'raw_info': car_processing.get('raw_info'),
                'extensions': car_processing.get('extensions'),
            },
            'can_processing': {
                'result': can_processing.get('result'),
                'time_seconds': can_processing.get('time_seconds'),
                'raw_info': can_processing.get('raw_info'),
                'extensions': can_processing.get('extensions'),
            },
            'outcomes': event.get('outcomes'),
        }

    report = {
        'zip_name': cache.get('zip_name'),
        'worker_count': len(worker_stats),
        'workers': {}
    }

    for worker_id in sorted(worker_stats):
        stats = worker_stats[worker_id]
        iterations = stats.get('iterations') or []
        queue_sizes = stats.get('queue_sizes') or []
        car_queue_sizes = stats.get('car_queue_sizes') or []
        summary = {
            'records': stats.get('records', 0),
            'iteration_range': [min(iterations), max(iterations)] if iterations else None,
            'queue': {
                'min': min(queue_sizes) if queue_sizes else None,
                'mean': safe_mean(queue_sizes),
                'max': max(queue_sizes) if queue_sizes else None,
            },
            'car_queue': {
                'min': min(car_queue_sizes) if car_queue_sizes else None,
                'mean': safe_mean(car_queue_sizes),
                'max': max(car_queue_sizes) if car_queue_sizes else None,
            },
            'timings_hours': {
                'total': hours(stats.get('total_seconds') or []),
                'car': hours(stats.get('car_seconds') or []),
                'can': hours(stats.get('can_seconds') or []),
            },
            'car_results': dict(stats.get('car_results') or {}),
            'can_results': dict(stats.get('can_results') or {}),
            'outcomes': dict(stats.get('outcomes') or {}),
        }
        events = list(stats.get('events') or [])
        events_sorted = sorted(events, key=event_key)
        if max_events is not None and max_events > 0:
            events_sorted = events_sorted[-max_events:]
        summary['events'] = [serialize_event(event) for event in events_sorted]
        report['workers'][worker_id] = summary

    globals()['DB10_WORKER_REPORT'] = report
    print(f"Report built for {report['worker_count']} workers.")
    return report

DB10_WORKER_REPORT = build_db10_worker_report(max_events=5)


In [None]:
import math
import json
import re
import statistics
from collections import OrderedDict
from typing import Iterable
from datetime import datetime

import matplotlib.pyplot as plt
import numpy as np
from IPython.display import display
import pandas as pd

DB_DISPLAY_NAMES = {
    'DATA': 'Data',
    'CAN': 'Candidate reasons',
    'R': 'Reasons',
    'NR': 'Non-reasons',
    'CAR': 'Candidate anti-reasons',
    'AR': 'Anti-reasons',
    'GP': 'Good profiles',
    'BP': 'Bad profiles',
    'PR': 'Preferred reasons',
    'AP': 'Anti-reason profiles',
    'LOGS': 'Worker iteration logs',
}

CANDIDATE_REASONS_NAME = DB_DISPLAY_NAMES['CAN']
CANDIDATE_ANTI_REASONS_NAME = DB_DISPLAY_NAMES['CAR']

BASE_COLUMNS = [
    'worker_id',
    'records',
    'iter_min',
    'iter_max',
    'queue_min',
    'queue_mean',
    'queue_max',
    'car_queue_min',
    'car_queue_mean',
    'car_queue_max',
    'hours_total',
    'hours_car',
    'hours_can',
]

SCATTER_PLOTS = [
    {
        'title': 'Queue size vs iteration range',
        'x': 'iter_max',
        'y': 'queue_mean',
        'xlabel': None,
        'ylabel': None,
    },
    {
        'title': f'{CANDIDATE_ANTI_REASONS_NAME} queue vs queue size',
        'x': 'queue_mean',
        'y': 'car_queue_mean',
        'xlabel': None,
        'ylabel': None,
    },
    {
        'title': 'Total hours vs records',
        'x': 'records',
        'y': 'hours_total',
        'xlabel': None,
        'ylabel': None,
    },
    {
        'title': f'Processing hours ({CANDIDATE_ANTI_REASONS_NAME} vs {CANDIDATE_REASONS_NAME})',
        'x': 'hours_car',
        'y': 'hours_can',
        'xlabel': None,
        'ylabel': None,
    },
]

BAR_PLOTS = [
    {
        'columns': ['queue_mean'],
        'title': 'Average queue size per worker',
        'ylabel': 'Average queue size',
        'sort_by': 'queue_mean',
    },
    {
        'columns': ['car_queue_mean'],
        'title': f'Average {CANDIDATE_ANTI_REASONS_NAME} queue size per worker',
        'ylabel': f'Average {CANDIDATE_ANTI_REASONS_NAME} queue size',
        'sort_by': 'car_queue_mean',
    },
    {
        'columns': ['records'],
        'title': 'Events processed per worker',
        'ylabel': 'Events',
        'sort_by': 'records',
    },
]

STACKED_BAR_CONFIG = [
    {
        'prefix': 'car_result_',
        'title': f'{CANDIDATE_ANTI_REASONS_NAME} results per worker',
        'ylabel': 'Count',
    },
    {
        'prefix': 'can_result_',
        'title': f'{CANDIDATE_REASONS_NAME} results per worker',
        'ylabel': 'Count',
    },
    {
        'prefix': 'outcome_',
        'title': 'Outcome flags per worker',
        'ylabel': 'Count',
    },
]

HISTOGRAMS = [
    {
        'column': 'queue_mean',
        'title': 'Distribution of average queue size',
        'xlabel': None,
    },
    {
        'column': 'hours_total',
        'title': 'Distribution of total processing hours',
        'xlabel': None,
    },
]

ADDITIONAL_SCATTER_PREFIX_PAIRS = [
    ('car_result_CONFIRMED_AR', 'car_result_NOT_AR', f'{CANDIDATE_ANTI_REASONS_NAME} confirmed vs not'),
    ('can_result_GOOD', 'hours_total', f"{CANDIDATE_REASONS_NAME} GOOD vs total hours"),
]

def _ensure_report():
    report = globals().get('DB10_WORKER_REPORT')
    if not report or report.get('zip_name') != selected_zip_name:
        report = build_db10_worker_report(max_events=5)
    return report


def _flatten_worker_summary(report):
    rows = []
    car_keys = set()
    can_keys = set()
    outcome_keys = set()
    workers = report.get('workers') or {}
    for summary in workers.values():
        car_keys.update((summary.get('car_results') or {}).keys())
        can_keys.update((summary.get('can_results') or {}).keys())
        outcome_keys.update((summary.get('outcomes') or {}).keys())
    car_keys = sorted(car_keys)
    can_keys = sorted(can_keys)
    outcome_keys = sorted(outcome_keys)

    sorted_workers = sorted(workers.items())
    for index, (worker_id, summary) in enumerate(sorted_workers, start=1):
        queue = summary.get('queue') or {}
        car_queue = summary.get('car_queue') or {}
        timings = summary.get('timings_hours') or {}
        iteration_range = summary.get('iteration_range') or [None, None]
        short_name = worker_id.split(':')[-1]
        label = f"W{index:02d}"
        row = OrderedDict(
            worker_id=worker_id,
            worker_index=index,
            worker_label=label,
            worker_short_name=short_name,
            records=summary.get('records'),
            iter_min=iteration_range[0],
            iter_max=iteration_range[1],
            queue_min=queue.get('min'),
            queue_mean=queue.get('mean'),
            queue_max=queue.get('max'),
            car_queue_min=car_queue.get('min'),
            car_queue_mean=car_queue.get('mean'),
            car_queue_max=car_queue.get('max'),
            hours_total=timings.get('total'),
            hours_car=timings.get('car'),
            hours_can=timings.get('can'),
        )
        car_results = summary.get('car_results') or {}
        can_results = summary.get('can_results') or {}
        outcomes = summary.get('outcomes') or {}
        for key in car_keys:
            row[f'car_result_{key}'] = car_results.get(key, 0)
        for key in can_keys:
            row[f'can_result_{key}'] = can_results.get(key, 0)
        for key in outcome_keys:
            row[f'outcome_{key}'] = outcomes.get(key, 0)
        rows.append(row)
    return rows


def _filter_valid_points(df: pd.DataFrame, x_key: str, y_key: str):
    subset = df.dropna(subset=[x_key, y_key])
    subset = subset[subset[[x_key, y_key]].applymap(lambda v: isinstance(v, (int, float))).all(axis=1)]
    if subset.empty:
        return []
    return list(zip(subset[x_key], subset[y_key], subset['worker_label']))


def _token_to_display(token: str) -> str:
    upper = token.upper()
    if upper in DB_DISPLAY_NAMES:
        return DB_DISPLAY_NAMES[upper]
    lookup = {
        'CAR': CANDIDATE_ANTI_REASONS_NAME,
        'CAN': CANDIDATE_REASONS_NAME,
    }
    if upper in lookup:
        return lookup[upper]
    if token.isupper():
        return token.capitalize()
    return token.replace('_', ' ').capitalize()


def _format_result_label(prefix: str, raw: str) -> str:
    parts = raw.split('_') if raw else []
    if parts and parts[-1].upper() in DB_DISPLAY_NAMES:
        parts = parts[:-1]
    descriptor = ' '.join(_token_to_display(part.lower()) for part in parts) if parts else 'Total'
    return f"{prefix} result: {descriptor}"


def _format_outcome_label(raw: str) -> str:
    key, sep, value = raw.partition('=')
    tokens = key.split('_') if key else []
    words = [_token_to_display(tok) for tok in tokens]
    label = ' '.join(words) if words else 'Outcome'
    if sep:
        value_text = {'T': 'True', 'F': 'False', 'TRUE': 'True', 'FALSE': 'False'}.get(value.upper(), value)
        return f"Outcome: {label} ({value_text})"
    return f"Outcome: {label}"


def _rename_columns(df: pd.DataFrame) -> pd.DataFrame:
    rename_map = {
        'worker_label': 'Worker',
        'worker_id': 'Worker ID',
        'records': 'Records',
        'iter_min': 'Iteration min',
        'iter_max': 'Iteration max',
        'queue_min': 'Queue min',
        'queue_mean': 'Queue mean',
        'queue_max': 'Queue max',
        'car_queue_min': f'{CANDIDATE_ANTI_REASONS_NAME} queue min',
        'car_queue_mean': f'{CANDIDATE_ANTI_REASONS_NAME} queue mean',
        'car_queue_max': f'{CANDIDATE_ANTI_REASONS_NAME} queue max',
        'hours_total': 'Total hours',
        'hours_car': f'{CANDIDATE_ANTI_REASONS_NAME} hours',
        'hours_can': f'{CANDIDATE_REASONS_NAME} hours',
    }
    computed = {}
    for col in df.columns:
        if col.startswith('car_result_'):
            computed[col] = _format_result_label(CANDIDATE_ANTI_REASONS_NAME, col[len('car_result_'):])
        elif col.startswith('can_result_'):
            computed[col] = _format_result_label(CANDIDATE_REASONS_NAME, col[len('can_result_'):])
        elif col.startswith('outcome_'):
            computed[col] = _format_outcome_label(col[len('outcome_'):])
        elif col in rename_map:
            computed[col] = rename_map[col]
        else:
            computed[col] = col
    return df.rename(columns=computed)


def _column_display_name(column: str) -> str:
    temp = _rename_columns(pd.DataFrame(columns=[column]))
    return temp.columns[0]


def _plot_scatter(df: pd.DataFrame, config: dict):
    points = _filter_valid_points(df, config['x'], config['y'])
    if not points:
        print(f"Plot '{config['title']}' skipped: insufficient data.")
        return
    plt.figure(figsize=(8, 5))
    xs, ys, labels = zip(*points)
    plt.scatter(xs, ys, alpha=0.7)
    for x, y, label in points:
        plt.annotate(label, (x, y), textcoords='offset points', xytext=(5, 3), fontsize=8)
    xlabel = config.get('xlabel') or _column_display_name(config['x'])
    ylabel = config.get('ylabel') or _column_display_name(config['y'])
    plt.title(config['title'])
    plt.xlabel(xlabel)
    plt.ylabel(ylabel)
    plt.grid(True, alpha=0.3)
    plt.tight_layout()
    plt.show()


def _plot_bar(df: pd.DataFrame, *, columns: Iterable[str], title: str, ylabel: str, stacked: bool = False, sort_by: str | None = None):
    available_columns = [col for col in columns if col in df.columns]
    if not available_columns:
        print(f"Plot '{title}' skipped: columns not available.")
        return
    subset = df[['worker_label'] + available_columns].copy()
    subset = subset.dropna(how='all', subset=available_columns)
    if subset.empty:
        print(f"Plot '{title}' skipped: insufficient data.")
        return
    subset[available_columns] = subset[available_columns].apply(pd.to_numeric, errors='coerce')
    subset = subset.dropna()
    if subset.empty:
        print(f"Plot '{title}' skipped: non-numeric data.")
        return
    renamed_subset = _rename_columns(subset)
    plot_df = renamed_subset.set_index('Worker')
    value_columns = []
    for original in available_columns:
        display_name = _column_display_name(original)
        if display_name in plot_df.columns:
            value_columns.append(display_name)
    if not value_columns:
        print(f"Plot '{title}' skipped: no numeric columns available after renaming.")
        return
    sort_column = _column_display_name(sort_by) if sort_by else None
    if sort_column and sort_column in plot_df.columns:
        plot_df = plot_df.sort_values(sort_column, ascending=False)
    plt.figure(figsize=(max(8, len(plot_df) * 0.5), 5))
    plot_df[value_columns].plot(kind='bar', stacked=stacked, alpha=0.8)
    plt.title(title)
    plt.ylabel(ylabel)
    plt.xlabel('Worker')
    plt.xticks(rotation=45, ha='right')
    plt.grid(True, axis='y', alpha=0.3)
    plt.tight_layout()
    plt.show()


def _collect_heatmap_rows(entries):
    extracted = []
    for entry in entries or []:
        try:
            key_bytes = decode_key(entry)
            key_text = key_bytes.decode('utf-8', errors='replace')
        except Exception:
            continue
        preview, details = try_decode_value(entry)
        timestamp = None
        if isinstance(preview, bytes):
            preview = preview.decode('utf-8', errors='replace')
        if isinstance(preview, str):
            candidate = preview.strip()
            if candidate:
                timestamp = candidate
        if timestamp is None and isinstance(details, dict):
            decoded_bytes = details.get('decoded_bytes')
            if isinstance(decoded_bytes, (bytes, bytearray)):
                candidate = decoded_bytes.decode('utf-8', errors='replace').strip()
                if candidate:
                    timestamp = candidate
        extracted.append((timestamp, key_text))
    if not extracted:
        return np.empty((0, 0), dtype=int), []
    def _sort_key(item):
        ts, _ = item
        if ts:
            try:
                return datetime.fromisoformat(ts)
            except ValueError:
                pass
        return datetime.max
    extracted.sort(key=_sort_key)
    width = max(len(key) for _, key in extracted)
    matrix = np.full((len(extracted), width), np.nan, dtype=float)
    labels = []
    for row_idx, (ts, key_text) in enumerate(extracted):
        if len(key_text) != width:
            key_text = key_text.ljust(width, '0')
        matrix[row_idx, :] = [1 if ch == '1' else 0 for ch in key_text]
        labels.append(ts or 'n/a')
    return matrix, labels
def render_bitmap_heatmaps():
    heatmap_dbs = [
        (2, DB_DISPLAY_NAMES.get('R', 'Reasons')),
        (3, DB_DISPLAY_NAMES.get('NR', 'Non-reasons')),
        (5, DB_DISPLAY_NAMES.get('AR', 'Anti-reasons')),
        (6, DB_DISPLAY_NAMES.get('GP', 'Good profiles')),
        (7, DB_DISPLAY_NAMES.get('BP', 'Bad profiles')),
        (8, DB_DISPLAY_NAMES.get('PR', 'Preferred reasons')),
        (9, DB_DISPLAY_NAMES.get('AP', 'Anti-reason profiles')),
    ]
    if not selected_manifest or not selected_backups:
        print('Bitmap heatmaps unavailable: no backups loaded.')
        return
    files_map = selected_manifest.get('files') or {}
    generated_any = False
    for db_index, label in heatmap_dbs:
        file_name = files_map.get(str(db_index))
        if not file_name:
            continue
        data = selected_backups.get(file_name) if isinstance(selected_backups, dict) else None
        if not isinstance(data, dict):
            continue
        matrix, labels = _collect_heatmap_rows(data.get('entries'))
        if matrix.size == 0:
            continue
        generated_any = True
        fig_height = max(4, matrix.shape[0] * 0.25)
        fig, ax = plt.subplots(figsize=(12, fig_height))
        im = ax.imshow(matrix, aspect='auto', cmap='viridis')
        ax.set_title(f"{label} bitmap heatmap (timestamp order)")
        ax.set_xlabel('Bitmap position')
        ax.set_ylabel('Timestamp')
        ax.set_yticks(range(len(labels)))
        ax.set_yticklabels(labels, fontsize=8)
        cbar = plt.colorbar(im, ax=ax)
        cbar.set_label('Bit value')
        plt.tight_layout()
        plt.show()
    if not generated_any:
        print('No bitmap heatmaps generated (missing or empty data).')
def render_db0_eu_analysis():
    entry_map = globals().get('selected_db0_values_by_key') or {}
    eu_entry = entry_map.get('EU') if isinstance(entry_map, dict) else None
    if not eu_entry:
        print('DB 0 EU entry not available for the current selection.')
        return
    series_map = eu_entry.get('value_json')
    if not isinstance(series_map, dict) or not series_map:
        print('DB 0 EU entry does not contain a time series map.')
        return
    feature_names = sorted(series_map)
    cleaned_series = []
    lengths = []
    max_len = 0
    for name in feature_names:
        values = series_map.get(name)
        numeric_values = []
        if isinstance(values, list):
            for value in values:
                if isinstance(value, (int, float)):
                    numeric_values.append(float(value))
                else:
                    try:
                        numeric_values.append(float(value))
                    except (TypeError, ValueError):
                        numeric_values.append(float('nan'))
        cleaned_series.append(numeric_values)
        lengths.append(len(numeric_values))
        max_len = max(max_len, len(numeric_values))
    if max_len == 0:
        print('DB 0 EU entry does not contain numeric time series.')
        return
    matrix = np.full((len(cleaned_series), max_len), np.nan, dtype=float)
    for idx, series in enumerate(cleaned_series):
        if series:
            matrix[idx, : len(series)] = series
    lengths = np.array(lengths)
    print('DB 0 EU summary:')
    print(f'  features: {len(feature_names)}')
    print(f'  min length: {np.nanmin(lengths) if lengths.size else 0}')
    print(f'  max length: {np.nanmax(lengths) if lengths.size else 0}')
    print(f'  average length: {np.nanmean(lengths) if lengths.size else 0:.2f}')
    time_index = np.arange(max_len)
    counts = np.sum(~np.isnan(matrix), axis=0)
    mean_series = np.nanmean(matrix, axis=0)
    std_series = np.nanstd(matrix, axis=0)
    mean_mask = counts > 0
    std_mask = counts > 1
    fig, axes = plt.subplots(2, 1, figsize=(12, 8), sharex=False)
    axes[0].bar(range(len(feature_names)), lengths, color='tab:blue')
    axes[0].set_ylabel('Length')
    axes[0].set_title('EU series lengths per feature')
    axes[0].set_xticks(range(len(feature_names)))
    axes[0].set_xticklabels(feature_names, rotation=90, fontsize=6)
    box_data = [np.array(series, dtype=float) for series in cleaned_series if series]
    if box_data:
        box_values = [np.array(series)[~np.isnan(series)] for series in box_data]
        axes[1].boxplot([values for values in box_values if values.size], vert=True, patch_artist=True)
        axes[1].set_title('EU feature distribution (box plot)')
        axes[1].set_ylabel('Value')
        axes[1].set_xticks(range(1, len(feature_names) + 1))
        axes[1].set_xticklabels(feature_names, rotation=90, fontsize=6)
    axes[1].set_xlabel('Feature index')
    plt.tight_layout()
    plt.show()
def _coerce_numeric_series(data):
    if data is None:
        return []
    if isinstance(data, bool):
        return []
    if isinstance(data, (int, float)):
        return [float(data)]
    if isinstance(data, str):
        trimmed = data.strip()
        if not trimmed:
            return []
        try:
            return [float(trimmed)]
        except ValueError:
            try:
                parsed = json.loads(trimmed)
            except json.JSONDecodeError:
                tokens = []
                for token in trimmed.replace(',', ' ').split():
                    try:
                        tokens.append(float(token))
                    except ValueError:
                        continue
                return tokens
            else:
                return _coerce_numeric_series(parsed)
    if isinstance(data, (list, tuple)):
        collected = []
        for item in data:
            if isinstance(item, dict):
                handled = False
                for candidate in ('value', 'y', 'val', 'score'):
                    if candidate in item:
                        nested = _coerce_numeric_series(item[candidate])
                        if nested:
                            collected.extend(nested)
                            handled = True
                        break
                if handled:
                    continue
                nested = _coerce_numeric_series(list(item.values()))
                if nested:
                    collected.extend(nested)
            else:
                nested = _coerce_numeric_series(item)
                if nested:
                    collected.extend(nested)
        return collected
    if isinstance(data, dict):
        for candidate in (
            'series',
            'values',
            'data',
            'points',
            'samples',
            'sample',
            'payload',
            'entries',
            'items',
            'measurements',
            'sample_dict',
        ):
            if candidate in data:
                nested = _coerce_numeric_series(data[candidate])
                if nested:
                    return nested
        numeric_items = []
        for idx, (key, value) in enumerate(data.items()):
            floats = _coerce_numeric_series(value)
            if not floats:
                continue
            if len(floats) == 1:
                numeric_items.append(((idx, 0), key, floats[0]))
            else:
                for offset, val in enumerate(floats):
                    numeric_items.append(((idx, offset), f"{key}[{offset}]", val))
        if not numeric_items:
            return []
        def _dict_sort_key(order, raw_key):
            order_token = tuple(order)
            if isinstance(raw_key, (int, float)):
                return (0, float(raw_key), order_token)
            if isinstance(raw_key, str):
                stripped = raw_key.strip()
                try:
                    return (0, float(stripped), order_token)
                except ValueError:
                    numbers = re.findall(r'-?\d+(?:\.\d+)?', stripped)
                    if numbers:
                        return (1, tuple(float(num) for num in numbers), order_token)
                    return (2, stripped.lower(), order_token)
            return (3, str(raw_key), order_token)
        numeric_items.sort(key=lambda item: _dict_sort_key(item[0], item[1]))
        return [value for _, _, value in numeric_items]
    return []
def _extract_sample_series(entry):
    if not isinstance(entry, dict):
        return []
    value_json = entry.get('value_json')
    series = _coerce_numeric_series(value_json)
    if series:
        return series
    value_text = entry.get('value_text')
    if isinstance(value_text, str):
        series = _coerce_numeric_series(value_text)
        if series:
            return series
    value_bytes = entry.get('value_bytes')
    if isinstance(value_bytes, (bytes, bytearray)):
        try:
            decoded = value_bytes.decode('utf-8', errors='replace')
        except Exception:
            decoded = ''
        if decoded:
            series = _coerce_numeric_series(decoded)
            if series:
                return series
    details = entry.get('details')
    if isinstance(details, dict):
        decoded_bytes = details.get('decoded_bytes')
        if isinstance(decoded_bytes, (bytes, bytearray)):
            try:
                decoded = decoded_bytes.decode('utf-8', errors='replace')
            except Exception:
                decoded = ''
            if decoded:
                series = _coerce_numeric_series(decoded)
                if series:
                    return series
    return []
def _extract_sample_timestamp(meta_entry):
    if not isinstance(meta_entry, dict):
        return None
    value_json = meta_entry.get('value_json')
    if isinstance(value_json, dict):
        for key in ('timestamp', 'created_at', 'created'):
            ts = value_json.get(key)
            if isinstance(ts, str) and ts:
                return ts
    value_text = meta_entry.get('value_text') if isinstance(meta_entry, dict) else None
    if isinstance(value_text, str):
        trimmed = value_text.strip()
        if trimmed.startswith('{') and trimmed.endswith('}'):
            try:
                parsed = json.loads(trimmed)
            except json.JSONDecodeError:
                parsed = None
            if isinstance(parsed, dict):
                for key in ('timestamp', 'created_at', 'created'):
                    ts = parsed.get(key)
                    if isinstance(ts, str) and ts:
                        return ts
        else:
            return trimmed or None
    return None
def render_db0_sample_timeseries():
    entry_map = globals().get('selected_db0_values_by_key') or {}
    if not isinstance(entry_map, dict):
        print('DB 0 sample entries not available for the current selection.')
        return
    collected = []
    for key, entry in entry_map.items():
        if not (key.startswith('sample_') and not key.endswith('_meta')):
            continue
        series = _extract_sample_series(entry)
        if not series:
            continue
        meta_entry = entry_map.get(f"{key}_meta")
        timestamp = _extract_sample_timestamp(meta_entry) if isinstance(meta_entry, dict) else None
        collected.append((timestamp, key, series))
    if not collected:
        print('No DB 0 sample time series available.')
        return
    def _sort_key(item):
        ts, key, _ = item
        if ts:
            try:
                return datetime.fromisoformat(ts)
            except ValueError:
                pass
        return key
    collected.sort(key=_sort_key)
    cols = 2
    rows = (len(collected) + cols - 1) // cols
    fig, axes = plt.subplots(rows, cols, figsize=(14, rows * 3), squeeze=False)
    flat_axes = axes.flatten()
    for ax in flat_axes[len(collected):]:
        ax.axis('off')
    for ax, (timestamp, key, series) in zip(flat_axes, collected):
        ax.plot(range(len(series)), series, marker='o', linewidth=1)
        title = f"{key} ({timestamp})" if timestamp else key
        ax.set_title(title, fontsize=8)
        ax.set_xlabel('Index')
        ax.set_ylabel('Value')
    plt.tight_layout()
    plt.show()
def _plot_histogram(df: pd.DataFrame, *, column: str, title: str, xlabel: str):
    if column not in df.columns:
        print(f"Histogram '{title}' skipped: column not available.")
        return
    series = pd.to_numeric(df[column], errors='coerce').dropna()
    if series.empty:
        print(f"Histogram '{title}' skipped: insufficient data.")
        return
    plt.figure(figsize=(8, 5))
    plt.hist(series, bins=min(25, len(series)), alpha=0.7)
    label = xlabel or _column_display_name(column)
    plt.title(title)
    plt.xlabel(label)
    plt.ylabel('Number of workers')
    plt.grid(True, alpha=0.3)
    plt.tight_layout()
    plt.show()


def render_worker_report():
    report = _ensure_report()
    rows = _flatten_worker_summary(report)
    if not rows:
        print('No data available for the worker report.')
        return

    df = pd.DataFrame(rows)
    df = df.sort_values('worker_index')
    base_columns = [col for col in BASE_COLUMNS if col in df.columns]
    extra_columns = sorted(
        col for col in df.columns if col.startswith('car_result_') or col.startswith('can_result_') or col.startswith('outcome_')
    )

    base_table_columns = ['worker_label', 'worker_id'] + [col for col in BASE_COLUMNS if col not in {'worker_id'}]
    base_table_columns = [col for col in base_table_columns if col in df.columns]
    display(_rename_columns(df[base_table_columns]))
    print('Detailed results per worker:')
    detail_columns = ['worker_label'] + extra_columns
    detail_columns = [col for col in detail_columns if col in df.columns]
    display(_rename_columns(df[detail_columns]))

    for config in SCATTER_PLOTS:
        _plot_scatter(df, config)

    for x_key, y_key, title in ADDITIONAL_SCATTER_PREFIX_PAIRS:
        if x_key in df.columns and y_key in df.columns:
            _plot_scatter(
                df,
                {
                    'title': title,
                    'x': x_key,
                    'y': y_key,
                    'xlabel': _column_display_name(x_key),
                    'ylabel': _column_display_name(y_key),
                },
            )

    for config in BAR_PLOTS:
        _plot_bar(
            df,
            columns=config['columns'],
            title=config['title'],
            ylabel=config['ylabel'],
            stacked=False,
            sort_by=config.get('sort_by'),
        )

    for config in STACKED_BAR_CONFIG:
        columns = sorted(col for col in df.columns if col.startswith(config['prefix']))
        if columns:
            _plot_bar(
                df,
                columns=columns,
                title=config['title'],
                ylabel=config['ylabel'],
                stacked=True,
            )
        else:
            print(f"Plot '{config['title']}' skipped: no columns with prefix {config['prefix']!r}.")

    for config in HISTOGRAMS:
        _plot_histogram(df, column=config['column'], title=config['title'], xlabel=config['xlabel'])

render_worker_report()
render_db0_eu_analysis()
render_db0_sample_timeseries()
render_bitmap_heatmaps()
render_db0_eu_analysis()
render_db0_sample_timeseries()
render_bitmap_heatmaps()
render_db0_eu_analysis()
