# tg-checkstats — Histogram Web UI exploration

This notebook helps you sanity-check the UI artifacts under `derived/ui/` and do quick exploratory analysis (month → week grid → weekday/hour histograms).

## Setup (uv)

```bash
uv venv
uv pip install -e ".[dev]"
```

Then open this notebook in VS Code (or Jupyter) and select the `.venv` kernel.

In [None]:
from __future__ import annotations

from pathlib import Path
import json

import pandas as pd
import matplotlib.pyplot as plt

# Point this at an existing run dir.
RUN_DIR = Path("runs/freifahren_leipzig_20260204_144216")

UI_DIR = RUN_DIR / "derived" / "ui"
RUN_METADATA = RUN_DIR / "run_metadata.json"

required = [
    RUN_METADATA,
    UI_DIR / "month_counts.csv",
    UI_DIR / "day_counts.csv",
    UI_DIR / "day_hour_counts.csv",
    UI_DIR / "month_weekday_stats.csv",
    UI_DIR / "calendar_day_index.csv",
]

missing = [p for p in required if not p.exists()]
if missing:
    raise FileNotFoundError("Missing required UI artifacts:\n" + "\n".join(str(p) for p in missing))

print("OK: required artifacts exist")
print("run:", RUN_DIR)


In [None]:
with RUN_METADATA.open("r", encoding="utf-8") as f:
    meta = json.load(f)

timezone = (meta.get("config") or {}).get("timezone", "Europe/Berlin")
policy = (meta.get("config") or {}).get("event_count_policy")
dataset = meta.get("dataset") or {}

print("timezone:", timezone)
print("event_count_policy:", policy)
print("dataset start/end:", dataset.get("start_berlin_date"), dataset.get("end_berlin_date"))
print("matched total (messages/events):", (meta.get("counts") or {}).get("events_matched_total"), (meta.get("counts") or {}).get("events_weight_total"))


## Month overview

`month_counts.csv` contains both metrics:
- `month_check_message_count`
- `month_check_event_count`

If everything is zero, the UI will look "empty" (because bars are height 0). That usually means no matches were detected in this run.

In [None]:
months = pd.read_csv(UI_DIR / "month_counts.csv")
months.head()

In [None]:
top_messages = months.sort_values("month_check_message_count", ascending=False).head(10)
top_events = months.sort_values("month_check_event_count", ascending=False).head(10)

display(top_messages[["month", "month_check_message_count", "messages_per_day_in_range"]])
display(top_events[["month", "month_check_event_count", "events_per_day_in_range"]])


In [None]:
fig, axes = plt.subplots(2, 1, figsize=(12, 6), constrained_layout=True)

axes[0].bar(months["month"], months["month_check_message_count"], color="#197fe6")
axes[0].set_title("Monthly totals (messages)")
axes[0].tick_params(axis="x", rotation=90)

axes[1].bar(months["month"], months["month_check_event_count"], color="#197fe6")
axes[1].set_title("Monthly totals (events)")
axes[1].tick_params(axis="x", rotation=90)

plt.show()

## Month detail: week grid

The grid is based on `day_counts.csv` (dense) and `week_start_date`.

In [None]:
days = pd.read_csv(UI_DIR / "day_counts.csv")
days["date"] = pd.to_datetime(days["date"]).dt.date

month = str(months.sort_values("month_check_message_count", ascending=False).iloc[0]["month"])
print("selected month:", month)

m = days[days["month"] == month].copy()
m["day"] = pd.to_datetime(m["date"]).dt.day
m.head()

In [None]:
if m.empty:
    raise ValueError("No day rows for selected month")

grid_messages = m.pivot_table(
    index="week_start_date",
    columns="weekday_idx",
    values="check_message_count",
    aggfunc="sum",
    fill_value=0,
)
grid_events = m.pivot_table(
    index="week_start_date",
    columns="weekday_idx",
    values="check_event_count",
    aggfunc="sum",
    fill_value=0,
)

display(grid_messages)
display(grid_events)


## Week detail: weekday/hour histograms

`day_hour_counts.csv` is sparse; missing `(date, hour)` implies 0.

In [None]:
day_hours = pd.read_csv(UI_DIR / "day_hour_counts.csv")
day_hours["date"] = pd.to_datetime(day_hours["date"]).dt.date

week_start = pd.to_datetime(m["week_start_date"].iloc[0]).date()
week_days = [week_start + pd.Timedelta(days=i) for i in range(7)]
week_days = [d.date() if hasattr(d, "date") else d for d in week_days]

print("selected week_start_date:", week_start)
print("week days:", week_days)


In [None]:
fig, axes = plt.subplots(7, 1, figsize=(12, 10), sharex=True, constrained_layout=True)

for i, day in enumerate(week_days):
    subset = day_hours[day_hours["date"] == day]
    hours = pd.DataFrame({"hour": range(24)})
    merged = hours.merge(subset, on="hour", how="left").fillna(0)

    axes[i].bar(merged["hour"], merged["check_message_count"], color="#197fe6")
    axes[i].set_ylabel(["Mon","Tue","Wed","Thu","Fri","Sat","Sun"][i])
    axes[i].set_ylim(bottom=0)

axes[-1].set_xlabel("hour")
fig.suptitle(f"Week {week_start} — hourly histograms (messages)")
plt.show()
