# Slack warehouse channel overview (regex parsed)

Lightweight, no-deps notebook to browse the processed Slack messages.
- Input: `training/peft/data/slack_messages_parsed.jsonl` (produced by `extract_slack_regex.py`).
- Each row: grouped messages from one user (30m window), direction tags, locations, and regex-parsed items.

Edit `DATA_PATH` below if you regenerate the file.

In [None]:
import json, pathlib
from collections import Counter

DATA_PATH = pathlib.Path("training/peft/data/slack_messages_parsed.jsonl")

In [None]:
def load_jsonl(path):
    rows = []
    path = pathlib.Path(path)
    with path.open() as f:
        for line in f:
            rows.append(json.loads(line))
    return rows

records = load_jsonl(DATA_PATH)
len(records)

In [None]:
def percent(n, d):
    return f"{(100*n/d):.1f}%" if d else "0%"

with_items = sum(1 for r in records if r.get("items"))
direction_counts = Counter(r.get("direction", "") or "unknown" for r in records)

print(f"Total grouped records: {len(records)}")
print(f"Records with parsed items: {with_items} ({percent(with_items, len(records))})")
print("Direction counts:")
for k, v in direction_counts.most_common():
    print(f"  {k:8s} {v}")

In [None]:
def top_counter(counter, n=12):
    return [(k, v) for k, v in counter.most_common(n)]

rescue_counter = Counter(r.get("rescue_location") for r in records if r.get("rescue_location"))
drop_counter = Counter(r.get("drop_off_location") for r in records if r.get("drop_off_location"))

print("Top rescue locations:")
for name, count in top_counter(rescue_counter):
    print(f"  {name}: {count}")

print("\nTop drop-off locations:")
for name, count in top_counter(drop_counter):
    print(f"  {name}: {count}")

In [None]:
item_counter = Counter()
for r in records:
    for item in r.get("items", []):
        qty = item.get("quantity") or 1
        name = item.get("name", "").lower().strip()
        if not name:
            continue
        item_counter[name] += qty

print("Top items (by summed quantities):")
for name, count in item_counter.most_common(15):
    print(f"  {name}: {count}")

In [None]:
def sample_records(direction=None, limit=5):
    shown = 0
    for rec in records:
        if direction and rec.get("direction") != direction:
            continue
        if not rec.get("items"):
            continue
        msg = " | ".join(rec.get("raw_messages", []))[:400]
        print("---")
        print(f"direction: {rec.get('direction')} | rescue: {rec.get('rescue_location')} | drop: {rec.get('drop_off_location')}")
        print(f"items: {rec['items'][:4]}")
        print(f"text: {msg}\n")
        shown += 1
        if shown >= limit:
            break

# Show a few outbound examples; change direction to inbound/both/unknown or None
sample_records(direction="outbound", limit=5)