# Slack warehouse channel overview (regex parsed)

Lightweight, no-deps notebook to browse the processed Slack messages.
- Input: `training/peft/data/slack_messages_parsed.jsonl` (produced by `extract_slack_regex.py`).
- Each row: grouped messages from one user (30m window), direction tags, locations, and regex-parsed items.

Edit `DATA_PATH` below if you regenerate the file.

In [19]:
import json, pathlib, subprocess, sys
from collections import Counter

# Resolve the data path whether you run the notebook from repo root or from training/peft
BASE = pathlib.Path(__file__).resolve().parent if "__file__" in globals() else pathlib.Path(".").resolve()
CANDIDATE_PATHS = [
    pathlib.Path("training/peft/data/slack_messages_parsed.jsonl"),
    BASE / "data" / "slack_messages_parsed.jsonl",
    BASE.parent / "data" / "slack_messages_parsed.jsonl",
    BASE.parent / "training" / "peft" / "data" / "slack_messages_parsed.jsonl",
]
DATA_PATH = next((p for p in CANDIDATE_PATHS if p.exists()), CANDIDATE_PATHS[0])


In [20]:
def ensure_parsed_file(path: pathlib.Path) -> pathlib.Path:
    path = pathlib.Path(path)
    if path.exists():
        return path
    script = BASE / "extract_slack_regex.py"
    if script.exists():
        print(f"{path} not found. Running extractor once…")
        subprocess.run([sys.executable, str(script)], check=True, cwd=script.parent)
        if path.exists():
            return path
    raise FileNotFoundError(f"Could not find {path}. Run extract_slack_regex.py to generate it.")

def load_jsonl(path):
    rows = []
    path = pathlib.Path(path)
    with path.open() as f:
        for line in f:
            rows.append(json.loads(line))
    return rows

records = load_jsonl(ensure_parsed_file(DATA_PATH))
len(records)


2634

In [21]:
def percent(n, d):
    return f"{(100*n/d):.1f}%" if d else "0%"

with_items = sum(1 for r in records if r.get("items"))
direction_counts = Counter(r.get("direction", "") or "unknown" for r in records)

print(f"Total grouped records: {len(records)}")
print(f"Records with parsed items: {with_items} ({percent(with_items, len(records))})")
print("Direction counts:")
for k, v in direction_counts.most_common():
    print(f"  {k:8s} {v}")

Total grouped records: 2634
Records with parsed items: 2205 (83.7%)
Direction counts:
  outbound 1076
  unknown  659
  inbound  600
  both     299


In [22]:
def top_counter(counter, n=12):
    return [(k, v) for k, v in counter.most_common(n)]

rescue_counter = Counter(r.get("rescue_location") for r in records if r.get("rescue_location"))
drop_counter = Counter(r.get("drop_off_location") for r in records if r.get("drop_off_location"))

print("Top rescue locations:")
for name, count in top_counter(rescue_counter):
    print(f"  {name}: {count}")

print("\nTop drop-off locations:")
for name, count in top_counter(drop_counter):
    print(f"  {name}: {count}")

Top rescue locations:
  Aldi Hodgkins: 32
  SL Mariano's: 14
  SL Mariano’s: 13
  Aldi Lyons: 6
  UC: 5
  Aldi Cicero today, dropped off @ UC: 5
  South Loop Marianos today, dropped off @ UC: 5
  Englewood Aldi: 4
  Wicker Park Aldi: 4
  Cicero Aldi: 4
  Marillac: 3
  WP Aldi: 3

Top drop-off locations:
  distro: 20
  UC: 18
  love fridges: 17
  Port &amp: 13
  Love Fridge: 11
  reporting: 11
  Pilsen shelter: 11
  love fridge: 10
  BYP: 9
  Austin distro: 8
  grabs: 6
  UC:: 6


In [23]:
item_counter = Counter()
for r in records:
    for item in r.get("items", []):
        qty = item.get("quantity") or 1
        name = item.get("name", "").lower().strip()
        if not name:
            continue
        item_counter[name] += qty

print("Top items (by summed quantities):")
for name, count in item_counter.most_common(15):
    print(f"  {name}: {count}")

Top items (by summed quantities):
  bread: 1592.0
  apples: 1135.75
  milk: 795.5
  oranges: 756.5
  eggs: 646.0
  cereal: 618.5
  deli: 581.0
  carrots: 522.0
  potatoes: 506.0
  broccoli: 483.5
  meat: 476.0
  grapes: 404.0
  cookies: 394.0
  lettuce: 361.0
  almond milk: 348.0


In [24]:
def sample_records(direction=None, limit=5):
    shown = 0
    for rec in records:
        if direction and rec.get("direction") != direction:
            continue
        if not rec.get("items"):
            continue
        msg = " | ".join(rec.get("raw_messages", []))[:400]
        print("---")
        print(f"direction: {rec.get('direction')} | rescue: {rec.get('rescue_location')} | drop: {rec.get('drop_off_location')}")
        print(f"items: {rec['items'][:4]}")
        print(f"text: {msg}\n")
        shown += 1
        if shown >= limit:
            break

# Show a few outbound examples; change direction to inbound/both/unknown or None
sample_records(direction="outbound", limit=5)

---
direction: outbound | rescue:  | drop: 
items: [{'name': 'pork ribs', 'quantity': 2.0, 'unit': 'box', 'estimated_lbs': 36.0, 'subcategory': 'meat'}, {'name': 'corn', 'quantity': 2.0, 'unit': 'box', 'estimated_lbs': 36.0, 'subcategory': ''}]
text: Grabbed 2 boxes of pork ribs, 2 boxes of corn and one small crate of corn, and the things labeled 'Mordion '

---
direction: outbound | rescue:  | drop: 
items: [{'name': 'random meat', 'quantity': 6.0, 'unit': 'box', 'estimated_lbs': 108.0, 'subcategory': 'meat'}, {'name': 'baby shampoo', 'quantity': 1.0, 'unit': 'box', 'estimated_lbs': 18.0, 'subcategory': ''}]
text: I stopped by today and grabbed 6 boxes of random meat, one box of baby shampoo and a box of random dry/ pantry goods.

---
direction: outbound | rescue:  | drop: the other half of produce and greens
items: [{'name': 'the assorted leftover produce', 'quantity': 0.5, 'unit': None, 'estimated_lbs': 2.5, 'subcategory': 'produce'}, {'name': 'produce', 'quantity': 0.5, 'unit': Non

In [28]:

import re
from IPython.display import display, HTML
import ipywidgets as widgets

# Normalization helpers reused for location grouping

def _normalize_loc(value):
    clean = re.sub(r'[^a-z0-9 ]+', ' ', str(value or '').lower()).strip()
    clean = re.sub(r'\s+', ' ', clean)
    return clean

LOCATION_ALIASES = {
    'aldi wp': 'Aldi Wicker Park',
    'aldi wicker park': 'Aldi Wicker Park',
    'wicker park aldi': 'Aldi Wicker Park',
    'aldi n milwaukee': 'Aldi Wicker Park',
    'aldi n milwaukee ave': 'Aldi Wicker Park',
    'aldis wp': 'Aldi Wicker Park',
    'aldis wicker park': 'Aldi Wicker Park',
    'aldi hodgkins': 'Aldi Hodgkins',
    'aldi lyons': 'Aldi Lyons',
    'aldi cicero': 'Aldi Cicero',
    'aldi englewood': 'Aldi Englewood',
    'uc': 'UC',
}

def canonical_loc(value):
    if not value:
        return ''
    key = _normalize_loc(value)
    if key in LOCATION_ALIASES:
        return LOCATION_ALIASES[key]
    return value

# Build filtered list once for navigation

def filter_records(direction=None, has_items=True, contains=None):
    contains_lower = contains.lower() if isinstance(contains, str) and contains else None
    out = []
    for rec in records:
        if direction and rec.get('direction') != direction:
            continue
        if has_items is True and not rec.get('items'):
            continue
        if has_items is False and rec.get('items'):
            continue
        text = ' | '.join(rec.get('raw_messages', []))
        if contains_lower and contains_lower not in text.lower():
            continue
        out.append(rec)
    return out

def render_record(rec):
    rescue = canonical_loc(rec.get('rescue_location')) or '—'
    drop = canonical_loc(rec.get('drop_off_location')) or '—'
    direction = rec.get('direction') or 'unknown'
    msg = ' | '.join(rec.get('raw_messages', []))
    items = rec.get('items') or []
    rows = []
    for item in items:
        rows.append(
            f"<tr><td>{item.get('quantity','')}</td><td>{item.get('unit','')}</td>"
            f"<td>{item.get('name','')}</td><td>{item.get('estimated_lbs','')}</td>"
            f"<td>{item.get('subcategory','')}</td></tr>"
        )
    if rows:
        items_table = "<table style='border-collapse:collapse;'>"                       "<tr><th>qty</th><th>unit</th><th>name</th><th>~lbs</th><th>subcat</th></tr>"                       + ''.join(rows) + "</table>"
    else:
        items_table = "<em>No items parsed.</em>"
    html = (
        f"<div style='border:1px solid #ccc;padding:12px;border-radius:8px;'>"
        f"<div><strong>ID</strong> {rec.get('id')} · <strong>Direction</strong> {direction} · "
        f"<strong>Rescue</strong> {rescue} · <strong>Drop</strong> {drop}</div>"
        f"<div style='margin-top:8px;'><strong>Raw message</strong><br><div style='white-space:pre-wrap'>{msg}</div></div>"
        f"<div style='margin-top:8px;'><strong>Items</strong><br>{items_table}</div>"
        f"</div>"
    )
    display(HTML(html))

# Widgets

direction_dd = widgets.Dropdown(options=[('Any',''), ('Inbound','inbound'), ('Outbound','outbound'), ('Both','both'), ('Unknown','unknown')], description='Direction', layout=widgets.Layout(width='220px'))
items_dd = widgets.Dropdown(options=[('Must have items', True), ('No items', False), ('Either', None)], description='Items', layout=widgets.Layout(width='220px'))
text_filter = widgets.Text(description='Contains', placeholder='substring', layout=widgets.Layout(width='260px'))
prev_btn = widgets.Button(description='Prev', icon='arrow-left')
next_btn = widgets.Button(description='Next', icon='arrow-right')
status = widgets.Label()
output = widgets.Output()

state = {'records': records, 'index': 0}


def refresh_records(change=None):
    state['records'] = filter_records(
        direction=direction_dd.value or None,
        has_items=items_dd.value,
        contains=text_filter.value or None,
    )
    state['index'] = 0
    update_view()


def update_view():
    output.clear_output()
    total = len(state['records'])
    idx = state['index']
    status.value = f"{idx+1 if total else 0} / {total}"
    if not total:
        with output:
            display(HTML("<em>No matches</em>"))
        return
    rec = state['records'][idx]
    with output:
        render_record(rec)


def on_prev(_):
    if not state['records']:
        return
    state['index'] = (state['index'] - 1) % len(state['records'])
    update_view()

def on_next(_):
    if not state['records']:
        return
    state['index'] = (state['index'] + 1) % len(state['records'])
    update_view()

for w in (direction_dd, items_dd, text_filter):
    w.observe(refresh_records, names='value')
prev_btn.on_click(on_prev)
next_btn.on_click(on_next)

controls = widgets.HBox([direction_dd, items_dd, text_filter, prev_btn, next_btn, status])
display(controls, output)
refresh_records()


--- id 8 direction outbound rescue - drop -
raw message:
Grabbed 2 boxes of pork ribs, 2 boxes of corn and one small crate of corn, and the things labeled 'Mordion '
items:
qty | unit | name      | ~lbs | subcat----+------+-----------+------+-------2.0 | box  | pork ribs | 36.0 | meat  2.0 | box  | corn      | 36.0 |       

--- id 9 direction inbound rescue - drop -
raw message:
Dropped 15 cases persian cucumbers, 19 cases roma tomatoes, 4 cases plums, 12 cases oranges
items:
qty  | unit | name              | ~lbs  | subcat -----+------+-------------------+-------+--------15.0 | case | persian cucumbers | 270.0 |        19.0 | case | roma tomatoes     | 342.0 | produce4.0  | case | plums             | 72.0  |        12.0 | case | oranges           | 216.0 | produce

--- id 12 direction unknown rescue - drop -
raw message:
1.5 cs bell pepper
2 cs jalapeno
45 dz eggs
2 cs corn
1 cs apples
1 cs oranges
50 lb bag potatoes - still in my car :o(

freezer:
2 cs chix q
1 cs big bags precooked