# utils

> Display formatting and word counting utilities for the selection step

In [None]:
#| default_exp utils

In [None]:
#| export
from datetime import datetime
from pathlib import Path

## Word Operations

In [None]:
#| export
def count_words(
    text: str  # Text to count words in
) -> int:  # Word count
    """Count the number of whitespace-delimited words in text."""
    if not text:
        return 0
    return len(text.split())

## Date Formatting

In [None]:
#| export
def format_date(
    created_at: str  # ISO date string, Unix timestamp, or similar
) -> str:  # Formatted date for display
    """Format a date string for human-readable display (e.g., 'Jan 20, 2026')."""
    if not created_at:
        return "Unknown"
    
    # Try to parse as Unix timestamp first (float or int)
    try:
        timestamp = float(created_at)
        # Sanity check: timestamp should be reasonable (after year 2000, before year 2100)
        if 946684800 < timestamp < 4102444800:
            dt = datetime.fromtimestamp(timestamp)
            return dt.strftime("%b %d, %Y")  # e.g., "Jan 20, 2026"
    except (ValueError, TypeError, OSError):
        pass
    
    # Try to parse common ISO date formats
    date_formats = [
        "%Y-%m-%d %H:%M:%S.%f",  # SQLite with microseconds
        "%Y-%m-%d %H:%M:%S",     # SQLite without microseconds
        "%Y-%m-%dT%H:%M:%S.%f",  # ISO 8601 with microseconds
        "%Y-%m-%dT%H:%M:%S",     # ISO 8601 without microseconds
        "%Y-%m-%d",              # Date only
    ]
    
    for fmt in date_formats:
        try:
            dt = datetime.strptime(str(created_at).strip(), fmt)
            return dt.strftime("%b %d, %Y")  # e.g., "Jan 20, 2026"
        except ValueError:
            continue
    
    # Fallback: truncate if unparseable
    return str(created_at)[:16] if len(str(created_at)) > 16 else str(created_at)

## Filename Formatting

In [None]:
#| export
def format_audio_filename(
    audio_path: str  # Full path to audio file
) -> str:  # Shortened filename for display
    """Extract and format the filename from a path."""
    if not audio_path or audio_path == "Unknown":
        return "Unknown Source"
    return Path(audio_path).name

## Tests

In [None]:
assert count_words("") == 0
assert count_words("hello") == 1
assert count_words("The art of war") == 4
print("count_words tests passed")

In [None]:
assert format_date("") == "Unknown"
assert format_date(None) == "Unknown"
assert format_date("2026-01-20") == "Jan 20, 2026"
assert format_date("2026-01-20 14:30:00") == "Jan 20, 2026"
print("format_date tests passed")

In [None]:
assert format_audio_filename("/home/user/audio/test.wav") == "test.wav"
assert format_audio_filename("Unknown") == "Unknown Source"
assert format_audio_filename("") == "Unknown Source"
print("format_audio_filename tests passed")

In [None]:
#| hide
import nbdev; nbdev.nbdev_export()