attach-dev · hammadtq · Jul 25, 2025 · Jul 15, 2025 · Jul 15, 2025 · Jul 15, 2025
diff --git a/.env.example b/.env.example
@@ -19,6 +19,9 @@ WEAVIATE_URL=http://localhost:8081
 MAX_TOKENS_PER_MIN=60000
 QUOTA_ENCODING=cl100k_base
 
+# Metering Option (null, prometheus, openmeter)
+USAGE_METERING=null 
+
 # Development: Auth0 credentials for dev_login script
 # AUTH0_DOMAIN=your-domain.auth0.com
 # AUTH0_CLIENT=your-client-id

diff --git a/AGENTS.md b/AGENTS.md
@@ -13,3 +13,18 @@ This repo is used for the Attach Gateway service. Follow these guidelines for co
 
 ## Development Tools
 - Code should be formatted with `black` and imports sorted with `isort`.
+
+## 🔒  Memory & /mem/events are **read-only**
+
+> **Do not touch any memory-related code.**
+
+* **Off-limits files / symbols**  
+  * `mem/**`
+  * `main.py` → the `/mem/events` route and **all** `MemoryEvent` logic  
+  * Any Weaviate queries, inserts, or schema
+
+* PRs that change, remove, or “refactor” these areas **will be rejected**.  
+  Only work on the explicitly assigned task (e.g. billing hooks).
+
+* If your change needs to interact with memory, open an issue first and wait
+  for maintainer approval.
diff --git a/README.md b/README.md
@@ -234,36 +234,82 @@ curl -X POST /v1/logs \
 # => HTTP/1.1 202 Accepted
 ```
 
+## Usage hooks
+
+Emit token usage metrics for every request. Choose a backend via
+`USAGE_METERING` (alias `USAGE_BACKEND`):
+
+```bash
+export USAGE_METERING=prometheus  # or null
+```
+
+A Prometheus counter `attach_usage_tokens_total{user,direction,model}` is
+exposed for Grafana dashboards.
+Set `USAGE_METERING=null` (the default) to disable metering entirely.
+
+> **⚠️ Usage hooks depend on the quota middleware.**  
+> Make sure `MAX_TOKENS_PER_MIN` is set (any positive number) so the  
+> `TokenQuotaMiddleware` is enabled; the middleware is what records usage  
+> events that feed Prometheus.
+
+```bash
+# Enable usage tracking (set any reasonable limit)
+export MAX_TOKENS_PER_MIN=60000
+export USAGE_METERING=prometheus
+```
+
+#### OpenMeter (Stripe / ClickHouse)
+
+```bash
+# No additional dependencies needed - uses direct HTTP API
+export MAX_TOKENS_PER_MIN=60000              # Required: enables quota middleware
+export USAGE_METERING=openmeter              # Required: activates OpenMeter backend  
+export OPENMETER_API_KEY=your-api-key-here   # Required: API authentication
+export OPENMETER_URL=https://openmeter.cloud # Optional: defaults to https://openmeter.cloud
+```
+
+Events are sent directly to OpenMeter's HTTP API and are processed by the LLM tokens meter for billing integration with Stripe.
+
+> **⚠️ All three variables are required for OpenMeter to work:**  
+> - `MAX_TOKENS_PER_MIN` enables the quota middleware that records usage events  
+> - `USAGE_METERING=openmeter` activates the OpenMeter backend  
+> - `OPENMETER_API_KEY` provides authentication to OpenMeter's API  
+
+The gateway gracefully falls back to `NullUsageBackend` if any required variable is missing.
+
+### Scraping metrics
+
+```bash
+curl -H "Authorization: Bearer $JWT" http://localhost:8080/metrics
+```
+
 ## Token quotas
 
 Attach Gateway can enforce per-user token limits. Install the optional
-dependency with `pip install attach-gateway[quota]` and set
+dependency with `pip install attach-dev[quota]` and set
 `MAX_TOKENS_PER_MIN` in your environment to enable the middleware. The
 counter defaults to the `cl100k_base` encoding; override with
 `QUOTA_ENCODING` if your model uses a different tokenizer. The default
 in-memory store works in a single process and is not shared between
 workers—requests retried across processes may be double-counted. Use Redis
 for production deployments.
+If `tiktoken` is missing, a byte-count fallback is used which counts about
+four times more tokens than the `cl100k` tokenizer – install `tiktoken` in
+production.
 
 ### Enable token quotas
 
 ```bash
 # Optional: Enable token quotas
 export MAX_TOKENS_PER_MIN=60000
-pip install tiktoken  # or pip install attach-gateway[quota]
+pip install tiktoken  # or pip install attach-dev[quota]
 ```
 
 To customize the tokenizer:
 ```bash
 export QUOTA_ENCODING=cl100k_base  # default
 ```
 
-## Roadmap
-
-* **v0.2** — Protected‑resource metadata endpoint (OAuth 2.1), enhanced DID resolvers.  
-* **v0.3** — Token‑exchange (RFC 8693) for on‑behalf‑of delegation.  
-* **v0.4** — Attach Store v1 (Git‑style, policy guards).
-
 ---
 
 ## License

diff --git a/attach/__init__.py b/attach/__init__.py
@@ -4,11 +4,20 @@
 Add OIDC SSO, agent-to-agent handoff, and pluggable memory to any Python project.
 """
 
-__version__ = "0.2.2"
+__version__ = "0.3.7"
 __author__ = "Hammad Tariq"
 __email__ = "hammad@attach.dev"
 
-# Clean imports - no sys.path hacks needed since everything will be in the wheel
-from .gateway import create_app, AttachConfig
+# Remove this line that causes early failure:
+# from .gateway import create_app, AttachConfig
+
+# Optional: Add lazy import for convenience
+def create_app(*args, **kwargs):
+    from .gateway import create_app as _real
+    return _real(*args, **kwargs)
+
+def AttachConfig(*args, **kwargs):
+    from .gateway import AttachConfig as _real
+    return _real(*args, **kwargs)
 
 __all__ = ["create_app", "AttachConfig", "__version__"] 
diff --git a/attach/__main__.py b/attach/__main__.py
@@ -2,7 +2,7 @@
 CLI entry point - replaces the need for main.py in wheel
 """
 import uvicorn
-from .gateway import create_app
+import click
 
 def main():
     """Run Attach Gateway server"""
@@ -13,17 +13,42 @@ def main():
     except ImportError:
         pass  # python-dotenv not installed, that's OK for production
 
-    import click
-
     @click.command()
     @click.option("--host", default="0.0.0.0", help="Host to bind to")
     @click.option("--port", default=8080, help="Port to bind to") 
     @click.option("--reload", is_flag=True, help="Enable auto-reload")
     def cli(host: str, port: int, reload: bool):
-        app = create_app()
-        uvicorn.run(app, host=host, port=port, reload=reload)
+        try:
+            # Import here AFTER .env is loaded and CLI is parsed
+            from .gateway import create_app
+            app = create_app()
+            uvicorn.run(app, host=host, port=port, reload=reload)
+        except RuntimeError as e:
+            _friendly_exit(e)
+        except Exception as e:  # unexpected crash
+            click.echo(f"❌ Startup failed: {e}", err=True)
+            raise click.Abort()
 
     cli()
 
+def _friendly_exit(err):
+    """Convert RuntimeError to clean user message."""
+    err_str = str(err)
+
+    if "OPENMETER_API_KEY" in err_str:
+        msg = (f"❌ {err}\n\n"
+               "💡 Fix:\n"
+               "   export OPENMETER_API_KEY=\"sk_live_...\"\n"
+               "   (or) export USAGE_METERING=null    # to disable metering\n\n"
+               "📖 See README.md for complete setup")
+    else:
+        msg = (f"❌ {err}\n\n"
+               "💡 Required environment variables:\n"
+               "   export OIDC_ISSUER=\"https://your-domain.auth0.com/\"\n"
+               "   export OIDC_AUD=\"your-api-identifier\"\n\n"
+               "📖 See README.md for complete setup instructions")
+
+    raise click.ClickException(msg)
+
 if __name__ == "__main__":
     main() 
diff --git a/attach/gateway.py b/attach/gateway.py
@@ -3,24 +3,33 @@
 """
 
 import os
+from contextlib import asynccontextmanager
 from typing import Optional
 
 import weaviate
 from fastapi import APIRouter, FastAPI, HTTPException, Request
+from fastapi.middleware.cors import CORSMiddleware
+from starlette.middleware.base import BaseHTTPMiddleware
 from pydantic import BaseModel
 
 from a2a.routes import router as a2a_router
-
-# Clean relative imports
-from auth import verify_jwt
 from auth.oidc import _require_env
-
-# from logs import router as logs_router
+import logs
+logs_router = logs.router
 from mem import get_memory_backend
 from middleware.auth import jwt_auth_mw
-from middleware.quota import TokenQuotaMiddleware
 from middleware.session import session_mw
 from proxy.engine import router as proxy_router
+from usage.factory import _select_backend, get_usage_backend
+from usage.metrics import mount_metrics
+from utils.env import int_env
+
+# Guard TokenQuotaMiddleware import (matches main.py pattern)
+try:
+    from middleware.quota import TokenQuotaMiddleware
+    QUOTA_AVAILABLE = True
+except ImportError:  # optional extra not installed
+    QUOTA_AVAILABLE = False
 
 # Import version from parent package
 from . import __version__
@@ -49,7 +58,7 @@ async def get_memory_events(request: Request, limit: int = 10):
             return {"data": {"Get": {"MemoryEvent": []}}}
 
         result = (
-            client.query.get("MemoryEvent", ["timestamp", "role", "content"])
+            client.query.get("MemoryEvent", ["timestamp", "event", "user", "state"])
             .with_additional(["id"])
             .with_limit(limit)
             .with_sort([{"path": ["timestamp"], "order": "desc"}])
@@ -97,6 +106,21 @@ class AttachConfig(BaseModel):
     auth0_client: Optional[str] = None
 
 
+@asynccontextmanager
+async def lifespan(app: FastAPI):
+    """Manage application lifespan - startup and shutdown."""
+    # Startup
+    backend_selector = _select_backend()
+    app.state.usage = get_usage_backend(backend_selector)
+    mount_metrics(app)
+
+    yield
+
+    # Shutdown
+    if hasattr(app.state.usage, 'aclose'):
+        await app.state.usage.aclose()
+
+
 def create_app(config: Optional[AttachConfig] = None) -> FastAPI:
     """
     Create a FastAPI app with Attach Gateway functionality
@@ -127,17 +151,38 @@ def create_app(config: Optional[AttachConfig] = None) -> FastAPI:
         title="Attach Gateway",
         description="Identity & Memory side-car for LLM engines",
         version=__version__,
+        lifespan=lifespan,
+    )
+
+    @app.get("/auth/config")
+    async def auth_config():
+        return {
+            "domain": config.auth0_domain,
+            "client_id": config.auth0_client,
+            "audience": config.oidc_audience,
+        }
+
+    # Add middleware in correct order (CORS outer-most)
+    app.add_middleware(
+        CORSMiddleware,
+        allow_origins=["http://localhost:9000", "http://127.0.0.1:9000"],
+        allow_methods=["*"],
+        allow_headers=["*"],
+        allow_credentials=True,
     )
+
+    # Only add quota middleware if available and explicitly configured
+    limit = int_env("MAX_TOKENS_PER_MIN", 60000)
+    if QUOTA_AVAILABLE and limit is not None:
+        app.add_middleware(TokenQuotaMiddleware)
 
-    # Add middleware
-    app.middleware("http")(jwt_auth_mw)
-    app.middleware("http")(session_mw)
-    app.add_middleware(TokenQuotaMiddleware)
+    app.add_middleware(BaseHTTPMiddleware, dispatch=jwt_auth_mw)
+    app.add_middleware(BaseHTTPMiddleware, dispatch=session_mw)
 
     # Add routes
-    app.include_router(a2a_router)
+    app.include_router(a2a_router, prefix="/a2a")
     app.include_router(proxy_router)
-    # app.include_router(logs_router)
+    app.include_router(logs_router)
     app.include_router(mem_router)
 
     # Setup memory backend

diff --git a/logs.py → logs/__init__.py b/logs.py → logs/__init__.py