# 🔬 Trifecta Telemetry Analysis

This notebook provides comprehensive analysis of Trifecta CLI telemetry data including:
- Performance metrics (P50/P95/P99 latencies)
- Command usage patterns  
- Search effectiveness
- Cold vs Warm run comparisons
- Token usage analysis

---
## 1. Setup & Data Loading

In [1]:
import json
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from pathlib import Path
from datetime import datetime, timedelta
import sys
import warnings

warnings.filterwarnings('ignore', category=FutureWarning)

pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)

In [2]:
print("🔧 Environment Check")
print("=" * 50)
print(f"Python: {sys.version_info.major}.{sys.version_info.minor}.{sys.version_info.micro}")
print(f"Working directory: {Path.cwd()}")

# Auto-detect repository root by looking for telemetry directory
possible_paths = [
    Path(".."),                          # telemetry_analysis/ -> repo root
    Path.cwd().parent,                   # Same as above, explicit
    Path("."),                           # Current directory (for testing)
    Path("../trifecta_dope"),             # debug_terminal/ -> repo root
]

REPO_ROOT = None
for path in possible_paths:
    if (path / "_ctx" / "telemetry" / "events.jsonl").exists():
        REPO_ROOT = path.resolve()
        break

if REPO_ROOT is None:
    raise RuntimeError(
        "Cannot proceed without valid telemetry path. "
        "Expected _ctx/telemetry/events.jsonl not found in any searched location. "
        "Please run this notebook from the trifecta_dope directory."
    )
else:
    print(f"✅ Auto-detected repo root: {REPO_ROOT}")

# Analysis window: 7 days provides recent data while ensuring sufficient volume
# Set to 0 for all-time analysis (may be slow for large datasets)
DAYS = 7

# Operations for cold/warm performance comparison
# Maps display name -> (section, operation, metric_key) for navigating telemetry JSON
# Used to extract timing data from last_run_cold.json and last_run_warm.json
OPERATIONS = {
    'Context Build': ('ctx', 'build', 'total_time_ms'),  # Full context rebuild
    'AST Parse': ('ast', 'parse', 'total_time_ms'),       # AST parsing operation
    'Context Sync': ('ctx', 'sync', 'total_time_ms'),     # Context sync to disk
}

print(f"\nAnalyzing last {DAYS} days" if DAYS > 0 else "Analyzing all data")

🔧 Environment Check
Python: 3.14.2
Working directory: /Users/felipe_gonzalez/Developer/agent_h/trifecta_dope/telemetry_analysis
✅ Auto-detected repo root: /Users/felipe_gonzalez/Developer/agent_h/trifecta_dope

Analyzing last 7 days


## Load Telemetry Data

In [3]:
def load_telemetry(repo_root: Path, days: int = 7) -> pd.DataFrame:    """Load and parse telemetry events from events.jsonl into a pandas DataFrame.        This function:    - Loads JSONL-format telemetry events    - Parses timestamps and filters by date range    - Extracts nested fields (status, hits, tokens, warnings)    - Handles missing columns and malformed data gracefully        Args:        repo_root: Repository root path containing _ctx/telemetry/ directory        days: Number of days to analyze (0 = all data, N = last N days)        Returns:        DataFrame with columns: timestamp, date, timing_ms, status, hits,                                 total_tokens, retrieved_tokens, warnings_count, cmd        Raises:        FileNotFoundError: If telemetry file doesn't exist at expected path        ValueError: If no events found or required columns are missing    """    tel_dir = repo_root / "_ctx" / "telemetry"    events_path = tel_dir / "events.jsonl"        if not events_path.exists():        raise FileNotFoundError(f"Telemetry file not found: {events_path}")        # Load events with error logging    events = []    decode_errors = 0    with open(events_path, 'r') as f:        for line_num, line in enumerate(f, 1):            if line.strip():                try:                    events.append(json.loads(line))                except json.JSONDecodeError as e:                    decode_errors += 1                    if decode_errors <= 3:  # Show first 3 errors                        print(f"⚠️  JSON decode error at line {line_num}: {e}")        if decode_errors > 0:        print(f"⚠️  Total JSON decode errors: {decode_errors} (skipped)")        if not events:        raise ValueError(f"No events found in {events_path}")        # Convert to DataFrame    df = pd.DataFrame(events)        # Handle malformed timestamps by coercing to NaT, then drop invalid rows    # This prevents AttributeError when accessing .dt.date later    # Normalize all timestamps to UTC naive to avoid timezone comparison issues    df['timestamp'] = pd.to_datetime(df['ts'], errors='coerce', utc=True).dt.tz_localize(None)    before_count = len(df)    na_count = df['timestamp'].isna().sum()        if na_count > 0:        print(f"⚠️  Dropping {na_count} events with unparseable timestamps")        df = df.dropna(subset=['timestamp'])    df['date'] = df['timestamp'].dt.date        # Validate required columns exist before accessing (telemetry schema may vary)    required_cols = ['timing_ms', 'result', 'warnings', 'cmd']    missing_cols = [col for col in required_cols if col not in df.columns]    if missing_cols:        print(f"❌ Schema validation failed!")        print(f"   Missing columns: {missing_cols}")        print(f"   Available columns: {list(df.columns)}")        raise ValueError(f"Missing required columns: {missing_cols}")        # Extract nested fields    df['timing_ms'] = df['timing_ms'].fillna(0).astype(int)    df['status'] = df['result'].apply(lambda x: x.get('status', 'unknown') if isinstance(x, dict) else 'unknown')    df['hits'] = df['result'].apply(lambda x: x.get('hits', 0) if isinstance(x, dict) else 0)        # Token extraction from telemetry events    # Schema: tokens field may be missing (older events) or present (newer events)    # Expected structure when present: {'total_tokens': int, 'retrieved_tokens': int}    # Commands with tokens: ctx.search, ctx.build (varies by command type)    if 'tokens' in df.columns:        def safe_get_tokens(row):            """Extract token counts from a telemetry event row.                        Args:                row: DataFrame row (Series) with 'tokens' column containing dict or NaN                        Returns:                Tuple of (total_tokens, retrieved_tokens), defaulting to (0, 0)            """            tokens = row.get('tokens', {}) if isinstance(row, dict) else row            if isinstance(tokens, dict):                return tokens.get('total_tokens', 0), tokens.get('retrieved_tokens', 0)            return 0, 0                token_data = df.apply(safe_get_tokens, axis=1, result_type='expand')        df['total_tokens'] = token_data[0].astype(int)        df['retrieved_tokens'] = token_data[1].astype(int)    else:        df['total_tokens'] = 0        df['retrieved_tokens'] = 0        df['warnings_count'] = df['warnings'].apply(lambda x: len(x) if isinstance(x, list) else 0)        # Filter by date    # Note: cutoff uses local time, all timestamps are normalized to UTC    if days > 0:        cutoff = pd.Timestamp(datetime.now()).tz_localize(None)        df = df[df['timestamp'] >= cutoff]        print(f"Filtered to last {days} days (since {cutoff.date()})")        return df# Load the data with error handlingtry:    events_df = load_telemetry(REPO_ROOT, DAYS)    print(f"\n✅ Loaded {len(events_df)} events")        if events_df['total_tokens'].sum() == 0:        print("⚠️  No token data available in telemetry events")except FileNotFoundError as e:    print(f"❌ Telemetry file not found!")    print(f"   Searched path: {REPO_ROOT / '_ctx' / 'telemetry'}")    print(f"   Working directory: {Path.cwd()}")    print(f"   Please ensure telemetry data exists")    raiseexcept ValueError as e:    print(f"❌ Data validation error!")    print(f"   Issue: {e}")    print(f"   Check telemetry format and version compatibility")    raiseexcept Exception as e:    print(f"❌ Unexpected error loading telemetry!")    print(f"   Error type: {type(e).__name__}")    print(f"   Message: {e}")    raise

❌ Unexpected error loading telemetry!
   Error type: TypeError
   Message: Invalid comparison between dtype=datetime64[ns, UTC-03:00] and datetime


TypeError: Invalid comparison between dtype=datetime64[ns, UTC-03:00] and datetime

## 2. Data Overview

This section provides basic statistics about the telemetry dataset:
- Total event volume and date range
- Command distribution  
- Run identification (if available)

**What to look for:**
- Sufficient data volume (100+ events recommended for statistical significance)
- Recent data (check date range matches expectations)
- Expected command types (ctx.build, ctx.search, etc.)

In [None]:
if events_df.empty:
    print("⚠️  No data available after filtering!")
else:
    print("📊 Data Overview")
    print("=" * 50)
    print(f"Total Events:       {len(events_df):,}")
    print(f"Date Range:         {events_df['date'].min()} to {events_df['date'].max()}")
    print(f"Unique Commands:    {events_df['cmd'].nunique()}")
    
    # run_id column may not exist in all telemetry versions
    if 'run_id' in events_df.columns:
        print(f"Total Runs:         {events_df['run_id'].nunique()}")
    else:
        print(f"Total Runs:         N/A (no run_id column)")
    
    print(f"\nCommand Types:")
    print(events_df['cmd'].value_counts().to_string())

## 3. Performance Analysis

Analyzes latency distributions across commands to identify performance bottlenecks.

In [None]:
timed = events_df[events_df['timing_ms'] > 0].copy()
print(f"Events with timing data: {len(timed)}")

if not timed.empty:
    percentiles = timed.groupby('cmd')['timing_ms'].agg([
        ('Count', 'count'),
        ('P50', 'median'),
        ('P95', lambda x: x.quantile(0.95)),
        ('P99', lambda x: x.quantile(0.99)),
        ('Mean', 'mean'),
        ('Max', 'max')
    ]).round(1)
    
    percentiles = percentiles.sort_values('Count', ascending=False)
    percentiles
else:
    print("⚠️  No timing data available")
    # Initialize empty DataFrame to prevent NameError if cells run out of order
    percentiles = pd.DataFrame()

In [None]:
if not timed.empty:
    top_cmds = timed['cmd'].value_counts().head(10).index
    filtered = timed[timed['cmd'].isin(top_cmds)]
    
    fig = px.box(
        filtered, 
        x='cmd', 
        y='timing_ms', 
        title='Latency Distribution by Command (Top 10)',
        labels={'cmd': 'Command', 'timing_ms': 'Latency (ms)'}
    )
    fig.update_layout(yaxis_type="log", height=500)
    fig.show()
else:
    print("⚠️  No timing data available for chart")

In [None]:
if not timed.empty:    fig = px.histogram(        timed,        x='timing_ms',        title='Overall Latency Distribution',        labels={'timing_ms': 'Latency (ms)'},        nbins=50,        log_y=True    )    fig.update_layout(xaxis_type="log")    fig.show()else:    print("⚠️  No timing data available for chart")

## 4. Command Usage Patterns

Examines which commands are used most frequently to understand usage patterns.

In [None]:
cmd_counts = events_df['cmd'].value_counts()

fig = px.pie(
    values=cmd_counts.values, 
    names=cmd_counts.index, 
    title='Command Distribution',
    hole=0.3
)
fig.update_layout(height=500)
fig.show()

In [None]:
fig = px.bar(
    x=cmd_counts.index,
    y=cmd_counts.values,
    title='Command Frequency',
    labels={'x': 'Command', 'y': 'Count'},
    text=cmd_counts.values
)
fig.update_traces(textposition='outside')
fig.update_layout(height=500)
fig.show()

## 5. Search Effectiveness

Analyzes ctx.search hit rates to evaluate search quality.

**Gauge thresholds:**
- **< 50%**: Poor performance (red zone)
- **50-80%**: Acceptable (yellow zone)  
- **> 80%**: Target (green zone)

In [None]:
searches = events_df[events_df['cmd'] == 'ctx.search'].copy()

# Initialize with default value - allows downstream cells to check safely
# even if this cell is re-run out of order
hit_rate = None

if len(searches) > 0:
    total_searches = len(searches)
    with_hits = (searches['hits'] > 0).sum()
    hit_rate = (with_hits / total_searches * 100) if total_searches > 0 else 0
    
    print(f"🔍 Search Analysis")
    print("=" * 50)
    print(f"Total Searches:      {total_searches}")
    print(f"With Hits:           {with_hits} ({hit_rate:.1f}%)")
    print(f"Zero Hits:           {total_searches - with_hits} ({100-hit_rate:.1f}%)")
    
    fig = go.Figure(go.Indicator(
        mode="gauge+number",
        value=hit_rate,
        title={'text': "Search Hit Rate (%)"},
        gauge={
            'axis': {'range': [0, 100]},
            'bar': {'color': "darkblue"},
            'steps': [
                {'range': [0, 50], 'color': "lightgray"},   # Below 50%: poor performance
                {'range': [50, 80], 'color': "gray"},       # 50-80%: acceptable
            ],
            'threshold': {
                'line': {'color': "red", 'width': 4},
                'thickness': 0.75,
                'value': 80  # 80% is the target for good search effectiveness
            }
        }
    ))
    fig.update_layout(height=400)
    fig.show()
else:
    print("No search events found.")

In [None]:
if 'searches' in locals() and len(searches) > 0:
    # Use named aggregation to avoid FutureWarning from deprecated .apply()
    daily_hit_rate = searches.groupby('date').agg(
        hit_rate=('hits', lambda x: (x > 0).sum() / len(x) * 100)
    ).reset_index()
    
    fig = px.line(
        daily_hit_rate,
        x='date',
        y='hit_rate',
        title='Daily Search Hit Rate',
        labels={'date': 'Date', 'hit_rate': 'Hit Rate (%)'},
        markers=True
    )
    fig.update_layout(height=400)
    fig.show()
else:
    print("⚠️  No search events available for chart")

## 6. Cold vs Warm Comparison

Analyzes cache effectiveness by comparing cold (no cache) vs warm (cached) run performance.

In [None]:
tel_dir = REPO_ROOT / "_ctx" / "telemetry"

cold_run = None
warm_run = None

cold_path = tel_dir / "last_run_cold.json"
warm_path = tel_dir / "last_run_warm.json"

if cold_path.exists():
    with open(cold_path) as f:
        cold_run = json.load(f)
    print(f"✅ Loaded cold run data: {cold_path}")
else:
    print("⚠️  Cold run data not found")

if warm_path.exists():
    with open(warm_path) as f:
        warm_run = json.load(f)
    print(f"✅ Loaded warm run data: {warm_path}")
else:
    print("⚠️  Warm run data not found")

In [None]:
if cold_run and warm_run:
    comparison_data = []
    
    for display_name, (section, op, metric) in OPERATIONS.items():
        cold_time = cold_run.get(section, {}).get(op, {}).get(metric, 0)
        warm_time = warm_run.get(section, {}).get(op, {}).get(metric, 0)
        speedup = cold_time / warm_time if warm_time > 0 else 1.0
        
        comparison_data.append({
            'Operation': display_name,
            'Cold (ms)': cold_time,
            'Warm (ms)': warm_time,
            'Speedup': f"{speedup:.2f}x" if speedup != 1.0 else 'N/A'
        })
    
    comparison_df = pd.DataFrame(comparison_data)
    comparison_df
else:
    print("⚠️  Cold/warm data not available for comparison")

In [None]:
if cold_run and warm_run:
    categories = []
    cold_times = []
    warm_times = []
    
    for display_name, (section, op, metric) in OPERATIONS.items():
        cold_time = cold_run.get(section, {}).get(op, {}).get(metric, 0)
        warm_time = warm_run.get(section, {}).get(op, {}).get(metric, 0)
        
        if cold_time > 0 or warm_time > 0:
            categories.append(display_name)
            cold_times.append(cold_time)
            warm_times.append(warm_time)
    
    if categories:
        fig = go.Figure(data=[
            go.Bar(name='Cold Run', x=categories, y=cold_times, marker_color='lightblue'),
            go.Bar(name='Warm Run', x=categories, y=warm_times, marker_color='lightgreen')
        ])
        
        fig.update_layout(
            title='Cold vs Warm Run Latency Comparison',
            xaxis_title='Operation',
            yaxis_title='Time (ms)',
            barmode='group',
            height=400
        )
        fig.show()
else:
    print("⚠️  Cold/warm data not available for chart")

In [None]:
# Initialize speedup with default value - allows safe checking in later cells
speedup = None

if cold_run and warm_run:
    cold_time = cold_run.get('ctx', {}).get('build', {}).get('total_time_ms', 0)
    warm_time = warm_run.get('ctx', {}).get('build', {}).get('total_time_ms', 0)
    
    if warm_time > 0:
        speedup = cold_time / warm_time
        
        # Gauge range: 0.5 (slowdown) to max(3.0, speedup*1.2) (excellent speedup)
        # Steps: <1.0 is slowdown (gray), >1.0 is improvement (green)
        fig = go.Figure(go.Indicator(
            mode="gauge+number",
            value=speedup,
            title={'text': f"Warm vs Cold Speedup (Context Build)"},
            gauge={
                'axis': {'range': [0.5, max(3.0, speedup * 1.2)]},
                'bar': {'color': "green"},
                'steps': [
                    {'range': [0.5, 1.0], 'color': "lightgray"},
                    {'range': [1.0, 2.0], 'color': "lightgreen"},
                ],
            }
        ))
        fig.update_layout(height=400)
        fig.show()
        
        print(f"\n📈 Performance Improvement")
        print(f"Cold Build Time:  {cold_time}ms")
        print(f"Warm Build Time:  {warm_time}ms")
        print(f"Speedup:          {speedup:.2f}x")
    else:
        print("⚠️  Warm time is 0, cannot calculate speedup")
else:
    print("⚠️  Cold/warm data not available for speedup")

## 7. Token Usage Analysis

Evaluates token consumption patterns and retrieval efficiency across commands.

In [None]:
token_usage = events_df.groupby('cmd')[['total_tokens', 'retrieved_tokens']].sum()
token_usage = token_usage.sort_values('total_tokens', ascending=False)
token_usage = token_usage[token_usage['total_tokens'] > 0]

# Safe division: use where() to handle edge cases
token_usage['efficiency'] = (
    token_usage['retrieved_tokens'] / token_usage['total_tokens'] * 100
).where(token_usage['total_tokens'] > 0, 0).round(1)
token_usage

In [None]:
if not token_usage.empty:
    fig = px.bar(
        x=token_usage.index,
        y=token_usage['total_tokens'],
        title='Total Token Usage by Command',
        labels={'x': 'Command', 'y': 'Total Tokens'},
        text=token_usage['total_tokens']
    )
    fig.update_traces(textposition='outside')
    fig.update_layout(height=500)
    fig.show()
else:
    print("⚠️  No token data available")

In [None]:
cmd_token_stats = events_df[events_df['total_tokens'] > 0].groupby('cmd').agg({
    'total_tokens': 'sum',
    'retrieved_tokens': 'sum',
    'run_id': 'count'
}).rename(columns={'run_id': 'count'})

cmd_token_stats = cmd_token_stats.reset_index()

if not cmd_token_stats.empty:
    fig = px.scatter(
        cmd_token_stats,
        x='total_tokens',
        y='retrieved_tokens',
        size='count',
        hover_data=['cmd'],
        title='Token Efficiency: Total vs Retrieved',
        labels={'total_tokens': 'Total Tokens', 'retrieved_tokens': 'Retrieved Tokens', 'count': 'Event Count'}
    )
    max_tokens = cmd_token_stats['total_tokens'].max()
    fig.add_shape(
        type="line", 
        x0=0, y0=0, 
        x1=max_tokens, y1=max_tokens,
        line=dict(dash="dash", color="red")
    )
    fig.update_layout(height=500)
    fig.show()
else:
    print("⚠️  No token data available for chart")

## 8. Timeline Analysis

Examines event patterns and command usage over time to identify trends.

In [None]:
daily_counts = events_df.groupby('date').size().reset_index()
daily_counts.columns = ['date', 'count']

fig = px.line(
    daily_counts,
    x='date',
    y='count',
    title='Events Over Time',
    labels={'date': 'Date', 'count': 'Event Count'},
    markers=True
)
fig.update_layout(height=400)
fig.show()