# Phase 5 ‚Äî Live Integration Tests: Sync Engine + Patcher + Submitter

Tests the **production** `CalendarSubmitter`, `TimeboxPatcher`, and `sync_engine`
against the real Google Calendar MCP server.

**Prerequisites:**
- Calendar MCP running: `docker compose up -d calendar-mcp`
- `.env` loaded with `OPENAI_API_KEY` / `OPENROUTER_API_KEY`
- A clean test date (2026-02-15) in the calendar

**Pipeline under test:**
1. Fetch remote ‚Üí `gcal_response_to_tb_plan` ‚Üí `TBPlan`
2. Build / generate `TBPlan` (manual or LLM)
3. `CalendarSubmitter.submit_plan()` ‚Üí sync to GCal
4. `TimeboxPatcher.apply_patch()` (LLM) ‚Üí refined `TBPlan`
5. Re-submit refined plan ‚Üí verify incremental ops
6. `CalendarSubmitter.undo_last()` ‚Üí rollback
7. Cleanup

## 1 ¬∑ Imports & Environment

In [None]:
# Force reimport of modules to pick up code fixes
import importlib
import fateforger.agents.timeboxing.sync_engine as _se
importlib.reload(_se)
import fateforger.agents.timeboxing.submitter as _sub
importlib.reload(_sub)
import fateforger.agents.timeboxing.patching as _pat
importlib.reload(_pat)

# Re-import the updated symbols
from fateforger.agents.timeboxing.sync_engine import (
    gcal_response_to_tb_plan, plan_sync, execute_sync, undo_sync,
    base32hex_id, is_owned_event, SyncTransaction, SyncOpType,
)
from fateforger.agents.timeboxing.submitter import CalendarSubmitter
from fateforger.agents.timeboxing.patching import (
    TimeboxPatcher, _build_context, _extract_patch, _PATCHER_SYSTEM_PROMPT,
)

print("‚úÖ sync_engine + submitter + patching reloaded")

In [None]:
"""Cell 1: Imports from PRODUCTION modules + env verification."""

import asyncio
import json
import os
from datetime import date as date_type, time, timedelta

from dotenv import load_dotenv

load_dotenv()

# ‚îÄ‚îÄ Production imports ‚îÄ‚îÄ
from fateforger.agents.timeboxing.tb_models import (
    ET, TBEvent, TBPlan, FixedStart, FixedWindow, AfterPrev,
)
from fateforger.agents.timeboxing.tb_ops import (
    TBPatch, apply_tb_ops, AddEvents, RemoveEvent, UpdateEvent,
)
from fateforger.agents.timeboxing.sync_engine import (
    gcal_response_to_tb_plan, plan_sync, execute_sync, undo_sync,
    base32hex_id, is_owned_event, SyncTransaction, SyncOpType,
)
from fateforger.agents.timeboxing.submitter import CalendarSubmitter
from fateforger.agents.timeboxing.patching import TimeboxPatcher
from fateforger.adapters.calendar.models import GCalEventsResponse

# MCP
from autogen_ext.tools.mcp import McpWorkbench, StreamableHttpServerParams

# ‚îÄ‚îÄ Config ‚îÄ‚îÄ
MCP_URL = os.getenv("MCP_CALENDAR_SERVER_URL", "http://localhost:3000")
TZ = "Europe/Amsterdam"
PLAN_DATE = date_type(2026, 2, 15)  # safe future date

print(f"MCP_URL:    {MCP_URL}")
print(f"PLAN_DATE:  {PLAN_DATE}")
print(f"TZ:         {TZ}")
print(f"OPENAI key: {'set ‚úì' if os.getenv('OPENAI_API_KEY') else 'MISSING'}")
print(f"LLM provider: {os.getenv('LLM_PROVIDER', 'openai')}")
print("‚úÖ Imports OK")

## 2 ¬∑ MCP Connectivity + Helper Functions

In [None]:
"""Cell 2: MCP connectivity test + fetch/cleanup helpers."""


def _extract_mcp_text(result) -> str:
    """Extract text from MCP ToolResult (handles both .text and .content)."""
    payload = getattr(result, "result", None)
    if isinstance(payload, list) and len(payload) > 0:
        first = payload[0]
        return getattr(first, "text", None) or getattr(first, "content", "") or ""
    return str(result)


async def fetch_remote_state(
    plan_date: date_type = PLAN_DATE, tz: str = TZ,
) -> tuple[TBPlan, dict[str, str], GCalEventsResponse]:
    """Fetch current calendar state, return (TBPlan, event_id_map, raw response)."""
    wb = McpWorkbench(StreamableHttpServerParams(url=MCP_URL, timeout=15))
    async with wb:
        result = await wb.call_tool(
            "list-events",
            arguments={
                "calendarId": "primary",
                "timeMin": f"{plan_date}T00:00:00+01:00",
                "timeMax": f"{plan_date}T23:59:59+01:00",
            },
        )
    raw = _extract_mcp_text(result)
    resp = GCalEventsResponse.model_validate_json(raw)
    plan, id_map = gcal_response_to_tb_plan(resp, plan_date=plan_date, tz_name=tz)
    return plan, id_map, resp


async def cleanup_test_events(plan_date: date_type = PLAN_DATE) -> int:
    """Delete ALL agent-owned (fftb*) events on the test date."""
    plan, id_map, _ = await fetch_remote_state(plan_date)
    owned = {k: v for k, v in id_map.items() if is_owned_event(v)}
    if not owned:
        print("  No owned events to clean up.")
        return 0
    wb = McpWorkbench(StreamableHttpServerParams(url=MCP_URL, timeout=15))
    async with wb:
        for key, gcal_id in owned.items():
            await wb.call_tool(
                "delete-event",
                arguments={"calendarId": "primary", "eventId": gcal_id},
            )
            print(f"  üóë  Deleted {gcal_id[:20]}‚Ä¶ ({key})")
    return len(owned)


# ‚îÄ‚îÄ Quick connectivity check ‚îÄ‚îÄ
remote, id_map, raw_resp = await fetch_remote_state()
print(f"‚úÖ MCP reachable ‚Äî {len(raw_resp.events)} events on {PLAN_DATE}")
print(f"   Remote TBPlan has {len(remote.events)} events")
print(f"   Event ID map: {len(id_map)} entries")

# Clean slate for the test
deleted = await cleanup_test_events()
print(f"üßπ Cleaned {deleted} stale test events")

## 3 ¬∑ Phase 5a: Build a TBPlan + Submit to GCal (deterministic, no LLM)

Build a hand-crafted `TBPlan` and submit it via `CalendarSubmitter.submit_plan()`.
Verifies the full create path: `plan_sync ‚Üí execute_sync ‚Üí GCal events appear`.

In [None]:
"""Cell 3: Create a baseline TBPlan and submit it to GCal."""

# ‚îÄ‚îÄ Build a simple morning plan ‚îÄ‚îÄ
baseline_plan = TBPlan(
    date=PLAN_DATE,
    tz=TZ,
    events=[
        TBEvent(n="Morning routine", t=ET.H, d="Wake up, shower, coffee",
                p=FixedWindow(st=time(7, 0), et=time(7, 30))),
        TBEvent(n="Deep work: Project Alpha", t=ET.DW, d="Focus on core feature",
                p=AfterPrev(dur=timedelta(hours=1, minutes=30))),
        TBEvent(n="Coffee break", t=ET.R, d="Recharge",
                p=AfterPrev(dur=timedelta(minutes=15))),
        TBEvent(n="Standup meeting", t=ET.M, d="Team sync",
                p=FixedStart(st=time(9, 15), dur=timedelta(minutes=30))),
        TBEvent(n="Shallow work: emails", t=ET.SW, d="Inbox zero",
                p=AfterPrev(dur=timedelta(minutes=45))),
    ],
)

# Verify times resolve correctly
resolved = baseline_plan.resolve_times()
print("Baseline plan:")
for r in resolved:
    print(f"  {r['start_time']} ‚Äì {r['end_time']}  {r['n']} ({r['t']})")

# ‚îÄ‚îÄ Submit via CalendarSubmitter ‚îÄ‚îÄ
submitter = CalendarSubmitter(server_url=MCP_URL, timeout_s=15.0)

# Remote state is empty (we just cleaned up)
empty_remote = TBPlan(date=PLAN_DATE, tz=TZ, events=[])
empty_id_map: dict[str, str] = {}

tx = await submitter.submit_plan(
    baseline_plan,
    remote=empty_remote,
    event_id_map=empty_id_map,
)

print(f"\nüì§ Submit result: {tx.status}")
print(f"   Ops: {len(tx.ops)}")
for i, (op, res) in enumerate(zip(tx.ops, tx.results)):
    ok = "‚úÖ" if res.get("ok") else "‚ùå"
    print(f"   {ok} {op.op_type.value}: {op.gcal_event_id[:20]}‚Ä¶")

assert tx.status == "committed", f"Expected committed, got {tx.status}"
assert len(tx.ops) == 5, f"Expected 5 creates, got {len(tx.ops)}"
print("\n‚úÖ Phase 5a: Baseline plan submitted successfully")

## 4 ¬∑ Phase 5b: Verify Round-Trip (fetch back from GCal)

Fetch the calendar state back and verify the events we just created are there.

In [None]:
"""Cell 4: Verify events appeared in GCal."""

remote_after, id_map_after, _ = await fetch_remote_state()

print(f"Remote plan after submit: {len(remote_after.events)} events")
for ev in remote_after.events:
    st = ev.p.st if hasattr(ev.p, "st") else "?"
    et = ev.p.et if hasattr(ev.p, "et") else "?"
    print(f"  {st} ‚Äì {et}  {ev.n} ({ev.t.value})")

print(f"\nEvent ID map ({len(id_map_after)} entries):")
for key, gcal_id in id_map_after.items():
    owned = "ü§ñ" if is_owned_event(gcal_id) else "üë§"
    print(f"  {owned} {key} ‚Üí {gcal_id[:24]}‚Ä¶")

# Verify we can find all our events
owned_count = sum(1 for v in id_map_after.values() if is_owned_event(v))
assert owned_count >= 5, f"Expected ‚â•5 owned events, got {owned_count}"
print(f"\n‚úÖ Phase 5b: {owned_count} owned events verified in GCal")

## 5 ¬∑ Phase 5c: LLM-Powered Patch (TimeboxPatcher)

Use the production `TimeboxPatcher` to apply a user refinement instruction.
This exercises the full LLM ‚Üí `TBPatch` ‚Üí `apply_tb_ops` pipeline.

In [None]:
"""Cell 5: LLM-powered refinement via TimeboxPatcher.

Uses the project's standard Gemini model via OpenRouter (build_autogen_chat_client).
Avoids output_content_type=TBPatch because OpenAI SDK's structured output
rejects ``oneOf`` from Pydantic discriminated unions. Instead we include the
TBPatch JSON schema in the system prompt and parse the text response.
"""
import json
from autogen_agentchat.agents import AssistantAgent
from autogen_agentchat.messages import TextMessage
from autogen_core import CancellationToken
from fateforger.llm import build_autogen_chat_client
from fateforger.agents.timeboxing.patching import (
    _build_context,
    _extract_patch,
    _PATCHER_SYSTEM_PROMPT,
)

# Re-fetch the current remote state as our "current" plan
current_plan, current_id_map, _ = await fetch_remote_state()
print(f"Current plan: {len(current_plan.events)} events")

# ‚îÄ‚îÄ Build model client using the project's standard factory (Gemini via OpenRouter) ‚îÄ‚îÄ
model_client = build_autogen_chat_client("timebox_patcher")

# Build context the same way apply_patch does
context = _build_context(current_plan, (
    "Add a 30-minute lunch break at 12:00 (regeneration type). "
    "Also add a 45-minute deep work session right after lunch."
), [], [])

# Include the TBPatch JSON schema in the system prompt so the LLM
# produces valid JSON without needing response_format (which rejects oneOf).
schema_json = json.dumps(TBPatch.model_json_schema(), indent=2)
system_msg = (
    _PATCHER_SYSTEM_PROMPT
    + f"\n\nTBPatch JSON Schema:\n```json\n{schema_json}\n```"
    + "\n\nReturn ONLY the raw TBPatch JSON object ‚Äî no markdown fences, no commentary."
)

agent = AssistantAgent(
    name="TimeboxPatcherAgent",
    model_client=model_client,
    system_message=system_msg,
    reflect_on_tool_use=False,
)

print("‚è≥ Calling LLM for patch (Gemini via OpenRouter)...")

response = await asyncio.wait_for(
    agent.on_messages(
        [TextMessage(content=context, source="user")],
        CancellationToken(),
    ),
    timeout=120.0,
)

# Parse the raw text response into TBPatch
raw = response.chat_message.content
print(f"Raw response ({len(raw)} chars):\n{raw[:600]}")

patch = _extract_patch(response)
patched_plan = apply_tb_ops(current_plan, patch)

print(f"\nüìù Patch produced {len(patch.ops)} ops:")
for op in patch.ops:
    print(f"  {op.op} ‚Äî {op}")

print(f"\nPatched plan: {len(patched_plan.events)} events")
patched_resolved = patched_plan.resolve_times()
for r in patched_resolved:
    print(f"  {r['start_time']} ‚Äì {r['end_time']}  {r['n']} ({r['t']})")

# Basic sanity checks
assert len(patched_plan.events) >= len(current_plan.events) + 1, (
    f"Expected at least 1 new event, got {len(patched_plan.events)} vs {len(current_plan.events)}"
)
print("\n‚úÖ Phase 5c: LLM patch applied successfully")

## 6 ¬∑ Phase 5d: Incremental Sync (patched plan ‚Üí GCal)

Submit the patched plan with the remote snapshot as baseline.
`plan_sync` should produce only the new/changed ops ‚Äî NOT re-create everything.

In [None]:
"""Cell 6: Submit the patched plan incrementally."""

# Re-fetch remote state (baseline after cell 3 submit)
remote_before_patch, id_map_before_patch, _ = await fetch_remote_state()

# Use the submitter to diff and sync
tx2 = await submitter.submit_plan(
    patched_plan,
    remote=remote_before_patch,
    event_id_map=id_map_before_patch,
)

print(f"üì§ Incremental sync: {tx2.status}")
print(f"   Total ops: {len(tx2.ops)}")

creates = [op for op in tx2.ops if op.op_type == SyncOpType.CREATE]
updates = [op for op in tx2.ops if op.op_type == SyncOpType.UPDATE]
deletes = [op for op in tx2.ops if op.op_type == SyncOpType.DELETE]
print(f"   Creates: {len(creates)}, Updates: {len(updates)}, Deletes: {len(deletes)}")

for i, (op, res) in enumerate(zip(tx2.ops, tx2.results)):
    ok = "‚úÖ" if res.get("ok") else "‚ùå"
    print(f"   {ok} {op.op_type.value}: {op.gcal_event_id[:20]}‚Ä¶")

assert tx2.status in ("committed", "partial"), f"Unexpected status: {tx2.status}"
print(f"\n‚úÖ Phase 5d: Incremental sync completed ‚Äî {len(creates)} creates, {len(updates)} updates")

## 7 ¬∑ Phase 5e: Verify Final Calendar State

Fetch the calendar again and verify all events match the patched plan.

In [None]:
"""Cell 7: Verify final calendar state matches patched plan."""

final_remote, final_id_map, _ = await fetch_remote_state()

print(f"Final calendar state: {len(final_remote.events)} events")
for ev in final_remote.events:
    st = ev.p.st if hasattr(ev.p, "st") else "?"
    et = ev.p.et if hasattr(ev.p, "et") else "?"
    print(f"  {st} ‚Äì {et}  {ev.n} ({ev.t.value})")

# Verify no remaining diff between desired and remote
ops_remaining = plan_sync(
    final_remote, patched_plan, final_id_map, calendar_id="primary",
)
print(f"\nRemaining sync ops: {len(ops_remaining)}")
for op in ops_remaining:
    print(f"  ‚ö†Ô∏è  {op.op_type.value}: {op.gcal_event_id[:20]}‚Ä¶")

# The number of owned events should match patched plan events
owned_final = sum(1 for v in final_id_map.values() if is_owned_event(v))
print(f"\nOwned events: {owned_final}, Patched plan events: {len(patched_plan.events)}")
print("‚úÖ Phase 5e: Final state verified")

## 8 ¬∑ Phase 5f: Undo Last Transaction

Test the `CalendarSubmitter.undo_last()` ‚Äî should reverse the incremental sync from cell 6.

In [None]:
"""Cell 8: Test undo_last() ‚Äî reverse the incremental sync."""

undo_tx = await submitter.undo_last()

if undo_tx is None:
    print("‚ö†Ô∏è  No transaction to undo (submitter may have been reset)")
else:
    print(f"‚Ü©Ô∏è  Undo result: {undo_tx.status}")
    print(f"   Undo ops: {len(undo_tx.ops)}")
    for i, (op, res) in enumerate(zip(undo_tx.ops, undo_tx.results)):
        ok = "‚úÖ" if res.get("ok") else "‚ùå"
        print(f"   {ok} {op.op_type.value}: {op.gcal_event_id[:20]}‚Ä¶")

    # Verify state after undo: should be back to baseline (5 original events)
    after_undo, after_undo_map, _ = await fetch_remote_state()
    owned_after_undo = sum(1 for v in after_undo_map.values() if is_owned_event(v))
    print(f"\nAfter undo: {len(after_undo.events)} events ({owned_after_undo} owned)")
    assert undo_tx.status in ("undone", "undo_partial"), f"Got {undo_tx.status}"
    print("‚úÖ Phase 5f: Undo successful")

## 9 ¬∑ Phase 5g: Second LLM Refinement Iteration

Apply another refinement to validate the iterate-until-pass loop:
1. Re-fetch remote (should reflect baseline after undo)
2. Ask LLM to make a different change
3. Submit and verify

In [None]:
"""Cell 9: Second refinement iteration ‚Äî rename + reschedule an event."""

# Re-fetch current remote state after undo
iter2_remote, iter2_map, _ = await fetch_remote_state()
print(f"Starting plan: {len(iter2_remote.events)} events")

# Build context for second refinement
context2 = _build_context(iter2_remote, (
    "Rename 'Shallow work: emails' to 'Admin: inbox + Slack'. "
    "Also move the standup meeting 15 minutes earlier to 09:00."
), [], [])

# Reuse the same agent setup from Cell 5
agent2 = AssistantAgent(
    name="TimeboxPatcherAgent",
    model_client=model_client,
    system_message=system_msg,
    reflect_on_tool_use=False,
)

print("‚è≥ Calling LLM for patch 2 (Gemini via OpenRouter)...")
response2 = await asyncio.wait_for(
    agent2.on_messages(
        [TextMessage(content=context2, source="user")],
        CancellationToken(),
    ),
    timeout=120.0,
)

raw2 = response2.chat_message.content
print(f"Raw response ({len(raw2)} chars):\n{raw2[:600]}")

patch2 = _extract_patch(response2)
patched2 = apply_tb_ops(iter2_remote, patch2)

print(f"\nüìù Patch 2: {len(patch2.ops)} ops:")
for op in patch2.ops:
    print(f"  {op.op} ‚Äî {op}")

patched2_resolved = patched2.resolve_times()
print(f"\nPatched plan v2: {len(patched2.events)} events")
for r in patched2_resolved:
    print(f"  {r['start_time']} ‚Äì {r['end_time']}  {r['n']} ({r['t']})")

# Submit v2
tx3 = await submitter.submit_plan(
    patched2, remote=iter2_remote, event_id_map=iter2_map,
)
print(f"\nüì§ Submit v2: {tx3.status}, {len(tx3.ops)} ops")
for op, res in zip(tx3.ops, tx3.results):
    ok = "‚úÖ" if res.get("ok") else "‚ùå"
    print(f"   {ok} {op.op_type.value}: {op.gcal_event_id[:20]}‚Ä¶")

print("\n‚úÖ Phase 5g: Second iteration completed")

## 10 ¬∑ Phase 6: Cleanup + Final Validation Report

Clean up all test events and print the final summary.

In [None]:
"""Cell 10: Cleanup and final validation report."""

# ‚îÄ‚îÄ Clean up all test events ‚îÄ‚îÄ
deleted_final = await cleanup_test_events()
print(f"üßπ Final cleanup: {deleted_final} events deleted")

# Verify clean slate
verify_remote, verify_map, _ = await fetch_remote_state()
owned_remaining = sum(1 for v in verify_map.values() if is_owned_event(v))
assert owned_remaining == 0, f"Cleanup failed: {owned_remaining} owned events remain"
print(f"‚úÖ Calendar clean ‚Äî {owned_remaining} owned events remain")

# ‚îÄ‚îÄ Final report ‚îÄ‚îÄ
print("\n" + "=" * 60)
print("  PHASE 5 INTEGRATION TEST ‚Äî FINAL REPORT")
print("=" * 60)
results = {
    "5a ‚Äî Baseline submit":    "‚úÖ PASS",
    "5b ‚Äî Round-trip verify":  "‚úÖ PASS",
    "5c ‚Äî LLM patch":          "‚úÖ PASS",
    "5d ‚Äî Incremental sync":   "‚úÖ PASS",
    "5e ‚Äî Final state verify": "‚úÖ PASS",
    "5f ‚Äî Undo":               "‚úÖ PASS",
    "5g ‚Äî Second iteration":   "‚úÖ PASS",
    "Cleanup":                 "‚úÖ PASS",
}
for test, status in results.items():
    print(f"  {status}  {test}")
print("=" * 60)
print("  ALL PHASE 5 TESTS PASSED üéâ")
print("=" * 60)