diff --git a/src/compat/maestro/runtime-interactions.ts b/src/compat/maestro/runtime-interactions.ts index 470fa631f..737699aa6 100644 --- a/src/compat/maestro/runtime-interactions.ts +++ b/src/compat/maestro/runtime-interactions.ts @@ -502,6 +502,10 @@ async function clickMaestroSnapshotTarget( ...params.baseReq, command: 'click', positionals: [String(point.x), String(point.y)], + flags: { + ...params.baseReq.flags, + interactionOutcome: { retryOnNoChange: true }, + }, }); if (response.ok) clearMaestroVisibleContext(params.scope); return { @@ -550,6 +554,7 @@ async function invokeMaestroFuzzyTapOn( flags: { ...params.baseReq.flags, findFirst: true, + interactionOutcome: { retryOnNoChange: true }, }, }); if (findResponse.ok) return { retry: false, response: findResponse }; diff --git a/src/core/dispatch-context.ts b/src/core/dispatch-context.ts index f979a38e1..b99e4d0b4 100644 --- a/src/core/dispatch-context.ts +++ b/src/core/dispatch-context.ts @@ -15,6 +15,9 @@ export type MaestroRuntimeFlags = { export type CommandFlags = Omit & { batchSteps?: DaemonBatchStep[]; clearAppState?: boolean; + interactionOutcome?: { + retryOnNoChange?: boolean; + }; launchArgs?: string[]; maestro?: MaestroRuntimeFlags; replayBackend?: string; diff --git a/src/daemon/__tests__/interaction-outcome-policy.test.ts b/src/daemon/__tests__/interaction-outcome-policy.test.ts new file mode 100644 index 000000000..4816eb0f6 --- /dev/null +++ b/src/daemon/__tests__/interaction-outcome-policy.test.ts @@ -0,0 +1,129 @@ +import assert from 'node:assert/strict'; +import { test } from 'vitest'; +import type { SnapshotState } from '../../utils/snapshot.ts'; +import { + buildInteractionSurfaceSignature, + classifyInteractionSurfaceChange, + markPendingInteractionOutcome, + stripInternalInteractionOutcomeFlags, +} from '../interaction-outcome-policy.ts'; +import type { SessionState } from '../types.ts'; +import { IOS_SIMULATOR } from '../../__tests__/test-utils/device-fixtures.ts'; + +test('classifyInteractionSurfaceChange treats identical surfaces as unchanged', () => { + const before = buildInteractionSurfaceSignature(makeSnapshot('Inbox').nodes); + const after = buildInteractionSurfaceSignature(makeSnapshot('Inbox').nodes); + + assert.equal(classifyInteractionSurfaceChange(before, after), 'unchanged'); +}); + +test('classifyInteractionSurfaceChange tolerates tiny rect drift', () => { + const before = buildInteractionSurfaceSignature(makeSnapshot('Inbox', 100).nodes); + const after = buildInteractionSurfaceSignature(makeSnapshot('Inbox', 100.4).nodes); + + assert.equal(classifyInteractionSurfaceChange(before, after), 'unchanged'); +}); + +test('classifyInteractionSurfaceChange detects semantic screen changes', () => { + const before = buildInteractionSurfaceSignature(makeSnapshot('Inbox').nodes); + const after = buildInteractionSurfaceSignature(makeSnapshot('Article detail').nodes); + + assert.equal(classifyInteractionSurfaceChange(before, after), 'changed'); +}); + +test('classifyInteractionSurfaceChange detects material layout movement', () => { + const before = buildInteractionSurfaceSignature(makeSnapshot('Inbox', 100).nodes); + const after = buildInteractionSurfaceSignature(makeSnapshot('Inbox', 180).nodes); + + assert.equal(classifyInteractionSurfaceChange(before, after), 'changed'); +}); + +test('markPendingInteractionOutcome stores retry state only for explicit retry flags', () => { + const session = makeSession(); + markPendingInteractionOutcome({ + session, + command: 'click', + positionals: ['20', '40'], + flags: {}, + preSnapshot: makeSnapshot('Inbox'), + }); + assert.equal(session.pendingInteractionOutcome, undefined); + + const retrySession = makeSession(); + markPendingInteractionOutcome({ + session: retrySession, + command: 'click', + positionals: ['20', '40'], + flags: { interactionOutcome: { retryOnNoChange: true } }, + preSnapshot: makeSnapshot('Inbox'), + }); + + assert.equal(retrySession.pendingInteractionOutcome?.action, 'click'); + assert.equal(retrySession.pendingInteractionOutcome?.command, 'press'); + assert.equal(retrySession.pendingInteractionOutcome?.attemptsRemaining, 2); + assert.equal(retrySession.pendingInteractionOutcome?.flags?.interactionOutcome, undefined); + + const refSession = makeSession(); + markPendingInteractionOutcome({ + session: refSession, + command: 'click', + positionals: ['@e1'], + flags: { interactionOutcome: { retryOnNoChange: true } }, + preSnapshot: makeSnapshot('Inbox'), + }); + assert.equal(refSession.pendingInteractionOutcome, undefined); + + const longPressSession = makeSession(); + markPendingInteractionOutcome({ + session: longPressSession, + command: 'longpress', + positionals: ['20', '40', '800'], + flags: { interactionOutcome: { retryOnNoChange: true } }, + preSnapshot: makeSnapshot('Inbox'), + }); + assert.equal(longPressSession.pendingInteractionOutcome, undefined); +}); + +test('stripInternalInteractionOutcomeFlags removes internal retry controls', () => { + assert.deepEqual( + stripInternalInteractionOutcomeFlags({ + platform: 'ios', + interactionOutcome: { retryOnNoChange: true }, + }), + { platform: 'ios' }, + ); +}); + +function makeSession(): SessionState { + return { + name: 'ios', + device: IOS_SIMULATOR, + createdAt: Date.now(), + actions: [], + }; +} + +function makeSnapshot(label: string, y = 100): SnapshotState { + return { + nodes: [ + { + ref: 'e1', + index: 0, + type: 'Application', + label: 'App', + rect: { x: 0, y: 0, width: 390, height: 844 }, + }, + { + ref: 'e2', + index: 1, + parentIndex: 0, + type: 'Button', + identifier: 'primary-action', + label, + rect: { x: 120, y, width: 80, height: 40 }, + }, + ], + createdAt: Date.now(), + backend: 'xctest', + }; +} diff --git a/src/daemon/handlers/__tests__/find.test.ts b/src/daemon/handlers/__tests__/find.test.ts index adebffd9d..e83298549 100644 --- a/src/daemon/handlers/__tests__/find.test.ts +++ b/src/daemon/handlers/__tests__/find.test.ts @@ -36,10 +36,12 @@ async function runFindClickScenario(options: { }): Promise<{ response: NonNullable>>; invokeCalls: DaemonRequest[]; + session: SessionState; }> { const sessionStore = makeSessionStore(); const sessionName = 'default'; - sessionStore.set(sessionName, options.session ?? makeSession(sessionName)); + const session = options.session ?? makeSession(sessionName); + sessionStore.set(sessionName, session); if (options.nodes !== undefined) { mockDispatch.mockImplementation(async (_device, command) => { @@ -70,7 +72,7 @@ async function runFindClickScenario(options: { }); expect(response).toBeTruthy(); - return { response: response!, invokeCalls }; + return { response: response!, invokeCalls, session }; } test('handleFindCommands click returns deterministic metadata across locator variants', async () => { @@ -213,6 +215,36 @@ test('handleFindCommands click prefers semantic controls over matching container expect(invokeCalls[0]!.positionals?.[0]).toBe('@e5'); }); +test('handleFindCommands forwards internal interaction outcome flags only to delegated click', async () => { + const { response, invokeCalls, session } = await runFindClickScenario({ + positionals: ['Continue', 'click'], + flags: { + findFirst: true, + interactionOutcome: { retryOnNoChange: true }, + }, + nodes: [ + { + index: 0, + ref: 'e1', + type: 'Application', + rect: { x: 0, y: 0, width: 440, height: 956 }, + }, + { + index: 1, + ref: 'e2', + type: 'Button', + label: 'Continue', + rect: { x: 40, y: 870, width: 360, height: 44 }, + parentIndex: 0, + }, + ], + }); + + expect(response.ok).toBe(true); + expect(invokeCalls[0]!.flags?.interactionOutcome).toEqual({ retryOnNoChange: true }); + expect(session.actions.at(-1)?.flags).toEqual({}); +}); + test('handleFindCommands wait bypasses snapshot cache while Android freshness recovery is active', async () => { const sessionName = 'android-find-wait'; const session: SessionState = { diff --git a/src/daemon/handlers/__tests__/interaction.test.ts b/src/daemon/handlers/__tests__/interaction.test.ts index 737dbd151..a948957e7 100644 --- a/src/daemon/handlers/__tests__/interaction.test.ts +++ b/src/daemon/handlers/__tests__/interaction.test.ts @@ -1163,6 +1163,49 @@ test('press @ref preserves native timing in recorded result and touch visualizat expect(stored?.recording?.gestureEvents[0]?.tMs).toBe(570); }); +test('press @ref stores resolved coordinate retry payload for lazy outcome retry', async () => { + const sessionStore = makeSessionStore(); + const sessionName = 'retry-ref'; + const session = makeSession(sessionName); + session.snapshot = { + nodes: attachRefs([ + { + index: 0, + type: 'XCUIElementTypeButton', + label: 'Continue', + identifier: 'auth_continue', + rect: { x: 10, y: 20, width: 100, height: 40 }, + enabled: true, + hittable: true, + }, + ]), + createdAt: Date.now(), + backend: 'xctest', + }; + sessionStore.set(sessionName, session); + mockDispatch.mockResolvedValue({}); + + const response = await handleInteractionCommands({ + req: { + token: 't', + session: sessionName, + command: 'press', + positionals: ['@e1'], + flags: { interactionOutcome: { retryOnNoChange: true } }, + }, + sessionName, + sessionStore, + contextFromFlags, + }); + + expect(response?.ok).toBe(true); + const stored = sessionStore.get(sessionName); + expect(stored?.pendingInteractionOutcome?.command).toBe('press'); + expect(stored?.pendingInteractionOutcome?.positionals).toEqual(['60', '40']); + expect(stored?.actions[0]?.positionals).toEqual(['@e1']); + expect(stored?.actions[0]?.flags).toEqual({}); +}); + test('longpress @ref resolves the target and dispatches coordinate longpress', async () => { const sessionStore = makeSessionStore(); const sessionName = 'longpress-ref'; diff --git a/src/daemon/handlers/__tests__/session-replay-vars.test.ts b/src/daemon/handlers/__tests__/session-replay-vars.test.ts index 60ca98c12..3a9e482fc 100644 --- a/src/daemon/handlers/__tests__/session-replay-vars.test.ts +++ b/src/daemon/handlers/__tests__/session-replay-vars.test.ts @@ -1886,6 +1886,12 @@ test('runReplayScriptFile runs Maestro runFlow.when.visible commands when presen ['find', ['Continue', 'click']], ], ); + assert.deepEqual(calls.find((call) => call.command === 'click')?.flags?.interactionOutcome, { + retryOnNoChange: true, + }); + assert.deepEqual(calls.find((call) => call.command === 'find')?.flags?.interactionOutcome, { + retryOnNoChange: true, + }); }); test('runReplayScriptFile runs nested Maestro runtime commands inside runFlow.when', async () => { diff --git a/src/daemon/handlers/__tests__/snapshot-handler.test.ts b/src/daemon/handlers/__tests__/snapshot-handler.test.ts index 1a4afff9c..1c7d97146 100644 --- a/src/daemon/handlers/__tests__/snapshot-handler.test.ts +++ b/src/daemon/handlers/__tests__/snapshot-handler.test.ts @@ -9,6 +9,7 @@ import { SessionStore } from '../../session-store.ts'; import type { SessionState } from '../../types.ts'; import { AppError } from '../../../utils/errors.ts'; import { buildSnapshotSignatures } from '../../android-snapshot-freshness.ts'; +import { buildInteractionSurfaceSignature } from '../../interaction-outcome-policy.ts'; import { buildSnapshotPresentationKey } from '../../../utils/snapshot.ts'; vi.mock('../../../core/dispatch.ts', async (importOriginal) => { @@ -720,6 +721,163 @@ test('Android ref refresh mode does not retry narrow snapshots as sharp drops', expect(session.androidSnapshotFreshness).toBeUndefined(); }); +test('captureSnapshot lazily retries pending no-change touch before returning fresh state', async () => { + const sessionName = 'ios-lazy-outcome-retry'; + const session = makeSession(sessionName, iosSimulatorDevice); + const baselineNodes = [ + { + ref: 'e1', + index: 0, + depth: 0, + type: 'Button', + label: 'Open feed', + identifier: 'open-feed', + hittable: true, + rect: { x: 20, y: 120, width: 160, height: 48 }, + }, + ]; + session.snapshot = { + nodes: baselineNodes, + createdAt: Date.now(), + backend: 'xctest', + }; + session.pendingInteractionOutcome = { + action: 'click', + command: 'press', + positionals: ['100', '144'], + flags: { platform: 'ios' }, + markedAt: Date.now(), + attemptsRemaining: 2, + preSignature: [ + { + key: 'open-feed|Open feed||Button||enabled|unselected|hittable|#0', + x: 20, + y: 120, + width: 160, + height: 48, + }, + ], + }; + + mockDispatch + .mockResolvedValueOnce({ + nodes: baselineNodes, + backend: 'xctest', + }) + .mockResolvedValueOnce({ clicked: true }) + .mockResolvedValueOnce({ + nodes: [ + { + index: 0, + depth: 0, + type: 'Button', + label: 'Back', + identifier: 'back', + hittable: true, + rect: { x: 20, y: 60, width: 90, height: 44 }, + }, + { + index: 1, + depth: 0, + type: 'StaticText', + label: 'Feed', + rect: { x: 20, y: 140, width: 160, height: 48 }, + }, + ], + backend: 'xctest', + }); + + const result = await captureSnapshot({ + device: iosSimulatorDevice, + session, + flags: { snapshotInteractiveOnly: true }, + logPath: '/tmp/daemon.log', + }); + + expect(result.snapshot.nodes).toEqual( + expect.arrayContaining([expect.objectContaining({ label: 'Feed' })]), + ); + expect(mockDispatch.mock.calls.map((call) => call[1])).toEqual(['snapshot', 'press', 'snapshot']); + expect(mockDispatch.mock.calls[1]?.[2]).toEqual(['100', '144']); + expect(session.pendingInteractionOutcome).toBeUndefined(); +}); + +test('captureSnapshot composes pending outcome retry with Android freshness capture', async () => { + const sessionName = 'android-lazy-outcome-freshness'; + const session = makeSession(sessionName, androidDevice); + const baselineNodes = Array.from({ length: 18 }, (_, index) => ({ + ref: `e${index + 1}`, + index, + depth: 0, + type: 'android.widget.TextView', + label: `Inbox row ${index + 1}`, + })); + session.snapshot = { + nodes: baselineNodes, + createdAt: Date.now(), + backend: 'android', + comparisonSafe: true, + }; + session.androidSnapshotFreshness = { + action: 'click', + markedAt: Date.now(), + baselineCount: baselineNodes.length, + baselineSignatures: buildSnapshotSignatures(baselineNodes), + routeComparable: true, + }; + session.pendingInteractionOutcome = { + action: 'click', + command: 'press', + positionals: ['180', '330'], + flags: { platform: 'android' }, + markedAt: Date.now(), + attemptsRemaining: 2, + preSignature: buildInteractionSurfaceSignature(baselineNodes), + }; + + mockDispatch + .mockResolvedValueOnce({ + nodes: [], + truncated: false, + backend: 'android', + analysis: { rawNodeCount: 18, maxDepth: 1 }, + }) + .mockResolvedValueOnce({ + nodes: [ + { + index: 0, + depth: 0, + type: 'android.widget.Button', + label: 'Create document', + hittable: true, + }, + ], + truncated: false, + backend: 'android', + analysis: { rawNodeCount: 1, maxDepth: 0 }, + }); + + const result = await captureSnapshot({ + device: androidDevice, + session, + flags: { snapshotInteractiveOnly: true }, + logPath: '/tmp/daemon.log', + }); + + expect(result.snapshot.nodes).toEqual( + expect.arrayContaining([expect.objectContaining({ label: 'Create document' })]), + ); + expect(result.freshness).toEqual({ + action: 'click', + retryCount: 1, + staleAfterRetries: false, + reason: undefined, + }); + expect(mockDispatch.mock.calls.map((call) => call[1])).toEqual(['snapshot', 'snapshot']); + expect(session.pendingInteractionOutcome).toBeUndefined(); + expect(session.androidSnapshotFreshness).toBeUndefined(); +}); + test('wait text on Android uses freshness-aware capture instead of one-shot snapshot polling', async () => { const sessionStore = makeSessionStore(); const sessionName = 'android-wait-freshness'; diff --git a/src/daemon/handlers/find.ts b/src/daemon/handlers/find.ts index 7ad36804b..b11366885 100644 --- a/src/daemon/handlers/find.ts +++ b/src/daemon/handlers/find.ts @@ -16,6 +16,7 @@ import { captureSnapshot } from './snapshot-capture.ts'; import { setSessionSnapshot } from '../session-snapshot.ts'; import { errorResponse } from './response.ts'; import { getActiveAndroidSnapshotFreshness } from '../android-snapshot-freshness.ts'; +import { stripInternalInteractionOutcomeFlags } from '../interaction-outcome-policy.ts'; import { dispatchFindReadOnlyViaRuntime } from '../selector-runtime.ts'; import { PUBLIC_COMMANDS } from '../../command-catalog.ts'; @@ -36,6 +37,7 @@ type FindContext = { command: string; locator: FindLocator; query: string; + publicFlags: Record; }; type ResolvedMatch = { @@ -140,6 +142,7 @@ export async function handleFindCommands(params: { command, locator, query, + publicFlags: publicFindFlags(req.flags), }; if (action === 'wait') { @@ -306,7 +309,7 @@ async function handleFindWait( query: string, timeoutMs: number | undefined, ): Promise { - const { req, sessionStore, session, command } = ctx; + const { req, sessionStore, session, command, publicFlags } = ctx; const timeout = timeoutMs ?? 10000; const start = Date.now(); while (Date.now() - start < timeout) { @@ -318,7 +321,7 @@ async function handleFindWait( sessionStore.recordAction(session, { command, positionals: req.positionals ?? [], - flags: req.flags ?? {}, + flags: publicFlags, result: { found: true, waitedMs: Date.now() - start }, }); } @@ -330,12 +333,12 @@ async function handleFindWait( } async function handleFindExists(ctx: FindContext): Promise { - const { req, sessionStore, session, command } = ctx; + const { req, sessionStore, session, command, publicFlags } = ctx; if (session) { sessionStore.recordAction(session, { command, positionals: req.positionals ?? [], - flags: req.flags ?? {}, + flags: publicFlags, result: { found: true }, }); } @@ -343,7 +346,7 @@ async function handleFindExists(ctx: FindContext): Promise { } async function handleFindGetText(ctx: FindContext, match: ResolvedMatch): Promise { - const { req, sessionStore, session, command, device, logPath } = ctx; + const { req, sessionStore, session, command, device, logPath, publicFlags } = ctx; const text = await readTextForNode({ device, node: match.node, @@ -358,7 +361,7 @@ async function handleFindGetText(ctx: FindContext, match: ResolvedMatch): Promis sessionStore.recordAction(session, { command, positionals: req.positionals ?? [], - flags: req.flags ?? {}, + flags: publicFlags, result: { ref: match.ref, action: 'get text', text }, }); } @@ -366,12 +369,12 @@ async function handleFindGetText(ctx: FindContext, match: ResolvedMatch): Promis } async function handleFindGetAttrs(ctx: FindContext, match: ResolvedMatch): Promise { - const { req, sessionStore, session, command } = ctx; + const { req, sessionStore, session, command, publicFlags } = ctx; if (session) { sessionStore.recordAction(session, { command, positionals: req.positionals ?? [], - flags: req.flags ?? {}, + flags: publicFlags, result: { ref: match.ref, action: 'get attrs' }, }); } @@ -379,7 +382,8 @@ async function handleFindGetAttrs(ctx: FindContext, match: ResolvedMatch): Promi } async function handleFindClick(ctx: FindContext, match: ResolvedMatch): Promise { - const { req, sessionName, sessionStore, session, invoke, command, locator, query } = ctx; + const { req, sessionName, sessionStore, session, invoke, command, locator, query, publicFlags } = + ctx; const response = await invoke({ token: req.token, session: sessionName, @@ -402,7 +406,7 @@ async function handleFindClick(ctx: FindContext, match: ResolvedMatch): Promise< sessionStore.recordAction(session, { command, positionals: req.positionals ?? [], - flags: req.flags ?? {}, + flags: publicFlags, result: { ref: match.ref, action: 'click', locator, query }, }); } @@ -414,7 +418,7 @@ async function handleFindFill( match: ResolvedMatch, value: string | undefined, ): Promise { - const { req, sessionName, sessionStore, session, invoke, command } = ctx; + const { req, sessionName, sessionStore, session, invoke, command, publicFlags } = ctx; if (!value) { return errorResponse('INVALID_ARGS', 'find fill requires text'); } @@ -430,7 +434,7 @@ async function handleFindFill( sessionStore.recordAction(session, { command, positionals: req.positionals ?? [], - flags: req.flags ?? {}, + flags: publicFlags, result: { ref: match.ref, action: 'fill' }, }); } @@ -484,12 +488,12 @@ async function dispatchFocusForFindMatch( } function recordFindAction(ctx: FindContext, match: ResolvedMatch, action: string): void { - const { req, sessionStore, session, command } = ctx; + const { req, sessionStore, session, command, publicFlags } = ctx; if (session) { sessionStore.recordAction(session, { command, positionals: req.positionals ?? [], - flags: req.flags ?? {}, + flags: publicFlags, result: { ref: match.ref, action }, }); } @@ -497,6 +501,10 @@ function recordFindAction(ctx: FindContext, match: ResolvedMatch, action: string // --- Helpers --- +function publicFindFlags(flags: DaemonRequest['flags']): Record { + return { ...(stripInternalInteractionOutcomeFlags(flags) ?? {}) }; +} + function buildAmbiguousMatchError( matches: SnapshotState['nodes'], locator: FindLocator, diff --git a/src/daemon/handlers/interaction-common.ts b/src/daemon/handlers/interaction-common.ts index 8d4c5d016..ec34c1dfc 100644 --- a/src/daemon/handlers/interaction-common.ts +++ b/src/daemon/handlers/interaction-common.ts @@ -9,6 +9,10 @@ import { isNavigationSensitiveAction, markAndroidSnapshotFreshness, } from '../android-snapshot-freshness.ts'; +import { + markPendingInteractionOutcome, + stripInternalInteractionOutcomeFlags, +} from '../interaction-outcome-policy.ts'; export type ContextFromFlags = ( flags: CommandFlags | undefined, @@ -73,6 +77,7 @@ export function finalizeTouchInteraction(params: { sessionStore: SessionStore; command: string; positionals: string[]; + retryPositionals?: string[]; flags: CommandFlags | undefined; result: Record; responseData: Record; @@ -85,6 +90,7 @@ export function finalizeTouchInteraction(params: { sessionStore, command, positionals, + retryPositionals, flags, result, responseData, @@ -92,12 +98,20 @@ export function finalizeTouchInteraction(params: { actionFinishedAt, androidFreshnessBaseline, } = params; + const actionFlags = stripInternalInteractionOutcomeFlags(flags); sessionStore.recordAction(session, { command, positionals, - flags: flags ?? {}, + flags: actionFlags ?? {}, result, }); + markPendingInteractionOutcome({ + session, + command, + positionals: retryPositionals ?? positionals, + flags, + preSnapshot: session.snapshot, + }); if (isNavigationSensitiveAction(command)) { markAndroidSnapshotFreshness(session, command, androidFreshnessBaseline ?? session.snapshot); } @@ -106,7 +120,7 @@ export function finalizeTouchInteraction(params: { command, positionals, result, - (flags ?? {}) as Record, + (actionFlags ?? {}) as Record, actionStartedAt, actionFinishedAt, ); diff --git a/src/daemon/handlers/interaction-touch.ts b/src/daemon/handlers/interaction-touch.ts index 8aa6df4ef..af0fbce0e 100644 --- a/src/daemon/handlers/interaction-touch.ts +++ b/src/daemon/handlers/interaction-touch.ts @@ -342,6 +342,7 @@ async function dispatchDirectIosSelectorInteraction(params: { sessionStore: handlerParams.sessionStore, command: handlerParams.req.command, positionals: handlerParams.req.positionals ?? [], + retryPositionals: pointPositionals(point), flags: handlerParams.req.flags, result: responseData, responseData, @@ -555,6 +556,7 @@ async function dispatchRuntimeInteraction< sessionStore: params.sessionStore, command: params.req.command, positionals: params.req.positionals ?? [], + retryPositionals: retryPositionalsForRuntimeResult(params.req.command, runtimeResult), flags: params.req.flags, result, responseData, @@ -602,3 +604,17 @@ async function refreshAndroidRefSnapshotIfFreshnessActive( function appErrorResponse(error: unknown): DaemonResponse { return { ok: false, error: normalizeError(error) }; } + +function retryPositionalsForRuntimeResult( + command: string, + result: PressCommandResult | FillCommandResult | LongPressCommandResult, +): string[] | undefined { + if (command === 'click' || command === 'press') { + return pointPositionals(result.point); + } + return undefined; +} + +function pointPositionals(point: { x: number; y: number }): string[] { + return [String(point.x), String(point.y)]; +} diff --git a/src/daemon/handlers/snapshot-capture.ts b/src/daemon/handlers/snapshot-capture.ts index 17b5917ec..bcde1f14c 100644 --- a/src/daemon/handlers/snapshot-capture.ts +++ b/src/daemon/handlers/snapshot-capture.ts @@ -28,6 +28,13 @@ import { type AndroidFreshnessCaptureMeta, } from '../android-snapshot-freshness.ts'; import { contextFromFlags } from '../context.ts'; +import { + clearPendingInteractionOutcome, + emitInteractionSettled, + emitInteractionSettleTimeout, + getActivePendingInteractionOutcome, + retryPendingInteractionOutcome, +} from '../interaction-outcome-policy.ts'; import { capturePostGestureStabilizedSnapshot } from '../post-gesture-stabilization.ts'; import { findNodeByLabel, pruneGroupNodes, resolveRefLabel } from '../snapshot-processing.ts'; import { errorResponse, type DaemonFailureResponse } from './response.ts'; @@ -51,6 +58,12 @@ type SnapshotData = { androidSnapshot?: AndroidSnapshotBackendMetadata; }; +type SnapshotAttempt = { + data: SnapshotData; + snapshot: SnapshotState; + freshness?: AndroidFreshnessCaptureMeta; +}; + type AndroidFreshnessReason = 'empty-interactive' | 'sharp-drop' | 'stuck-route'; type AndroidFreshnessMode = 'default' | 'ref-refresh'; @@ -71,6 +84,13 @@ export async function captureSnapshot(params: CaptureSnapshotParams): Promise<{ }), }; } + const pendingInteractionOutcome = getActivePendingInteractionOutcome(params.session); + if (pendingInteractionOutcome && params.session) { + return await captureInteractionOutcomeAwareSnapshot( + { ...params, session: params.session }, + pendingInteractionOutcome, + ); + } const freshness = getActiveAndroidSnapshotFreshness(params.session); if (freshness && params.device.platform === 'android') { return await captureAndroidFreshnessAwareSnapshot(params, freshness); @@ -84,6 +104,61 @@ export async function captureSnapshot(params: CaptureSnapshotParams): Promise<{ }; } +async function captureInteractionOutcomeAwareSnapshot( + params: CaptureSnapshotParams & { session: SessionState }, + pending: NonNullable, +): Promise<{ + snapshot: SnapshotState; + analysis?: AndroidSnapshotAnalysis; + androidSnapshot?: AndroidSnapshotBackendMetadata; + freshness?: AndroidFreshnessCaptureMeta; +}> { + const session = params.session; + + const startedAt = Date.now(); + let retryAttempts = 0; + let latest = await captureSnapshotAttemptForInteractionOutcome(params); + let outcome = await retryPendingInteractionOutcome({ + session, + pending, + logPath: params.logPath, + snapshot: latest.snapshot, + }); + + while (outcome.retried) { + retryAttempts += 1; + latest = await captureSnapshotAttemptForInteractionOutcome(params); + outcome = await retryPendingInteractionOutcome({ + session, + pending, + logPath: params.logPath, + snapshot: latest.snapshot, + }); + } + + clearPendingInteractionOutcome(session); + if (outcome.change !== 'ambiguous' && latest.freshness?.staleAfterRetries !== true) { + clearAndroidSnapshotFreshness(session); + } + if (outcome.change === 'unchanged') { + emitInteractionSettleTimeout({ pending, attempts: retryAttempts, startedAt }); + } else { + emitInteractionSettled({ + pending, + change: outcome.change, + attempts: retryAttempts, + startedAt, + }); + } + + return { + snapshot: latest.snapshot, + analysis: latest.data.analysis, + androidSnapshot: latest.data.androidSnapshot, + freshness: latest.freshness, + }; +} + export async function captureSnapshotData(params: CaptureSnapshotParams): Promise { const { device, session, flags, outPath, logPath, snapshotScope } = params; if (device.platform === 'linux') { @@ -126,6 +201,19 @@ async function captureAndroidFreshnessAwareSnapshot( androidSnapshot?: AndroidSnapshotBackendMetadata; freshness?: AndroidFreshnessCaptureMeta; }> { + const latest = await captureAndroidFreshnessAwareAttempt(params, freshness); + return { + snapshot: latest.snapshot, + analysis: latest.data.analysis, + androidSnapshot: latest.data.androidSnapshot, + freshness: latest.freshness, + }; +} + +async function captureAndroidFreshnessAwareAttempt( + params: CaptureSnapshotParams, + freshness: NonNullable, +): Promise { let latest = await captureSnapshotAttempt(params); let suspiciousReason = getAndroidFreshnessReason(latest, freshness, params); let retryCount = 0; @@ -146,9 +234,7 @@ async function captureAndroidFreshnessAwareSnapshot( } return { - snapshot: latest.snapshot, - analysis: latest.data.analysis, - androidSnapshot: latest.data.androidSnapshot, + ...latest, freshness: retryCount > 0 || Boolean(suspiciousReason) ? { @@ -161,9 +247,17 @@ async function captureAndroidFreshnessAwareSnapshot( }; } -async function captureSnapshotAttempt( - params: CaptureSnapshotParams, -): Promise<{ data: SnapshotData; snapshot: SnapshotState }> { +async function captureSnapshotAttemptForInteractionOutcome( + params: CaptureSnapshotParams & { session: SessionState }, +): Promise { + const freshness = getActiveAndroidSnapshotFreshness(params.session); + if (freshness && params.device.platform === 'android') { + return await captureAndroidFreshnessAwareAttempt(params, freshness); + } + return await captureSnapshotAttempt(params); +} + +async function captureSnapshotAttempt(params: CaptureSnapshotParams): Promise { const data = await captureSnapshotData(params); return { data, diff --git a/src/daemon/interaction-outcome-policy.ts b/src/daemon/interaction-outcome-policy.ts new file mode 100644 index 000000000..1e7f8c6eb --- /dev/null +++ b/src/daemon/interaction-outcome-policy.ts @@ -0,0 +1,248 @@ +import { dispatchCommand, type CommandFlags } from '../core/dispatch.ts'; +import type { SnapshotNode, SnapshotState } from '../utils/snapshot.ts'; +import { emitDiagnostic } from '../utils/diagnostics.ts'; +import { contextFromFlags } from './context.ts'; +import type { SessionState } from './types.ts'; + +const OUTCOME_RETRY_WINDOW_MS = 30_000; +const OUTCOME_RETRY_ATTEMPTS = 2; +const RECT_TOLERANCE_PX = 1; + +export type InteractionSurfaceSignature = NonNullable< + SessionState['pendingInteractionOutcome'] +>['preSignature']; + +export type InteractionSurfaceChange = 'changed' | 'unchanged' | 'ambiguous'; + +function shouldRetryTouchOnNoChange(flags: CommandFlags | undefined): boolean { + return flags?.interactionOutcome?.retryOnNoChange === true; +} + +export function markPendingInteractionOutcome(params: { + session: SessionState; + command: string; + positionals: string[]; + flags: CommandFlags | undefined; + preSnapshot: SnapshotState | undefined; +}): void { + const { session, command, positionals, flags, preSnapshot } = params; + if (!shouldRetryTouchOnNoChange(flags)) return; + if (!supportsInteractionOutcomePolicy(session)) return; + const retryCommand = retryCommandForTap(command); + if (!retryCommand) return; + if (!isCoordinatePair(positionals)) return; + const preSignature = buildInteractionSurfaceSignature(preSnapshot?.nodes ?? []); + if (preSignature.length === 0) return; + session.pendingInteractionOutcome = { + action: command, + command: retryCommand, + positionals, + flags: stripInternalInteractionOutcomeFlags(flags), + markedAt: Date.now(), + attemptsRemaining: OUTCOME_RETRY_ATTEMPTS, + preSignature, + }; +} + +export function getActivePendingInteractionOutcome( + session: SessionState | undefined, +): NonNullable | undefined { + const pending = session?.pendingInteractionOutcome; + if (!session || !pending) return undefined; + if (!supportsInteractionOutcomePolicy(session)) { + clearPendingInteractionOutcome(session); + return undefined; + } + if (Date.now() - pending.markedAt > OUTCOME_RETRY_WINDOW_MS) { + clearPendingInteractionOutcome(session); + return undefined; + } + return pending; +} + +export function clearPendingInteractionOutcome(session: SessionState | undefined): void { + if (!session?.pendingInteractionOutcome) return; + session.pendingInteractionOutcome = undefined; +} + +export async function retryPendingInteractionOutcome(params: { + session: SessionState; + pending: NonNullable; + logPath: string; + snapshot: SnapshotState; +}): Promise<{ retried: boolean; change: InteractionSurfaceChange }> { + const { session, pending, snapshot } = params; + const change = classifyInteractionSurfaceChange( + pending.preSignature, + buildInteractionSurfaceSignature(snapshot.nodes), + ); + if (change !== 'unchanged' || pending.attemptsRemaining <= 0) { + return { retried: false, change }; + } + + const startedAt = Date.now(); + pending.attemptsRemaining -= 1; + // Opt-in Maestro retries intentionally re-fire the same coordinate tap; delayed or + // non-visual side effects can duplicate, but unchanged visual taps are the target gap. + await dispatchCommand(session.device, pending.command, pending.positionals, pending.flags?.out, { + ...contextFromFlags(params.logPath, pending.flags, session.appBundleId, session.trace?.outPath), + surface: session.surface, + }); + emitDiagnostic({ + level: 'info', + phase: 'interaction_no_change_retry', + data: { + action: pending.action, + attemptsRemaining: pending.attemptsRemaining, + durationMs: Date.now() - startedAt, + }, + }); + return { retried: true, change }; +} + +export function emitInteractionSettled(params: { + pending: NonNullable; + change: InteractionSurfaceChange; + attempts: number; + startedAt: number; +}): void { + emitDiagnostic({ + level: params.attempts > 0 ? 'info' : 'debug', + phase: 'interaction_settled', + data: { + action: params.pending.action, + change: params.change, + attempts: params.attempts, + durationMs: Date.now() - params.startedAt, + }, + }); +} + +export function emitInteractionSettleTimeout(params: { + pending: NonNullable; + attempts: number; + startedAt: number; +}): void { + emitDiagnostic({ + level: 'warn', + phase: 'interaction_settle_timeout', + data: { + action: params.pending.action, + attempts: params.attempts, + durationMs: Date.now() - params.startedAt, + }, + }); +} + +export function stripInternalInteractionOutcomeFlags( + flags: CommandFlags | undefined, +): CommandFlags | undefined { + if (!flags?.interactionOutcome) return flags; + const { interactionOutcome: _interactionOutcome, ...publicFlags } = flags; + return publicFlags; +} + +export function buildInteractionSurfaceSignature(nodes: SnapshotNode[]): Array<{ + key: string; + x: number; + y: number; + width: number; + height: number; +}> { + const occurrenceCounts = new Map(); + const entries: InteractionSurfaceSignature = []; + + for (const node of nodes) { + const entry = buildInteractionSurfaceEntry(node, occurrenceCounts); + if (entry) entries.push(entry); + } + + return entries; +} + +export function classifyInteractionSurfaceChange( + before: InteractionSurfaceSignature, + after: InteractionSurfaceSignature, +): InteractionSurfaceChange { + if (before.length === 0 || after.length === 0) return 'ambiguous'; + if (areInteractionSurfaceSignaturesStable(before, after)) return 'unchanged'; + return 'changed'; +} + +export function areInteractionSurfaceSignaturesStable( + left: InteractionSurfaceSignature, + right: InteractionSurfaceSignature, +): boolean { + if (left.length !== right.length) return false; + for (let index = 0; index < left.length; index += 1) { + const a = left[index]; + const b = right[index]; + if (!a || !b || a.key !== b.key) return false; + if (Math.abs(a.x - b.x) > RECT_TOLERANCE_PX) return false; + if (Math.abs(a.y - b.y) > RECT_TOLERANCE_PX) return false; + if (Math.abs(a.width - b.width) > RECT_TOLERANCE_PX) return false; + if (Math.abs(a.height - b.height) > RECT_TOLERANCE_PX) return false; + } + return true; +} + +function supportsInteractionOutcomePolicy(session: SessionState): boolean { + return session.device.platform === 'ios' || session.device.platform === 'android'; +} + +function retryCommandForTap(command: string): string | undefined { + if (command === 'click') return 'press'; + if (command === 'press') return 'press'; + return undefined; +} + +function buildInteractionSurfaceEntry( + node: SnapshotNode, + occurrenceCounts: Map, +): InteractionSurfaceSignature[number] | undefined { + if (!node.rect) return undefined; + if (!isFiniteRect(node.rect)) return undefined; + if (isScrollIndicator(node)) return undefined; + const semanticKey = interactionSurfaceSemanticKey(node); + if (!semanticKey) return undefined; + const occurrence = occurrenceCounts.get(semanticKey) ?? 0; + occurrenceCounts.set(semanticKey, occurrence + 1); + return { + key: `${semanticKey}|#${occurrence}`, + x: Math.round(node.rect.x), + y: Math.round(node.rect.y), + width: Math.round(node.rect.width), + height: Math.round(node.rect.height), + }; +} + +function interactionSurfaceSemanticKey(node: SnapshotNode): string | undefined { + const semanticKey = [ + node.identifier, + node.label, + node.value, + node.type, + node.role, + node.enabled === false ? 'disabled' : 'enabled', + node.selected === true ? 'selected' : 'unselected', + node.hittable === true ? 'hittable' : 'not-hittable', + ] + .map((value) => (typeof value === 'string' ? value.trim() : '')) + .join('|'); + return semanticKey.replaceAll('|', '') ? semanticKey : undefined; +} + +function isCoordinatePair(positionals: string[]): boolean { + if (positionals.length !== 2) return false; + return positionals.every((value) => Number.isFinite(Number(value))); +} + +function isFiniteRect(rect: NonNullable): boolean { + const values = [rect.x, rect.y, rect.width, rect.height]; + return values.every((value) => Number.isFinite(value)) && rect.width > 0 && rect.height > 0; +} + +function isScrollIndicator(node: SnapshotNode): boolean { + const label = `${node.label ?? ''} ${node.identifier ?? ''}`.toLowerCase(); + return label.includes('scroll bar'); +} diff --git a/src/daemon/post-gesture-stabilization.ts b/src/daemon/post-gesture-stabilization.ts index a99b400f5..c952eec26 100644 --- a/src/daemon/post-gesture-stabilization.ts +++ b/src/daemon/post-gesture-stabilization.ts @@ -1,11 +1,14 @@ import { emitDiagnostic } from '../utils/diagnostics.ts'; -import type { SnapshotNode, SnapshotState } from '../utils/snapshot.ts'; +import type { SnapshotState } from '../utils/snapshot.ts'; import { sleep } from '../utils/timeouts.ts'; +import { + areInteractionSurfaceSignaturesStable, + buildInteractionSurfaceSignature, +} from './interaction-outcome-policy.ts'; import type { SessionState } from './types.ts'; const STABILIZATION_DEADLINE_MS = 1_500; const STABILIZATION_INTERVAL_MS = 200; -const RECT_TOLERANCE_PX = 1; export function markPostGestureStabilization(session: SessionState, action: string): void { if (!supportsPostGestureStabilization(session.device.platform)) return; @@ -34,14 +37,14 @@ export async function capturePostGestureStabilizedSnapshot(params: { const startedAt = Date.now(); let attempts = 1; let previous = await capture(); - let previousSignature = buildStabilitySignature(previous.nodes); + let previousSignature = buildInteractionSurfaceSignature(previous.nodes); while (Date.now() - startedAt < STABILIZATION_DEADLINE_MS) { await sleep(STABILIZATION_INTERVAL_MS); attempts += 1; const current = await capture(); - const currentSignature = buildStabilitySignature(current.nodes); - if (areSignaturesStable(previousSignature, currentSignature)) { + const currentSignature = buildInteractionSurfaceSignature(current.nodes); + if (areInteractionSurfaceSignaturesStable(previousSignature, currentSignature)) { clearPostGestureStabilization(session); emitDiagnostic({ level: attempts > 2 ? 'info' : 'debug', @@ -78,62 +81,3 @@ function isPostGestureStabilizingAction(action: string): boolean { function supportsPostGestureStabilization(platform: SessionState['device']['platform']): boolean { return platform === 'ios' || platform === 'android'; } - -type StabilityEntry = { - key: string; - x: number; - y: number; - width: number; - height: number; -}; - -function buildStabilitySignature(nodes: SnapshotNode[]): StabilityEntry[] { - const occurrenceCounts = new Map(); - const entries: StabilityEntry[] = []; - - for (const node of nodes) { - if (!node.rect) continue; - if (!isFiniteRect(node.rect)) continue; - if (isScrollIndicator(node)) continue; - const semanticKey = [node.identifier, node.label, node.value, node.type] - .map((value) => (typeof value === 'string' ? value.trim() : '')) - .join('|'); - if (!semanticKey.replaceAll('|', '')) continue; - const occurrence = occurrenceCounts.get(semanticKey) ?? 0; - occurrenceCounts.set(semanticKey, occurrence + 1); - entries.push({ - key: `${semanticKey}|#${occurrence}`, - x: node.rect.x, - y: node.rect.y, - width: node.rect.width, - height: node.rect.height, - }); - } - - return entries; -} - -// fallow-ignore-next-line complexity -function areSignaturesStable(left: StabilityEntry[], right: StabilityEntry[]): boolean { - if (left.length !== right.length) return false; - for (let index = 0; index < left.length; index += 1) { - const a = left[index]; - const b = right[index]; - if (!a || !b || a.key !== b.key) return false; - if (Math.abs(a.x - b.x) > RECT_TOLERANCE_PX) return false; - if (Math.abs(a.y - b.y) > RECT_TOLERANCE_PX) return false; - if (Math.abs(a.width - b.width) > RECT_TOLERANCE_PX) return false; - if (Math.abs(a.height - b.height) > RECT_TOLERANCE_PX) return false; - } - return true; -} - -function isFiniteRect(rect: NonNullable): boolean { - const values = [rect.x, rect.y, rect.width, rect.height]; - return values.every((value) => Number.isFinite(value)) && rect.width > 0 && rect.height > 0; -} - -function isScrollIndicator(node: SnapshotNode): boolean { - const label = `${node.label ?? ''} ${node.identifier ?? ''}`.toLowerCase(); - return label.includes('scroll bar'); -} diff --git a/src/daemon/types.ts b/src/daemon/types.ts index afb2494da..f281fb8cc 100644 --- a/src/daemon/types.ts +++ b/src/daemon/types.ts @@ -142,6 +142,22 @@ export type PostGestureStabilization = { markedAt: number; }; +export type PendingInteractionOutcome = { + action: string; + command: string; + positionals: string[]; + flags?: CommandFlags; + markedAt: number; + attemptsRemaining: number; + preSignature: Array<{ + key: string; + x: number; + y: number; + width: number; + height: number; + }>; +}; + type SessionRecordingBase = { outPath: string; clientOutPath?: string; @@ -182,6 +198,7 @@ export type SessionState = { snapshotScopeSource?: SnapshotState; androidSnapshotFreshness?: AndroidSnapshotFreshness; postGestureStabilization?: PostGestureStabilization; + pendingInteractionOutcome?: PendingInteractionOutcome; trace?: { outPath: string; startedAt: number; diff --git a/test/integration/provider-scenarios/android-test-suite.test.ts b/test/integration/provider-scenarios/android-test-suite.test.ts index f24471df7..277d5fc26 100644 --- a/test/integration/provider-scenarios/android-test-suite.test.ts +++ b/test/integration/provider-scenarios/android-test-suite.test.ts @@ -333,10 +333,14 @@ test('Provider-backed integration Android Maestro executes runFlow conditions an assert.equal(suite.passed, 1, JSON.stringify(suite)); assert.equal(suite.failed, 0, JSON.stringify(suite)); assert.deepEqual( - world.adbCalls.find((call) => call.slice(0, 3).join(' ') === 'shell input tap'), - ['shell', 'input', 'tap', '180', '330'], + world.adbCalls.filter((call) => call.slice(0, 3).join(' ') === 'shell input tap'), + [ + ['shell', 'input', 'tap', '180', '330'], + ['shell', 'input', 'tap', '180', '330'], + ['shell', 'input', 'tap', '180', '330'], + ], ); - assert.equal(snapshots, 3); + assert.equal(snapshots, 5); }, ); });