diff --git a/KeyType/Logic/Completion/CompletionController.swift b/KeyType/Logic/Completion/CompletionController.swift index 97c04d5..4ad228e 100644 --- a/KeyType/Logic/Completion/CompletionController.swift +++ b/KeyType/Logic/Completion/CompletionController.swift @@ -577,13 +577,21 @@ final class CompletionController { let healExtraTokens = healSlack > 0 ? 1 : 0 // Completion length is user-configurable (Settings) and maps to the decoder's token/width budget. let length = settings.completionLength + // Clipboard and OCR are background context, not text to reproduce; carry them so the output + // filter can drop a completion that just parrots them verbatim. History is excluded — it is + // already same-app/domain scoped and echoing the user's own recurring phrases is intended. + let injectedContext = Self.injectedContext( + pasteboardText: sideContext.pasteboardText, + screenText: sideContext.screenText + ) let request = CompletionRequest( context: context, prompt: promptResult.prompt, requiredPrefixBytes: requiredPrefixBytes, mode: policy.completionMode, maxCompletionTokens: length.maxCompletionTokens + healExtraTokens, - maxDisplayWidth: length.maxDisplayWidth + healSlack + maxDisplayWidth: length.maxDisplayWidth + healSlack, + injectedContext: injectedContext ) rememberFullPromptDebug( for: request, @@ -886,11 +894,19 @@ final class CompletionController { return (cached, true) } + // Scope history to the focused app. Cross-app recent samples bleed unrelated content into the + // prompt — e.g. a Notes draft about an API key surfacing as a verbatim suggestion in a fresh + // Gmail message — which the small model tends to parrot. Same-app history still personalizes + // tone/recurring phrases without leaking content across contexts. + // Normalize an empty domain to nil so it can't collapse the same-app filter to `domain == ""` + // and silently drop all real history for the app. + let scopedDomain = context.target.domain.flatMap { $0.isEmpty ? nil : $0 } let query = WritingHistoryQuery( bundleIdentifier: context.target.bundleIdentifier, - domain: context.target.domain, + domain: scopedDomain, typingContext: context.typingContext, - language: context.detectedLanguage + language: context.detectedLanguage, + sameAppOnly: true ) let previousUserInputs = settings.historyEnabled ? history.samples(for: query) @@ -970,6 +986,59 @@ final class CompletionController { case notApplicable } + /// Clipboard + OCR text injected into the prompt, as the echo guard consumes it. History is + /// intentionally excluded (same-app/domain scoped; echoing the user's own phrases is intended). + private static func injectedContext(pasteboardText: String?, screenText: String?) -> [String] { + [pasteboardText, screenText].compactMap { $0 } + } + + /// Re-check the context-dependent suppression nets against the *live* context before re-showing a + /// cached completion. The candidate was filtered once at generation time, but reuse re-shows it + /// without going back through the pipeline, and the inputs those nets key off can change after the + /// fact: + /// - prefix-repetition / suffix-overlap key off `beforeCursor`/`afterCursor`, which grow as the + /// user types through the suggestion — a tail clean at anchor time can become a verbatim + /// repetition (or suffix duplication) of text just typed; + /// - the echo guard keys off injected clipboard/OCR context, which can change mid-burst or differ + /// from when an older reused snapshot was generated. We check it against the currently-frozen + /// side context (already cached, so no hot-path pasteboard read). + /// Returns `true` when the remaining text is still safe to show. + private func reuseRemainingPassesLiveGuards(remaining: String, context: TextFieldContext) -> Bool { + Self.reuseRemainingIsSafe( + remaining: remaining, + context: context, + injectedContext: Self.injectedContext( + pasteboardText: frozenSideContext?.pasteboardText, + screenText: frozenSideContext?.screenText + ) + ) + } + + /// Pure decision behind `reuseRemainingPassesLiveGuards`, factored out so the reuse-safety rules + /// are unit-testable without constructing a controller. `true` when `remaining` is still safe to + /// re-show against the given live context and injected side context. + nonisolated static func reuseRemainingIsSafe( + remaining: String, + context: TextFieldContext, + injectedContext: [String] + ) -> Bool { + guard !remaining.isEmpty else { return true } + if PrefixRepetitionGuard.repeatsPrefix(completion: remaining, beforeCursor: context.beforeCursor) { + return false + } + if SuffixOverlapGuard.duplicatesSuffix( + completion: remaining, + beforeCursor: context.beforeCursor, + afterCursor: context.afterCursor + ) { + return false + } + if ContextEchoGuard.echoesInjectedContext(completion: remaining, injectedContext: injectedContext) { + return false + } + return true + } + @discardableResult private func applyReuseHistoryIfUseful( for live: TextFieldContext, @@ -980,6 +1049,11 @@ final class CompletionController { switch reuseHistory.decision(for: live) { case let .reuse(reuse): + guard reuseRemainingPassesLiveGuards(remaining: reuse.remainingText, context: live) else { + predictionLog.append("REUSE rejected by live guard remaining=\"\(PredictionLog.escape(reuse.remainingText))\"") + clearCompletion() + return .mustRecompute + } anchorText = reuse.anchorText anchorContext = reuse.anchorContext if updateLatestContext { latestContext = live } @@ -1292,6 +1366,10 @@ final class CompletionController { ) -> Bool { switch decision { case let .reuse(reuse): + guard reuseRemainingPassesLiveGuards(remaining: reuse.remainingText, context: optimistic) else { + predictionLog.append("REUSE rejected by live guard remaining=\"\(PredictionLog.escape(reuse.remainingText))\"") + return false + } anchorText = reuse.anchorText anchorContext = reuse.anchorContext latestContext = optimistic diff --git a/KeyType/Logic/Context/ScreenContextController.swift b/KeyType/Logic/Context/ScreenContextController.swift index 55c5c5c..c788a06 100644 --- a/KeyType/Logic/Context/ScreenContextController.swift +++ b/KeyType/Logic/Context/ScreenContextController.swift @@ -99,6 +99,11 @@ final class ScreenContextController { let key = windowKey(for: snapshot) guard key != lastWindowKey else { return } lastWindowKey = key + // Drop the previous window's cached OCR *before* kicking off the new (async) capture, so a + // completion fired in the just-focused window can't be fed the prior window's screen text + // while the fresh capture is still in flight. Without this, switching browser tabs/windows + // leaks the old page's text (e.g. a "2 of 10 …" results counter) into the new one's prompt. + engine.clear() capture(for: snapshot) } @@ -120,7 +125,18 @@ final class ScreenContextController { // screen context carries only the *surrounding* on-screen text. let context = snapshot.context let fieldText = context.beforeCursor + context.afterCursor - engine.refresh(pid: pid, fieldText: fieldText) + // The caret location lets the capturer pick the right window when the app has several open, + // so screen context can't bleed in text from a different window of the same app. `caretRect` + // is in AppKit space (bottom-left origin) but ScreenCaptureKit window frames are in CG space + // (top-left origin), so convert before handing it down — otherwise the Y axes don't match and + // the wrong window (or none) is selected. + let focusPoint = snapshot.caretRect.flatMap { rect -> CGPoint? in + DisplayCoordinateConverter.coreGraphicsPoint( + fromAppKitPoint: CGPoint(x: rect.midX, y: rect.midY), + displays: ScreenDisplayGeometryProvider.current() + ) + } + engine.refresh(pid: pid, fieldText: fieldText, focusPoint: focusPoint) } // MARK: - Eligibility diff --git a/KeyTypeTests/KeyTypeTests.swift b/KeyTypeTests/KeyTypeTests.swift index 27cd589..41524b1 100644 --- a/KeyTypeTests/KeyTypeTests.swift +++ b/KeyTypeTests/KeyTypeTests.swift @@ -273,6 +273,48 @@ struct KeyTypeTests { #expect(advanced == nil) } + // MARK: - Reuse re-check (H2) + + @Test func reuseRejectsRemainingThatRepeatsRecentlyTypedText() { + // As the user types through a cached suggestion, beforeCursor grows; a tail that becomes a + // verbatim repetition of just-typed text must not be re-shown via reuse. + let context = TextFieldContext( + beforeCursor: "You can use it to access the OpenAI. And", + target: Self.target + ) + #expect( + CompletionController.reuseRemainingIsSafe( + remaining: " you can use it to access the OpenAI again", + context: context, + injectedContext: [] + ) == false + ) + } + + @Test func reuseRejectsRemainingThatEchoesInjectedClipboard() { + // A cached completion (clean at anchor time) must not be re-shown if it now parrots the + // currently-injected clipboard/OCR context. + let context = TextFieldContext(beforeCursor: "Hi Molly,", target: Self.target) + #expect( + CompletionController.reuseRemainingIsSafe( + remaining: " if you require maintenance of UPS systems or", + context: context, + injectedContext: ["if you require maintenance of UPS systems or backup power, call us."] + ) == false + ) + } + + @Test func reuseAllowsGenuineRemaining() { + let context = TextFieldContext(beforeCursor: "Hi Molly,", target: Self.target) + #expect( + CompletionController.reuseRemainingIsSafe( + remaining: " hope you are doing well today", + context: context, + injectedContext: ["if you require maintenance of UPS systems or backup power, call us."] + ) + ) + } + @Test func promotionCachePromotesLowerRankedBranchWhenTopIsInvalidated() { let cache = Self.promotionCache(candidates: [ "ship it today", diff --git a/Packages/AutocompleteCore/Sources/AutocompleteCore/Completion/AlphanumericNormalizer.swift b/Packages/AutocompleteCore/Sources/AutocompleteCore/Completion/AlphanumericNormalizer.swift new file mode 100644 index 0000000..725cea4 --- /dev/null +++ b/Packages/AutocompleteCore/Sources/AutocompleteCore/Completion/AlphanumericNormalizer.swift @@ -0,0 +1,16 @@ +import Foundation + +/// Shared text normalization for the content-overlap guards (`SuffixOverlapGuard`, +/// `PrefixRepetitionGuard`, `ContextEchoGuard`). Comparisons are done on case-folded alphanumeric +/// scalars only, so differences in whitespace, punctuation, and stray symbol glyphs the model +/// sometimes prepends ("**", "•") don't defeat a match. +enum AlphanumericNormalizer { + /// Case-folded string of only the alphanumeric scalars in `text`. + static func normalize(_ text: String) -> String { + var result = String.UnicodeScalarView() + for scalar in text.lowercased().unicodeScalars where CharacterSet.alphanumerics.contains(scalar) { + result.append(scalar) + } + return String(result) + } +} diff --git a/Packages/AutocompleteCore/Sources/AutocompleteCore/Completion/AutocompleteCore.swift b/Packages/AutocompleteCore/Sources/AutocompleteCore/Completion/AutocompleteCore.swift index 60d7108..210d725 100644 --- a/Packages/AutocompleteCore/Sources/AutocompleteCore/Completion/AutocompleteCore.swift +++ b/Packages/AutocompleteCore/Sources/AutocompleteCore/Completion/AutocompleteCore.swift @@ -135,6 +135,12 @@ public struct CompletionRequest: Equatable { public var mode: CompletionMode public var maxCompletionTokens: Int public var maxDisplayWidth: Int + /// Side-context text injected into the prompt that the user did NOT type — clipboard contents and + /// on-screen OCR text. Carried alongside the request so the output filter can drop a completion + /// that merely parrots it verbatim (`ContextEchoGuard`). Writing-history samples are deliberately + /// excluded: they are scoped to the same app/domain and reproducing the user's own recurring + /// phrases is the point of that feature. + public var injectedContext: [String] public init( context: TextFieldContext, @@ -142,7 +148,8 @@ public struct CompletionRequest: Equatable { requiredPrefixBytes: [UInt8] = [], mode: CompletionMode = .prose, maxCompletionTokens: Int = 4, - maxDisplayWidth: Int = 80 + maxDisplayWidth: Int = 80, + injectedContext: [String] = [] ) { self.context = context self.prompt = prompt @@ -150,6 +157,7 @@ public struct CompletionRequest: Equatable { self.mode = mode self.maxCompletionTokens = maxCompletionTokens self.maxDisplayWidth = maxDisplayWidth + self.injectedContext = injectedContext } } @@ -202,6 +210,13 @@ public enum SuppressionReason: Equatable { /// A mid-line / fill-in-the-middle completion that is too long or too low-probability to show /// without risking a wrong suggestion. case lowConfidenceMidLine + /// The completion reproduces a phrase that is already present in the recent text before the caret. + /// Accepting it would create a verbatim repetition loop. See `PrefixRepetitionGuard`. + case repeatsRecentPrefix + /// The completion verbatim-reproduces a span of injected side context the user did not type + /// (clipboard, on-screen OCR text) — the small model parroting context instead of predicting. + /// See `ContextEchoGuard`. + case echoesInjectedContext case noCandidate } diff --git a/Packages/AutocompleteCore/Sources/AutocompleteCore/Completion/ContextEchoGuard.swift b/Packages/AutocompleteCore/Sources/AutocompleteCore/Completion/ContextEchoGuard.swift new file mode 100644 index 0000000..9eec7b9 --- /dev/null +++ b/Packages/AutocompleteCore/Sources/AutocompleteCore/Completion/ContextEchoGuard.swift @@ -0,0 +1,69 @@ +import Foundation + +/// Shared "does this completion reproduce a phrase from some text" test, used by both +/// `PrefixRepetitionGuard` (against the recent typed prefix) and `ContextEchoGuard` (against injected +/// side context). Two shapes are detected on case-folded alphanumerics: +/// +/// 1. **Whole** — the entire (normalized) completion is a substring of the text. A strong signal, so +/// a short match (`minimumWhole`) is enough. +/// 2. **Leading** — the completion *begins* with a run that appears in the text and then diverges, so +/// shape 1 misses it. A leading run of length ≥ `minimumLeading` exists iff the leading slice of +/// exactly that length is a substring (any longer contained run has it as a prefix), so one +/// `contains` decides it. The larger floor keeps chance word collisions from firing. +enum RepeatedSpanDetector { + static func reproduces( + normalizedCompletion: String, + within normalizedText: String, + minimumWhole: Int, + minimumLeading: Int + ) -> Bool { + guard !normalizedCompletion.isEmpty, !normalizedText.isEmpty else { return false } + + if normalizedCompletion.count >= minimumWhole, + normalizedText.contains(normalizedCompletion) { + return true + } + + guard normalizedCompletion.count >= minimumLeading else { return false } + return normalizedText.contains(String(normalizedCompletion.prefix(minimumLeading))) + } +} + +/// Detects completions that merely parrot injected side context — clipboard contents or on-screen +/// OCR text the prompt carries but the user did not type. The small model frequently copies such +/// context verbatim instead of using it as background (e.g. text copied from a localhost page in +/// one browser surfacing as a suggestion in a different app's compose field). +/// +/// Writing-history samples are intentionally NOT passed here: they are already scoped to the same +/// app/domain, and reproducing the user's own recurring phrases (a signature, a stock reply) is the +/// purpose of that personalization — suppressing it would be a regression. +public enum ContextEchoGuard { + + /// `true` when `completion` verbatim-reproduces a span of any string in `injectedContext`. + /// + /// `minimumWhole` is a touch higher than `PrefixRepetitionGuard`'s because the injected corpus is + /// larger (more chance of an incidental short match); `minimumLeading` matches it. + public static func echoesInjectedContext( + completion: String, + injectedContext: [String], + minimumWhole: Int = 12, + minimumLeading: Int = 16 + ) -> Bool { + guard !injectedContext.isEmpty else { return false } + let normalizedCompletion = AlphanumericNormalizer.normalize(completion) + guard !normalizedCompletion.isEmpty else { return false } + + for sample in injectedContext { + let normalizedSample = AlphanumericNormalizer.normalize(sample) + if RepeatedSpanDetector.reproduces( + normalizedCompletion: normalizedCompletion, + within: normalizedSample, + minimumWhole: minimumWhole, + minimumLeading: minimumLeading + ) { + return true + } + } + return false + } +} diff --git a/Packages/AutocompleteCore/Sources/AutocompleteCore/Completion/PrefixRepetitionGuard.swift b/Packages/AutocompleteCore/Sources/AutocompleteCore/Completion/PrefixRepetitionGuard.swift new file mode 100644 index 0000000..69dc43f --- /dev/null +++ b/Packages/AutocompleteCore/Sources/AutocompleteCore/Completion/PrefixRepetitionGuard.swift @@ -0,0 +1,49 @@ +import Foundation + +/// Detects completions that would create a verbatim repetition loop by reproducing a phrase already +/// present in the recent text before the cursor. +/// +/// The failure mode this guards against: the model predicts " i want to write about" after +/// "...AI meetup." because that exact phrase already appeared earlier in the text. If the user +/// accepts it, the sentence repeats — and the model will predict the same continuation again, +/// looping indefinitely. +/// +/// Two repetition shapes are caught, both on case-folded alphanumerics within the last +/// `lookbackCharacters` of `beforeCursor`: +/// +/// 1. **Whole-completion** — the entire suggestion already appears verbatim in the recent text. A +/// strong signal, so a short match (`minimumAlphanumericLength`) is enough. +/// 2. **Leading** — the suggestion *begins* by reproducing a recent phrase and then diverges +/// ("…access the OpenAI" + " API to do X"). The whole string is no longer a substring, so shape 1 +/// misses it; this catches it when the repeated leading run is long enough +/// (`minimumLeadingRepeat`) to be a genuine loop rather than a chance word collision. +/// +/// The minimum lengths keep short common phrases ("the", "and") from triggering false positives. +public enum PrefixRepetitionGuard { + + /// `true` when `completion` reproduces a phrase that already appears in the recent prefix, + /// meaning accepting it would create a repetition. + public static func repeatsPrefix( + completion: String, + beforeCursor: String, + lookbackCharacters: Int = 300, + minimumAlphanumericLength: Int = 8, + minimumLeadingRepeat: Int = 16 + ) -> Bool { + let normalizedCompletion = AlphanumericNormalizer.normalize(completion) + + // Only look back a bounded window — we don't want to suppress completions that share a + // common phrase with text written hours ago in a very long document. + let lookback = String(beforeCursor.suffix(lookbackCharacters)) + let normalizedPrefix = AlphanumericNormalizer.normalize(lookback) + + // Shape 1 (whole) catches a short verbatim repeat; shape 2 (leading) catches a repeat that + // then diverges. See `RepeatedSpanDetector`. + return RepeatedSpanDetector.reproduces( + normalizedCompletion: normalizedCompletion, + within: normalizedPrefix, + minimumWhole: minimumAlphanumericLength, + minimumLeading: minimumLeadingRepeat + ) + } +} diff --git a/Packages/AutocompleteCore/Sources/AutocompleteCore/Completion/SuffixOverlapGuard.swift b/Packages/AutocompleteCore/Sources/AutocompleteCore/Completion/SuffixOverlapGuard.swift index 0695059..3e00aa5 100644 --- a/Packages/AutocompleteCore/Sources/AutocompleteCore/Completion/SuffixOverlapGuard.swift +++ b/Packages/AutocompleteCore/Sources/AutocompleteCore/Completion/SuffixOverlapGuard.swift @@ -159,11 +159,7 @@ public enum SuffixOverlapGuard { /// Case-folded string of only the alphanumeric scalars — drops whitespace, punctuation, and any /// stray symbol glyphs the model prepends, so the comparison is on real content. static func normalizedAlphanumerics(_ text: String) -> String { - var result = String.UnicodeScalarView() - for scalar in text.lowercased().unicodeScalars where CharacterSet.alphanumerics.contains(scalar) { - result.append(scalar) - } - return String(result) + AlphanumericNormalizer.normalize(text) } /// Whether the last scalar of `text` is a word character (letter or digit) — i.e. the caret is diff --git a/Packages/AutocompleteCore/Tests/AutocompleteCoreTests/Completion/ContextEchoGuardTests.swift b/Packages/AutocompleteCore/Tests/AutocompleteCoreTests/Completion/ContextEchoGuardTests.swift new file mode 100644 index 0000000..4256e7f --- /dev/null +++ b/Packages/AutocompleteCore/Tests/AutocompleteCoreTests/Completion/ContextEchoGuardTests.swift @@ -0,0 +1,65 @@ +import AutocompleteCore +import XCTest + +final class ContextEchoGuardTests: XCTestCase { + + func testFiresWhenCompletionEchoesClipboardVerbatim() { + // The reported case: text copied from a localhost page in another browser is injected as + // clipboard context and parroted into a fresh Gmail draft. + let clipboard = "if you require maintenance of UPS systems or backup power, contact us." + XCTAssertTrue( + ContextEchoGuard.echoesInjectedContext( + completion: " if you require maintenance of UPS systems or", + injectedContext: [clipboard] + ) + ) + } + + func testFiresOnLeadingEchoThatThenDiverges() { + let screen = "The private key for the OpenAI API is stored in the vault." + XCTAssertTrue( + ContextEchoGuard.echoesInjectedContext( + completion: " the private key for the OpenAI API is yours to keep forever", + injectedContext: [screen] + ) + ) + } + + func testChecksAllInjectedSources() { + XCTAssertTrue( + ContextEchoGuard.echoesInjectedContext( + completion: " maintenance of UPS systems is required", + injectedContext: ["unrelated clipboard text", "notes about maintenance of UPS systems here"] + ) + ) + } + + func testDoesNotFireWithoutInjectedContext() { + XCTAssertFalse( + ContextEchoGuard.echoesInjectedContext( + completion: " if you require maintenance of UPS systems or", + injectedContext: [] + ) + ) + } + + func testAllowsGenuineCompletionNotInContext() { + let clipboard = "if you require maintenance of UPS systems or backup power, contact us." + XCTAssertFalse( + ContextEchoGuard.echoesInjectedContext( + completion: " hope you are doing well", + injectedContext: [clipboard] + ) + ) + } + + func testDoesNotFireOnShortIncidentalOverlap() { + // A short common run ("if you ") must not be enough to suppress a real continuation. + XCTAssertFalse( + ContextEchoGuard.echoesInjectedContext( + completion: " if you can", + injectedContext: ["if you require maintenance of UPS systems"] + ) + ) + } +} diff --git a/Packages/AutocompleteCore/Tests/AutocompleteCoreTests/Completion/PrefixRepetitionGuardTests.swift b/Packages/AutocompleteCore/Tests/AutocompleteCoreTests/Completion/PrefixRepetitionGuardTests.swift new file mode 100644 index 0000000..c387416 --- /dev/null +++ b/Packages/AutocompleteCore/Tests/AutocompleteCoreTests/Completion/PrefixRepetitionGuardTests.swift @@ -0,0 +1,117 @@ +import AutocompleteCore +import XCTest + +final class PrefixRepetitionGuardTests: XCTestCase { + + // MARK: - Whole-completion repetition + + func testFiresWhenWholeCompletionRepeatsRecentPhrase() { + let before = "This is the private key for the OpenAI API. You can use it to access the OpenAI. And" + XCTAssertTrue( + PrefixRepetitionGuard.repeatsPrefix( + completion: " you can use it to access the OpenAI", + beforeCursor: before + ) + ) + } + + func testIgnoresPunctuationAndCaseDifferences() { + let before = "I went to the AI meetup. I want to write about" + XCTAssertTrue( + PrefixRepetitionGuard.repeatsPrefix( + completion: " i want to write about,", + beforeCursor: before + ) + ) + } + + // MARK: - Leading repetition that then diverges (the loop shape) + + func testFiresWhenCompletionLeadsWithRepeatThenDiverges() { + // The repeated phrase is followed by genuinely new text, so the *whole* completion is no + // longer a substring of the prefix — only the leading run is. + let before = "This is the private key for the OpenAI API. You can use it to access the OpenAI. And" + XCTAssertTrue( + PrefixRepetitionGuard.repeatsPrefix( + completion: " you can use it to access the OpenAI API to do whatever you want", + beforeCursor: before + ) + ) + } + + // MARK: - Negatives + + func testAllowsGenuineContinuation() { + let before = "This is the private key for the OpenAI API. You can use it to access the OpenAI. And" + XCTAssertFalse( + PrefixRepetitionGuard.repeatsPrefix( + completion: " keep it somewhere safe", + beforeCursor: before + ) + ) + } + + func testDoesNotFireOnShortCommonLeadingWord() { + // A short leading collision ("the ") must not be enough to suppress a real continuation. + let before = "I saw the dog run across the" + XCTAssertFalse( + PrefixRepetitionGuard.repeatsPrefix( + completion: " street quickly", + beforeCursor: before + ) + ) + } + + func testDoesNotFireOnShortCompletion() { + let before = "the quick brown fox jumps over the" + XCTAssertFalse( + PrefixRepetitionGuard.repeatsPrefix( + completion: " lazy", + beforeCursor: before + ) + ) + } + + func testLeadingRepeatThresholdBoundary() { + // The leading-divergence shape requires a repeated run of ≥16 normalized alphanumeric chars. + // "abcdefghijklmno" is 15 → must NOT fire on leading-only; "abcdefghijklmnop" is 16 → fires. + let before15 = "abcdefghijklmno was here earlier in the document somewhere" + XCTAssertFalse( + PrefixRepetitionGuard.repeatsPrefix( + completion: "abcdefghijklmno then something new entirely", + beforeCursor: before15 + ), + "15-char leading run is below the threshold" + ) + let before16 = "abcdefghijklmnop was here earlier in the document somewhere" + XCTAssertTrue( + PrefixRepetitionGuard.repeatsPrefix( + completion: "abcdefghijklmnop then something new entirely", + beforeCursor: before16 + ), + "16-char leading run meets the threshold" + ) + } + + func testWholeCompletionRepeatBoundaryIsEightChars() { + // The whole-completion shape uses the lower ≥8 floor; "abcdefg" (7) must not fire. + XCTAssertFalse( + PrefixRepetitionGuard.repeatsPrefix(completion: " abcdefg", beforeCursor: "abcdefg earlier") + ) + XCTAssertTrue( + PrefixRepetitionGuard.repeatsPrefix(completion: " abcdefgh", beforeCursor: "abcdefgh earlier") + ) + } + + func testRespectsLookbackWindow() { + // The repeated phrase sits far outside the lookback window, so it should not be suppressed. + let filler = String(repeating: "x ", count: 400) + let before = "you can use it to access the OpenAI" + filler + XCTAssertFalse( + PrefixRepetitionGuard.repeatsPrefix( + completion: " you can use it to access the OpenAI", + beforeCursor: before + ) + ) + } +} diff --git a/Packages/ConstrainedGeneration/Sources/ConstrainedGeneration/Filtering/CandidateFilter.swift b/Packages/ConstrainedGeneration/Sources/ConstrainedGeneration/Filtering/CandidateFilter.swift index 8e2474d..c59fc20 100644 --- a/Packages/ConstrainedGeneration/Sources/ConstrainedGeneration/Filtering/CandidateFilter.swift +++ b/Packages/ConstrainedGeneration/Sources/ConstrainedGeneration/Filtering/CandidateFilter.swift @@ -121,6 +121,33 @@ public final class DefaultCandidateFilter: CandidateFiltering { return .duplicatesAfterCursor } + // The content-overlap nets below judge the text that will actually be inserted. When the + // prompt was healed (ADR-019) the candidate re-emits the already-typed stem (" coll…"); strip + // it so the comparison is against the genuinely-new continuation, not the stem the user typed. + let insertedText = Self.healStripped(candidate.text, request: request) + + // 7b. Prefix-repetition net: the completion reproduces a phrase already in the recent + // preceding text, so accepting it would create a verbatim repetition loop. + // Typical failure: small model predicts "i want to write about" after "…AI meetup." + // because that exact phrase appeared earlier in the text. See PrefixRepetitionGuard. + if PrefixRepetitionGuard.repeatsPrefix( + completion: insertedText, + beforeCursor: request.context.beforeCursor + ) { + return .repeatsRecentPrefix + } + + // 7c. Context-echo net: the completion verbatim-reproduces injected side context the user did + // not type (clipboard / on-screen OCR). The small model parrots such context instead of + // using it as background — e.g. text copied from one app surfacing in another's compose + // field. Writing-history samples are excluded upstream (see `CompletionRequest`). + if ContextEchoGuard.echoesInjectedContext( + completion: insertedText, + injectedContext: request.injectedContext + ) { + return .echoesInjectedContext + } + // 8. Mid-line confidence net. Native FIM is useful only when it is both short and highly // likely; longer middle spans have been low-precision in edge data. Keep this deliberately // conservative so re-enabled mid-line favors suppression over wrong visible text. @@ -158,6 +185,15 @@ public final class DefaultCandidateFilter: CandidateFiltering { return meanLogProbability < minimumMidLineMeanLogProbability } + // MARK: - Heal-aware text + + /// The text that will actually be inserted: for a healed request (ADR-019) the candidate re-emits + /// the already-typed stem, so strip it back off; otherwise the candidate text is inserted as-is. + static func healStripped(_ text: String, request: CompletionRequest) -> String { + guard !request.requiredPrefixBytes.isEmpty else { return text } + return MidWordHealing.strip(text, heal: String(decoding: request.requiredPrefixBytes, as: UTF8.self)) + } + // MARK: - Required prefix /// `true` when `bytes` is consistent with `prefix`: either it begins with the whole prefix or @@ -210,9 +246,7 @@ public final class DefaultCandidateFilter: CandidateFiltering { // For a healed request (ADR-019) the candidate re-emits the typed stem (`" coll…"`); strip it // so the leading word is the genuinely-new continuation rather than an empty leading-space run // — otherwise healed mid-word completions slip past the net entirely (ADR-025 follow-up). - let judged = request.requiredPrefixBytes.isEmpty - ? candidate.text - : MidWordHealing.strip(candidate.text, heal: String(decoding: request.requiredPrefixBytes, as: UTF8.self)) + let judged = Self.healStripped(candidate.text, request: request) let lead = CurrentWordTypoGuard.leadingWord(of: judged) guard !lead.isEmpty else { return false } // completion opened on a boundary — not our word @@ -244,9 +278,7 @@ public final class DefaultCandidateFilter: CandidateFiltering { let stem = CurrentWordTypoGuard.trailingWord(of: request.context.beforeCursor) guard !stem.isEmpty else { return false } // model started a fresh word — leave it - let judged = request.requiredPrefixBytes.isEmpty - ? candidate.text - : MidWordHealing.strip(candidate.text, heal: String(decoding: request.requiredPrefixBytes, as: UTF8.self)) + let judged = Self.healStripped(candidate.text, request: request) let lead = CurrentWordTypoGuard.leadingWord(of: judged) guard !lead.isEmpty else { return false } // completion opened on a boundary — not our word diff --git a/Packages/ConstrainedGeneration/Tests/ConstrainedGenerationTests/Filtering/CandidateFilterTests.swift b/Packages/ConstrainedGeneration/Tests/ConstrainedGenerationTests/Filtering/CandidateFilterTests.swift index 72ffc26..78f6cd2 100644 --- a/Packages/ConstrainedGeneration/Tests/ConstrainedGenerationTests/Filtering/CandidateFilterTests.swift +++ b/Packages/ConstrainedGeneration/Tests/ConstrainedGenerationTests/Filtering/CandidateFilterTests.swift @@ -19,7 +19,8 @@ final class CandidateFilterTests: XCTestCase { target: AppTarget = CandidateFilterTests.target, placeholder: String? = nil, labels: [String] = [], - traits: TextFieldTraits = TextFieldTraits() + traits: TextFieldTraits = TextFieldTraits(), + injectedContext: [String] = [] ) -> CompletionRequest { let context = TextFieldContext( beforeCursor: beforeCursor, @@ -36,7 +37,8 @@ final class CandidateFilterTests: XCTestCase { requiredPrefixBytes: requiredPrefixBytes, mode: mode, maxCompletionTokens: maxCompletionTokens, - maxDisplayWidth: maxDisplayWidth + maxDisplayWidth: maxDisplayWidth, + injectedContext: injectedContext ) } @@ -379,6 +381,66 @@ final class CandidateFilterTests: XCTestCase { ) } + // MARK: - Prefix-repetition net + + func testSuppressesPrefixRepetitionLoop() { + let filter = DefaultCandidateFilter() + XCTAssertEqual( + filter.suppressionReason( + for: candidate(" you can use it to access the OpenAI API to do anything"), + request: request(beforeCursor: "You can use it to access the OpenAI. And") + ), + .repeatsRecentPrefix + ) + } + + func testPrefixRepetitionJudgedAfterHealingStem() { + // H1: under healing the candidate re-emits the typed stem (" ex"); the repetition check must + // run on the *inserted* text (stem stripped). The stripped continuation "ample data set here" + // reproduces an earlier phrase, but the RAW candidate ("example data set here") does NOT + // appear contiguously in the prefix — so this only fires if the heal stem is stripped first. + let filter = DefaultCandidateFilter() + XCTAssertEqual( + filter.suppressionReason( + for: candidate(" example data set here"), + request: request( + beforeCursor: "ample data set here is good. Give me an ex", + requiredPrefixBytes: Array(" ex".utf8) + ) + ), + .repeatsRecentPrefix + ) + } + + // MARK: - Context-echo net + + func testSuppressesEchoOfClipboardContext() { + let filter = DefaultCandidateFilter() + XCTAssertEqual( + filter.suppressionReason( + for: candidate(" if you require maintenance of UPS systems or backup"), + request: request( + beforeCursor: "Hi Molly,", + injectedContext: ["if you require maintenance of UPS systems or backup power, call us."] + ) + ), + .echoesInjectedContext + ) + } + + func testKeepsCompletionNotPresentInInjectedContext() { + let filter = DefaultCandidateFilter() + XCTAssertNil( + filter.suppressionReason( + for: candidate(" hope you are well"), + request: request( + beforeCursor: "Hi Molly,", + injectedContext: ["if you require maintenance of UPS systems or backup power, call us."] + ) + ) + ) + } + // MARK: - Dead-end mid-word net (ADR-052) /// A recogniser whose `canCompleteWord` only accepts an explicit set of viable prefixes. diff --git a/Packages/MacContextCapture/Sources/MacContextCapture/CaretGeometry/AXCaretGeometryResolver.swift b/Packages/MacContextCapture/Sources/MacContextCapture/CaretGeometry/AXCaretGeometryResolver.swift index ac130f8..13125ad 100644 --- a/Packages/MacContextCapture/Sources/MacContextCapture/CaretGeometry/AXCaretGeometryResolver.swift +++ b/Packages/MacContextCapture/Sources/MacContextCapture/CaretGeometry/AXCaretGeometryResolver.swift @@ -865,19 +865,7 @@ enum AXCaretHelper { @MainActor private static func displayGeometries() -> [DisplayGeometry] { - NSScreen.screens.compactMap { screen in - guard let number = screen.deviceDescription[NSDeviceDescriptionKey("NSScreenNumber")] as? NSNumber else { - return nil - } - - let displayID = CGDirectDisplayID(number.uint32Value) - return DisplayGeometry( - appKitFrame: screen.frame, - visibleFrame: screen.visibleFrame, - coreGraphicsBounds: CGDisplayBounds(displayID), - backingScaleFactor: screen.backingScaleFactor - ) - } + ScreenDisplayGeometryProvider.current() } @MainActor @@ -921,6 +909,27 @@ public struct DisplayGeometry: Equatable { } } +/// Reads the live `[DisplayGeometry]` from `NSScreen`. Separated from the pure +/// `DisplayCoordinateConverter` so the conversion math stays testable without a real display. +public enum ScreenDisplayGeometryProvider { + @MainActor + public static func current() -> [DisplayGeometry] { + NSScreen.screens.compactMap { screen in + guard let number = screen.deviceDescription[NSDeviceDescriptionKey("NSScreenNumber")] as? NSNumber else { + return nil + } + + let displayID = CGDirectDisplayID(number.uint32Value) + return DisplayGeometry( + appKitFrame: screen.frame, + visibleFrame: screen.visibleFrame, + coreGraphicsBounds: CGDisplayBounds(displayID), + backingScaleFactor: screen.backingScaleFactor + ) + } + } +} + /// Pure CG <-> AppKit coordinate conversion against a set of synthetic or real /// `DisplayGeometry` values. Kept side-effect-free so unit tests don't need `NSScreen`. public enum DisplayCoordinateConverter { @@ -980,6 +989,31 @@ public enum DisplayCoordinateConverter { ) } + /// Inverse of `appKitRect(fromCoreGraphicsRect:)` for a point: maps an AppKit (bottom-left origin) + /// global point to a CoreGraphics (top-left origin) global point. Needed because caret geometry is + /// stored in AppKit space while ScreenCaptureKit window frames are in CG space. Returns `nil` when + /// the point lands on no known display. + public static func coreGraphicsPoint( + fromAppKitPoint point: CGPoint, + displays: [DisplayGeometry] + ) -> CGPoint? { + guard let display = bestDisplay( + for: CGRect(origin: point, size: .zero), + displays: displays, + keyPath: \.appKitFrame + ) else { + return nil + } + // Invert: appKit.x = appKitFrame.minX + (cg.x - cgBounds.minX) + // appKit.y = appKitFrame.maxY - (cg.y - cgBounds.minY) (height 0) + let localX = point.x - display.appKitFrame.minX + let localY = display.appKitFrame.maxY - point.y + return CGPoint( + x: display.coreGraphicsBounds.minX + localX, + y: display.coreGraphicsBounds.minY + localY + ) + } + private static func bestDisplay( for rect: CGRect, displays: [DisplayGeometry], diff --git a/Packages/MacContextCapture/Sources/MacContextCapture/Screen/ScreenWindowSelector.swift b/Packages/MacContextCapture/Sources/MacContextCapture/Screen/ScreenWindowSelector.swift index 2fa544a..ba52957 100644 --- a/Packages/MacContextCapture/Sources/MacContextCapture/Screen/ScreenWindowSelector.swift +++ b/Packages/MacContextCapture/Sources/MacContextCapture/Screen/ScreenWindowSelector.swift @@ -19,19 +19,25 @@ public struct ScreenWindowCandidate: Equatable { public var isOnScreen: Bool /// `windowLayer` — normal app windows are layer 0; menus/panels/overlays sit above. public var layer: Int + /// Front-to-back position (0 = frontmost), from ScreenCaptureKit's window ordering. Used to pick + /// the frontmost window when several overlap the caret. Defaults high so synthetic candidates that + /// don't set it sort last on this key and fall through to the area tiebreak. + public var zOrder: Int public init( windowID: CGWindowID, processID: pid_t, frame: CGRect, isOnScreen: Bool, - layer: Int + layer: Int, + zOrder: Int = .max ) { self.windowID = windowID self.processID = processID self.frame = frame self.isOnScreen = isOnScreen self.layer = layer + self.zOrder = zOrder } } @@ -40,12 +46,18 @@ public enum ScreenWindowSelector { static let minimumWidth: CGFloat = 200 static let minimumHeight: CGFloat = 120 - /// Picks the window to capture for `pid`: the focused app's main content window. Prefers - /// on-screen, normal-layer (0) windows and, among equals, the largest one (tie-broken by the - /// lowest window id for determinism). Returns `nil` when the app has no suitable window. + /// Picks the window to capture for `pid`: the focused app's main content window. When + /// `focusPoint` (the caret location, in global top-left screen coordinates) is supplied and + /// lands inside one or more of the app's windows, only those are considered — this disambiguates + /// multiple windows of the same app so OCR reads the window the user is actually typing in, not + /// just the largest one. (Without it, a second window of the same app could bleed its text into + /// the prompt's screen context.) Among the remaining windows, prefers on-screen, normal-layer (0) + /// ones and, among equals, the largest (tie-broken by the lowest window id for determinism). + /// Returns `nil` when the app has no suitable window. public static func selectWindowID( forPID pid: pid_t, - from candidates: [ScreenWindowCandidate] + from candidates: [ScreenWindowCandidate], + focusPoint: CGPoint? = nil ) -> CGWindowID? { let eligible = candidates.filter { candidate in candidate.processID == pid @@ -54,11 +66,23 @@ public enum ScreenWindowSelector { } guard !eligible.isEmpty else { return nil } - let ranked = eligible.sorted { lhs, rhs in + // If we know where the caret is, prefer the window(s) containing it. Fall back to the full + // set when the point lands in none of them (e.g. caret geometry unavailable/stale), so we + // never regress to returning nil just because the point missed. + let containing = focusPoint.map { point in + eligible.filter { $0.frame.contains(point) } + } ?? [] + let pool = containing.isEmpty ? eligible : containing + + let ranked = pool.sorted { lhs, rhs in if lhs.isOnScreen != rhs.isOnScreen { return lhs.isOnScreen } let lhsNormalLayer = lhs.layer == 0 let rhsNormalLayer = rhs.layer == 0 if lhsNormalLayer != rhsNormalLayer { return lhsNormalLayer } + // Frontmost wins. This is decisive when several windows overlap the caret (the focused + // window is on top); otherwise candidates share the default zOrder and the area tiebreak + // below applies, preserving the largest-content-window heuristic. + if lhs.zOrder != rhs.zOrder { return lhs.zOrder < rhs.zOrder } let lhsArea = lhs.frame.width * lhs.frame.height let rhsArea = rhs.frame.width * rhs.frame.height if lhsArea != rhsArea { return lhsArea > rhsArea } diff --git a/Packages/MacContextCapture/Sources/MacContextCapture/Screen/WindowOCRCaptureEngine.swift b/Packages/MacContextCapture/Sources/MacContextCapture/Screen/WindowOCRCaptureEngine.swift index 82b304f..7933a14 100644 --- a/Packages/MacContextCapture/Sources/MacContextCapture/Screen/WindowOCRCaptureEngine.swift +++ b/Packages/MacContextCapture/Sources/MacContextCapture/Screen/WindowOCRCaptureEngine.swift @@ -19,8 +19,9 @@ public protocol ScreenWindowTextCapturing: Sendable { /// Capture the focused window for `pid` and return its OCR'd text, or `nil` if there's no /// suitable window / no recognised text. `fieldText` is the focused field's own text (already /// captured via Accessibility); lines matching it are stripped so screen context doesn't - /// duplicate the field. - func captureWindowText(pid: pid_t, fieldText: String, maxLines: Int, maxChars: Int) async throws -> String? + /// duplicate the field. `focusPoint` (caret location, global top-left screen coordinates) + /// disambiguates multiple windows of the same app so the correct one is read. + func captureWindowText(pid: pid_t, fieldText: String, focusPoint: CGPoint?, maxLines: Int, maxChars: Int) async throws -> String? } /// `ScreenTextProviding` cache fed by an out-of-band capturer. Main-actor isolated: the completion @@ -49,10 +50,11 @@ public final class WindowOCRCaptureEngine: ScreenTextProviding { } /// Kick off a fresh capture for `pid`, superseding any in-flight one. `fieldText` is the focused - /// field's own text, stripped from the OCR so screen context doesn't echo it. Fire-and-forget: - /// the cache updates when the capture completes. A failed/empty capture clears the cache so a - /// stale reading can't outlive the window it came from. - public func refresh(pid: pid_t, fieldText: String) { + /// field's own text, stripped from the OCR so screen context doesn't echo it. `focusPoint` is the + /// caret location (global top-left screen coordinates) used to pick the right window when the app + /// has several. Fire-and-forget: the cache updates when the capture completes. A failed/empty + /// capture clears the cache so a stale reading can't outlive the window it came from. + public func refresh(pid: pid_t, fieldText: String, focusPoint: CGPoint? = nil) { inFlight?.cancel() let capturer = self.capturer let maxLines = self.maxLines @@ -61,6 +63,7 @@ public final class WindowOCRCaptureEngine: ScreenTextProviding { let text = try? await capturer.captureWindowText( pid: pid, fieldText: fieldText, + focusPoint: focusPoint, maxLines: maxLines, maxChars: maxChars ) @@ -89,10 +92,14 @@ public struct ScreenCaptureKitWindowTextCapturer: ScreenWindowTextCapturing { self.maxCaptureDimension = maxCaptureDimension } - public func captureWindowText(pid: pid_t, fieldText: String, maxLines: Int, maxChars: Int) async throws -> String? { + public func captureWindowText(pid: pid_t, fieldText: String, focusPoint: CGPoint?, maxLines: Int, maxChars: Int) async throws -> String? { let content = try await SCShareableContent.excludingDesktopWindows(false, onScreenWindowsOnly: true) - let candidates = content.windows.map(ScreenWindowCandidate.init(window:)) - guard let windowID = ScreenWindowSelector.selectWindowID(forPID: pid, from: candidates), + // `content.windows` is front-to-back; the index is the z-order the selector uses to break ties + // between windows that overlap the caret. + let candidates = content.windows.enumerated().map { index, window in + ScreenWindowCandidate(window: window, zOrder: index) + } + guard let windowID = ScreenWindowSelector.selectWindowID(forPID: pid, from: candidates, focusPoint: focusPoint), let window = content.windows.first(where: { $0.windowID == windowID }) else { return nil } @@ -118,13 +125,14 @@ public struct ScreenCaptureKitWindowTextCapturer: ScreenWindowTextCapturing { } private extension ScreenWindowCandidate { - init(window: SCWindow) { + init(window: SCWindow, zOrder: Int) { self.init( windowID: window.windowID, processID: window.owningApplication?.processID ?? -1, frame: window.frame, isOnScreen: window.isOnScreen, - layer: window.windowLayer + layer: window.windowLayer, + zOrder: zOrder ) } } diff --git a/Packages/MacContextCapture/Tests/MacContextCaptureTests/CaretGeometry/DisplayCoordinateConverterTests.swift b/Packages/MacContextCapture/Tests/MacContextCaptureTests/CaretGeometry/DisplayCoordinateConverterTests.swift index 61a3943..82cbf1b 100644 --- a/Packages/MacContextCapture/Tests/MacContextCaptureTests/CaretGeometry/DisplayCoordinateConverterTests.swift +++ b/Packages/MacContextCapture/Tests/MacContextCaptureTests/CaretGeometry/DisplayCoordinateConverterTests.swift @@ -60,6 +60,63 @@ final class DisplayCoordinateConverterTests: XCTestCase { XCTAssertEqual(first.minY, 860, accuracy: 0.001) } + func testAppKitPointToCGFlipsAroundDisplayHeight() throws { + // AppKit (bottom-left) y=960 should map back to CG (top-left) y=120 on a 1080-tall display. + let cg = try XCTUnwrap( + DisplayCoordinateConverter.coreGraphicsPoint( + fromAppKitPoint: CGPoint(x: 50, y: 960), + displays: [singleDisplay] + ) + ) + XCTAssertEqual(cg.x, 50, accuracy: 0.001) + XCTAssertEqual(cg.y, 120, accuracy: 0.001) + } + + func testCGRectToAppKitPointRoundTrips() throws { + // The point conversion must invert the rect conversion: a caret's CG midpoint → AppKit → CG + // returns the original midpoint. This is the exact path used for window selection. + let cgRect = CGRect(x: 300, y: 220, width: 2, height: 24) + let appKit = try XCTUnwrap( + DisplayCoordinateConverter.appKitRect(fromCoreGraphicsRect: cgRect, displays: [singleDisplay]) + ) + let backToCG = try XCTUnwrap( + DisplayCoordinateConverter.coreGraphicsPoint( + fromAppKitPoint: CGPoint(x: appKit.midX, y: appKit.midY), + displays: [singleDisplay] + ) + ) + XCTAssertEqual(backToCG.x, cgRect.midX, accuracy: 0.001) + XCTAssertEqual(backToCG.y, cgRect.midY, accuracy: 0.001) + } + + func testAppKitPointToCGOnSecondaryDisplay() throws { + let secondary = DisplayGeometry( + appKitFrame: CGRect(x: 1920, y: 180, width: 1440, height: 900), + visibleFrame: CGRect(x: 1920, y: 204, width: 1440, height: 876), + coreGraphicsBounds: CGRect(x: 1920, y: 0, width: 1440, height: 900), + backingScaleFactor: 2 + ) + // AppKit point inside the secondary display. localY = appKitFrame.maxY(1080) - 1010 = 70, + // so CG y = coreGraphicsBounds.minY(0) + 70 = 70. + let cg = try XCTUnwrap( + DisplayCoordinateConverter.coreGraphicsPoint( + fromAppKitPoint: CGPoint(x: 2500, y: 1010), + displays: [singleDisplay, secondary] + ) + ) + XCTAssertEqual(cg.x, 2500, accuracy: 0.001) + XCTAssertEqual(cg.y, 70, accuracy: 0.001) + } + + func testAppKitPointToCGReturnsNilOutsideAllDisplays() { + XCTAssertNil( + DisplayCoordinateConverter.coreGraphicsPoint( + fromAppKitPoint: CGPoint(x: 9000, y: 9000), + displays: [singleDisplay] + ) + ) + } + func testMultiDisplayPicksContainingDisplay() throws { let primary = singleDisplay // Secondary 1440x900 sitting to the right of the primary in CG space; AppKit places it diff --git a/Packages/MacContextCapture/Tests/MacContextCaptureTests/Screen/ScreenWindowSelectorTests.swift b/Packages/MacContextCapture/Tests/MacContextCaptureTests/Screen/ScreenWindowSelectorTests.swift index 86a2adb..a8ead11 100644 --- a/Packages/MacContextCapture/Tests/MacContextCaptureTests/Screen/ScreenWindowSelectorTests.swift +++ b/Packages/MacContextCapture/Tests/MacContextCaptureTests/Screen/ScreenWindowSelectorTests.swift @@ -8,9 +8,10 @@ final class ScreenWindowSelectorTests: XCTestCase { pid: pid_t, frame: CGRect, onScreen: Bool = true, - layer: Int = 0 + layer: Int = 0, + zOrder: Int = .max ) -> ScreenWindowCandidate { - ScreenWindowCandidate(windowID: id, processID: pid, frame: frame, isOnScreen: onScreen, layer: layer) + ScreenWindowCandidate(windowID: id, processID: pid, frame: frame, isOnScreen: onScreen, layer: layer, zOrder: zOrder) } func testReturnsNilWhenNoWindowMatchesPID() { @@ -48,6 +49,46 @@ final class ScreenWindowSelectorTests: XCTestCase { XCTAssertEqual(ScreenWindowSelector.selectWindowID(forPID: 42, from: candidates), 2) } + func testFocusPointPicksWindowContainingCaretOverLarger() { + // Two windows of the same app: the caret is in the smaller one, which must win over the + // larger window the area-based ranking would otherwise pick. + let candidates = [ + candidate(id: 1, pid: 42, frame: CGRect(x: 0, y: 0, width: 1600, height: 1000)), + candidate(id: 2, pid: 42, frame: CGRect(x: 1700, y: 0, width: 600, height: 400)) + ] + let caret = CGPoint(x: 1750, y: 50) + XCTAssertEqual(ScreenWindowSelector.selectWindowID(forPID: 42, from: candidates, focusPoint: caret), 2) + } + + func testOverlappingWindowsAtCaretPickFrontmost() { + // A small compose window (frontmost, z=0) floats over a large background window (z=1); the + // caret falls inside both. The frontmost must win even though the background is larger. + let candidates = [ + candidate(id: 1, pid: 42, frame: CGRect(x: 0, y: 0, width: 1600, height: 1000), zOrder: 1), + candidate(id: 2, pid: 42, frame: CGRect(x: 100, y: 100, width: 500, height: 400), zOrder: 0) + ] + let caret = CGPoint(x: 200, y: 200) // inside both + XCTAssertEqual(ScreenWindowSelector.selectWindowID(forPID: 42, from: candidates, focusPoint: caret), 2) + } + + func testFallbackPrefersFrontmostWhenNoCaret() { + // With no caret info, the frontmost window is a better guess than the largest. + let candidates = [ + candidate(id: 1, pid: 42, frame: CGRect(x: 0, y: 0, width: 1600, height: 1000), zOrder: 1), + candidate(id: 2, pid: 42, frame: CGRect(x: 0, y: 0, width: 800, height: 600), zOrder: 0) + ] + XCTAssertEqual(ScreenWindowSelector.selectWindowID(forPID: 42, from: candidates), 2) + } + + func testFocusPointFallsBackToRankingWhenOutsideAllWindows() { + let candidates = [ + candidate(id: 1, pid: 42, frame: CGRect(x: 0, y: 0, width: 400, height: 300)), + candidate(id: 2, pid: 42, frame: CGRect(x: 0, y: 0, width: 1200, height: 800)) + ] + let caret = CGPoint(x: 9000, y: 9000) + XCTAssertEqual(ScreenWindowSelector.selectWindowID(forPID: 42, from: candidates, focusPoint: caret), 2) + } + func testCaptureScaleDownscalesLargeWindows() { let scale = ScreenWindowSelector.captureScale(for: CGSize(width: 3200, height: 1800), maxDimension: 1600) XCTAssertEqual(scale, 0.5, accuracy: 0.0001) diff --git a/Packages/MacContextCapture/Tests/MacContextCaptureTests/Screen/WindowOCRCaptureEngineTests.swift b/Packages/MacContextCapture/Tests/MacContextCaptureTests/Screen/WindowOCRCaptureEngineTests.swift index 17b463e..7a0f21c 100644 --- a/Packages/MacContextCapture/Tests/MacContextCaptureTests/Screen/WindowOCRCaptureEngineTests.swift +++ b/Packages/MacContextCapture/Tests/MacContextCaptureTests/Screen/WindowOCRCaptureEngineTests.swift @@ -5,7 +5,7 @@ import XCTest final class WindowOCRCaptureEngineTests: XCTestCase { private struct FakeCapturer: ScreenWindowTextCapturing { let result: String? - func captureWindowText(pid: pid_t, fieldText: String, maxLines: Int, maxChars: Int) async throws -> String? { + func captureWindowText(pid: pid_t, fieldText: String, focusPoint: CGPoint?, maxLines: Int, maxChars: Int) async throws -> String? { result } } diff --git a/Packages/Personalization/Sources/Personalization/PersistentWritingHistoryStore.swift b/Packages/Personalization/Sources/Personalization/PersistentWritingHistoryStore.swift index 183771b..9f9f67a 100644 --- a/Packages/Personalization/Sources/Personalization/PersistentWritingHistoryStore.swift +++ b/Packages/Personalization/Sources/Personalization/PersistentWritingHistoryStore.swift @@ -202,6 +202,12 @@ public final class PersistentWritingHistoryStore: WritingHistoryStoring, @unchec .filter(Column("charCount") >= query.minimumCharacters) if let bundle = query.bundleIdentifier, query.sameAppOnly { request = request.filter(Column("appBundleIdentifier") == bundle) + // Web fields: keep only the focused domain's rows so a different tab in the same + // browser can't fill the row budget (and the in-memory selection then drops any + // that slip through). Native apps have a nil domain and are unaffected. + if let domain = query.domain, !domain.isEmpty { + request = request.filter(Column("domain") == domain) + } } if let language = query.language { // Keep rows whose language matches or is unknown (conservative). @@ -232,7 +238,14 @@ enum WritingHistorySelection { let sameApp = candidates.filter { entry in guard let bundle = query.bundleIdentifier else { return true } - return entry.appBundleIdentifier == bundle + guard entry.appBundleIdentifier == bundle else { return false } + // For web fields the bundle is the browser, so several sites share it. Require a matching + // domain so content from a different tab (or an unknown-domain sample) can't be treated as + // same-context and bleed in. Native apps have no domain, so this is inert for them. + if let queryDomain = query.domain, !queryDomain.isEmpty { + return entry.domain == queryDomain + } + return true } let crossApp = candidates.filter { entry in guard let bundle = query.bundleIdentifier else { return false } diff --git a/Packages/Personalization/Tests/PersonalizationTests/PersonalizationTests.swift b/Packages/Personalization/Tests/PersonalizationTests/PersonalizationTests.swift index d787dba..81bbc4c 100644 --- a/Packages/Personalization/Tests/PersonalizationTests/PersonalizationTests.swift +++ b/Packages/Personalization/Tests/PersonalizationTests/PersonalizationTests.swift @@ -41,6 +41,25 @@ final class PersonalizationTests: XCTestCase { XCTAssertTrue(store.samples(for: WritingHistoryQuery(bundleIdentifier: "com.app.mail")).isEmpty) } + func testPersistentStoreDomainScopingExcludesOtherTabs() throws { + // DB-level coverage for the domain filter (the production path): two sites in the same browser + // bundle must not share context, and a nil-domain row must not leak into a domain-scoped query. + let (store, url) = try makeTempStore() + defer { try? FileManager.default.removeItem(at: url) } + + store.record(WritingHistorySample(text: "Draft about quarterly revenue numbers here.", appBundleIdentifier: "com.browser", domain: "mail.google.com")) + store.record(WritingHistorySample(text: "you can use it to access the OpenAI API key.", appBundleIdentifier: "com.browser", domain: "platform.openai.com")) + store.record(WritingHistorySample(text: "Some unknown-domain text from this browser.", appBundleIdentifier: "com.browser", domain: nil)) + + let result = store.samples(for: WritingHistoryQuery( + bundleIdentifier: "com.browser", + domain: "mail.google.com", + minimumCharacters: 1, + sameAppOnly: true + )) + XCTAssertEqual(result, ["Draft about quarterly revenue numbers here."]) + } + func testPersistentStoreDedupesIdenticalSample() throws { let (store, url) = try makeTempStore() defer { try? FileManager.default.removeItem(at: url) } @@ -108,6 +127,56 @@ final class PersonalizationTests: XCTestCase { XCTAssertEqual(result, ["Newer note from this same app here."]) } + func testSameAppOnlyExcludesCrossAppContent() { + // Regression: a recent sample from another app must never be injected when the query is + // same-app-scoped — otherwise unrelated content (e.g. a Notes draft) bleeds into another + // app's prompt and the model parrots it verbatim. + let now = Date() + let entries = [ + WritingHistorySample(text: "you can use it to access the OpenAI API.", appBundleIdentifier: "com.app.notes", updatedAt: now), + WritingHistorySample(text: "Hi Molly, hope you are doing well today.", appBundleIdentifier: "com.app.mail", updatedAt: now.addingTimeInterval(-100)) + ] + let result = WritingHistorySelection.select(from: entries, query: WritingHistoryQuery( + bundleIdentifier: "com.app.mail", + minimumCharacters: 1, + sameAppOnly: true + )) + XCTAssertEqual(result, ["Hi Molly, hope you are doing well today."]) + XCTAssertFalse(result.contains { $0.contains("OpenAI") }, "cross-app content must not leak") + } + + func testSameAppScopingExcludesOtherWebDomains() { + // Two tabs in the same browser (same bundle) must not share context: a sample from another + // site, or one with no recorded domain, must not be injected into the focused domain's prompt. + let now = Date() + let entries = [ + WritingHistorySample(text: "Draft about quarterly revenue numbers.", appBundleIdentifier: "com.browser", domain: "mail.google.com", updatedAt: now), + WritingHistorySample(text: "you can use it to access the OpenAI API.", appBundleIdentifier: "com.browser", domain: "platform.openai.com", updatedAt: now), + WritingHistorySample(text: "Some unknown-domain text from this browser.", appBundleIdentifier: "com.browser", domain: nil, updatedAt: now) + ] + let result = WritingHistorySelection.select(from: entries, query: WritingHistoryQuery( + bundleIdentifier: "com.browser", + domain: "mail.google.com", + minimumCharacters: 1, + sameAppOnly: true + )) + XCTAssertEqual(result, ["Draft about quarterly revenue numbers."]) + } + + func testNativeAppScopingIsUnaffectedByDomain() { + // A native app has no domain; same-app scoping must still return its samples. + let now = Date() + let entries = [ + WritingHistorySample(text: "A note typed in the native app here.", appBundleIdentifier: "com.app.notes", domain: nil, updatedAt: now) + ] + let result = WritingHistorySelection.select(from: entries, query: WritingHistoryQuery( + bundleIdentifier: "com.app.notes", + minimumCharacters: 1, + sameAppOnly: true + )) + XCTAssertEqual(result, ["A note typed in the native app here."]) + } + // MARK: - Telemetry func testTelemetryRatesAndPercentiles() { diff --git a/Packages/Prompting/Sources/Prompting/WritingHistory.swift b/Packages/Prompting/Sources/Prompting/WritingHistory.swift index d67fc3d..e136e3a 100644 --- a/Packages/Prompting/Sources/Prompting/WritingHistory.swift +++ b/Packages/Prompting/Sources/Prompting/WritingHistory.swift @@ -108,7 +108,14 @@ public struct InMemoryWritingHistoryStore: WritingHistoryProviding { let sameApp = candidates.filter { entry in guard let bundle = query.bundleIdentifier else { return true } - return entry.appBundleIdentifier == bundle + guard entry.appBundleIdentifier == bundle else { return false } + // For web fields the bundle is the browser, shared across sites; require a matching domain + // so a different tab's content can't be treated as same-context. Native apps have no + // domain, so this is inert for them. Mirrors `WritingHistorySelection` in Personalization. + if let queryDomain = query.domain, !queryDomain.isEmpty { + return entry.domain == queryDomain + } + return true } let crossApp = candidates.filter { entry in guard let bundle = query.bundleIdentifier else { return false }