diff --git a/.omd/project-memory.json b/.omd/project-memory.json new file mode 100644 index 0000000..984d440 --- /dev/null +++ b/.omd/project-memory.json @@ -0,0 +1,244 @@ +{ + "version": "1.0.0", + "lastScanned": 1772269310504, + "projectRoot": "/Users/hubo/.superset/worktrees/screentranslate/featadd-translate-engine", + "techStack": { + "languages": [], + "frameworks": [], + "packageManager": null, + "runtime": null + }, + "build": { + "buildCommand": null, + "testCommand": null, + "lintCommand": null, + "devCommand": null, + "scripts": {} + }, + "conventions": { + "namingStyle": null, + "importStyle": null, + "testPattern": null, + "fileOrganization": null + }, + "structure": { + "isMonorepo": false, + "workspaces": [], + "mainDirectories": [ + "docs" + ], + "gitBranches": { + "defaultBranch": "main", + "branchingStrategy": null + } + }, + "customNotes": [], + "directoryMap": { + "Build": { + "path": "Build", + "purpose": "Build output", + "fileCount": 1, + "lastAccessed": 1772269310491, + "keyFiles": [] + }, + "ScreenTranslate": { + "path": "ScreenTranslate", + "purpose": null, + "fileCount": 0, + "lastAccessed": 1772269310492, + "keyFiles": [] + }, + "ScreenTranslate.xcodeproj": { + "path": "ScreenTranslate.xcodeproj", + "purpose": null, + "fileCount": 1, + "lastAccessed": 1772269310492, + "keyFiles": [ + "project.pbxproj" + ] + }, + "ScreenTranslateTests": { + "path": "ScreenTranslateTests", + "purpose": null, + "fileCount": 5, + "lastAccessed": 1772269310493, + "keyFiles": [ + "KeyboardShortcutTests.swift", + "README.md", + "ScreenTranslateErrorTests.swift", + "ShortcutRecordingTypeTests.swift", + "TextTranslationErrorTests.swift" + ] + }, + "docs": { + "path": "docs", + "purpose": "Documentation", + "fileCount": 6, + "lastAccessed": 1772269310493, + "keyFiles": [ + "README.md", + "api-reference.md", + "architecture.md", + "components.md", + "developer-guide.md" + ] + }, + "skills": { + "path": "skills", + "purpose": null, + "fileCount": 0, + "lastAccessed": 1772269310494, + "keyFiles": [] + }, + "tasks": { + "path": "tasks", + "purpose": null, + "fileCount": 6, + "lastAccessed": 1772269310494, + "keyFiles": [ + "prd-.md", + "prd-macos-screentranslate.md", + "prd-screencoder-kiss-translator.md", + "prd-screencoder.md", + "prd-text-translation.json" + ] + }, + "ScreenTranslate/App": { + "path": "ScreenTranslate/App", + "purpose": "Application code", + "fileCount": 2, + "lastAccessed": 1772269310495, + "keyFiles": [ + "AppDelegate.swift", + "ScreenTranslateApp.swift" + ] + }, + "ScreenTranslate/Models": { + "path": "ScreenTranslate/Models", + "purpose": "Data models", + "fileCount": 23, + "lastAccessed": 1772269310495, + "keyFiles": [ + "Annotation.swift", + "AppLanguage.swift", + "AppSettings.swift" + ] + }, + "ScreenTranslate/Services": { + "path": "ScreenTranslate/Services", + "purpose": "Business logic services", + "fileCount": 26, + "lastAccessed": 1772269310495, + "keyFiles": [ + "AccessibilityPermissionChecker.swift", + "AppleTranslationProvider.swift", + "ClaudeVLMProvider.swift" + ] + } + }, + "hotPaths": [ + { + "path": "ScreenTranslate/Services/PaddleOCREngine.swift", + "accessCount": 17, + "lastAccessed": 1772277198204, + "type": "file" + }, + { + "path": "ScreenTranslate/Services/Security/KeychainService.swift", + "accessCount": 14, + "lastAccessed": 1772277575721, + "type": "file" + }, + { + "path": "ScreenTranslate/Models/AppSettings.swift", + "accessCount": 13, + "lastAccessed": 1772277135092, + "type": "directory" + }, + { + "path": "ScreenTranslate/Resources/en.lproj/Localizable.strings", + "accessCount": 6, + "lastAccessed": 1772277251555, + "type": "file" + }, + { + "path": "ScreenTranslate/Services/PaddleOCRVLMProvider.swift", + "accessCount": 6, + "lastAccessed": 1772277354512, + "type": "directory" + }, + { + "path": "ScreenTranslate/Features/Settings/SettingsViewModel.swift", + "accessCount": 5, + "lastAccessed": 1772277092578, + "type": "directory" + }, + { + "path": "ScreenTranslate/Resources", + "accessCount": 4, + "lastAccessed": 1772271181502, + "type": "directory" + }, + { + "path": "ScreenTranslate/Features/Settings/EngineSettingsTab.swift", + "accessCount": 4, + "lastAccessed": 1772274724758, + "type": "directory" + }, + { + "path": "ScreenTranslate/Resources/zh-Hans.lproj/Localizable.strings", + "accessCount": 4, + "lastAccessed": 1772277283269, + "type": "file" + }, + { + "path": "ScreenTranslate/Features/TranslationFlow/TranslationFlowController.swift", + "accessCount": 3, + "lastAccessed": 1772270086752, + "type": "file" + }, + { + "path": "ScreenTranslate", + "accessCount": 3, + "lastAccessed": 1772271133277, + "type": "directory" + }, + { + "path": "ScreenTranslate/Features/Capture/ScreenDetector.swift", + "accessCount": 3, + "lastAccessed": 1772277067534, + "type": "directory" + }, + { + "path": "ScreenTranslate/Features/Capture/CaptureManager.swift", + "accessCount": 2, + "lastAccessed": 1772277030873, + "type": "directory" + }, + { + "path": "", + "accessCount": 1, + "lastAccessed": 1772269949125, + "type": "directory" + }, + { + "path": "ScreenTranslate/Errors/ScreenTranslateError.swift", + "accessCount": 1, + "lastAccessed": 1772269960584, + "type": "file" + }, + { + "path": "ScreenTranslate/Models/VLMProviderType.swift", + "accessCount": 1, + "lastAccessed": 1772269960585, + "type": "file" + }, + { + "path": "ScreenTranslate/Services/ScreenCoderEngine.swift", + "accessCount": 1, + "lastAccessed": 1772270035460, + "type": "file" + } + ], + "userDirectives": [] +} \ No newline at end of file diff --git a/.omd/sessions/0c203f54-c10d-4417-8115-005c18e9036b.json b/.omd/sessions/0c203f54-c10d-4417-8115-005c18e9036b.json new file mode 100644 index 0000000..666c2c8 --- /dev/null +++ b/.omd/sessions/0c203f54-c10d-4417-8115-005c18e9036b.json @@ -0,0 +1,8 @@ +{ + "session_id": "0c203f54-c10d-4417-8115-005c18e9036b", + "ended_at": "2026-02-28T09:01:44.054Z", + "reason": "other", + "agents_spawned": 0, + "agents_completed": 0, + "modes_used": [] +} \ No newline at end of file diff --git a/ScreenTranslate/Features/Capture/ScreenDetector.swift b/ScreenTranslate/Features/Capture/ScreenDetector.swift index c65d166..d8d9ffa 100644 --- a/ScreenTranslate/Features/Capture/ScreenDetector.swift +++ b/ScreenTranslate/Features/Capture/ScreenDetector.swift @@ -122,16 +122,27 @@ actor ScreenDetector { } /// Checks if the app has screen recording permission. - /// Uses CGPreflightScreenCaptureAccess() which does NOT trigger system dialog. - /// This API is deprecated in macOS 15 but still works correctly. + /// Uses SCShareableContent to actually verify permission works (not just cached status). /// - Parameter silent: If true, suppresses logging (default: true) /// - Returns: True if permission is granted func hasPermission(silent: Bool = true) async -> Bool { - // Use CGPreflightScreenCaptureAccess - does NOT trigger dialog - let granted = CGPreflightScreenCaptureAccess() - cachedPermissionStatus = granted - if !silent { print("[ScreenDetector] Permission check: \(granted ? "granted" : "denied")") } - return granted + // Quick check first using CGPreflightScreenCaptureAccess + guard CGPreflightScreenCaptureAccess() else { + cachedPermissionStatus = false + if !silent { print("[ScreenDetector] Permission check: denied (CGPreflight)") } + return false + } + // Actually verify by trying to get shareable content + do { + _ = try await SCShareableContent.current + cachedPermissionStatus = true + if !silent { print("[ScreenDetector] Permission check: granted") } + return true + } catch { + cachedPermissionStatus = false + if !silent { print("[ScreenDetector] Permission check: denied (SCShareableContent)") } + return false + } } /// Forces a fresh permission check (clears cache) diff --git a/ScreenTranslate/Features/Settings/EngineSettingsTab.swift b/ScreenTranslate/Features/Settings/EngineSettingsTab.swift index fee7765..b7f42bd 100644 --- a/ScreenTranslate/Features/Settings/EngineSettingsTab.swift +++ b/ScreenTranslate/Features/Settings/EngineSettingsTab.swift @@ -221,6 +221,90 @@ struct PaddleOCRStatusSection: View { .frame(maxWidth: 300) } } + + // MLX-VLM settings (only show when mode is precise and not using cloud) + if viewModel.paddleOCRMode == .precise && !viewModel.paddleOCRUseCloud { + Divider() + .gridCellUnsizedAxes(.horizontal) + + GridRow { + Text(localized("settings.paddleocr.useMLXVLM")) + .foregroundStyle(.secondary) + .gridColumnAlignment(.trailing) + Toggle("", isOn: $viewModel.paddleOCRUseMLXVLM) + .toggleStyle(.checkbox) + .onChange(of: viewModel.paddleOCRUseMLXVLM) { _, newValue in + if newValue { + viewModel.checkMLXVLMServerStatus() + } + } + } + + if viewModel.paddleOCRUseMLXVLM { + // MLX-VLM server status + GridRow { + Text(localized("settings.paddleocr.mlxVLMStatus")) + .foregroundStyle(.secondary) + .gridColumnAlignment(.trailing) + HStack { + if viewModel.isCheckingMLXVLMServer { + ProgressView() + .controlSize(.small) + Text(localized("settings.paddleocr.mlxVLMChecking")) + .foregroundStyle(.secondary) + } else { + Image(systemName: viewModel.isMLXVLMServerRunning ? "checkmark.circle.fill" : "xmark.circle.fill") + .foregroundStyle(viewModel.isMLXVLMServerRunning ? .green : .red) + Text(viewModel.isMLXVLMServerRunning + ? localized("settings.paddleocr.mlxVLMRunning") + : localized("settings.paddleocr.mlxVLMNotRunning")) + .foregroundStyle(.secondary) + } + + Button { + viewModel.checkMLXVLMServerStatus() + } label: { + Image(systemName: "arrow.clockwise") + } + .buttonStyle(.borderless) + .controlSize(.small) + } + } + + GridRow { + Text(localized("settings.paddleocr.mlxVLMServerURL")) + .foregroundStyle(.secondary) + .gridColumnAlignment(.trailing) + TextField("", text: $viewModel.paddleOCRMLXVLMServerURL) + .textFieldStyle(.roundedBorder) + .frame(maxWidth: 300) + } + + GridRow { + Text(localized("settings.paddleocr.mlxVLMModelName")) + .foregroundStyle(.secondary) + .gridColumnAlignment(.trailing) + TextField("", text: $viewModel.paddleOCRMLXVLMModelName) + .textFieldStyle(.roundedBorder) + .frame(maxWidth: 300) + } + } else { + // Local model directory for native backend (when not using MLX-VLM) + GridRow { + Text(localized("settings.paddleocr.localVLModelDir")) + .foregroundStyle(.secondary) + .gridColumnAlignment(.trailing) + VStack(alignment: .leading, spacing: 4) { + TextField("", text: $viewModel.paddleOCRLocalVLModelDir) + .textFieldStyle(.roundedBorder) + .frame(maxWidth: 300) + Text(localized("settings.paddleocr.localVLModelDir.hint")) + .font(.caption) + .foregroundStyle(.tertiary) + } + } + } + } } // Description @@ -263,5 +347,11 @@ struct PaddleOCRStatusSection: View { } } .padding(.top, 8) + .onAppear { + // Auto-check MLX-VLM server status when section appears + if viewModel.paddleOCRUseMLXVLM { + viewModel.checkMLXVLMServerStatus() + } + } } } diff --git a/ScreenTranslate/Features/Settings/SettingsViewModel.swift b/ScreenTranslate/Features/Settings/SettingsViewModel.swift index 723b130..fa4d25e 100644 --- a/ScreenTranslate/Features/Settings/SettingsViewModel.swift +++ b/ScreenTranslate/Features/Settings/SettingsViewModel.swift @@ -126,6 +126,36 @@ final class SettingsViewModel { set { settings.paddleOCRCloudAPIKey = newValue } } + /// Whether to use MLX-VLM inference framework + var paddleOCRUseMLXVLM: Bool { + get { settings.paddleOCRUseMLXVLM } + set { settings.paddleOCRUseMLXVLM = newValue } + } + + /// MLX-VLM server URL + var paddleOCRMLXVLMServerURL: String { + get { settings.paddleOCRMLXVLMServerURL } + set { settings.paddleOCRMLXVLMServerURL = newValue } + } + + /// MLX-VLM model name + var paddleOCRMLXVLMModelName: String { + get { settings.paddleOCRMLXVLMModelName } + set { settings.paddleOCRMLXVLMModelName = newValue } + } + + /// Local VL model directory (for native backend) + var paddleOCRLocalVLModelDir: String { + get { settings.paddleOCRLocalVLModelDir } + set { settings.paddleOCRLocalVLModelDir = newValue } + } + + /// Whether MLX-VLM server is running + var isMLXVLMServerRunning: Bool = false + + /// Whether MLX-VLM server check is in progress + var isCheckingMLXVLMServer: Bool = false + // MARK: - VLM Test State /// Whether VLM API test is in progress @@ -761,7 +791,7 @@ final class SettingsViewModel { func refreshPaddleOCRStatus() { PaddleOCRChecker.resetCache() PaddleOCRChecker.checkAvailabilityAsync() - + Task { for _ in 0..<20 { try? await Task.sleep(for: .milliseconds(250)) @@ -773,6 +803,11 @@ final class SettingsViewModel { isPaddleOCRInstalled = PaddleOCRChecker.isAvailable paddleOCRVersion = PaddleOCRChecker.version paddleOCRInstallError = nil + + // Auto-check MLX-VLM server status if enabled + if paddleOCRUseMLXVLM { + checkMLXVLMServerStatus() + } } } } @@ -824,6 +859,43 @@ final class SettingsViewModel { NSPasteboard.general.setString(command, forType: .string) } + // MARK: - MLX-VLM Server Management + + func checkMLXVLMServerStatus() { + guard paddleOCRUseMLXVLM else { return } + + isCheckingMLXVLMServer = true + + Task.detached { [serverURL = paddleOCRMLXVLMServerURL] in + var isRunning = false + + do { + guard let url = URL(string: serverURL) else { + await MainActor.run { + self.isMLXVLMServerRunning = false + self.isCheckingMLXVLMServer = false + } + return + } + + // Try to connect to the server with a short timeout + let request = URLRequest(url: url, timeoutInterval: 3.0) + let (_, response) = try await URLSession.shared.data(for: request) + + if let httpResponse = response as? HTTPURLResponse { + isRunning = (200...299).contains(httpResponse.statusCode) + } + } catch { + isRunning = false + } + + await MainActor.run { + self.isMLXVLMServerRunning = isRunning + self.isCheckingMLXVLMServer = false + } + } + } + // MARK: - VLM API Test /// Tests the VLM API connectivity with current configuration diff --git a/ScreenTranslate/Features/TranslationFlow/TranslationFlowController.swift b/ScreenTranslate/Features/TranslationFlow/TranslationFlowController.swift index ff447c1..80a055c 100644 --- a/ScreenTranslate/Features/TranslationFlow/TranslationFlowController.swift +++ b/ScreenTranslate/Features/TranslationFlow/TranslationFlowController.swift @@ -347,7 +347,16 @@ final class TranslationFlowController { if case .analysisFailure = error { let settings = AppSettings.shared errorDetails += "\n\nProvider: \(settings.vlmProvider.localizedName)" - errorDetails += "\nModel: \(settings.vlmModelName)" + // Show appropriate model info based on provider type + switch settings.vlmProvider { + case .paddleocr: + if settings.paddleOCRUseMLXVLM { + errorDetails += "\nModel: \(settings.paddleOCRMLXVLMModelName)" + } + // For local/cloud PaddleOCR modes, model info is not applicable + default: + errorDetails += "\nModel: \(settings.vlmModelName)" + } } // Add provider info for translation errors diff --git a/ScreenTranslate/Models/AppSettings.swift b/ScreenTranslate/Models/AppSettings.swift index 9c020d7..43dc65c 100644 --- a/ScreenTranslate/Models/AppSettings.swift +++ b/ScreenTranslate/Models/AppSettings.swift @@ -88,6 +88,11 @@ final class AppSettings { static let paddleOCRUseCloud = prefix + "paddleOCRUseCloud" static let paddleOCRCloudBaseURL = prefix + "paddleOCRCloudBaseURL" static let paddleOCRCloudAPIKey = prefix + "paddleOCRCloudAPIKey" + // MLX-VLM Configuration (for Apple Silicon optimization) + static let paddleOCRUseMLXVLM = prefix + "paddleOCRUseMLXVLM" + static let paddleOCRMLXVLMServerURL = prefix + "paddleOCRMLXVLMServerURL" + static let paddleOCRMLXVLMModelName = prefix + "paddleOCRMLXVLMModelName" + static let paddleOCRLocalVLModelDir = prefix + "paddleOCRLocalVLModelDir" } // MARK: - Properties @@ -325,6 +330,26 @@ final class AppSettings { } } + /// Whether to use MLX-VLM inference framework (Apple Silicon optimization) + var paddleOCRUseMLXVLM: Bool { + didSet { save(paddleOCRUseMLXVLM, forKey: Keys.paddleOCRUseMLXVLM) } + } + + /// MLX-VLM server URL (default: http://localhost:8111) + var paddleOCRMLXVLMServerURL: String { + didSet { save(paddleOCRMLXVLMServerURL, forKey: Keys.paddleOCRMLXVLMServerURL) } + } + + /// MLX-VLM model name (default: PaddlePaddle/PaddleOCR-VL-1.5) + var paddleOCRMLXVLMModelName: String { + didSet { save(paddleOCRMLXVLMModelName, forKey: Keys.paddleOCRMLXVLMModelName) } + } + + /// Local VL model directory (for native backend without MLX-VLM server) + var paddleOCRLocalVLModelDir: String { + didSet { save(paddleOCRLocalVLModelDir, forKey: Keys.paddleOCRLocalVLModelDir) } + } + // MARK: - Initialization private init() { @@ -430,6 +455,12 @@ final class AppSettings { // Load PaddleOCR cloud API key from Keychain (secure storage) paddleOCRCloudAPIKey = Self.loadPaddleOCRAPIKeyFromKeychain() + // Load MLX-VLM configuration + paddleOCRUseMLXVLM = defaults.object(forKey: Keys.paddleOCRUseMLXVLM) as? Bool ?? false + paddleOCRMLXVLMServerURL = defaults.string(forKey: Keys.paddleOCRMLXVLMServerURL) ?? "http://localhost:8111" + paddleOCRMLXVLMModelName = defaults.string(forKey: Keys.paddleOCRMLXVLMModelName) ?? "PaddlePaddle/PaddleOCR-VL-1.5" + paddleOCRLocalVLModelDir = defaults.string(forKey: Keys.paddleOCRLocalVLModelDir) ?? "" + Logger.settings.info("ScreenCapture launched - settings loaded from: \(loadedLocation.path)") } @@ -479,8 +510,17 @@ final class AppSettings { paddleOCRCloudAPIKey = "" // Delete PaddleOCR cloud API key from Keychain Task.detached { - try? await KeychainService.shared.deletePaddleOCRCredentials() + do { + try await KeychainService.shared.deletePaddleOCRCredentials() + } catch { + Logger.settings.error("Failed to delete PaddleOCR credentials from keychain: \(error.localizedDescription)") + } } + // Reset MLX-VLM settings + paddleOCRUseMLXVLM = false + paddleOCRMLXVLMServerURL = "http://localhost:8111" + paddleOCRMLXVLMModelName = "PaddlePaddle/PaddleOCR-VL-1.5" + paddleOCRLocalVLModelDir = "" // Reset multi-engine configuration - directly create defaults, don't load from persistence engineSelectionMode = .primaryWithFallback var defaultConfigs: [TranslationEngineType: TranslationEngineConfig] = [:] diff --git a/ScreenTranslate/Resources/en.lproj/Localizable.strings b/ScreenTranslate/Resources/en.lproj/Localizable.strings index dd7c874..2977c6f 100644 --- a/ScreenTranslate/Resources/en.lproj/Localizable.strings +++ b/ScreenTranslate/Resources/en.lproj/Localizable.strings @@ -594,6 +594,16 @@ "settings.paddleocr.useCloud" = "Use Cloud API"; "settings.paddleocr.cloudBaseURL" = "Cloud API URL"; "settings.paddleocr.cloudAPIKey" = "API Key"; +"settings.paddleocr.useMLXVLM" = "Use MLX-VLM (Apple Silicon)"; +"settings.paddleocr.mlxVLMServerURL" = "MLX-VLM Server URL"; +"settings.paddleocr.mlxVLMModelName" = "Model Name"; +"settings.paddleocr.mlxVLMStatus" = "Server Status"; +"settings.paddleocr.mlxVLMChecking" = "Checking..."; +"settings.paddleocr.mlxVLMRunning" = "Running"; +"settings.paddleocr.mlxVLMNotRunning" = "Not Running"; +"settings.paddleocr.localVLModelDir" = "Local Model Directory"; +"settings.paddleocr.localVLModelDir.hint" = "Path to local PaddleOCR-VL model (e.g. ~/.paddlex/official_models/PaddleOCR-VL-1.5)"; +"error.paddleocr.notInstalled" = "PaddleOCR is not installed. Install it using: pip3 install paddleocr paddlepaddle"; /* ======================================== @@ -668,9 +678,9 @@ "translationFlow.error.title.translation" = "Translation Failed"; "translationFlow.error.title.rendering" = "Rendering Failed"; "translationFlow.error.unknown" = "An unknown error occurred."; -"translationFlow.error.analysis %@" = "Analysis failed: %@"; -"translationFlow.error.translation %@" = "Translation failed: %@"; -"translationFlow.error.rendering %@" = "Rendering failed: %@"; +"translationFlow.error.analysis" = "Analysis failed: %@"; +"translationFlow.error.translation" = "Translation failed: %@"; +"translationFlow.error.rendering" = "Rendering failed: %@"; "translationFlow.error.cancelled" = "Translation was cancelled."; "translationFlow.error.noTextFound" = "No text found in the selected area."; "translationFlow.error.translation.engine" = "Translation Engine"; diff --git a/ScreenTranslate/Resources/zh-Hans.lproj/Localizable.strings b/ScreenTranslate/Resources/zh-Hans.lproj/Localizable.strings index f573774..bee7b0c 100644 --- a/ScreenTranslate/Resources/zh-Hans.lproj/Localizable.strings +++ b/ScreenTranslate/Resources/zh-Hans.lproj/Localizable.strings @@ -594,6 +594,16 @@ "settings.paddleocr.useCloud" = "使用云端 API"; "settings.paddleocr.cloudBaseURL" = "云端 API 地址"; "settings.paddleocr.cloudAPIKey" = "API 密钥"; +"settings.paddleocr.useMLXVLM" = "使用 MLX-VLM (Apple Silicon)"; +"settings.paddleocr.mlxVLMServerURL" = "MLX-VLM 服务地址"; +"settings.paddleocr.mlxVLMModelName" = "模型名称"; +"settings.paddleocr.mlxVLMStatus" = "服务状态"; +"settings.paddleocr.mlxVLMChecking" = "检测中..."; +"settings.paddleocr.mlxVLMRunning" = "运行中"; +"settings.paddleocr.mlxVLMNotRunning" = "未运行"; +"settings.paddleocr.localVLModelDir" = "本地模型目录"; +"settings.paddleocr.localVLModelDir.hint" = "本地 PaddleOCR-VL 模型路径(如 ~/.paddlex/official_models/PaddleOCR-VL-1.5)"; +"error.paddleocr.notInstalled" = "PaddleOCR 未安装。请使用以下命令安装:pip3 install paddleocr paddlepaddle"; /* ======================================== @@ -668,9 +678,9 @@ "translationFlow.error.title.translation" = "翻译失败"; "translationFlow.error.title.rendering" = "渲染失败"; "translationFlow.error.unknown" = "发生未知错误。"; -"translationFlow.error.analysis %@" = "分析失败:%@"; -"translationFlow.error.translation %@" = "翻译失败:%@"; -"translationFlow.error.rendering %@" = "渲染失败:%@"; +"translationFlow.error.analysis" = "分析失败:%@"; +"translationFlow.error.translation" = "翻译失败:%@"; +"translationFlow.error.rendering" = "渲染失败:%@"; "translationFlow.error.cancelled" = "翻译已取消。"; "translationFlow.error.noTextFound" = "选中区域未找到文字。"; "translationFlow.error.translation.engine" = "翻译引擎"; diff --git a/ScreenTranslate/Services/PaddleOCREngine.swift b/ScreenTranslate/Services/PaddleOCREngine.swift index a4e6569..3a3a043 100644 --- a/ScreenTranslate/Services/PaddleOCREngine.swift +++ b/ScreenTranslate/Services/PaddleOCREngine.swift @@ -54,6 +54,18 @@ actor PaddleOCREngine { /// Cloud API key var cloudAPIKey: String + /// Whether to use MLX-VLM inference framework (Apple Silicon optimization) + var useMLXVLM: Bool + + /// MLX-VLM server URL + var mlxVLMServerURL: String + + /// MLX-VLM model name + var mlxVLMModelName: String + + /// Local VL model directory (for native backend) + var localVLModelDir: String + static let `default` = Configuration( languages: [.chinese, .english], minimumConfidence: 0.0, @@ -63,7 +75,11 @@ actor PaddleOCREngine { mode: .fast, useCloud: false, cloudBaseURL: "", - cloudAPIKey: "" + cloudAPIKey: "", + useMLXVLM: false, + mlxVLMServerURL: "http://localhost:8111", + mlxVLMModelName: "PaddlePaddle/PaddleOCR-VL-1.5", + localVLModelDir: "" ) } @@ -230,13 +246,32 @@ actor PaddleOCREngine { "--use_angle_cls", config.useDirectionClassify ? "true" : "false" ] case .precise: - // Precise mode: use doc_parser with VL-1.5 (~12s) - return [ + // Precise mode: use doc_parser with VL-1.5 + var args = [ "doc_parser", "-i", imagePath, "--pipeline_version", "v1.5", "--device", config.useGPU ? "gpu" : "cpu" ] + + // Choose backend: MLX-VLM server or native (local model) + if config.useMLXVLM { + args += [ + "--vl_rec_backend", "mlx-vlm-server", + "--vl_rec_server_url", config.mlxVLMServerURL, + "--vl_rec_api_model_name", config.mlxVLMModelName + ] + } else if !config.localVLModelDir.isEmpty { + // Use native backend with local model + // Expand tilde in path (e.g., ~/.paddlex -> /Users/xxx/.paddlex) + let expandedPath = NSString(string: config.localVLModelDir).expandingTildeInPath + args += [ + "--vl_rec_backend", "native", + "--vl_rec_model_dir", expandedPath + ] + } + + return args } } @@ -579,8 +614,7 @@ actor PaddleOCREngine { } content = content.replacingOccurrences(of: "[,", with: "[") content = content.replacingOccurrences(of: ",]", with: "]") - // Handle edge case of empty nested arrays - content = content.replacingOccurrences(of: "[]", with: "[]") + // Handle edge case of empty nested arrays - return early return content } @@ -638,7 +672,7 @@ enum PaddleOCREngineError: LocalizedError, Sendable { ) case .recognitionFailed: return NSLocalizedString( - "error.ocr.recognition.failed", + "error.ocr.failed", comment: "Text recognition failed" ) case .invalidOutput: diff --git a/ScreenTranslate/Services/PaddleOCRVLMProvider.swift b/ScreenTranslate/Services/PaddleOCRVLMProvider.swift index 5f08bb6..5d44146 100644 --- a/ScreenTranslate/Services/PaddleOCRVLMProvider.swift +++ b/ScreenTranslate/Services/PaddleOCRVLMProvider.swift @@ -58,7 +58,7 @@ struct PaddleOCRVLMProvider: VLMProvider, Sendable { if !config.useCloud { guard await PaddleOCREngine.shared.isAvailable else { throw VLMProviderError.invalidConfiguration( - "PaddleOCR is not installed. Install it using: pip3 install paddleocr paddlepaddle" + NSLocalizedString("error.paddleocr.notInstalled", comment: "PaddleOCR not installed error") ) } } @@ -80,6 +80,10 @@ struct PaddleOCRVLMProvider: VLMProvider, Sendable { config.useCloud = settings.paddleOCRUseCloud config.cloudBaseURL = settings.paddleOCRCloudBaseURL config.cloudAPIKey = settings.paddleOCRCloudAPIKey + config.useMLXVLM = settings.paddleOCRUseMLXVLM + config.mlxVLMServerURL = settings.paddleOCRMLXVLMServerURL + config.mlxVLMModelName = settings.paddleOCRMLXVLMModelName + config.localVLModelDir = settings.paddleOCRLocalVLModelDir return config } @@ -226,16 +230,28 @@ private struct MergedLine { /// Checks if a character is CJK (Chinese/Japanese/Korean) private static func isCJKChar(_ char: Character) -> Bool { - let scalar = char.unicodeScalars.first?.value ?? 0 - // CJK Unified Ideographs: U+4E00-U+9FFF - // CJK Unified Ideographs Extension A: U+3400-U+4DBF - // Hiragana: U+3040-U+309F - // Katakana: U+30A0-U+30FF - // Hangul Syllables: U+AC00-U+D7AF - return (0x4E00...0x9FFF).contains(scalar) || - (0x3400...0x4DBF).contains(scalar) || - (0x3040...0x309F).contains(scalar) || - (0x30A0...0x30FF).contains(scalar) || - (0xAC00...0xD7AF).contains(scalar) + // Check all unicode scalars to handle surrogate pairs correctly + for scalar in char.unicodeScalars { + let value = scalar.value + // CJK Unified Ideographs: U+4E00-U+9FFF + // CJK Unified Ideographs Extension A: U+3400-U+4DBF + // Hiragana: U+3040-U+309F + // Katakana: U+30A0-U+30FF + // Hangul Syllables: U+AC00-U+D7AF + // CJK Symbols and Punctuation: U+3000-U+303F + // Fullwidth Forms: U+FF00-U+FFEF + // CJK Extension B-F: U+20000-U+2FA1F + if (0x4E00...0x9FFF).contains(value) || + (0x3400...0x4DBF).contains(value) || + (0x3040...0x309F).contains(value) || + (0x30A0...0x30FF).contains(value) || + (0xAC00...0xD7AF).contains(value) || + (0x3000...0x303F).contains(value) || + (0xFF00...0xFFEF).contains(value) || + (0x20000...0x2FA1F).contains(value) { + return true + } + } + return false } } diff --git a/ScreenTranslate/Services/Security/KeychainService.swift b/ScreenTranslate/Services/Security/KeychainService.swift index 0ae63b7..de5a649 100644 --- a/ScreenTranslate/Services/Security/KeychainService.swift +++ b/ScreenTranslate/Services/Security/KeychainService.swift @@ -48,6 +48,19 @@ actor KeychainService { additional: additionalData ) + try saveCredentialsInternal( + credentials: credentials, + account: engine.rawValue, + label: engine.rawValue + ) + } + + /// Internal helper for saving credentials to keychain + /// - Parameters: + /// - credentials: The credentials to save + /// - account: The account identifier for the keychain item + /// - label: A descriptive label for logging + private func saveCredentialsInternal(credentials: StoredCredentials, account: String, label: String) throws { guard let encodedData = try? JSONEncoder().encode(credentials) else { throw KeychainError.invalidData } @@ -55,7 +68,7 @@ actor KeychainService { let query: [String: Any] = [ kSecClass as String: kSecClassGenericPassword, kSecAttrService as String: service, - kSecAttrAccount as String: engine.rawValue + kSecAttrAccount as String: account ] // Check if item exists and update it, or add new if not found @@ -68,27 +81,27 @@ actor KeychainService { ] let updateStatus = SecItemUpdate(query as CFDictionary, updateQuery as CFDictionary) guard updateStatus == errSecSuccess else { - logger.error("Failed to update credentials for \(engine.rawValue): \(updateStatus)") + logger.error("Failed to update credentials for \(label): \(updateStatus)") throw KeychainError.unexpectedStatus(updateStatus) } - logger.info("Updated credentials for \(engine.rawValue)") + logger.info("Updated credentials for \(label)") } else if status == errSecItemNotFound { // Item doesn't exist - add new let addQuery: [String: Any] = [ kSecClass as String: kSecClassGenericPassword, kSecAttrService as String: service, - kSecAttrAccount as String: engine.rawValue, + kSecAttrAccount as String: account, kSecValueData as String: encodedData, kSecAttrAccessible as String: kSecAttrAccessibleWhenUnlocked ] let addStatus = SecItemAdd(addQuery as CFDictionary, nil) guard addStatus == errSecSuccess else { - logger.error("Failed to save credentials for \(engine.rawValue): \(addStatus)") + logger.error("Failed to save credentials for \(label): \(addStatus)") throw KeychainError.unexpectedStatus(addStatus) } - logger.info("Saved credentials for \(engine.rawValue)") + logger.info("Saved credentials for \(label)") } else { - logger.error("Failed to check credentials for \(engine.rawValue): \(status)") + logger.error("Failed to check credentials for \(label): \(status)") throw KeychainError.unexpectedStatus(status) } } @@ -183,50 +196,11 @@ actor KeychainService { /// - compatibleId: The compatible engine identifier (e.g., "custom:0", "custom:1") func saveCredentials(apiKey: String, forCompatibleId compatibleId: String) throws { let credentials = StoredCredentials(apiKey: apiKey) - - guard let encodedData = try? JSONEncoder().encode(credentials) else { - throw KeychainError.invalidData - } - - let query: [String: Any] = [ - kSecClass as String: kSecClassGenericPassword, - kSecAttrService as String: service, - kSecAttrAccount as String: compatibleId - ] - - // Check if item exists and update it, or add new if not found - let status = SecItemCopyMatching(query as CFDictionary, nil) - if status == errSecSuccess { - // Item exists - update it - let updateQuery: [String: Any] = [ - kSecValueData as String: encodedData, - kSecAttrAccessible as String: kSecAttrAccessibleWhenUnlocked - ] - let updateStatus = SecItemUpdate(query as CFDictionary, updateQuery as CFDictionary) - guard updateStatus == errSecSuccess else { - logger.error("Failed to update credentials for \(compatibleId): \(updateStatus)") - throw KeychainError.unexpectedStatus(updateStatus) - } - logger.info("Updated credentials for compatible engine \(compatibleId)") - } else if status == errSecItemNotFound { - // Item doesn't exist - add new - let addQuery: [String: Any] = [ - kSecClass as String: kSecClassGenericPassword, - kSecAttrService as String: service, - kSecAttrAccount as String: compatibleId, - kSecValueData as String: encodedData, - kSecAttrAccessible as String: kSecAttrAccessibleWhenUnlocked - ] - let addStatus = SecItemAdd(addQuery as CFDictionary, nil) - guard addStatus == errSecSuccess else { - logger.error("Failed to save credentials for \(compatibleId): \(addStatus)") - throw KeychainError.unexpectedStatus(addStatus) - } - logger.info("Saved credentials for compatible engine \(compatibleId)") - } else { - logger.error("Failed to check credentials for \(compatibleId): \(status)") - throw KeychainError.unexpectedStatus(status) - } + try saveCredentialsInternal( + credentials: credentials, + account: compatibleId, + label: "compatible engine \(compatibleId)" + ) } /// Retrieve stored credentials for a compatible engine instance @@ -320,53 +294,12 @@ actor KeychainService { /// Save PaddleOCR cloud API key /// - Parameter apiKey: The API key to store func savePaddleOCRCredentials(apiKey: String) throws { - let account = Self.paddleOCRAccount - let credentials = StoredCredentials(apiKey: apiKey) - - guard let encodedData = try? JSONEncoder().encode(credentials) else { - throw KeychainError.invalidData - } - - let query: [String: Any] = [ - kSecClass as String: kSecClassGenericPassword, - kSecAttrService as String: service, - kSecAttrAccount as String: account - ] - - // Check if item exists and update it, or add new if not found - let status = SecItemCopyMatching(query as CFDictionary, nil) - if status == errSecSuccess { - // Item exists - update it - let updateQuery: [String: Any] = [ - kSecValueData as String: encodedData, - kSecAttrAccessible as String: kSecAttrAccessibleWhenUnlocked - ] - let updateStatus = SecItemUpdate(query as CFDictionary, updateQuery as CFDictionary) - guard updateStatus == errSecSuccess else { - logger.error("Failed to update PaddleOCR cloud credentials: \(updateStatus)") - throw KeychainError.unexpectedStatus(updateStatus) - } - logger.info("Updated PaddleOCR cloud credentials") - } else if status == errSecItemNotFound { - // Item doesn't exist - add new - let addQuery: [String: Any] = [ - kSecClass as String: kSecClassGenericPassword, - kSecAttrService as String: service, - kSecAttrAccount as String: account, - kSecValueData as String: encodedData, - kSecAttrAccessible as String: kSecAttrAccessibleWhenUnlocked - ] - let addStatus = SecItemAdd(addQuery as CFDictionary, nil) - guard addStatus == errSecSuccess else { - logger.error("Failed to save PaddleOCR cloud credentials: \(addStatus)") - throw KeychainError.unexpectedStatus(addStatus) - } - logger.info("Saved PaddleOCR cloud credentials") - } else { - logger.error("Failed to check PaddleOCR cloud credentials: \(status)") - throw KeychainError.unexpectedStatus(status) - } + try saveCredentialsInternal( + credentials: credentials, + account: Self.paddleOCRAccount, + label: "PaddleOCR cloud" + ) } /// Retrieve stored PaddleOCR cloud API key