diff --git a/ScreenTranslate/Features/Capture/CaptureManager.swift b/ScreenTranslate/Features/Capture/CaptureManager.swift index 814059f..f093b85 100644 --- a/ScreenTranslate/Features/Capture/CaptureManager.swift +++ b/ScreenTranslate/Features/Capture/CaptureManager.swift @@ -43,9 +43,27 @@ actor CaptureManager { // MARK: - Permission Handling /// Checks if the app has screen recording permission. - /// Uses CGPreflightScreenCaptureAccess() which does NOT trigger dialog. - /// - Returns: True if permission is granted + /// Uses SCShareableContent to actually verify permission works (not just cached status). + /// - Returns: True if permission is granted and functional var hasPermission: Bool { + get async { + // Quick check first + guard CGPreflightScreenCaptureAccess() else { + return false + } + // Actually verify by trying to get shareable content + do { + _ = try await SCShareableContent.current + return true + } catch { + return false + } + } + } + + /// Synchronous permission check using only CGPreflightScreenCaptureAccess. + /// Use only when async check is not possible. + var hasPermissionSync: Bool { CGPreflightScreenCaptureAccess() } @@ -70,8 +88,8 @@ actor CaptureManager { isCapturing = true defer { isCapturing = false } - // Check permission - guard hasPermission else { + // Check permission using async method + guard await hasPermission else { throw ScreenTranslateError.permissionDenied } @@ -139,8 +157,8 @@ actor CaptureManager { isCapturing = true defer { isCapturing = false } - // Check permission - guard hasPermission else { + // Check permission using async method + guard await hasPermission else { throw ScreenTranslateError.permissionDenied } diff --git a/ScreenTranslate/Features/Onboarding/OnboardingViewModel.swift b/ScreenTranslate/Features/Onboarding/OnboardingViewModel.swift index f36ba12..622ffe7 100644 --- a/ScreenTranslate/Features/Onboarding/OnboardingViewModel.swift +++ b/ScreenTranslate/Features/Onboarding/OnboardingViewModel.swift @@ -144,30 +144,26 @@ final class OnboardingViewModel { func checkPermissions() { hasAccessibilityPermission = AccessibilityPermissionChecker.hasPermission - // Check screen recording permission using multiple methods for reliability - hasScreenRecordingPermission = checkScreenRecordingPermission() + // Check screen recording permission using async method + Task { + hasScreenRecordingPermission = await checkScreenRecordingPermission() + } } - /// Checks screen recording permission using multiple methods for reliability - private func checkScreenRecordingPermission() -> Bool { - // Method 1: CGPreflightScreenCaptureAccess (may not work in all cases) - if CGPreflightScreenCaptureAccess() { - return true + /// Checks screen recording permission using ScreenCaptureKit for reliable detection + private func checkScreenRecordingPermission() async -> Bool { + // First do a quick check with CGPreflightScreenCaptureAccess + if !CGPreflightScreenCaptureAccess() { + return false } - // Method 2: Check if we can see windows from other apps - // If we have permission, we should see windows from other apps - let windowList = CGWindowListCopyWindowInfo([.optionOnScreenOnly], kCGNullWindowID) as? [[String: Any]] ?? [] - let ownPID = ProcessInfo.processInfo.processIdentifier - - // Count windows from other processes - let otherAppWindows = windowList.filter { window in - guard let ownerPID = window[kCGWindowOwnerPID as String] as? Int32 else { return false } - return ownerPID != ownPID + // Verify by actually trying to get shareable content + do { + _ = try await SCShareableContent.current + return true + } catch { + return false } - - // If we can see windows from other apps, we likely have permission - return otherAppWindows.count > 3 } /// Requests screen recording permission @@ -234,8 +230,8 @@ final class OnboardingViewModel { switch type { case .screenRecording: - // Use multiple methods to check permission without triggering dialog - let granted = checkScreenRecordingPermission() + // Use async ScreenCaptureKit check for reliable detection + let granted = await checkScreenRecordingPermission() if granted { hasScreenRecordingPermission = true permissionCheckTask = nil diff --git a/ScreenTranslate/Features/Settings/EngineSettingsTab.swift b/ScreenTranslate/Features/Settings/EngineSettingsTab.swift index 7b7b4d1..fee7765 100644 --- a/ScreenTranslate/Features/Settings/EngineSettingsTab.swift +++ b/ScreenTranslate/Features/Settings/EngineSettingsTab.swift @@ -37,98 +37,231 @@ struct VLMConfigurationSection: View { } } .pickerStyle(.segmented) - .frame(maxWidth: 300) + .frame(maxWidth: 400) } + } - GridRow { - Text(localized("settings.vlm.apiKey")) - .foregroundStyle(.secondary) - .gridColumnAlignment(.trailing) - HStack { - if showAPIKey { - TextField("", text: $viewModel.vlmAPIKey) - .textFieldStyle(.roundedBorder) - } else { - SecureField("", text: $viewModel.vlmAPIKey) - .textFieldStyle(.roundedBorder) + // PaddleOCR specific section + if viewModel.vlmProvider == .paddleocr { + PaddleOCRStatusSection(viewModel: viewModel) + } else { + // Standard VLM configuration for API-based providers + Grid(alignment: .leading, horizontalSpacing: 16, verticalSpacing: 12) { + GridRow { + Text(localized("settings.vlm.apiKey")) + .foregroundStyle(.secondary) + .gridColumnAlignment(.trailing) + HStack { + if showAPIKey { + TextField("", text: $viewModel.vlmAPIKey) + .textFieldStyle(.roundedBorder) + } else { + SecureField("", text: $viewModel.vlmAPIKey) + .textFieldStyle(.roundedBorder) + } + Button { + showAPIKey.toggle() + } label: { + Image(systemName: showAPIKey ? "eye.slash" : "eye") + } + .buttonStyle(.borderless) } - Button { - showAPIKey.toggle() - } label: { - Image(systemName: showAPIKey ? "eye.slash" : "eye") + .frame(maxWidth: 300) + } + + if !viewModel.vlmProvider.requiresAPIKey { + GridRow { + Color.clear.gridCellUnsizedAxes([.horizontal, .vertical]) + Text(localized("settings.vlm.apiKey.optional")) + .font(.caption) + .foregroundStyle(.secondary) } - .buttonStyle(.borderless) } - .frame(maxWidth: 300) - } - if !viewModel.vlmProvider.requiresAPIKey { GridRow { - Color.clear.gridCellUnsizedAxes([.horizontal, .vertical]) - Text(localized("settings.vlm.apiKey.optional")) - .font(.caption) + Text(localized("settings.vlm.baseURL")) .foregroundStyle(.secondary) + .gridColumnAlignment(.trailing) + TextField("", text: $viewModel.vlmBaseURL) + .textFieldStyle(.roundedBorder) + .frame(maxWidth: 300) + } + + GridRow { + Text(localized("settings.vlm.model")) + .foregroundStyle(.secondary) + .gridColumnAlignment(.trailing) + TextField("", text: $viewModel.vlmModelName) + .textFieldStyle(.roundedBorder) + .frame(maxWidth: 300) + } + } + + Text(viewModel.vlmProvider.providerDescription) + .font(.caption) + .foregroundStyle(.secondary) + + // Test API Connection Button + HStack { + Button { + viewModel.testVLMAPI() + } label: { + HStack(spacing: 6) { + if viewModel.isTestingVLM { + ProgressView() + .controlSize(.small) + } + Image(systemName: "bolt.fill") + Text(localized("settings.vlm.test.button")) + } + } + .buttonStyle(.bordered) + .controlSize(.small) + .disabled(viewModel.isTestingVLM) + + Spacer() + + if let result = viewModel.vlmTestResult { + HStack(spacing: 4) { + Image(systemName: viewModel.vlmTestSuccess ? "checkmark.circle.fill" : "xmark.circle.fill") + .foregroundStyle(viewModel.vlmTestSuccess ? Color.green : Color.red) + Text(result) + .font(.caption) + .foregroundStyle(viewModel.vlmTestSuccess ? .secondary : Color.red) + .lineLimit(2) + } + } + } + .padding(.top, 8) + } + } + .padding() + .background(Color(.controlBackgroundColor)) + .cornerRadius(8) + } +} + +// MARK: - PaddleOCR Status Section + +struct PaddleOCRStatusSection: View { + @Bindable var viewModel: SettingsViewModel + + var body: some View { + VStack(alignment: .leading, spacing: 12) { + // Status + HStack { + Image(systemName: viewModel.isPaddleOCRInstalled ? "checkmark.circle.fill" : "exclamationmark.triangle.fill") + .foregroundStyle(viewModel.isPaddleOCRInstalled ? .green : .orange) + + if viewModel.isPaddleOCRInstalled { + Text(localized("settings.paddleocr.ready")) + .foregroundStyle(.secondary) + if let version = viewModel.paddleOCRVersion, !version.isEmpty { + Text("(\(version))") + .font(.caption) + .foregroundStyle(.tertiary) } + } else { + Text(localized("settings.paddleocr.not.installed.message")) + .foregroundStyle(.secondary) } + } + // Mode selection + Grid(alignment: .leading, horizontalSpacing: 16, verticalSpacing: 12) { GridRow { - Text(localized("settings.vlm.baseURL")) + Text(localized("settings.paddleocr.mode")) .foregroundStyle(.secondary) .gridColumnAlignment(.trailing) - TextField("", text: $viewModel.vlmBaseURL) - .textFieldStyle(.roundedBorder) - .frame(maxWidth: 300) + Picker("", selection: $viewModel.paddleOCRMode) { + ForEach(PaddleOCRMode.allCases, id: \.self) { mode in + VStack(alignment: .leading) { + Text(mode.localizedName) + }.tag(mode) + } + } + .pickerStyle(.segmented) + .frame(maxWidth: 300) } + // Mode description GridRow { - Text(localized("settings.vlm.model")) + Color.clear.gridCellUnsizedAxes([.horizontal, .vertical]) + Text(viewModel.paddleOCRMode.description) + .font(.caption) + .foregroundStyle(.tertiary) + } + + // Cloud API toggle + GridRow { + Text(localized("settings.paddleocr.useCloud")) .foregroundStyle(.secondary) .gridColumnAlignment(.trailing) - TextField("", text: $viewModel.vlmModelName) - .textFieldStyle(.roundedBorder) - .frame(maxWidth: 300) + Toggle("", isOn: $viewModel.paddleOCRUseCloud) + .toggleStyle(.checkbox) + } + + // Cloud API settings (only show when useCloud is true) + if viewModel.paddleOCRUseCloud { + GridRow { + Text(localized("settings.paddleocr.cloudBaseURL")) + .foregroundStyle(.secondary) + .gridColumnAlignment(.trailing) + TextField("", text: $viewModel.paddleOCRCloudBaseURL) + .textFieldStyle(.roundedBorder) + .frame(maxWidth: 300) + } + + GridRow { + Text(localized("settings.paddleocr.cloudAPIKey")) + .foregroundStyle(.secondary) + .gridColumnAlignment(.trailing) + SecureField("", text: $viewModel.paddleOCRCloudAPIKey) + .textFieldStyle(.roundedBorder) + .frame(maxWidth: 300) + } } } - Text(viewModel.vlmProvider.providerDescription) + // Description + Text(localized("settings.paddleocr.description")) .font(.caption) - .foregroundStyle(.secondary) + .foregroundStyle(.tertiary) - // Test API Connection Button - HStack { - Button { - viewModel.testVLMAPI() - } label: { - HStack(spacing: 6) { - if viewModel.isTestingVLM { + // Install instructions or button + if !viewModel.isPaddleOCRInstalled { + VStack(alignment: .leading, spacing: 8) { + if viewModel.isInstallingPaddleOCR { + HStack { ProgressView() .controlSize(.small) + Text(localized("settings.paddleocr.installing")) + .foregroundStyle(.secondary) } - Image(systemName: "bolt.fill") - Text(localized("settings.vlm.test.button")) - } - } - .buttonStyle(.bordered) - .controlSize(.small) - .disabled(viewModel.isTestingVLM) + } else { + HStack(spacing: 12) { + Button(localized("settings.paddleocr.install.button")) { + viewModel.installPaddleOCR() + } + .buttonStyle(.bordered) + .controlSize(.small) - Spacer() + Button(localized("settings.paddleocr.copy.command.button")) { + viewModel.copyPaddleOCRInstallCommand() + } + .buttonStyle(.borderless) + .controlSize(.small) + } - if let result = viewModel.vlmTestResult { - HStack(spacing: 4) { - Image(systemName: viewModel.vlmTestSuccess ? "checkmark.circle.fill" : "xmark.circle.fill") - .foregroundStyle(viewModel.vlmTestSuccess ? Color.green : Color.red) - Text(result) - .font(.caption) - .foregroundStyle(viewModel.vlmTestSuccess ? .secondary : Color.red) - .lineLimit(2) + if let error = viewModel.paddleOCRInstallError { + Text(error) + .font(.caption) + .foregroundStyle(.red) + } } } } - .padding(.top, 8) } - .padding() - .background(Color(.controlBackgroundColor)) - .cornerRadius(8) + .padding(.top, 8) } } diff --git a/ScreenTranslate/Features/Settings/SettingsViewModel.swift b/ScreenTranslate/Features/Settings/SettingsViewModel.swift index 22e3832..723b130 100644 --- a/ScreenTranslate/Features/Settings/SettingsViewModel.swift +++ b/ScreenTranslate/Features/Settings/SettingsViewModel.swift @@ -100,6 +100,32 @@ final class SettingsViewModel { /// PaddleOCR version if installed var paddleOCRVersion: String? + // MARK: - PaddleOCR Settings + + /// PaddleOCR mode: fast or precise + var paddleOCRMode: PaddleOCRMode { + get { settings.paddleOCRMode } + set { settings.paddleOCRMode = newValue } + } + + /// Whether to use cloud API + var paddleOCRUseCloud: Bool { + get { settings.paddleOCRUseCloud } + set { settings.paddleOCRUseCloud = newValue } + } + + /// Cloud API base URL + var paddleOCRCloudBaseURL: String { + get { settings.paddleOCRCloudBaseURL } + set { settings.paddleOCRCloudBaseURL = newValue } + } + + /// Cloud API key + var paddleOCRCloudAPIKey: String { + get { settings.paddleOCRCloudAPIKey } + set { settings.paddleOCRCloudAPIKey = newValue } + } + // MARK: - VLM Test State /// Whether VLM API test is in progress @@ -375,34 +401,33 @@ final class SettingsViewModel { // Check folder access permission by testing if we can write to the save location hasFolderAccessPermission = checkFolderAccess(to: saveLocation) - // Check screen recording permission - // Try CGPreflightScreenCaptureAccess first, then fallback to window count check - hasScreenRecordingPermission = checkScreenRecordingPermission() + // Check screen recording permission using ScreenCaptureKit + // Cancel any existing task to avoid race conditions + permissionCheckTask?.cancel() - isCheckingPermissions = false + permissionCheckTask = Task { + let granted = await checkScreenRecordingPermission() + self.hasScreenRecordingPermission = granted + self.isCheckingPermissions = false + permissionCheckTask = nil + } } - /// Checks screen recording permission using multiple methods for reliability - private func checkScreenRecordingPermission() -> Bool { - // Method 1: CGPreflightScreenCaptureAccess (may not work in all cases) - if CGPreflightScreenCaptureAccess() { - return true + /// Checks screen recording permission using ScreenCaptureKit for reliable detection + private func checkScreenRecordingPermission() async -> Bool { + // First do a quick check with CGPreflightScreenCaptureAccess + if !CGPreflightScreenCaptureAccess() { + return false } - - // Method 2: Check if we can see windows from other apps - // If we have permission, we should see windows from other apps - let windowList = CGWindowListCopyWindowInfo([.optionOnScreenOnly], kCGNullWindowID) as? [[String: Any]] ?? [] - let ownPID = ProcessInfo.processInfo.processIdentifier - - // Count windows from other processes - let otherAppWindows = windowList.filter { window in - guard let ownerPID = window[kCGWindowOwnerPID as String] as? Int32 else { return false } - return ownerPID != ownPID + + // Verify by actually trying to get shareable content + // This ensures permission is truly granted (not just cached) + do { + _ = try await SCShareableContent.current + return true + } catch { + return false } - - // If we can see windows from other apps, we likely have permission - // (There should be at least a few windows from Finder, Dock, etc.) - return otherAppWindows.count > 3 } /// Checks if we have write access to the specified folder @@ -486,8 +511,8 @@ final class SettingsViewModel { switch type { case .screenRecording: - // Use CGPreflightScreenCaptureAccess to check without triggering dialog - let granted = CGPreflightScreenCaptureAccess() + // Use the same reliable check method + let granted = await checkScreenRecordingPermission() if granted { hasScreenRecordingPermission = true permissionCheckTask = nil @@ -871,6 +896,59 @@ final class SettingsViewModel { return try await testClaudeConnection(baseURL: baseURL, apiKey: apiKey, modelName: modelName) case .ollama: return try await testOllamaConnection(baseURL: baseURL, modelName: modelName) + case .paddleocr: + return try await testPaddleOCRConnection() + } + } + + /// Tests PaddleOCR availability - checks cloud mode first, then local + private func testPaddleOCRConnection() async throws -> (success: Bool, message: String) { + let settings = AppSettings.shared + + // If cloud mode is enabled, test cloud connectivity first + if settings.paddleOCRUseCloud { + let cloudBaseURL = settings.paddleOCRCloudBaseURL.trimmingCharacters(in: .whitespaces) + guard !cloudBaseURL.isEmpty, + let url = URL(string: cloudBaseURL) else { + throw VLMProviderError.invalidConfiguration("PaddleOCR cloud base URL is not configured") + } + + // Test cloud API connectivity with a simple request + var request = URLRequest(url: url) + request.timeoutInterval = 10 + + // Add API key if configured + let apiKey = settings.paddleOCRCloudAPIKey.trimmingCharacters(in: .whitespaces) + if !apiKey.isEmpty { + request.setValue("Bearer \(apiKey)", forHTTPHeaderField: "Authorization") + } + + do { + let (_, response) = try await URLSession.shared.data(for: request) + guard let httpResponse = response as? HTTPURLResponse else { + throw VLMProviderError.invalidResponse("Invalid HTTP response from PaddleOCR cloud") + } + switch httpResponse.statusCode { + case 200, 404: // 404 is acceptable - means server is reachable + return (true, "PaddleOCR cloud is reachable") + case 401, 403: + throw VLMProviderError.authenticationFailed + default: + throw VLMProviderError.invalidResponse("HTTP \(httpResponse.statusCode)") + } + } catch let error as VLMProviderError { + throw error + } catch { + throw VLMProviderError.invalidConfiguration("PaddleOCR cloud is not reachable: \(error.localizedDescription)") + } + } + + // Local mode - check if PaddleOCR is installed + let isAvailable = await PaddleOCREngine.shared.isAvailable + if isAvailable { + return (true, "PaddleOCR is ready") + } else { + throw VLMProviderError.invalidConfiguration("PaddleOCR is not installed. Install it using: pip3 install paddleocr paddlepaddle") } } diff --git a/ScreenTranslate/Models/AppSettings.swift b/ScreenTranslate/Models/AppSettings.swift index 359ef81..9c020d7 100644 --- a/ScreenTranslate/Models/AppSettings.swift +++ b/ScreenTranslate/Models/AppSettings.swift @@ -1,6 +1,31 @@ import Foundation import SwiftUI import os +import Security + +/// PaddleOCR mode selection +enum PaddleOCRMode: String, Codable, CaseIterable, Sendable { + case fast + case precise + + var localizedName: String { + switch self { + case .fast: + return NSLocalizedString("settings.paddleocr.mode.fast", comment: "Fast mode") + case .precise: + return NSLocalizedString("settings.paddleocr.mode.precise", comment: "Precise mode") + } + } + + var description: String { + switch self { + case .fast: + return NSLocalizedString("settings.paddleocr.mode.fast.description", comment: "~1s, uses groupIntoLines") + case .precise: + return NSLocalizedString("settings.paddleocr.mode.precise.description", comment: "~12s, VL-1.5 model") + } + } +} /// User preferences persisted across sessions via UserDefaults. /// All properties automatically sync to UserDefaults with the `ScreenTranslate.` prefix. @@ -58,6 +83,11 @@ final class AppSettings { static let sceneBindings = prefix + "sceneBindings" static let parallelEngines = prefix + "parallelEngines" static let compatibleProviderConfigs = prefix + "compatibleProviderConfigs" + // PaddleOCR Configuration + static let paddleOCRMode = prefix + "paddleOCRMode" + static let paddleOCRUseCloud = prefix + "paddleOCRUseCloud" + static let paddleOCRCloudBaseURL = prefix + "paddleOCRCloudBaseURL" + static let paddleOCRCloudAPIKey = prefix + "paddleOCRCloudAPIKey" } // MARK: - Properties @@ -262,6 +292,39 @@ final class AppSettings { didSet { saveCompatibleConfigs() } } + // MARK: - PaddleOCR Configuration + + /// PaddleOCR mode: fast (ocr command) or precise (doc_parser VL-1.5) + var paddleOCRMode: PaddleOCRMode { + didSet { save(paddleOCRMode.rawValue, forKey: Keys.paddleOCRMode) } + } + + /// Whether to use cloud API instead of local CLI + var paddleOCRUseCloud: Bool { + didSet { save(paddleOCRUseCloud, forKey: Keys.paddleOCRUseCloud) } + } + + /// Cloud API base URL (for third-party PaddleOCR cloud services) + var paddleOCRCloudBaseURL: String { + didSet { save(paddleOCRCloudBaseURL, forKey: Keys.paddleOCRCloudBaseURL) } + } + + /// Cloud API key (stored securely in Keychain, not UserDefaults) + var paddleOCRCloudAPIKey: String { + didSet { + // Capture the value on the actor before spawning detached task + let capturedKey = paddleOCRCloudAPIKey + // Save to Keychain asynchronously + Task.detached { + do { + try await KeychainService.shared.savePaddleOCRCredentials(apiKey: capturedKey) + } catch { + Logger.settings.error("Failed to save PaddleOCR cloud API key to Keychain: \(error)") + } + } + } + } + // MARK: - Initialization private init() { @@ -358,6 +421,15 @@ final class AppSettings { parallelEngines = Self.loadParallelEngines() compatibleProviderConfigs = Self.loadCompatibleConfigs() + // Load PaddleOCR configuration + paddleOCRMode = defaults.string(forKey: Keys.paddleOCRMode) + .flatMap { PaddleOCRMode(rawValue: $0) } ?? .fast + paddleOCRUseCloud = defaults.object(forKey: Keys.paddleOCRUseCloud) as? Bool ?? false + paddleOCRCloudBaseURL = defaults.string(forKey: Keys.paddleOCRCloudBaseURL) ?? "" + + // Load PaddleOCR cloud API key from Keychain (secure storage) + paddleOCRCloudAPIKey = Self.loadPaddleOCRAPIKeyFromKeychain() + Logger.settings.info("ScreenCapture launched - settings loaded from: \(loadedLocation.path)") } @@ -400,6 +472,15 @@ final class AppSettings { onboardingCompleted = false translateAndInsertSourceLanguage = .auto translateAndInsertTargetLanguage = nil + // Reset PaddleOCR settings + paddleOCRMode = .fast + paddleOCRUseCloud = false + paddleOCRCloudBaseURL = "" + paddleOCRCloudAPIKey = "" + // Delete PaddleOCR cloud API key from Keychain + Task.detached { + try? await KeychainService.shared.deletePaddleOCRCredentials() + } // Reset multi-engine configuration - directly create defaults, don't load from persistence engineSelectionMode = .primaryWithFallback var defaultConfigs: [TranslationEngineType: TranslationEngineConfig] = [:] @@ -453,6 +534,31 @@ final class AppSettings { return try? JSONDecoder().decode(CodableColor.self, from: data) } + // MARK: - Keychain Helpers + + /// Load PaddleOCR cloud API key from Keychain synchronously + private static func loadPaddleOCRAPIKeyFromKeychain() -> String { + // Use shared constants from KeychainService + let query: [String: Any] = [ + kSecClass as String: kSecClassGenericPassword, + kSecAttrService as String: KeychainService.serviceIdentifier, + kSecAttrAccount as String: KeychainService.paddleOCRAccount, + kSecReturnData as String: true, + kSecMatchLimit as String: kSecMatchLimitOne + ] + + var result: CFTypeRef? + let status = SecItemCopyMatching(query as CFDictionary, &result) + + guard status == errSecSuccess, + let data = result as? Data, + let credentials = try? JSONDecoder().decode(StoredCredentials.self, from: data) else { + return "" + } + + return credentials.apiKey + } + // MARK: - Multi-Engine Persistence Helpers private func saveEngineConfigs() { diff --git a/ScreenTranslate/Models/VLMProviderType.swift b/ScreenTranslate/Models/VLMProviderType.swift index e4cc19a..bf35e38 100644 --- a/ScreenTranslate/Models/VLMProviderType.swift +++ b/ScreenTranslate/Models/VLMProviderType.swift @@ -12,6 +12,7 @@ enum VLMProviderType: String, CaseIterable, Sendable, Codable, Identifiable { case openai = "openai" case claude = "claude" case ollama = "ollama" + case paddleocr = "paddleocr" var id: String { rawValue } @@ -24,6 +25,8 @@ enum VLMProviderType: String, CaseIterable, Sendable, Codable, Identifiable { return NSLocalizedString("vlm.provider.claude", comment: "Claude") case .ollama: return NSLocalizedString("vlm.provider.ollama", comment: "Ollama") + case .paddleocr: + return NSLocalizedString("vlm.provider.paddleocr", comment: "PaddleOCR") } } @@ -45,6 +48,11 @@ enum VLMProviderType: String, CaseIterable, Sendable, Codable, Identifiable { "vlm.provider.ollama.description", comment: "Local Ollama server" ) + case .paddleocr: + return NSLocalizedString( + "vlm.provider.paddleocr.description", + comment: "Local OCR engine (free, offline)" + ) } } @@ -57,6 +65,8 @@ enum VLMProviderType: String, CaseIterable, Sendable, Codable, Identifiable { return "https://api.anthropic.com/v1" case .ollama: return "http://localhost:11434" + case .paddleocr: + return "" } } @@ -69,6 +79,8 @@ enum VLMProviderType: String, CaseIterable, Sendable, Codable, Identifiable { return "claude-sonnet-4-20250514" case .ollama: return "llava" + case .paddleocr: + return "" } } @@ -77,7 +89,7 @@ enum VLMProviderType: String, CaseIterable, Sendable, Codable, Identifiable { switch self { case .openai, .claude: return true - case .ollama: + case .ollama, .paddleocr: return false } } diff --git a/ScreenTranslate/Resources/en.lproj/Localizable.strings b/ScreenTranslate/Resources/en.lproj/Localizable.strings index 619dd4e..dd7c874 100644 --- a/ScreenTranslate/Resources/en.lproj/Localizable.strings +++ b/ScreenTranslate/Resources/en.lproj/Localizable.strings @@ -581,6 +581,19 @@ "settings.paddleocr.install.hint" = "Requires Python 3 and pip installed on your system."; "settings.paddleocr.copy.command" = "Copy Command"; "settings.paddleocr.refresh" = "Refresh Status"; +"settings.paddleocr.ready" = "PaddleOCR is ready"; +"settings.paddleocr.not.installed.message" = "PaddleOCR is not installed"; +"settings.paddleocr.description" = "PaddleOCR is a local OCR engine. It's free, works offline, and doesn't require an API key."; +"settings.paddleocr.install.button" = "Install PaddleOCR"; +"settings.paddleocr.copy.command.button" = "Copy Install Command"; +"settings.paddleocr.mode" = "Mode"; +"settings.paddleocr.mode.fast" = "Fast"; +"settings.paddleocr.mode.precise" = "Precise"; +"settings.paddleocr.mode.fast.description" = "~1s, fast OCR with line grouping"; +"settings.paddleocr.mode.precise.description" = "~12s, VL-1.5 model with higher accuracy"; +"settings.paddleocr.useCloud" = "Use Cloud API"; +"settings.paddleocr.cloudBaseURL" = "Cloud API URL"; +"settings.paddleocr.cloudAPIKey" = "API Key"; /* ======================================== @@ -601,9 +614,11 @@ "vlm.provider.openai" = "OpenAI"; "vlm.provider.claude" = "Claude"; "vlm.provider.ollama" = "Ollama"; +"vlm.provider.paddleocr" = "PaddleOCR"; "vlm.provider.openai.description" = "OpenAI GPT-4 Vision API"; "vlm.provider.claude.description" = "Anthropic Claude Vision API"; "vlm.provider.ollama.description" = "Local Ollama server"; +"vlm.provider.paddleocr.description" = "Local OCR engine (free, offline)"; /* ======================================== diff --git a/ScreenTranslate/Resources/zh-Hans.lproj/Localizable.strings b/ScreenTranslate/Resources/zh-Hans.lproj/Localizable.strings index bcb5432..f573774 100644 --- a/ScreenTranslate/Resources/zh-Hans.lproj/Localizable.strings +++ b/ScreenTranslate/Resources/zh-Hans.lproj/Localizable.strings @@ -581,6 +581,19 @@ "settings.paddleocr.install.hint" = "需要在系统上安装 Python 3 和 pip。"; "settings.paddleocr.copy.command" = "复制命令"; "settings.paddleocr.refresh" = "刷新状态"; +"settings.paddleocr.ready" = "PaddleOCR 已就绪"; +"settings.paddleocr.not.installed.message" = "PaddleOCR 未安装"; +"settings.paddleocr.description" = "PaddleOCR 是本地 OCR 引擎。免费、离线可用,无需 API 密钥。"; +"settings.paddleocr.install.button" = "安装 PaddleOCR"; +"settings.paddleocr.copy.command.button" = "复制安装命令"; +"settings.paddleocr.mode" = "模式"; +"settings.paddleocr.mode.fast" = "快速"; +"settings.paddleocr.mode.precise" = "精确"; +"settings.paddleocr.mode.fast.description" = "~1秒,快速 OCR 并自动合并行"; +"settings.paddleocr.mode.precise.description" = "~12秒,VL-1.5 模型,更高精度"; +"settings.paddleocr.useCloud" = "使用云端 API"; +"settings.paddleocr.cloudBaseURL" = "云端 API 地址"; +"settings.paddleocr.cloudAPIKey" = "API 密钥"; /* ======================================== @@ -601,9 +614,11 @@ "vlm.provider.openai" = "OpenAI"; "vlm.provider.claude" = "Claude"; "vlm.provider.ollama" = "Ollama"; +"vlm.provider.paddleocr" = "PaddleOCR"; "vlm.provider.openai.description" = "OpenAI GPT-4 Vision API"; "vlm.provider.claude.description" = "Anthropic Claude Vision API"; "vlm.provider.ollama.description" = "本地 Ollama 服务器"; +"vlm.provider.paddleocr.description" = "本地 OCR 引擎(免费、离线可用)"; /* ======================================== diff --git a/ScreenTranslate/Services/PaddleOCREngine.swift b/ScreenTranslate/Services/PaddleOCREngine.swift index 96dff27..a4e6569 100644 --- a/ScreenTranslate/Services/PaddleOCREngine.swift +++ b/ScreenTranslate/Services/PaddleOCREngine.swift @@ -42,12 +42,28 @@ actor PaddleOCREngine { /// Detection model type var detectionModel: DetectionModel + /// OCR mode: fast (ocr command) or precise (doc_parser VL-1.5) + var mode: PaddleOCRMode + + /// Whether to use cloud API + var useCloud: Bool + + /// Cloud API base URL + var cloudBaseURL: String + + /// Cloud API key + var cloudAPIKey: String + static let `default` = Configuration( languages: [.chinese, .english], minimumConfidence: 0.0, useGPU: false, useDirectionClassify: true, - detectionModel: .default + detectionModel: .default, + mode: .fast, + useCloud: false, + cloudBaseURL: "", + cloudAPIKey: "" ) } @@ -140,7 +156,7 @@ actor PaddleOCREngine { let result = try await executePaddleOCR(arguments: arguments) // Parse output - let observations = try parsePaddleOCROutput(result, imageSize: CGSize(width: image.width, height: image.height)) + let observations = try parsePaddleOCROutput(result, imageSize: CGSize(width: image.width, height: image.height), mode: config.mode) // Filter by confidence let filteredTexts = observations.filter { $0.confidence >= config.minimumConfidence } @@ -203,18 +219,25 @@ actor PaddleOCREngine { /// Builds command line arguments for PaddleOCR private func buildArguments(config: Configuration, imagePath: String) -> [String] { - var args = [ - "ocr", - "-i", imagePath, - "--lang", "ch" - ] - - if config.useGPU { - args.append("--device") - args.append("gpu") + switch config.mode { + case .fast: + // Fast mode: use ocr command (~1s) + let langCode = config.languages.contains(.chinese) ? "ch" : "en" + return [ + "ocr", + "-i", imagePath, + "--lang", langCode, + "--use_angle_cls", config.useDirectionClassify ? "true" : "false" + ] + case .precise: + // Precise mode: use doc_parser with VL-1.5 (~12s) + return [ + "doc_parser", + "-i", imagePath, + "--pipeline_version", "v1.5", + "--device", config.useGPU ? "gpu" : "cpu" + ] } - - return args } /// Executes PaddleOCR with the given arguments @@ -298,8 +321,8 @@ actor PaddleOCREngine { return nil } - /// Parses PaddleOCR JSON output into OCRText observations - private func parsePaddleOCROutput(_ output: String, imageSize: CGSize) throws -> [OCRText] { + /// Parses PaddleOCR output into OCRText observations + private func parsePaddleOCROutput(_ output: String, imageSize: CGSize, mode: PaddleOCRMode) throws -> [OCRText] { var observations: [OCRText] = [] guard let startIndex = output.firstIndex(of: "{"), @@ -310,36 +333,150 @@ actor PaddleOCREngine { let jsonLike = String(output[startIndex...endIndex]) let cleanedJson = convertPythonDictToJson(jsonLike) - + Logger.ocr.debug("Cleaned JSON: \(cleanedJson.prefix(500))") - guard let jsonData = cleanedJson.data(using: .utf8), - let json = try? JSONSerialization.jsonObject(with: jsonData) as? [String: Any], - let res = json["res"] as? [String: Any] else { - Logger.ocr.error("Failed to parse JSON") + guard let jsonData = cleanedJson.data(using: .utf8) else { + Logger.ocr.error("Failed to convert cleaned JSON to data") + return observations + } + + // Try to parse JSON and log detailed error + var json: [String: Any]? + do { + json = try JSONSerialization.jsonObject(with: jsonData) as? [String: Any] + } catch { + Logger.ocr.error("JSON parse error: \(error.localizedDescription)") + // Log the problematic JSON (last 1000 chars to find the issue) + if let jsonStr = String(data: jsonData, encoding: .utf8) { + Logger.ocr.error("JSON end portion: ...\(jsonStr.suffix(500))") + } + return observations + } + + guard let json = json else { + Logger.ocr.error("Failed to parse JSON as dictionary") + return observations + } + + guard let res = json["res"] as? [String: Any] else { + Logger.ocr.error("No 'res' key in JSON. Keys: \(json.keys.joined(separator: ", "))") return observations } + switch mode { + case .fast: + // Fast mode: parse rec_texts format + observations = try parseFastModeOutput(res: res, imageSize: imageSize) + case .precise: + // Precise mode: parse doc_parser output format: parsing_res_list + observations = try parsePreciseModeOutput(res: res, imageSize: imageSize) + } + + return observations + } + + /// Parse fast mode output (ocr command) + private func parseFastModeOutput(res: [String: Any], imageSize: CGSize) throws -> [OCRText] { + var observations: [OCRText] = [] + + // Fast mode output has parallel arrays: rec_texts, rec_scores, rec_boxes guard let recTexts = res["rec_texts"] as? [String] else { - Logger.ocr.error("No rec_texts found") + Logger.ocr.error("No rec_texts found in fast mode output. Keys: \(res.keys.joined(separator: ", "))") return observations } - - let recScores = res["rec_scores"] as? [Double] ?? [] - let recBoxes = res["rec_boxes"] as? [[Int]] ?? [] - - Logger.ocr.info("Found \(recTexts.count) texts, \(recBoxes.count) boxes") + + // Get rec_boxes and rec_scores (optional) + let recBoxes = res["rec_boxes"] as? [[Double]] + let recScores = res["rec_scores"] as? [Double] + + Logger.ocr.info("Found \(recTexts.count) text blocks from fast mode") for (index, text) in recTexts.enumerated() { - let confidence = index < recScores.count ? Float(recScores[index]) : 0.5 - + guard !text.isEmpty else { continue } + + // Get bounding box from rec_boxes (format: [[x1, y1, x2, y2], ...]) + var boundingBox: CGRect + if let boxes = recBoxes, index < boxes.count { + let box = boxes[index] + if box.count >= 4 { + let x = CGFloat(box[0]) + let y = CGFloat(box[1]) + let x2 = CGFloat(box[2]) + let y2 = CGFloat(box[3]) + boundingBox = CGRect( + x: x / imageSize.width, + y: y / imageSize.height, + width: (x2 - x) / imageSize.width, + height: (y2 - y) / imageSize.height + ) + } else { + boundingBox = CGRect(x: 0, y: CGFloat(index) * 0.1, width: 1, height: 0.1) + } + } else { + // Fallback: stack vertically + boundingBox = CGRect(x: 0, y: CGFloat(index) * 0.1, width: 1, height: 0.1) + } + + // Get confidence from rec_scores + let confidence: Float + if let scores = recScores, index < scores.count { + confidence = Float(scores[index]) + } else { + confidence = 0.9 + } + + let observation = OCRText( + text: text, + boundingBox: boundingBox, + confidence: confidence + ) + observations.append(observation) + Logger.ocr.debug("Fast mode block: '\(text)', box: \(String(describing: boundingBox))") + } + + return observations + } + + /// Parse precise mode output (doc_parser VL-1.5) + private func parsePreciseModeOutput(res: [String: Any], imageSize: CGSize) throws -> [OCRText] { + var observations: [OCRText] = [] + + // Log all keys in res for debugging + Logger.ocr.info("Precise mode res keys: \(res.keys.joined(separator: ", "))") + + guard let parsingResList = res["parsing_res_list"] as? [[String: Any]] else { + Logger.ocr.error("No parsing_res_list found in res. Available keys: \(res.keys.joined(separator: ", "))") + // Try to log the raw res for debugging + if let resData = try? JSONSerialization.data(withJSONObject: res), + let resStr = String(data: resData, encoding: .utf8) { + Logger.ocr.debug("Raw res content: \(resStr.prefix(1000))") + } + return observations + } + + Logger.ocr.info("Found \(parsingResList.count) blocks from doc_parser") + + for (index, block) in parsingResList.enumerated() { + guard let text = block["block_content"] as? String else { + continue + } + + // Skip non-text blocks (charts, seals, images, etc.) + if let label = block["block_label"] as? String { + let skipLabels = ["chart", "seal", "image", "table", "figure"] + if skipLabels.contains(where: { label.lowercased().contains($0) }) { + Logger.ocr.debug("Skipping non-text block: \(label)") + continue + } + } + var boundingBox: CGRect - if index < recBoxes.count && recBoxes[index].count >= 4 { - let box = recBoxes[index] - let x = CGFloat(box[0]) - let y = CGFloat(box[1]) - let x2 = CGFloat(box[2]) - let y2 = CGFloat(box[3]) + if let bbox = block["block_bbox"] as? [Double], bbox.count >= 4 { + let x = CGFloat(bbox[0]) + let y = CGFloat(bbox[1]) + let x2 = CGFloat(bbox[2]) + let y2 = CGFloat(bbox[3]) boundingBox = CGRect( x: x / imageSize.width, y: y / imageSize.height, @@ -349,14 +486,17 @@ actor PaddleOCREngine { } else { boundingBox = CGRect(x: 0, y: CGFloat(index) * 0.1, width: 1, height: 0.1) } - + + // doc_parser doesn't provide confidence scores per block, use default + let confidence: Float = 0.9 + let observation = OCRText( text: text, boundingBox: boundingBox, confidence: confidence ) observations.append(observation) - Logger.ocr.debug("Text: '\(text)', box: \(String(describing: boundingBox)), confidence: \(confidence)") + Logger.ocr.debug("Block: '\(text)', box: \(String(describing: boundingBox))") } return observations @@ -371,6 +511,13 @@ actor PaddleOCREngine { result = convertNumpyArraysToJson(result) + // Fix float format: "8." -> "8.0", "-5." -> "-5.0" (valid JSON) + let floatPattern = #"(-?\d+)\.\s*([,\]\}])"# + if let regex = try? NSRegularExpression(pattern: floatPattern) { + let range = NSRange(result.startIndex..., in: result) + result = regex.stringByReplacingMatches(in: result, options: [], range: range, withTemplate: "$1.0$2") + } + return result } @@ -409,26 +556,34 @@ actor PaddleOCREngine { private func extractArrayContent(from arrayContent: String) -> String { var content = arrayContent - + + // Remove shape and dtype info if let shapeRange = content.range(of: ", shape=") { content = String(content[.. ScreenAnalysisResult { + // Build configuration from AppSettings first + let config = await buildConfiguration() + + // Check local availability only for local mode + if !config.useCloud { + guard await PaddleOCREngine.shared.isAvailable else { + throw VLMProviderError.invalidConfiguration( + "PaddleOCR is not installed. Install it using: pip3 install paddleocr paddlepaddle" + ) + } + } + + // Perform OCR using PaddleOCREngine with settings + let ocrResult = try await PaddleOCREngine.shared.recognize(image, config: config) + + // Convert OCRResult to ScreenAnalysisResult + return convertToScreenAnalysisResult(ocrResult, mode: config.mode) + } + + // MARK: - Private Methods + + @MainActor + private func buildConfiguration() -> PaddleOCREngine.Configuration { + let settings = AppSettings.shared + var config = PaddleOCREngine.Configuration.default + config.mode = settings.paddleOCRMode + config.useCloud = settings.paddleOCRUseCloud + config.cloudBaseURL = settings.paddleOCRCloudBaseURL + config.cloudAPIKey = settings.paddleOCRCloudAPIKey + return config + } + + private func convertToScreenAnalysisResult(_ ocrResult: OCRResult, mode: PaddleOCRMode) -> ScreenAnalysisResult { + // For precise mode (doc_parser), the output is already in block format, no need to group + // For fast mode (ocr command), we need to group into lines + let segments: [TextSegment] + switch mode { + case .precise: + // Precise mode: already in block format, convert directly + segments = ocrResult.observations.map { observation in + TextSegment( + text: observation.text, + boundingBox: observation.boundingBox, + confidence: observation.confidence + ) + } + case .fast: + // Fast mode: group into lines based on vertical position + let lines = groupIntoLines(ocrResult.observations, imageSize: ocrResult.imageSize) + segments = lines.map { line -> TextSegment in + TextSegment( + text: line.text, + boundingBox: line.boundingBox, + confidence: line.confidence + ) + } + } + + return ScreenAnalysisResult( + segments: segments, + imageSize: ocrResult.imageSize + ) + } + + /// Groups OCR texts into lines based on vertical position overlap + private func groupIntoLines(_ observations: [OCRText], imageSize: CGSize) -> [MergedLine] { + guard !observations.isEmpty else { return [] } + + // Sort by Y position (top to bottom), then by X position (left to right) + let sortedObservations = observations.sorted { a, b in + let yTolerance = min(a.boundingBox.height, b.boundingBox.height) * 0.5 + if abs(a.boundingBox.minY - b.boundingBox.minY) > yTolerance { + return a.boundingBox.minY < b.boundingBox.minY + } + return a.boundingBox.minX < b.boundingBox.minX + } + + var lines: [MergedLine] = [] + var currentLine: MergedLine? + + for observation in sortedObservations { + if let line = currentLine { + // Check if this observation is on the same line (Y position overlap) + let yOverlap = max(0, + min(line.boundingBox.maxY, observation.boundingBox.maxY) - + max(line.boundingBox.minY, observation.boundingBox.minY) + ) + let minHeight = min(line.boundingBox.height, observation.boundingBox.height) + + // If there's significant Y overlap, add to current line + if yOverlap > minHeight * 0.3 { + currentLine = line.merged(with: observation) + } else { + // Start a new line + lines.append(line) + currentLine = MergedLine(from: observation) + } + } else { + currentLine = MergedLine(from: observation) + } + } + + // Don't forget the last line + if let line = currentLine { + lines.append(line) + } + + return lines + } +} + +/// Helper struct to merge OCR texts into lines +private struct MergedLine { + let text: String + let boundingBox: CGRect + let confidence: Float + + init(text: String, boundingBox: CGRect, confidence: Float) { + self.text = text + self.boundingBox = boundingBox + self.confidence = confidence + } + + init(from observation: OCRText) { + self.text = observation.text + self.boundingBox = observation.boundingBox + self.confidence = observation.confidence + } + + func merged(with other: OCRText) -> MergedLine { + // Combine texts with appropriate separator for CJK vs non-CJK + let separator = Self.separator(for: text, and: other.text) + let combinedText = text + separator + other.text + + // Merge bounding boxes + let mergedBox = boundingBox.union(other.boundingBox) + + // Average confidence weighted by text length + let totalLength = text.count + other.text.count + let weightedConfidence: Float + if totalLength == 0 { + // Edge case: both texts are empty, use average of confidences + weightedConfidence = (confidence + other.confidence) / 2.0 + } else { + weightedConfidence = ( + Float(text.count) * confidence + + Float(other.text.count) * other.confidence + ) / Float(totalLength) + } + + return MergedLine( + text: combinedText, + boundingBox: mergedBox, + confidence: weightedConfidence + ) + } + + /// Returns appropriate separator between two text segments based on CJK detection + /// Checks the last character of the first string and the first character of the second string + private static func separator(for first: String, and second: String) -> String { + // Check last character of first string and first character of second string + // This handles mixed-content cases like "Hello世界" correctly + guard let firstLast = first.last, + let secondFirst = second.first else { + return " " // Default to space if either string is empty + } + + let firstLastIsCJK = isCJKChar(firstLast) + let secondFirstIsCJK = isCJKChar(secondFirst) + // No space between CJK characters, space otherwise + return (firstLastIsCJK && secondFirstIsCJK) ? "" : " " + } + + /// Checks if a character is CJK (Chinese/Japanese/Korean) + private static func isCJKChar(_ char: Character) -> Bool { + let scalar = char.unicodeScalars.first?.value ?? 0 + // CJK Unified Ideographs: U+4E00-U+9FFF + // CJK Unified Ideographs Extension A: U+3400-U+4DBF + // Hiragana: U+3040-U+309F + // Katakana: U+30A0-U+30FF + // Hangul Syllables: U+AC00-U+D7AF + return (0x4E00...0x9FFF).contains(scalar) || + (0x3400...0x4DBF).contains(scalar) || + (0x3040...0x309F).contains(scalar) || + (0x30A0...0x30FF).contains(scalar) || + (0xAC00...0xD7AF).contains(scalar) + } +} diff --git a/ScreenTranslate/Services/ScreenCoderEngine.swift b/ScreenTranslate/Services/ScreenCoderEngine.swift index 20025d1..88e9cef 100644 --- a/ScreenTranslate/Services/ScreenCoderEngine.swift +++ b/ScreenTranslate/Services/ScreenCoderEngine.swift @@ -161,6 +161,8 @@ actor ScreenCoderEngine { return ClaudeVLMProvider(configuration: configuration) case .ollama: return OllamaVLMProvider(configuration: configuration) + case .paddleocr: + return PaddleOCRVLMProvider() } } diff --git a/ScreenTranslate/Services/Security/KeychainService.swift b/ScreenTranslate/Services/Security/KeychainService.swift index 937ea42..0ae63b7 100644 --- a/ScreenTranslate/Services/Security/KeychainService.swift +++ b/ScreenTranslate/Services/Security/KeychainService.swift @@ -17,11 +17,17 @@ actor KeychainService { static let shared = KeychainService() /// Service identifier for Keychain items - private let service = "com.screentranslate.credentials" + static let serviceIdentifier = "com.screentranslate.credentials" + + /// PaddleOCR cloud account identifier + static let paddleOCRAccount = "paddleocr_cloud" /// Logger instance private let logger = Logger(subsystem: Bundle.main.bundleIdentifier ?? "ScreenTranslate", category: "KeychainService") + /// Internal service property for instance methods + private var service: String { Self.serviceIdentifier } + private init() {} // MARK: - Public API @@ -308,6 +314,113 @@ actor KeychainService { logger.info("Deleted all credentials") } + + // MARK: - PaddleOCR Cloud Methods + + /// Save PaddleOCR cloud API key + /// - Parameter apiKey: The API key to store + func savePaddleOCRCredentials(apiKey: String) throws { + let account = Self.paddleOCRAccount + + let credentials = StoredCredentials(apiKey: apiKey) + + guard let encodedData = try? JSONEncoder().encode(credentials) else { + throw KeychainError.invalidData + } + + let query: [String: Any] = [ + kSecClass as String: kSecClassGenericPassword, + kSecAttrService as String: service, + kSecAttrAccount as String: account + ] + + // Check if item exists and update it, or add new if not found + let status = SecItemCopyMatching(query as CFDictionary, nil) + if status == errSecSuccess { + // Item exists - update it + let updateQuery: [String: Any] = [ + kSecValueData as String: encodedData, + kSecAttrAccessible as String: kSecAttrAccessibleWhenUnlocked + ] + let updateStatus = SecItemUpdate(query as CFDictionary, updateQuery as CFDictionary) + guard updateStatus == errSecSuccess else { + logger.error("Failed to update PaddleOCR cloud credentials: \(updateStatus)") + throw KeychainError.unexpectedStatus(updateStatus) + } + logger.info("Updated PaddleOCR cloud credentials") + } else if status == errSecItemNotFound { + // Item doesn't exist - add new + let addQuery: [String: Any] = [ + kSecClass as String: kSecClassGenericPassword, + kSecAttrService as String: service, + kSecAttrAccount as String: account, + kSecValueData as String: encodedData, + kSecAttrAccessible as String: kSecAttrAccessibleWhenUnlocked + ] + let addStatus = SecItemAdd(addQuery as CFDictionary, nil) + guard addStatus == errSecSuccess else { + logger.error("Failed to save PaddleOCR cloud credentials: \(addStatus)") + throw KeychainError.unexpectedStatus(addStatus) + } + logger.info("Saved PaddleOCR cloud credentials") + } else { + logger.error("Failed to check PaddleOCR cloud credentials: \(status)") + throw KeychainError.unexpectedStatus(status) + } + } + + /// Retrieve stored PaddleOCR cloud API key + /// - Returns: The stored API key, or nil if not found + func getPaddleOCRCredentials() -> String? { + let account = Self.paddleOCRAccount + + let query: [String: Any] = [ + kSecClass as String: kSecClassGenericPassword, + kSecAttrService as String: service, + kSecAttrAccount as String: account, + kSecReturnData as String: true, + kSecMatchLimit as String: kSecMatchLimitOne + ] + + var result: CFTypeRef? + let status = SecItemCopyMatching(query as CFDictionary, &result) + + guard status == errSecSuccess else { + if status == errSecItemNotFound { + logger.debug("No PaddleOCR cloud credentials found") + return nil + } + logger.error("Failed to retrieve PaddleOCR cloud credentials: \(status)") + return nil + } + + guard let data = result as? Data else { + return nil + } + + let credentials = try? JSONDecoder().decode(StoredCredentials.self, from: data) + return credentials?.apiKey + } + + /// Delete stored PaddleOCR cloud credentials + func deletePaddleOCRCredentials() throws { + let account = Self.paddleOCRAccount + + let query: [String: Any] = [ + kSecClass as String: kSecClassGenericPassword, + kSecAttrService as String: service, + kSecAttrAccount as String: account + ] + + let status = SecItemDelete(query as CFDictionary) + + guard status == errSecSuccess || status == errSecItemNotFound else { + logger.error("Failed to delete PaddleOCR cloud credentials: \(status)") + throw KeychainError.unexpectedStatus(status) + } + + logger.info("Deleted PaddleOCR cloud credentials") + } } // MARK: - Stored Credentials