diff --git a/ScreenTranslate.xcodeproj/project.pbxproj b/ScreenTranslate.xcodeproj/project.pbxproj index 4b2f0ee..4e60fa9 100644 --- a/ScreenTranslate.xcodeproj/project.pbxproj +++ b/ScreenTranslate.xcodeproj/project.pbxproj @@ -27,7 +27,7 @@ /* End PBXFileReference section */ /* Begin PBXFileSystemSynchronizedBuildFileExceptionSet section */ - 862C98FC2F32309800ABAC92 /* PBXFileSystemSynchronizedBuildFileExceptionSet */ = { + 862C98FC2F32309800ABAC92 /* Exceptions for "ScreenTranslate" folder in "ScreenTranslate" target */ = { isa = PBXFileSystemSynchronizedBuildFileExceptionSet; membershipExceptions = ( "Supporting Files/Info.plist", @@ -40,7 +40,7 @@ SC000002 /* ScreenTranslate */ = { isa = PBXFileSystemSynchronizedRootGroup; exceptions = ( - 862C98FC2F32309800ABAC92 /* PBXFileSystemSynchronizedBuildFileExceptionSet */, + 862C98FC2F32309800ABAC92 /* Exceptions for "ScreenTranslate" folder in "ScreenTranslate" target */, ); path = ScreenTranslate; sourceTree = ""; @@ -62,6 +62,13 @@ ); runOnlyForDeploymentPostprocessing = 0; }; + SC000025 /* Frameworks */ = { + isa = PBXFrameworksBuildPhase; + buildActionMask = 2147483647; + files = ( + ); + runOnlyForDeploymentPostprocessing = 0; + }; /* End PBXFrameworksBuildPhase section */ /* Begin PBXGroup section */ @@ -236,16 +243,6 @@ }; /* End PBXSourcesBuildPhase section */ -/* Begin PBXFrameworksBuildPhase section */ - SC000025 /* Frameworks */ = { - isa = PBXFrameworksBuildPhase; - buildActionMask = 2147483647; - files = ( - ); - runOnlyForDeploymentPostprocessing = 0; - }; -/* End PBXFrameworksBuildPhase section */ - /* Begin PBXTargetDependency section */ SC000033 /* PBXTargetDependency */ = { isa = PBXTargetDependency; diff --git a/ScreenTranslate/App/Coordinators/CaptureCoordinator.swift b/ScreenTranslate/App/Coordinators/CaptureCoordinator.swift index 2b32c77..6b88111 100644 --- a/ScreenTranslate/App/Coordinators/CaptureCoordinator.swift +++ b/ScreenTranslate/App/Coordinators/CaptureCoordinator.swift @@ -64,6 +64,12 @@ final class CaptureCoordinator { Task { defer { isCaptureInProgress = false } + guard await CaptureManager.shared.hasPermission else { + logger.info("Screen recording permission denied, triggering drag-to-authorize flow") + PermissionManager.shared.ensureScreenRecordingPermission() + return + } + do { // Get available displays let displays = try await CaptureManager.shared.availableDisplays() @@ -105,6 +111,12 @@ final class CaptureCoordinator { isCaptureInProgress = true Task { + guard await CaptureManager.shared.hasPermission else { + logger.info("Screen recording permission denied, triggering drag-to-authorize flow") + PermissionManager.shared.ensureScreenRecordingPermission() + isCaptureInProgress = false + return + } do { // Present the selection overlay on all displays let overlayController = SelectionOverlayController.shared @@ -144,6 +156,12 @@ final class CaptureCoordinator { isCaptureInProgress = true Task { + guard await CaptureManager.shared.hasPermission else { + logger.info("Screen recording permission denied, triggering drag-to-authorize flow") + PermissionManager.shared.ensureScreenRecordingPermission() + isCaptureInProgress = false + return + } do { let overlayController = SelectionOverlayController.shared diff --git a/ScreenTranslate/App/Coordinators/TextTranslationCoordinator.swift b/ScreenTranslate/App/Coordinators/TextTranslationCoordinator.swift index 3a3e650..3a8d110 100644 --- a/ScreenTranslate/App/Coordinators/TextTranslationCoordinator.swift +++ b/ScreenTranslate/App/Coordinators/TextTranslationCoordinator.swift @@ -87,16 +87,8 @@ final class TextTranslationCoordinator { permissionManager.refreshPermissionStatus() if !permissionManager.hasAccessibilityPermission { - // Directly trigger system permission prompt - permissionManager.requestAccessibilityPermission() - permissionManager.refreshPermissionStatus() - - if permissionManager.hasAccessibilityPermission { - return true - } - - // Still not granted (user denied) — guide to System Settings - permissionManager.showPermissionDeniedError(for: .accessibility) + // Call the drag-to-authorize flow for accessibility + permissionManager.ensureAccessibilityPermissionFlow() return false } return true diff --git a/ScreenTranslate/Extensions/URLExtensions.swift b/ScreenTranslate/Extensions/URLExtensions.swift new file mode 100644 index 0000000..2bfc1dd --- /dev/null +++ b/ScreenTranslate/Extensions/URLExtensions.swift @@ -0,0 +1,13 @@ +import Foundation + +extension URL { + /// Automatically resolves localhost to 127.0.0.1 to avoid IPv6 resolution issues (connection refused ::1) + var resolvingLocalhost: URL { + guard let host = self.host, host.lowercased() == "localhost" else { + return self + } + var components = URLComponents(url: self, resolvingAgainstBaseURL: true) + components?.host = "127.0.0.1" + return components?.url ?? self + } +} diff --git a/ScreenTranslate/Features/Settings/CompatibleEngineConfigSheet.swift b/ScreenTranslate/Features/Settings/CompatibleEngineConfigSheet.swift index b57f0e5..4494b6b 100644 --- a/ScreenTranslate/Features/Settings/CompatibleEngineConfigSheet.swift +++ b/ScreenTranslate/Features/Settings/CompatibleEngineConfigSheet.swift @@ -24,6 +24,11 @@ struct CompatibleEngineConfigSheet: View { @State private var testResult: String? @State private var testSuccess = false + @State private var availableModels: [String] = [] + @State private var isFetchingModels = false + @State private var showErrorAlert = false + @State private var errorMessage = "" + var body: some View { VStack(spacing: 20) { // Header @@ -72,8 +77,40 @@ struct CompatibleEngineConfigSheet: View { .font(.subheadline) .foregroundStyle(.secondary) - TextField("gpt-4o-mini", text: $modelName) - .textFieldStyle(.roundedBorder) + HStack { + TextField("gpt-4o-mini", text: $modelName) + .textFieldStyle(.roundedBorder) + + if !availableModels.isEmpty { + Menu { + ForEach(availableModels, id: \.self) { model in + Button(model) { + modelName = model + } + } + } label: { + Text("") + .frame(width: 8, height: 12) + } + .menuStyle(.borderlessButton) + .fixedSize() + } + + Button { + Task { + await fetchModels() + } + } label: { + if isFetchingModels { + ProgressView() + .controlSize(.small) + } else { + Text(localized("engine.config.fetchModels")) + } + } + .buttonStyle(.bordered) + .disabled(isFetchingModels) + } } // API Key Toggle @@ -162,6 +199,13 @@ struct CompatibleEngineConfigSheet: View { .onAppear { loadConfig() } + .alert(isPresented: $showErrorAlert) { + Alert( + title: Text(localized("engine.config.fetchModels.failed")), + message: Text(errorMessage), + dismissButton: .default(Text(localized("button.ok"))) + ) + } } // MARK: - Computed Properties @@ -242,13 +286,11 @@ struct CompatibleEngineConfigSheet: View { keychain: KeychainService.shared ) - let success = await provider.checkConnection() + try await provider.verifyConnection() await MainActor.run { - testSuccess = success - testResult = success - ? localized("engine.config.test.success") - : localized("engine.config.test.failed") + testSuccess = true + testResult = localized("engine.config.test.success") isTesting = false } } catch { @@ -259,4 +301,26 @@ struct CompatibleEngineConfigSheet: View { } } } + + @MainActor + private func fetchModels() async { + isFetchingModels = true + errorMessage = "" + + let requestURL = baseURL.isEmpty ? "http://localhost:8000/v1" : baseURL + + do { + let models = try await ModelDiscoveryService.fetchModels( + baseURL: requestURL, + apiKey: hasAPIKey ? apiKey : nil, + engineType: "custom" + ) + self.availableModels = models + self.isFetchingModels = false + } catch { + self.errorMessage = error.localizedDescription + self.showErrorAlert = true + self.isFetchingModels = false + } + } } diff --git a/ScreenTranslate/Features/Settings/EngineConfigSheet.swift b/ScreenTranslate/Features/Settings/EngineConfigSheet.swift index 8278581..a268e43 100644 --- a/ScreenTranslate/Features/Settings/EngineConfigSheet.swift +++ b/ScreenTranslate/Features/Settings/EngineConfigSheet.swift @@ -24,6 +24,11 @@ struct EngineConfigSheet: View { @State private var testSuccess = false private let logger = Logger.settings + @State private var availableModels: [String] = [] + @State private var isFetchingModels = false + @State private var showErrorAlert = false + @State private var errorMessage = "" + var body: some View { VStack(spacing: 20) { // Header @@ -132,6 +137,13 @@ struct EngineConfigSheet: View { .onAppear { loadConfig() } + .alert(isPresented: $showErrorAlert) { + Alert( + title: Text(localized("engine.config.fetchModels.failed")), + message: Text(errorMessage), + dismissButton: .default(Text(localized("button.ok"))) + ) + } } // MARK: - View Components @@ -255,8 +267,40 @@ struct EngineConfigSheet: View { .font(.subheadline) .foregroundStyle(.secondary) - TextField(engine.defaultModelName ?? "", text: $modelName) - .textFieldStyle(.roundedBorder) + HStack { + TextField(engine.defaultModelName ?? "", text: $modelName) + .textFieldStyle(.roundedBorder) + + if !availableModels.isEmpty { + Menu { + ForEach(availableModels, id: \.self) { model in + Button(model) { + modelName = model + } + } + } label: { + Text("") + .frame(width: 8, height: 12) + } + .menuStyle(.borderlessButton) + .fixedSize() + } + + Button { + Task { + await fetchModels() + } + } label: { + if isFetchingModels { + ProgressView() + .controlSize(.small) + } else { + Text(localized("engine.config.fetchModels")) + } + } + .buttonStyle(.bordered) + .disabled(isFetchingModels) + } } } @@ -368,13 +412,11 @@ struct EngineConfigSheet: View { } // Test connection - let success = await TranslationService.shared.testConnection(for: engine) + try await TranslationService.shared.verifyConnection(for: engine) await MainActor.run { - testSuccess = success - testResult = success - ? localized("engine.config.test.success") - : localized("engine.config.test.failed") + testSuccess = true + testResult = localized("engine.config.test.success") isTesting = false } } catch { @@ -385,4 +427,26 @@ struct EngineConfigSheet: View { } } } + + @MainActor + private func fetchModels() async { + isFetchingModels = true + errorMessage = "" + + let requestURL = baseURL.isEmpty ? (engine.defaultBaseURL ?? "") : baseURL + + do { + let models = try await ModelDiscoveryService.fetchModels( + baseURL: requestURL, + apiKey: engine.requiresAPIKey ? apiKey : nil, + engineType: engine.rawValue + ) + self.availableModels = models + self.isFetchingModels = false + } catch { + self.errorMessage = error.localizedDescription + self.showErrorAlert = true + self.isFetchingModels = false + } + } } diff --git a/ScreenTranslate/Features/Settings/EngineSettingsTab.swift b/ScreenTranslate/Features/Settings/EngineSettingsTab.swift index fd20324..9e7f66d 100644 --- a/ScreenTranslate/Features/Settings/EngineSettingsTab.swift +++ b/ScreenTranslate/Features/Settings/EngineSettingsTab.swift @@ -21,6 +21,11 @@ struct VLMConfigurationSection: View { @Bindable var viewModel: SettingsViewModel @State private var showAPIKey = false + @State private var availableVLMModels: [String] = [] + @State private var isFetchingVLMModels = false + @State private var showErrorAlert = false + @State private var errorMessage = "" + var body: some View { VStack(alignment: .leading, spacing: 16) { Text(localized("settings.vlm.title")) @@ -108,9 +113,41 @@ struct VLMConfigurationSection: View { Text(localized("settings.vlm.model")) .foregroundStyle(.secondary) .gridColumnAlignment(.trailing) - TextField("", text: $viewModel.vlmModelName) - .textFieldStyle(.roundedBorder) - .frame(maxWidth: 300) + HStack { + TextField("", text: $viewModel.vlmModelName) + .textFieldStyle(.roundedBorder) + + if !availableVLMModels.isEmpty { + Menu { + ForEach(availableVLMModels, id: \.self) { model in + Button(model) { + viewModel.vlmModelName = model + } + } + } label: { + Text("") + .frame(width: 8, height: 12) + } + .menuStyle(.borderlessButton) + .fixedSize() + } + + Button { + Task { + await fetchVLMModels() + } + } label: { + if isFetchingVLMModels { + ProgressView() + .controlSize(.small) + } else { + Text(localized("engine.config.fetchModels")) + } + } + .buttonStyle(.bordered) + .disabled(isFetchingVLMModels) + } + .frame(maxWidth: 300) } } @@ -155,6 +192,40 @@ struct VLMConfigurationSection: View { .padding() .background(Color(.controlBackgroundColor)) .cornerRadius(8) + .alert(isPresented: $showErrorAlert) { + Alert( + title: Text(localized("engine.config.fetchModels.failed")), + message: Text(errorMessage), + dismissButton: .default(Text(localized("button.ok"))) + ) + } + .onChange(of: viewModel.vlmProvider) { _, _ in + availableVLMModels = [] + } + } + + @MainActor + private func fetchVLMModels() async { + isFetchingVLMModels = true + errorMessage = "" + + let provider = viewModel.vlmProvider + let baseURL = viewModel.vlmBaseURL.isEmpty ? provider.defaultBaseURL(glmOCRMode: viewModel.glmOCRMode) : viewModel.vlmBaseURL + let apiKey = viewModel.vlmAPIKey + + do { + let models = try await ModelDiscoveryService.fetchModels( + baseURL: baseURL, + apiKey: provider.requiresAPIKey(glmOCRMode: viewModel.glmOCRMode) ? apiKey : nil, + engineType: provider.rawValue + ) + self.availableVLMModels = models + self.isFetchingVLMModels = false + } catch { + self.errorMessage = error.localizedDescription + self.showErrorAlert = true + self.isFetchingVLMModels = false + } } } @@ -163,6 +234,11 @@ struct VLMConfigurationSection: View { struct PaddleOCRStatusSection: View { @Bindable var viewModel: SettingsViewModel + @State private var availablePaddleModels: [String] = [] + @State private var isFetchingPaddleModels = false + @State private var showErrorAlert = false + @State private var errorMessage = "" + var body: some View { VStack(alignment: .leading, spacing: 12) { // Status @@ -242,9 +318,41 @@ struct PaddleOCRStatusSection: View { Text(localized("settings.paddleocr.cloudModelId")) .foregroundStyle(.secondary) .gridColumnAlignment(.trailing) - TextField("", text: $viewModel.paddleOCRCloudModelId) - .textFieldStyle(.roundedBorder) - .frame(maxWidth: 300) + HStack { + TextField("", text: $viewModel.paddleOCRCloudModelId) + .textFieldStyle(.roundedBorder) + + if !availablePaddleModels.isEmpty { + Menu { + ForEach(availablePaddleModels, id: \.self) { model in + Button(model) { + viewModel.paddleOCRCloudModelId = model + } + } + } label: { + Text("") + .frame(width: 8, height: 12) + } + .menuStyle(.borderlessButton) + .fixedSize() + } + + Button { + Task { + await fetchPaddleModels() + } + } label: { + if isFetchingPaddleModels { + ProgressView() + .controlSize(.small) + } else { + Text(localized("engine.config.fetchModels")) + } + } + .buttonStyle(.bordered) + .disabled(isFetchingPaddleModels) + } + .frame(maxWidth: 300) } } @@ -309,5 +417,38 @@ struct PaddleOCRStatusSection: View { } } .padding(.top, 8) + .alert(isPresented: $showErrorAlert) { + Alert( + title: Text(localized("engine.config.fetchModels.failed")), + message: Text(errorMessage), + dismissButton: .default(Text(localized("button.ok"))) + ) + } + .onChange(of: viewModel.paddleOCRUseCloud) { _, _ in + availablePaddleModels = [] + } + } + + @MainActor + private func fetchPaddleModels() async { + isFetchingPaddleModels = true + errorMessage = "" + + let baseURL = viewModel.paddleOCRCloudBaseURL + let apiKey = viewModel.paddleOCRCloudAPIKey + + do { + let models = try await ModelDiscoveryService.fetchModels( + baseURL: baseURL, + apiKey: apiKey.isEmpty ? nil : apiKey, + engineType: nil + ) + self.availablePaddleModels = models + self.isFetchingPaddleModels = false + } catch { + self.errorMessage = error.localizedDescription + self.showErrorAlert = true + self.isFetchingPaddleModels = false + } } } diff --git a/ScreenTranslate/Features/Settings/MultiEngineSettingsSection.swift b/ScreenTranslate/Features/Settings/MultiEngineSettingsSection.swift index 3101e9e..3c4187d 100644 --- a/ScreenTranslate/Features/Settings/MultiEngineSettingsSection.swift +++ b/ScreenTranslate/Features/Settings/MultiEngineSettingsSection.swift @@ -14,6 +14,8 @@ struct MultiEngineSettingsSection: View { @State private var showingConfigSheet = false @State private var editingConfig: TranslationEngineConfig? @State private var compatibleSheetState: CompatibleSheetState? + + @State private var configuredEngines: Set = [] // Sheet state for compatible engine configuration struct CompatibleSheetState: Identifiable { @@ -38,6 +40,9 @@ struct MultiEngineSettingsSection: View { .padding() .background(Color(.controlBackgroundColor)) .cornerRadius(8) + .onAppear { + checkConfiguredEngines() + } } // MARK: - Selection Mode Section (Horizontal) @@ -360,7 +365,7 @@ struct MultiEngineSettingsSection: View { } } } - .sheet(item: $editingConfig) { config in + .sheet(item: $editingConfig, onDismiss: { checkConfiguredEngines() }) { config in let engine = config.id EngineConfigSheet( engine: engine, @@ -422,13 +427,14 @@ struct MultiEngineSettingsSection: View { @ViewBuilder private func compatibleEngineCard(config: CompatibleTranslationProvider.CompatibleConfig, index: Int) -> some View { + let isSelected = isCompatibleEngineSelected(config) Button { compatibleSheetState = CompatibleSheetState(config: config, index: index) } label: { HStack(spacing: 8) { Image(systemName: "gearshape.2") .font(.body) - .foregroundStyle(Color.accentColor) + .foregroundStyle(isSelected ? Color.accentColor : Color.secondary) VStack(alignment: .leading, spacing: 2) { Text(config.displayName) @@ -437,7 +443,7 @@ struct MultiEngineSettingsSection: View { HStack(spacing: 4) { Circle() - .fill(Color.green) + .fill(isSelected ? Color.green : Color.gray) .frame(width: 6, height: 6) Text(localized("engine.status.configured")) .font(.caption2) @@ -451,8 +457,8 @@ struct MultiEngineSettingsSection: View { Button { setCompatibleAsEngine(config: config) } label: { - Image(systemName: "checkmark.circle") - .foregroundStyle(Color.accentColor) + Image(systemName: isSelected ? "checkmark.circle.fill" : "circle") + .foregroundStyle(isSelected ? Color.green : Color.accentColor) } .buttonStyle(.plain) .help(localized("engine.compatible.useAsEngine")) @@ -468,11 +474,11 @@ struct MultiEngineSettingsSection: View { .help(localized("engine.compatible.delete")) } .padding(8) - .background(Color.accentColor.opacity(0.1)) + .background(isSelected ? Color.accentColor.opacity(0.1) : Color.clear) .cornerRadius(6) .overlay( RoundedRectangle(cornerRadius: 6) - .stroke(Color.accentColor, lineWidth: 1) + .stroke(isSelected ? Color.accentColor : Color.gray.opacity(0.3), lineWidth: 1) ) } .buttonStyle(.plain) @@ -641,7 +647,7 @@ struct MultiEngineSettingsSection: View { let _ = Logger.settings.info("engineCard \(engine.rawValue): isEnabled=\(config.isEnabled), fromDefault=\(viewModel.settings.engineConfigs[engine] == nil)") // Built-in engines (apple, mtranServer) and Ollama don't need API keys // For others, we check if they require API key (simplified check - in real use would check keychain) - let isConfigured = !engine.requiresAPIKey || config.isEnabled + let isConfigured = configuredEngines.contains(engine) Button { editingConfig = config @@ -659,7 +665,7 @@ struct MultiEngineSettingsSection: View { // Show status for all engines HStack(spacing: 4) { Circle() - .fill(isConfigured ? Color.green : Color.orange) + .fill(isConfigured ? (config.isEnabled ? Color.green : Color.gray) : Color.orange) .frame(width: 6, height: 6) Text(isConfigured ? localized("engine.status.configured") : localized("engine.status.unconfigured")) .font(.caption2) @@ -861,6 +867,26 @@ struct MultiEngineSettingsSection: View { case .custom: return "gearshape.2" } } + + private func checkConfiguredEngines() { + Task { + var configured = Set() + for engine in TranslationEngineType.allCases { + if !engine.requiresAPIKey { + configured.insert(engine) + } else { + let hasCreds = await KeychainService.shared.hasCredentials(for: engine) + if hasCreds { + configured.insert(engine) + } + } + } + let finalConfigured = configured + await MainActor.run { + self.configuredEngines = finalConfigured + } + } + } } // MARK: - Flow Layout diff --git a/ScreenTranslate/Features/Settings/SettingsViewModel.swift b/ScreenTranslate/Features/Settings/SettingsViewModel.swift index c835cb8..fcf986f 100644 --- a/ScreenTranslate/Features/Settings/SettingsViewModel.swift +++ b/ScreenTranslate/Features/Settings/SettingsViewModel.swift @@ -840,9 +840,10 @@ final class SettingsViewModel { let effectiveBaseURL = vlmBaseURL.isEmpty ? vlmProvider.defaultBaseURL(glmOCRMode: glmOCRMode) : vlmBaseURL let effectiveModel = vlmModelName.isEmpty ? vlmProvider.defaultModelName(glmOCRMode: glmOCRMode) : vlmModelName - guard let baseURL = URL(string: effectiveBaseURL) else { + guard let parsedBaseURL = URL(string: effectiveBaseURL) else { throw ScreenCoderEngineError.invalidConfiguration("Invalid base URL: \(effectiveBaseURL)") } + let baseURL = parsedBaseURL.resolvingLocalhost if currentVLMRequiresAPIKey && vlmAPIKey.isEmpty { throw ScreenCoderEngineError.invalidConfiguration("API key is required for \(vlmProvider.localizedName)") @@ -956,12 +957,35 @@ final class SettingsViewModel { } } - /// Tests OpenAI API connection by fetching available models + /// Tests OpenAI API connection by sending a tiny placeholder image request private func testOpenAIConnection(baseURL: URL, apiKey: String, modelName: String) async throws -> (success: Bool, message: String) { - var request = URLRequest(url: baseURL.appendingPathComponent("models")) + var request = URLRequest(url: baseURL.appendingPathComponent("chat/completions")) + request.httpMethod = "POST" request.setValue("Bearer \(apiKey)", forHTTPHeaderField: "Authorization") + request.setValue("application/json", forHTTPHeaderField: "Content-Type") request.timeoutInterval = 10 + let body: [String: Any] = [ + "model": modelName, + "messages": [ + [ + "role": "user", + "content": [ + ["type": "text", "text": "Test connection."], + [ + "type": "image_url", + "image_url": [ + "url": "data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAQAAAAECAIAAAAmkwkpAAAAE0lEQVR4nGP8//8/AwwwwVl4OQCWbgMF7ZjH1AAAAABJRU5ErkJggg==" + ] + ] + ] + ] + ], + "stream": false, + "max_tokens": 10 + ] + request.httpBody = try JSONSerialization.data(withJSONObject: body) + let (_, response) = try await URLSession.shared.data(for: request) guard let httpResponse = response as? HTTPURLResponse else { @@ -980,13 +1004,40 @@ final class SettingsViewModel { } } - /// Tests Claude API connection + /// Tests Claude API connection by sending a tiny placeholder image request private func testClaudeConnection(baseURL: URL, apiKey: String, modelName: String) async throws -> (success: Bool, message: String) { - var request = URLRequest(url: baseURL.appendingPathComponent("models")) + var request = URLRequest(url: baseURL.appendingPathComponent("v1/messages")) + request.httpMethod = "POST" request.setValue(apiKey, forHTTPHeaderField: "x-api-key") request.setValue("2023-06-01", forHTTPHeaderField: "anthropic-version") + request.setValue("application/json", forHTTPHeaderField: "Content-Type") request.timeoutInterval = 10 + let body: [String: Any] = [ + "model": modelName, + "max_tokens": 10, + "messages": [ + [ + "role": "user", + "content": [ + [ + "type": "image", + "source": [ + "type": "base64", + "media_type": "image/png", + "data": "iVBORw0KGgoAAAANSUhEUgAAAAQAAAAECAIAAAAmkwkpAAAAE0lEQVR4nGP8//8/AwwwwVl4OQCWbgMF7ZjH1AAAAABJRU5ErkJggg==" + ] + ], + [ + "type": "text", + "text": "Test connection." + ] + ] + ] + ] + ] + request.httpBody = try JSONSerialization.data(withJSONObject: body) + let (_, response) = try await URLSession.shared.data(for: request) guard let httpResponse = response as? HTTPURLResponse else { @@ -1054,33 +1105,37 @@ final class SettingsViewModel { } } - /// Tests Ollama connection by checking if server is running + /// Tests Ollama connection by sending a tiny placeholder image request to test generation private func testOllamaConnection(baseURL: URL, modelName: String) async throws -> (success: Bool, message: String) { - var request = URLRequest(url: baseURL.appendingPathComponent("api/tags")) - request.timeoutInterval = 5 + var request = URLRequest(url: baseURL.appendingPathComponent("api/generate")) + request.httpMethod = "POST" + request.setValue("application/json", forHTTPHeaderField: "Content-Type") + request.timeoutInterval = 10 - let (data, response) = try await URLSession.shared.data(for: request) + let body: [String: Any] = [ + "model": modelName, + "prompt": "Test connection.", + "images": ["iVBORw0KGgoAAAANSUhEUgAAAAQAAAAECAIAAAAmkwkpAAAAE0lEQVR4nGP8//8/AwwwwVl4OQCWbgMF7ZjH1AAAAABJRU5ErkJggg=="], + "stream": false, + "options": [ + "num_predict": 10 + ] + ] + request.httpBody = try JSONSerialization.data(withJSONObject: body) - guard let httpResponse = response as? HTTPURLResponse, httpResponse.statusCode == 200 else { - throw VLMProviderError.networkError("Ollama server not responding") - } + let (_, response) = try await URLSession.shared.data(for: request) - // Check if the configured model is available - struct OllamaTagsResponse: Codable { - struct Model: Codable { - let name: String - } - let models: [Model] + guard let httpResponse = response as? HTTPURLResponse else { + throw VLMProviderError.invalidResponse("Invalid HTTP response") } - let tagsResponse = try JSONDecoder().decode(OllamaTagsResponse.self, from: data) - let availableModels = tagsResponse.models.map { $0.name } - - if availableModels.contains(where: { $0.hasPrefix(modelName) }) { + switch httpResponse.statusCode { + case 200: return (true, String(format: NSLocalizedString("settings.vlm.test.ollama.success", comment: ""), modelName)) - } else { - let modelsList = availableModels.isEmpty ? NSLocalizedString("none", comment: "") : availableModels.joined(separator: ", ") - return (true, String(format: NSLocalizedString("settings.vlm.test.ollama.available", comment: ""), modelsList)) + case 404: + throw VLMProviderError.modelUnavailable("\(modelName). Run 'ollama pull \(modelName)' first.") + default: + throw VLMProviderError.invalidResponse("HTTP \(httpResponse.statusCode)") } } diff --git a/ScreenTranslate/Models/AppSettings.swift b/ScreenTranslate/Models/AppSettings.swift index 0885406..3edee4e 100644 --- a/ScreenTranslate/Models/AppSettings.swift +++ b/ScreenTranslate/Models/AppSettings.swift @@ -741,47 +741,12 @@ final class AppSettings { /// Load PaddleOCR cloud API key from Keychain synchronously private static func loadPaddleOCRAPIKeyFromKeychain() -> String { - // Use shared constants from KeychainService - let query: [String: Any] = [ - kSecClass as String: kSecClassGenericPassword, - kSecAttrService as String: KeychainService.serviceIdentifier, - kSecAttrAccount as String: KeychainService.paddleOCRAccount, - kSecReturnData as String: true, - kSecMatchLimit as String: kSecMatchLimitOne - ] - - var result: CFTypeRef? - let status = SecItemCopyMatching(query as CFDictionary, &result) - - guard status == errSecSuccess, - let data = result as? Data, - let credentials = try? JSONDecoder().decode(StoredCredentials.self, from: data) else { - return "" - } - - return credentials.apiKey + KeychainService.loadPaddleOCRAPIKeySynchronously() } /// Load VLM API key from Keychain synchronously private static func loadVLMAPIKeyFromKeychain() -> String { - let query: [String: Any] = [ - kSecClass as String: kSecClassGenericPassword, - kSecAttrService as String: KeychainService.serviceIdentifier, - kSecAttrAccount as String: "vlm_api_key", - kSecReturnData as String: true, - kSecMatchLimit as String: kSecMatchLimitOne - ] - - var result: CFTypeRef? - let status = SecItemCopyMatching(query as CFDictionary, &result) - - guard status == errSecSuccess, - let data = result as? Data, - let credentials = try? JSONDecoder().decode(StoredCredentials.self, from: data) else { - return "" - } - - return credentials.apiKey + KeychainService.loadVLMAPIKeySynchronously() } // MARK: - Multi-Engine Persistence Helpers diff --git a/ScreenTranslate/Resources/de.lproj/Localizable.strings b/ScreenTranslate/Resources/de.lproj/Localizable.strings index 3f118e9..5852cc2 100644 --- a/ScreenTranslate/Resources/de.lproj/Localizable.strings +++ b/ScreenTranslate/Resources/de.lproj/Localizable.strings @@ -821,3 +821,6 @@ "settings.glmocr.mode.local" = "Lokal"; "settings.glmocr.local.apiKey.optional" = "API-Schlüssel ist für lokale MLX-VLM-Server optional"; "vlm.provider.glmocr.local.description" = "Lokaler MLX-VLM-Server für GLM-OCR"; + +"engine.config.fetchModels" = "Fetch"; +"engine.config.fetchModels.failed" = "Failed to fetch models"; diff --git a/ScreenTranslate/Resources/en.lproj/Localizable.strings b/ScreenTranslate/Resources/en.lproj/Localizable.strings index 6dcd61d..bf94937 100644 --- a/ScreenTranslate/Resources/en.lproj/Localizable.strings +++ b/ScreenTranslate/Resources/en.lproj/Localizable.strings @@ -797,3 +797,6 @@ "settings.glmocr.mode.local" = "Local"; "settings.glmocr.local.apiKey.optional" = "API Key is optional for local MLX-VLM servers"; "vlm.provider.glmocr.local.description" = "Local MLX-VLM server for GLM-OCR"; + +"engine.config.fetchModels" = "Fetch"; +"engine.config.fetchModels.failed" = "Failed to fetch models"; diff --git a/ScreenTranslate/Resources/es.lproj/Localizable.strings b/ScreenTranslate/Resources/es.lproj/Localizable.strings index 98d7e97..f05e454 100644 --- a/ScreenTranslate/Resources/es.lproj/Localizable.strings +++ b/ScreenTranslate/Resources/es.lproj/Localizable.strings @@ -821,3 +821,6 @@ "settings.glmocr.mode.local" = "Local"; "settings.glmocr.local.apiKey.optional" = "La clave API es opcional para servidores MLX-VLM locales"; "vlm.provider.glmocr.local.description" = "Servidor MLX-VLM local para GLM-OCR"; + +"engine.config.fetchModels" = "Fetch"; +"engine.config.fetchModels.failed" = "Failed to fetch models"; diff --git a/ScreenTranslate/Resources/fr.lproj/Localizable.strings b/ScreenTranslate/Resources/fr.lproj/Localizable.strings index e2d3ec8..5cb56ec 100644 --- a/ScreenTranslate/Resources/fr.lproj/Localizable.strings +++ b/ScreenTranslate/Resources/fr.lproj/Localizable.strings @@ -821,3 +821,6 @@ "settings.glmocr.mode.local" = "Local"; "settings.glmocr.local.apiKey.optional" = "La clé API est facultative pour les serveurs MLX-VLM locaux"; "vlm.provider.glmocr.local.description" = "Serveur MLX-VLM local pour GLM-OCR"; + +"engine.config.fetchModels" = "Fetch"; +"engine.config.fetchModels.failed" = "Failed to fetch models"; diff --git a/ScreenTranslate/Resources/it.lproj/Localizable.strings b/ScreenTranslate/Resources/it.lproj/Localizable.strings index f849795..b893f0b 100644 --- a/ScreenTranslate/Resources/it.lproj/Localizable.strings +++ b/ScreenTranslate/Resources/it.lproj/Localizable.strings @@ -822,3 +822,6 @@ "settings.glmocr.mode.local" = "Locale"; "settings.glmocr.local.apiKey.optional" = "La chiave API è opzionale per i server MLX-VLM locali"; "vlm.provider.glmocr.local.description" = "Server MLX-VLM locale per GLM-OCR"; + +"engine.config.fetchModels" = "Fetch"; +"engine.config.fetchModels.failed" = "Failed to fetch models"; diff --git a/ScreenTranslate/Resources/ja.lproj/Localizable.strings b/ScreenTranslate/Resources/ja.lproj/Localizable.strings index a18d79f..8b76bc7 100644 --- a/ScreenTranslate/Resources/ja.lproj/Localizable.strings +++ b/ScreenTranslate/Resources/ja.lproj/Localizable.strings @@ -821,3 +821,6 @@ "settings.glmocr.mode.local" = "ローカル"; "settings.glmocr.local.apiKey.optional" = "ローカル MLX-VLM サーバーでは API キーは不要です"; "vlm.provider.glmocr.local.description" = "GLM-OCR 用ローカル MLX-VLM サーバー"; + +"engine.config.fetchModels" = "Fetch"; +"engine.config.fetchModels.failed" = "Failed to fetch models"; diff --git a/ScreenTranslate/Resources/ko.lproj/Localizable.strings b/ScreenTranslate/Resources/ko.lproj/Localizable.strings index 73cb064..4458dc1 100644 --- a/ScreenTranslate/Resources/ko.lproj/Localizable.strings +++ b/ScreenTranslate/Resources/ko.lproj/Localizable.strings @@ -821,3 +821,6 @@ "settings.glmocr.mode.local" = "로컬"; "settings.glmocr.local.apiKey.optional" = "로컬 MLX-VLM 서버에서는 API 키가 선택 사항입니다"; "vlm.provider.glmocr.local.description" = "GLM-OCR용 로컬 MLX-VLM 서버"; + +"engine.config.fetchModels" = "Fetch"; +"engine.config.fetchModels.failed" = "Failed to fetch models"; diff --git a/ScreenTranslate/Resources/pt.lproj/Localizable.strings b/ScreenTranslate/Resources/pt.lproj/Localizable.strings index f8d1794..f10324e 100644 --- a/ScreenTranslate/Resources/pt.lproj/Localizable.strings +++ b/ScreenTranslate/Resources/pt.lproj/Localizable.strings @@ -836,3 +836,6 @@ Conceda permissão em Configurações do Sistema > Privacidade e Segurança > Mo "settings.glmocr.mode.local" = "Local"; "settings.glmocr.local.apiKey.optional" = "A chave API é opcional para servidores MLX-VLM locais"; "vlm.provider.glmocr.local.description" = "Servidor MLX-VLM local para GLM-OCR"; + +"engine.config.fetchModels" = "Fetch"; +"engine.config.fetchModels.failed" = "Failed to fetch models"; diff --git a/ScreenTranslate/Resources/ru.lproj/Localizable.strings b/ScreenTranslate/Resources/ru.lproj/Localizable.strings index 5b1c8d8..df76e5d 100644 --- a/ScreenTranslate/Resources/ru.lproj/Localizable.strings +++ b/ScreenTranslate/Resources/ru.lproj/Localizable.strings @@ -821,3 +821,6 @@ "settings.glmocr.mode.local" = "Локально"; "settings.glmocr.local.apiKey.optional" = "API-ключ необязателен для локальных серверов MLX-VLM"; "vlm.provider.glmocr.local.description" = "Локальный сервер MLX-VLM для GLM-OCR"; + +"engine.config.fetchModels" = "Fetch"; +"engine.config.fetchModels.failed" = "Failed to fetch models"; diff --git a/ScreenTranslate/Resources/zh-Hans.lproj/Localizable.strings b/ScreenTranslate/Resources/zh-Hans.lproj/Localizable.strings index a45bdff..18128b8 100644 --- a/ScreenTranslate/Resources/zh-Hans.lproj/Localizable.strings +++ b/ScreenTranslate/Resources/zh-Hans.lproj/Localizable.strings @@ -797,3 +797,6 @@ "settings.glmocr.mode.local" = "本地"; "settings.glmocr.local.apiKey.optional" = "本地 MLX-VLM 服务无需 API 密钥"; "vlm.provider.glmocr.local.description" = "本地 MLX-VLM GLM-OCR 服务"; + +"engine.config.fetchModels" = "获取"; +"engine.config.fetchModels.failed" = "获取模型失败"; diff --git a/ScreenTranslate/Services/AppleTranslationProvider.swift b/ScreenTranslate/Services/AppleTranslationProvider.swift index 42640fa..94348ec 100644 --- a/ScreenTranslate/Services/AppleTranslationProvider.swift +++ b/ScreenTranslate/Services/AppleTranslationProvider.swift @@ -43,8 +43,8 @@ actor AppleTranslationProvider: TranslationProvider { } } - func checkConnection() async -> Bool { - true + func verifyConnection() async throws { + // Built-in Apple Translation is always available on system level } private func mapEngineError(_ error: TranslationEngineError) -> TranslationProviderError { diff --git a/ScreenTranslate/Services/ClaudeVLMProvider.swift b/ScreenTranslate/Services/ClaudeVLMProvider.swift index 1f55331..3ae8907 100644 --- a/ScreenTranslate/Services/ClaudeVLMProvider.swift +++ b/ScreenTranslate/Services/ClaudeVLMProvider.swift @@ -247,7 +247,7 @@ struct ClaudeVLMProvider: VLMProvider, Sendable { /// Builds request with custom messages and continuation settings private func buildRequest(messages: [ClaudeMessage], isContinuation: Bool) throws -> URLRequest { - let endpoint = configuration.baseURL.appendingPathComponent("v1/messages") + let endpoint = configuration.baseURL.resolvingLocalhost.appendingPathComponent("v1/messages") var request = URLRequest(url: endpoint) request.httpMethod = "POST" diff --git a/ScreenTranslate/Services/GLMOCRVLMProvider.swift b/ScreenTranslate/Services/GLMOCRVLMProvider.swift index 50a93cf..de1e5b6 100644 --- a/ScreenTranslate/Services/GLMOCRVLMProvider.swift +++ b/ScreenTranslate/Services/GLMOCRVLMProvider.swift @@ -94,7 +94,7 @@ struct GLMOCRVLMProvider: VLMProvider, Sendable { throw VLMProviderError.invalidConfiguration("GLM OCR requires an API key.") } - let endpoint = baseURL.appendingPathComponent("layout_parsing") + let endpoint = baseURL.resolvingLocalhost.appendingPathComponent("layout_parsing") var request = URLRequest(url: endpoint) request.httpMethod = "POST" request.timeoutInterval = timeout @@ -118,7 +118,7 @@ struct GLMOCRVLMProvider: VLMProvider, Sendable { fileDataURI: String, timeout: TimeInterval ) throws -> URLRequest { - let endpoint = baseURL.appendingPathComponent("chat/completions") + let endpoint = baseURL.resolvingLocalhost.appendingPathComponent("chat/completions") var request = URLRequest(url: endpoint) request.httpMethod = "POST" request.timeoutInterval = timeout diff --git a/ScreenTranslate/Services/MTranServerEngine.swift b/ScreenTranslate/Services/MTranServerEngine.swift index a56b61e..75d5e0c 100644 --- a/ScreenTranslate/Services/MTranServerEngine.swift +++ b/ScreenTranslate/Services/MTranServerEngine.swift @@ -159,9 +159,11 @@ actor MTranServerEngine: TranslationProvider { return results } - func checkConnection() async -> Bool { + func verifyConnection() async throws { MTranServerChecker.resetCache() - return MTranServerChecker.isAvailable + guard MTranServerChecker.isAvailable else { + throw TranslationProviderError.connectionFailed("MTranServer is not reachable") + } } // MARK: - Private Methods diff --git a/ScreenTranslate/Services/ModelDiscoveryService.swift b/ScreenTranslate/Services/ModelDiscoveryService.swift new file mode 100644 index 0000000..8a6af54 --- /dev/null +++ b/ScreenTranslate/Services/ModelDiscoveryService.swift @@ -0,0 +1,163 @@ +import Foundation + +/// Service to fetch available model IDs from /models (or Ollama /api/tags) endpoint +enum ModelDiscoveryService { + + // MARK: - API Response Structures + + private struct OpenAIModel: Codable { + let id: String + } + + private struct OpenAIModelsResponse: Codable { + let data: [OpenAIModel] + } + + private struct OllamaModel: Codable { + let name: String + } + + private struct OllamaTagsResponse: Codable { + let models: [OllamaModel] + } + + // MARK: - Public Fetch Method + + /// Fetch model names/IDs from a specific base URL + /// - Parameters: + /// - baseURL: The api base URL string (e.g., "https://api.openai.com/v1") + /// - apiKey: Optional API Key for Authorization + /// - engineType: The engine type string, used to specialize requests (e.g. "ollama") + /// - Returns: A sorted list of available model IDs + static func fetchModels( + baseURL: String, + apiKey: String?, + engineType: String? + ) async throws -> [String] { + var cleanURL = baseURL.trimmingCharacters(in: .whitespacesAndNewlines) + if cleanURL.isEmpty { + throw NSError( + domain: "ModelDiscovery", + code: 400, + userInfo: [NSLocalizedDescriptionKey: "Base URL is empty"] + ) + } + + // Normalize URL protocol + if !cleanURL.lowercased().hasPrefix("http://") && !cleanURL.lowercased().hasPrefix("https://") { + let lowered = cleanURL.lowercased() + let isLocal = lowered.contains("localhost") || + lowered.contains("127.0.0.1") || + lowered.contains("::1") || + lowered.contains("0.0.0.0") || + lowered.hasSuffix(".local") || + lowered.contains(".local:") + if isLocal { + cleanURL = "http://" + cleanURL + } else { + cleanURL = "https://" + cleanURL + } + } + + let isOllama = (engineType?.lowercased() == "ollama" || cleanURL.contains("11434")) + + if isOllama { + // Try Ollama endpoint + let tagsURLString = cleanURL.hasSuffix("/") ? "\(cleanURL)api/tags" : "\(cleanURL)/api/tags" + if let tagsURL = URL(string: tagsURLString) { + var request = URLRequest(url: tagsURL) + request.httpMethod = "GET" + request.timeoutInterval = 10.0 + + do { + let (data, response) = try await URLSession.shared.data(for: request) + if let httpResponse = response as? HTTPURLResponse, httpResponse.statusCode == 200 { + let decoder = JSONDecoder() + decoder.keyDecodingStrategy = .convertFromSnakeCase + if let decoded = try? decoder.decode(OllamaTagsResponse.self, from: data) { + let models = decoded.models.map { $0.name }.sorted() + if !models.isEmpty { + return models + } + } + } + } catch { + // Fail silently here and fall back to standard /models check + } + } + } + + // Default OpenAI-compatible endpoint + var modelsURLString = cleanURL + if modelsURLString.hasSuffix("/models") { + // Do nothing + } else if modelsURLString.hasSuffix("/") { + modelsURLString += "models" + } else { + modelsURLString += "/models" + } + + guard let url = URL(string: modelsURLString) else { + throw NSError( + domain: "ModelDiscovery", + code: 400, + userInfo: [NSLocalizedDescriptionKey: "Invalid URL: \(modelsURLString)"] + ) + } + + var request = URLRequest(url: url) + request.httpMethod = "GET" + request.timeoutInterval = 10.0 + + if let key = apiKey, !key.isEmpty { + request.setValue("Bearer \(key)", forHTTPHeaderField: "Authorization") + } + + let (data, response) = try await URLSession.shared.data(for: request) + guard let httpResponse = response as? HTTPURLResponse else { + throw NSError( + domain: "ModelDiscovery", + code: 500, + userInfo: [NSLocalizedDescriptionKey: "No response from server"] + ) + } + + guard httpResponse.statusCode == 200 else { + let errorMsg = String(decoding: data, as: UTF8.self) + let hint = errorMsg.isEmpty ? "" : " (\(errorMsg.prefix(100)))" + throw NSError( + domain: "ModelDiscovery", + code: httpResponse.statusCode, + userInfo: [NSLocalizedDescriptionKey: "HTTP \(httpResponse.statusCode)\(hint)"] + ) + } + + let decoder = JSONDecoder() + + // Format 1: Standard OpenAI Response {"data": [{"id": "gpt-4o"}]} + if let decoded = try? decoder.decode(OpenAIModelsResponse.self, from: data) { + return decoded.data.map { $0.id }.sorted() + } + + // Format 2: Direct Array of Model Objects [{"id": "gpt-4o"}] + if let decodedArray = try? decoder.decode([OpenAIModel].self, from: data) { + return decodedArray.map { $0.id }.sorted() + } + + // Format 3: Direct Array of Strings ["gpt-4o", "gpt-4o-mini"] + if let decodedStrings = try? decoder.decode([String].self, from: data) { + return decodedStrings.sorted() + } + + // Format 4: Ollama tags structure returned on /models (sometimes configured on proxy) + if let decodedOllama = try? decoder.decode(OllamaTagsResponse.self, from: data) { + return decodedOllama.models.map { $0.name }.sorted() + } + + throw NSError( + domain: "ModelDiscovery", + code: 422, + userInfo: [NSLocalizedDescriptionKey: "Failed to parse models response. Unsupported JSON format."] + ) + } +} diff --git a/ScreenTranslate/Services/OllamaVLMProvider.swift b/ScreenTranslate/Services/OllamaVLMProvider.swift index f34b4fb..80b1742 100644 --- a/ScreenTranslate/Services/OllamaVLMProvider.swift +++ b/ScreenTranslate/Services/OllamaVLMProvider.swift @@ -83,7 +83,7 @@ struct OllamaVLMProvider: VLMProvider, Sendable { /// Checks if Ollama server is running and accessible private func checkServerAvailability() async -> Bool { - let endpoint = configuration.baseURL.appendingPathComponent("api/tags") + let endpoint = configuration.baseURL.resolvingLocalhost.appendingPathComponent("api/tags") var request = URLRequest(url: endpoint) request.httpMethod = "GET" request.timeoutInterval = 5 // Short timeout for health check @@ -101,7 +101,7 @@ struct OllamaVLMProvider: VLMProvider, Sendable { /// Builds the URLRequest for Ollama Generate API private func buildRequest(base64Image: String) throws -> URLRequest { - let endpoint = configuration.baseURL.appendingPathComponent("api/generate") + let endpoint = configuration.baseURL.resolvingLocalhost.appendingPathComponent("api/generate") var request = URLRequest(url: endpoint) request.httpMethod = "POST" diff --git a/ScreenTranslate/Services/OpenAIVLMProvider.swift b/ScreenTranslate/Services/OpenAIVLMProvider.swift index 8f2da0b..67237de 100644 --- a/ScreenTranslate/Services/OpenAIVLMProvider.swift +++ b/ScreenTranslate/Services/OpenAIVLMProvider.swift @@ -248,7 +248,7 @@ struct OpenAIVLMProvider: VLMProvider, Sendable { } /// Extracts content text and truncation status from OpenAI response - private func extractContentAndStatus(from data: Data) throws -> (content: String, isTruncated: Bool, finishReason: String?) { + func extractContentAndStatus(from data: Data) throws -> (content: String, isTruncated: Bool, finishReason: String?) { logDebug("Received raw response payload: \(data.count) bytes") // Check for error response first @@ -259,7 +259,6 @@ struct OpenAIVLMProvider: VLMProvider, Sendable { // Try to parse as OpenAI response let decoder = JSONDecoder() - decoder.keyDecodingStrategy = .convertFromSnakeCase let openAIResponse: OpenAIChatResponse do { @@ -290,9 +289,17 @@ struct OpenAIVLMProvider: VLMProvider, Sendable { throw VLMProviderError.invalidResponse("No message in choice") } - guard let content = message.content else { - let reason = choice.finishReason ?? "unknown" - throw VLMProviderError.invalidResponse("No content in response (finish_reason: \(reason))") + var content = message.content ?? "" + + // Fallback to reasoningContent or reasoning if content is empty + if content.trimmingCharacters(in: .whitespacesAndNewlines).isEmpty { + if let reasoning = message.reasoningContent, !reasoning.trimmingCharacters(in: .whitespacesAndNewlines).isEmpty { + logInfo("Main content is empty, falling back to reasoning_content") + content = reasoning + } else if let reasoningObj = message.reasoning, !reasoningObj.trimmingCharacters(in: .whitespacesAndNewlines).isEmpty { + logInfo("Main content is empty, falling back to reasoning") + content = reasoningObj + } } let isTruncated = choice.finishReason == "length" @@ -300,59 +307,86 @@ struct OpenAIVLMProvider: VLMProvider, Sendable { } /// Attempts to extract content field manually when JSON decoder fails - private func extractContentManually(from json: String) -> String? { + func extractContentManually(from json: String) -> String? { let patterns = ["\"content\":\"", "\"content\": \""] + let reasoningPatterns = [ + "\"reasoning_content\":\"", + "\"reasoning_content\": \"", + "\"reasoning\":\"", + "\"reasoning\": \"" + ] - for pattern in patterns { - if let range = json.range(of: pattern) { - let start = range.upperBound - - var end = start - var escaped = false - var depth = 0 - var charCount = 0 - - for char in json[start...] { - charCount += 1 - if escaped { - escaped = false - end = json.index(after: end) - } else if char == "\\" { - escaped = true - end = json.index(after: end) - } else if char == "{" || char == "[" { - depth += 1 - end = json.index(after: end) - } else if char == "}" || char == "]" { - depth -= 1 - end = json.index(after: end) - if depth < 0 { break } - } else if char == "\"" && depth == 0 { - break - } else { - end = json.index(after: end) - } - } + var extractedContent: String? = nil - let content = String(json[start.. String? { + guard let range = json.range(of: pattern) else { return nil } + let start = range.upperBound + + var end = start + var escaped = false + var depth = 0 + + for char in json[start...] { + if escaped { + escaped = false + end = json.index(after: end) + } else if char == "\\" { + escaped = true + end = json.index(after: end) + } else if char == "{" || char == "[" { + depth += 1 + end = json.index(after: end) + } else if char == "}" || char == "]" { + depth -= 1 + end = json.index(after: end) + if depth < 0 { break } + } else if char == "\"" && depth == 0 { + break + } else { + end = json.index(after: end) + } + } + + let content = String(json[start.. URLRequest { - let endpoint = configuration.baseURL.appendingPathComponent("chat/completions") + let endpoint = configuration.baseURL.resolvingLocalhost.appendingPathComponent("chat/completions") var request = URLRequest(url: endpoint) request.httpMethod = "POST" @@ -478,7 +512,13 @@ struct OpenAIVLMProvider: VLMProvider, Sendable { } /// Parses the VLM JSON content from assistant message - private func parseVLMContent(_ content: String, wasTruncated: Bool = false) throws -> VLMAnalysisResponse { + func parseVLMContent(_ content: String, wasTruncated: Bool = false) throws -> VLMAnalysisResponse { + guard !content.trimmingCharacters(in: .whitespacesAndNewlines).isEmpty else { + throw VLMProviderError.parsingFailed( + "Received empty response from model. This may happen if the model is not vision-capable, does not support the request format, or has content filters active." + ) + } + var cleanedContent = extractJSON(from: content) // If response was truncated, try to repair the JSON by closing open brackets @@ -487,6 +527,13 @@ struct OpenAIVLMProvider: VLMProvider, Sendable { cleanedContent = attemptToRepairJSON(cleanedContent) } + guard !cleanedContent.isEmpty else { + let escapedRaw = content.replacingOccurrences(of: "\n", with: " ") + throw VLMProviderError.parsingFailed( + "Cleaned content is empty. Raw content received: '\(escapedRaw.prefix(200))'" + ) + } + guard let jsonData = cleanedContent.data(using: .utf8) else { throw VLMProviderError.parsingFailed("Failed to convert content to data") } @@ -505,8 +552,11 @@ struct OpenAIVLMProvider: VLMProvider, Sendable { if wasTruncated { throw VLMProviderError.invalidResponse("Response was truncated due to token limit. Try selecting a smaller area or using a model with larger context window.") } + + let escapedCleaned = cleanedContent.replacingOccurrences(of: "\n", with: " ") + let escapedRaw = content.replacingOccurrences(of: "\n", with: " ") throw VLMProviderError.parsingFailed( - "Failed to parse VLM response JSON: \(error.localizedDescription). Content length: \(cleanedContent.count) chars" + "Failed to parse VLM response JSON: \(error.localizedDescription). Cleaned content (first 200 chars): '\(escapedCleaned.prefix(200))'. Raw content (first 200 chars): '\(escapedRaw.prefix(200))'" ) } } @@ -652,11 +702,13 @@ private struct OpenAIChatRequest: Encodable, Sendable { let messages: [OpenAIChatMessage] let maxTokens: Int let temperature: Double + let stream: Bool = false enum CodingKeys: String, CodingKey { case model, messages case maxTokens = "max_tokens" case temperature + case stream } } @@ -739,6 +791,14 @@ private struct OpenAIChatChoice: Decodable, Sendable { private struct OpenAIResponseMessage: Decodable, Sendable { let role: String? let content: String? + let reasoningContent: String? + let reasoning: String? + + enum CodingKeys: String, CodingKey { + case role, content + case reasoningContent = "reasoning_content" + case reasoning + } } private struct OpenAIUsage: Decodable, Sendable { diff --git a/ScreenTranslate/Services/PermissionManager.swift b/ScreenTranslate/Services/PermissionManager.swift index ccbcdcb..f3710a4 100644 --- a/ScreenTranslate/Services/PermissionManager.swift +++ b/ScreenTranslate/Services/PermissionManager.swift @@ -10,6 +10,7 @@ import ApplicationServices import AppKit import Combine import SystemSettingsKit +import PermissionFlow /// Manager for handling system permissions required by the app. /// Centralizes permission checking, requesting, and caching logic. @@ -27,6 +28,12 @@ final class PermissionManager: ObservableObject { /// Current input monitoring permission status @Published private(set) var hasInputMonitoringPermission: Bool = false + /// Controllers for the PermissionFlow drag & drop authorization overlay + private var flowControllers: [PermissionFlowPane: PermissionFlowController] = [:] + + /// Active tasks for polling permission status during drag authorization + private var flowPollingTasks: [PermissionFlowPane: Task] = [:] + // MARK: - Private Properties /// UserDefaults key for cached accessibility permission status @@ -279,6 +286,111 @@ final class PermissionManager: ObservableObject { func stopPermissionMonitoring() { NotificationCenter.default.removeObserver(self) } + + /// Checks Screen Recording permission and triggers the drag-to-authorize flow if denied. + /// - Returns: True if permission is granted, False if we had to request it (flow started). + @discardableResult + func ensureScreenRecordingPermission() -> Bool { + let isGranted = PermissionStatusRegistry.provider(for: .screenRecording).authorizationState() == .granted + if isGranted { + return true + } + + // Show the drag-to-authorize panel in the center of the main screen + let screenRect: CGRect + if let mainScreen = NSScreen.main { + let frame = mainScreen.frame + screenRect = CGRect(x: frame.midX - 50, y: frame.midY - 50, width: 100, height: 100) + } else { + screenRect = CGRect(x: 100, y: 100, width: 100, height: 100) + } + + let controller = getOrCreateFlowController(for: .screenRecording) + controller.authorize( + pane: .screenRecording, + suggestedAppURLs: [Bundle.main.bundleURL], + sourceFrameInScreen: screenRect + ) + + // Start polling to auto-close when granted + startFlowPolling(for: .screenRecording) + + return false + } + + /// Checks Accessibility permission and triggers the drag-to-authorize flow if denied. + /// - Returns: True if permission is granted, False if we had to request it (flow started). + @discardableResult + func ensureAccessibilityPermissionFlow() -> Bool { + refreshPermissionStatus() + if hasAccessibilityPermission { + return true + } + + // Show the drag-to-authorize panel in the center of the main screen + let screenRect: CGRect + if let mainScreen = NSScreen.main { + let frame = mainScreen.frame + screenRect = CGRect(x: frame.midX - 50, y: frame.midY - 50, width: 100, height: 100) + } else { + screenRect = CGRect(x: 100, y: 100, width: 100, height: 100) + } + + let controller = getOrCreateFlowController(for: .accessibility) + controller.authorize( + pane: .accessibility, + suggestedAppURLs: [Bundle.main.bundleURL], + sourceFrameInScreen: screenRect + ) + + // Start polling to auto-close when granted + startFlowPolling(for: .accessibility) + + return false + } + + /// Gets or creates a PermissionFlowController for a specific pane + private func getOrCreateFlowController(for pane: PermissionFlowPane) -> PermissionFlowController { + if let controller = flowControllers[pane] { + return controller + } + let controller = PermissionFlowController() + flowControllers[pane] = controller + return controller + } + + /// Polls permission status to automatically close the panel once granted + private func startFlowPolling(for pane: PermissionFlowPane) { + flowPollingTasks[pane]?.cancel() + flowPollingTasks[pane] = Task { + for _ in 0..<150 { // Poll for up to 30 seconds + do { + try await Task.sleep(for: .milliseconds(200)) + } catch { + return + } + + let isGranted: Bool + switch pane { + case .screenRecording: + isGranted = PermissionStatusRegistry.provider(for: .screenRecording).authorizationState() == .granted + case .accessibility: + isGranted = AXIsProcessTrusted() + default: + isGranted = false + } + + if isGranted { + if let controller = flowControllers[pane] { + controller.closePanel() + } + refreshPermissionStatus() + flowPollingTasks[pane] = nil + return + } + } + } + } } // MARK: - Convenience Extensions diff --git a/ScreenTranslate/Services/Security/KeychainService.swift b/ScreenTranslate/Services/Security/KeychainService.swift index 835c8de..afcc5e2 100644 --- a/ScreenTranslate/Services/Security/KeychainService.swift +++ b/ScreenTranslate/Services/Security/KeychainService.swift @@ -48,132 +48,34 @@ actor KeychainService { additional: additionalData ) - try saveCredentialsInternal( - credentials: credentials, - account: engine.rawValue, - label: engine.rawValue - ) - } - - /// Internal helper for saving credentials to keychain - /// - Parameters: - /// - credentials: The credentials to save - /// - account: The account identifier for the keychain item - /// - label: A descriptive label for logging - private func saveCredentialsInternal(credentials: StoredCredentials, account: String, label: String) throws { guard let encodedData = try? JSONEncoder().encode(credentials) else { throw KeychainError.invalidData } - let query: [String: Any] = [ - kSecClass as String: kSecClassGenericPassword, - kSecAttrService as String: service, - kSecAttrAccount as String: account - ] - - // Check if item exists and update it, or add new if not found - let status = SecItemCopyMatching(query as CFDictionary, nil) - if status == errSecSuccess { - // Item exists - update it - let updateQuery: [String: Any] = [ - kSecValueData as String: encodedData, - kSecAttrAccessible as String: kSecAttrAccessibleWhenUnlockedThisDeviceOnly - ] - let updateStatus = SecItemUpdate(query as CFDictionary, updateQuery as CFDictionary) - guard updateStatus == errSecSuccess else { - logger.error("Failed to update credentials for \(label): \(updateStatus)") - throw KeychainError.unexpectedStatus(updateStatus) - } - logger.info("Updated credentials for \(label)") - } else if status == errSecItemNotFound { - // Item doesn't exist - add new - let addQuery: [String: Any] = [ - kSecClass as String: kSecClassGenericPassword, - kSecAttrService as String: service, - kSecAttrAccount as String: account, - kSecValueData as String: encodedData, - kSecAttrAccessible as String: kSecAttrAccessibleWhenUnlockedThisDeviceOnly - ] - let addStatus = SecItemAdd(addQuery as CFDictionary, nil) - guard addStatus == errSecSuccess else { - logger.error("Failed to save credentials for \(label): \(addStatus)") - throw KeychainError.unexpectedStatus(addStatus) - } - logger.info("Saved credentials for \(label)") - } else { - logger.error("Failed to check credentials for \(label): \(status)") - throw KeychainError.unexpectedStatus(status) - } + try saveRaw(data: encodedData, account: engine.rawValue) } /// Retrieve stored credentials for an engine /// - Parameter engine: The engine type to get credentials for /// - Returns: The stored credentials, or nil if not found func getCredentials(for engine: TranslationEngineType) throws -> StoredCredentials? { - let query: [String: Any] = [ - kSecClass as String: kSecClassGenericPassword, - kSecAttrService as String: service, - kSecAttrAccount as String: engine.rawValue, - kSecReturnData as String: true, - kSecMatchLimit as String: kSecMatchLimitOne - ] - - var result: CFTypeRef? - let status = SecItemCopyMatching(query as CFDictionary, &result) - - guard status == errSecSuccess else { - if status == errSecItemNotFound { - logger.debug("No credentials found for \(engine.rawValue)") - return nil - } - logger.error("Failed to retrieve credentials for \(engine.rawValue): \(status)") - throw KeychainError.unexpectedStatus(status) - } - - guard let data = result as? Data else { - throw KeychainError.invalidData + guard let data = try loadRaw(account: engine.rawValue) else { + return nil } - - let credentials = try JSONDecoder().decode(StoredCredentials.self, from: data) - logger.debug("Retrieved credentials for \(engine.rawValue)") - return credentials + return try JSONDecoder().decode(StoredCredentials.self, from: data) } /// Delete stored credentials for an engine /// - Parameter engine: The engine type to delete credentials for func deleteCredentials(for engine: TranslationEngineType) throws { - let query: [String: Any] = [ - kSecClass as String: kSecClassGenericPassword, - kSecAttrService as String: service, - kSecAttrAccount as String: engine.rawValue - ] - - let status = SecItemDelete(query as CFDictionary) - - guard status == errSecSuccess || status == errSecItemNotFound else { - logger.error("Failed to delete credentials for \(engine.rawValue): \(status)") - throw KeychainError.unexpectedStatus(status) - } - - logger.info("Deleted credentials for \(engine.rawValue)") + try deleteRaw(account: engine.rawValue) } /// Check if credentials exist for an engine /// - Parameter engine: The engine type to check /// - Returns: True if credentials exist func hasCredentials(for engine: TranslationEngineType) -> Bool { - let query: [String: Any] = [ - kSecClass as String: kSecClassGenericPassword, - kSecAttrService as String: service, - kSecAttrAccount as String: engine.rawValue, - kSecReturnData as String: false, - kSecMatchLimit as String: kSecMatchLimitOne - ] - - var result: CFTypeRef? - let status = SecItemCopyMatching(query as CFDictionary, &result) - - return status == errSecSuccess + return existsRaw(account: engine.rawValue) } /// Get only the API key for an engine (convenience method) @@ -196,21 +98,132 @@ actor KeychainService { /// - compatibleId: The compatible engine identifier (e.g., "custom:0", "custom:1") func saveCredentials(apiKey: String, forCompatibleId compatibleId: String) throws { let credentials = StoredCredentials(apiKey: apiKey) - try saveCredentialsInternal( - credentials: credentials, - account: compatibleId, - label: "compatible engine \(compatibleId)" - ) + guard let encodedData = try? JSONEncoder().encode(credentials) else { + throw KeychainError.invalidData + } + try saveRaw(data: encodedData, account: compatibleId) } /// Retrieve stored credentials for a compatible engine instance /// - Parameter compatibleId: The compatible engine identifier /// - Returns: The stored credentials, or nil if not found func getCredentials(forCompatibleId compatibleId: String) throws -> StoredCredentials? { + guard let data = try loadRaw(account: compatibleId) else { + return nil + } + return try JSONDecoder().decode(StoredCredentials.self, from: data) + } + + /// Check if credentials exist for a compatible engine instance + /// - Parameter compatibleId: The compatible engine identifier + /// - Returns: True if credentials exist + func hasCredentials(forCompatibleId compatibleId: String) -> Bool { + return existsRaw(account: compatibleId) + } + + /// Delete stored credentials for a compatible engine instance + /// - Parameter compatibleId: The compatible engine identifier + func deleteCredentials(forCompatibleId compatibleId: String) throws { + try deleteRaw(account: compatibleId) + } + + /// Delete all stored credentials + func deleteAllCredentials() throws { + try deleteAllRaw() + } + + // MARK: - PaddleOCR Cloud Methods + + /// Save PaddleOCR cloud API key + /// - Parameter apiKey: The API key to store + func savePaddleOCRCredentials(apiKey: String) throws { + let credentials = StoredCredentials(apiKey: apiKey) + guard let encodedData = try? JSONEncoder().encode(credentials) else { + throw KeychainError.invalidData + } + try saveRaw(data: encodedData, account: Self.paddleOCRAccount) + } + + /// Retrieve stored PaddleOCR cloud API key + /// - Returns: The stored API key, or nil if not found + func getPaddleOCRCredentials() -> String? { + do { + guard let data = try loadRaw(account: Self.paddleOCRAccount) else { + return nil + } + let credentials = try JSONDecoder().decode(StoredCredentials.self, from: data) + return credentials.apiKey + } catch { + logger.error("Failed to retrieve PaddleOCR cloud credentials: \(error.localizedDescription)") + return nil + } + } + + /// Delete stored PaddleOCR cloud credentials + func deletePaddleOCRCredentials() throws { + try deleteRaw(account: Self.paddleOCRAccount) + } + + // MARK: - Core Storage Operations + + private func saveRaw(data: Data, account: String) throws { + #if DEBUG + let key = debugKey(for: account) + UserDefaults.standard.set(data, forKey: key) + logger.info("[Debug Storage] Saved credentials for \(account)") + #else let query: [String: Any] = [ kSecClass as String: kSecClassGenericPassword, kSecAttrService as String: service, - kSecAttrAccount as String: compatibleId, + kSecAttrAccount as String: account + ] + + let status = SecItemCopyMatching(query as CFDictionary, nil) + if status == errSecSuccess { + let updateQuery: [String: Any] = [ + kSecValueData as String: data, + kSecAttrAccessible as String: kSecAttrAccessibleWhenUnlockedThisDeviceOnly + ] + let updateStatus = SecItemUpdate(query as CFDictionary, updateQuery as CFDictionary) + guard updateStatus == errSecSuccess else { + logger.error("Failed to update credentials for \(account): \(updateStatus)") + throw KeychainError.unexpectedStatus(updateStatus) + } + logger.info("Updated credentials for \(account)") + } else if status == errSecItemNotFound { + let addQuery: [String: Any] = [ + kSecClass as String: kSecClassGenericPassword, + kSecAttrService as String: service, + kSecAttrAccount as String: account, + kSecValueData as String: data, + kSecAttrAccessible as String: kSecAttrAccessibleWhenUnlockedThisDeviceOnly + ] + let addStatus = SecItemAdd(addQuery as CFDictionary, nil) + guard addStatus == errSecSuccess else { + logger.error("Failed to save credentials for \(account): \(addStatus)") + throw KeychainError.unexpectedStatus(addStatus) + } + logger.info("Saved credentials for \(account)") + } else { + logger.error("Failed to check credentials for \(account): \(status)") + throw KeychainError.unexpectedStatus(status) + } + #endif + } + + private func loadRaw(account: String) throws -> Data? { + #if DEBUG + let key = debugKey(for: account) + let data = UserDefaults.standard.data(forKey: key) + if data != nil { + logger.debug("[Debug Storage] Retrieved credentials for \(account)") + } + return data + #else + let query: [String: Any] = [ + kSecClass as String: kSecClassGenericPassword, + kSecAttrService as String: service, + kSecAttrAccount as String: account, kSecReturnData as String: true, kSecMatchLimit as String: kSecMatchLimitOne ] @@ -220,97 +233,106 @@ actor KeychainService { guard status == errSecSuccess else { if status == errSecItemNotFound { - logger.debug("No credentials found for \(compatibleId)") + logger.debug("No credentials found for \(account)") return nil } - logger.error("Failed to retrieve credentials for \(compatibleId): \(status)") + logger.error("Failed to retrieve credentials for \(account): \(status)") throw KeychainError.unexpectedStatus(status) } - guard let data = result as? Data else { - throw KeychainError.invalidData - } - - let credentials = try JSONDecoder().decode(StoredCredentials.self, from: data) - logger.debug("Retrieved credentials for \(compatibleId)") - return credentials + return result as? Data + #endif } - /// Check if credentials exist for a compatible engine instance - /// - Parameter compatibleId: The compatible engine identifier - /// - Returns: True if credentials exist - func hasCredentials(forCompatibleId compatibleId: String) -> Bool { + private func deleteRaw(account: String) throws { + #if DEBUG + let key = debugKey(for: account) + UserDefaults.standard.removeObject(forKey: key) + logger.info("[Debug Storage] Deleted credentials for \(account)") + #else let query: [String: Any] = [ kSecClass as String: kSecClassGenericPassword, kSecAttrService as String: service, - kSecAttrAccount as String: compatibleId, - kSecReturnData as String: false, - kSecMatchLimit as String: kSecMatchLimitOne + kSecAttrAccount as String: account ] - var result: CFTypeRef? - let status = SecItemCopyMatching(query as CFDictionary, &result) - - return status == errSecSuccess + let status = SecItemDelete(query as CFDictionary) + guard status == errSecSuccess || status == errSecItemNotFound else { + logger.error("Failed to delete credentials for \(account): \(status)") + throw KeychainError.unexpectedStatus(status) + } + logger.info("Deleted credentials for \(account)") + #endif } - /// Delete stored credentials for a compatible engine instance - /// - Parameter compatibleId: The compatible engine identifier - func deleteCredentials(forCompatibleId compatibleId: String) throws { + private func existsRaw(account: String) -> Bool { + #if DEBUG + let key = debugKey(for: account) + return UserDefaults.standard.data(forKey: key) != nil + #else let query: [String: Any] = [ kSecClass as String: kSecClassGenericPassword, kSecAttrService as String: service, - kSecAttrAccount as String: compatibleId + kSecAttrAccount as String: account, + kSecReturnData as String: false, + kSecMatchLimit as String: kSecMatchLimitOne ] - let status = SecItemDelete(query as CFDictionary) - - guard status == errSecSuccess || status == errSecItemNotFound else { - logger.error("Failed to delete credentials for \(compatibleId): \(status)") - throw KeychainError.unexpectedStatus(status) + var result: CFTypeRef? + let status = SecItemCopyMatching(query as CFDictionary, &result) + if status == errSecSuccess { + return true + } else if status == errSecItemNotFound { + return false + } else { + logger.error("Failed to check existence of credentials for \(account): status \(status)") + return false } - - logger.info("Deleted credentials for compatible engine \(compatibleId)") + #endif } - /// Delete all stored credentials - func deleteAllCredentials() throws { + private func deleteAllRaw() throws { + #if DEBUG + let defaults = UserDefaults.standard + let prefix = "com.screentranslate.credentials.debug." + let keysToRemove = defaults.dictionaryRepresentation().keys.filter { $0.hasPrefix(prefix) } + for key in keysToRemove { + defaults.removeObject(forKey: key) + } + logger.info("[Debug Storage] Deleted all credentials") + #else let query: [String: Any] = [ kSecClass as String: kSecClassGenericPassword, kSecAttrService as String: service ] let status = SecItemDelete(query as CFDictionary) - guard status == errSecSuccess || status == errSecItemNotFound else { throw KeychainError.unexpectedStatus(status) } - logger.info("Deleted all credentials") + #endif } - // MARK: - PaddleOCR Cloud Methods - - /// Save PaddleOCR cloud API key - /// - Parameter apiKey: The API key to store - func savePaddleOCRCredentials(apiKey: String) throws { - let credentials = StoredCredentials(apiKey: apiKey) - try saveCredentialsInternal( - credentials: credentials, - account: Self.paddleOCRAccount, - label: "PaddleOCR cloud" - ) + private func debugKey(for account: String) -> String { + return "com.screentranslate.credentials.debug.\(account)" } - /// Retrieve stored PaddleOCR cloud API key - /// - Returns: The stored API key, or nil if not found - func getPaddleOCRCredentials() -> String? { - let account = Self.paddleOCRAccount + // MARK: - Synchronous Keychain Access for AppSettings (Non-isolated static helpers) + static func loadVLMAPIKeySynchronously() -> String { + #if DEBUG + let key = "com.screentranslate.credentials.debug.vlm_api_key" + guard let data = UserDefaults.standard.data(forKey: key), + let credentials = try? JSONDecoder().decode(StoredCredentials.self, from: data) else { + return "" + } + return credentials.apiKey + #else let query: [String: Any] = [ kSecClass as String: kSecClassGenericPassword, - kSecAttrService as String: service, - kSecAttrAccount as String: account, + kSecAttrService as String: KeychainService.serviceIdentifier, + kSecAttrAccount as String: "vlm_api_key", kSecReturnData as String: true, kSecMatchLimit as String: kSecMatchLimitOne ] @@ -318,41 +340,44 @@ actor KeychainService { var result: CFTypeRef? let status = SecItemCopyMatching(query as CFDictionary, &result) - guard status == errSecSuccess else { - if status == errSecItemNotFound { - logger.debug("No PaddleOCR cloud credentials found") - return nil - } - logger.error("Failed to retrieve PaddleOCR cloud credentials: \(status)") - return nil - } - - guard let data = result as? Data else { - return nil + guard status == errSecSuccess, + let data = result as? Data, + let credentials = try? JSONDecoder().decode(StoredCredentials.self, from: data) else { + return "" } - let credentials = try? JSONDecoder().decode(StoredCredentials.self, from: data) - return credentials?.apiKey + return credentials.apiKey + #endif } - /// Delete stored PaddleOCR cloud credentials - func deletePaddleOCRCredentials() throws { - let account = Self.paddleOCRAccount - + static func loadPaddleOCRAPIKeySynchronously() -> String { + #if DEBUG + let key = "com.screentranslate.credentials.debug.\(KeychainService.paddleOCRAccount)" + guard let data = UserDefaults.standard.data(forKey: key), + let credentials = try? JSONDecoder().decode(StoredCredentials.self, from: data) else { + return "" + } + return credentials.apiKey + #else let query: [String: Any] = [ kSecClass as String: kSecClassGenericPassword, - kSecAttrService as String: service, - kSecAttrAccount as String: account + kSecAttrService as String: KeychainService.serviceIdentifier, + kSecAttrAccount as String: KeychainService.paddleOCRAccount, + kSecReturnData as String: true, + kSecMatchLimit as String: kSecMatchLimitOne ] - let status = SecItemDelete(query as CFDictionary) + var result: CFTypeRef? + let status = SecItemCopyMatching(query as CFDictionary, &result) - guard status == errSecSuccess || status == errSecItemNotFound else { - logger.error("Failed to delete PaddleOCR cloud credentials: \(status)") - throw KeychainError.unexpectedStatus(status) + guard status == errSecSuccess, + let data = result as? Data, + let credentials = try? JSONDecoder().decode(StoredCredentials.self, from: data) else { + return "" } - logger.info("Deleted PaddleOCR cloud credentials") + return credentials.apiKey + #endif } } diff --git a/ScreenTranslate/Services/Translation/Providers/BaiduTranslationProvider.swift b/ScreenTranslate/Services/Translation/Providers/BaiduTranslationProvider.swift index 454ceec..df0acf0 100644 --- a/ScreenTranslate/Services/Translation/Providers/BaiduTranslationProvider.swift +++ b/ScreenTranslate/Services/Translation/Providers/BaiduTranslationProvider.swift @@ -121,15 +121,6 @@ actor BaiduTranslationProvider: TranslationProvider { return results } - func checkConnection() async -> Bool { - do { - _ = try await translate(text: "test", from: "en", to: "zh") - return true - } catch { - logger.error("Baidu connection check failed: \(error.localizedDescription)") - return false - } - } // MARK: - Private Methods diff --git a/ScreenTranslate/Services/Translation/Providers/CompatibleTranslationProvider.swift b/ScreenTranslate/Services/Translation/Providers/CompatibleTranslationProvider.swift index 0b501ab..722ac04 100644 --- a/ScreenTranslate/Services/Translation/Providers/CompatibleTranslationProvider.swift +++ b/ScreenTranslate/Services/Translation/Providers/CompatibleTranslationProvider.swift @@ -197,13 +197,58 @@ actor CompatibleTranslationProvider: TranslationProvider, TranslationPromptConfi return results } - func checkConnection() async -> Bool { - do { - _ = try await translate(text: "Hello", from: "en", to: "zh") - return true - } catch { - logger.error("Connection check failed: \(error.localizedDescription)") - return false + func verifyConnection() async throws { + let baseURL = compatibleConfig.baseURL.trimmingCharacters(in: CharacterSet(charactersIn: "/")) + guard let url = URL(string: baseURL) else { + throw TranslationProviderError.invalidConfiguration("Invalid base URL") + } + + let apiURL = url.resolvingLocalhost.appendingPathComponent("models") + + var request = URLRequest(url: apiURL) + request.httpMethod = "GET" + request.timeoutInterval = 10.0 + + if compatibleConfig.hasAPIKey { + let keychainId = compatibleConfig.keychainId + if let credentials = try await keychain.getCredentials(forCompatibleId: keychainId) { + // Security check + if let host = url.resolvingLocalhost.host, !Self.isLocalhost(host) && url.resolvingLocalhost.scheme != "https" { + throw TranslationProviderError.invalidConfiguration( + "Refusing to send API key over insecure connection (HTTP). Use HTTPS or a localhost URL." + ) + } + request.setValue("Bearer \(credentials.apiKey)", forHTTPHeaderField: "Authorization") + } + } + + if let headers = request.allHTTPHeaderFields { + var safeHeaders: [String: String] = [:] + for (key, value) in headers { + let lowerKey = key.lowercased() + if lowerKey == "authorization" { + if value.lowercased().hasPrefix("bearer ") { + safeHeaders[key] = "Bearer " + String(value.dropFirst(7).prefix(4)) + "..." + } else { + safeHeaders[key] = String(value.prefix(4)) + "..." + } + } else if lowerKey == "x-api-key" || lowerKey.contains("key") { + safeHeaders[key] = String(value.prefix(4)) + "..." + } else { + safeHeaders[key] = value + } + } + logger.info("Sending verifyConnection request to \(apiURL.absoluteString) with headers: \(safeHeaders)") + } + + let (_, response) = try await URLSession.shared.data(for: request) + guard let httpResponse = response as? HTTPURLResponse else { + throw TranslationProviderError.connectionFailed("Invalid response") + } + + guard httpResponse.statusCode == 200 else { + logger.error("Connection verify failed: status \(httpResponse.statusCode)") + throw TranslationProviderError.connectionFailed("Connection verify failed: HTTP \(httpResponse.statusCode)") } } @@ -248,7 +293,7 @@ actor CompatibleTranslationProvider: TranslationProvider, TranslationPromptConfi } // Build OpenAI-compatible endpoint: baseURL/chat/completions - let apiURL = url.appendingPathComponent("chat/completions") + let apiURL = url.resolvingLocalhost.appendingPathComponent("chat/completions") var request = URLRequest(url: apiURL) request.httpMethod = "POST" @@ -266,12 +311,31 @@ actor CompatibleTranslationProvider: TranslationProvider, TranslationPromptConfi "messages": [ ["role": "user", "content": prompt] ], + "stream": false, "temperature": config.options?.temperature ?? 0.3, "max_tokens": config.options?.maxTokens ?? 2048 ] request.httpBody = try JSONSerialization.data(withJSONObject: body) + if let headers = request.allHTTPHeaderFields { + var safeHeaders: [String: String] = [:] + for (key, value) in headers { + let lowerKey = key.lowercased() + if lowerKey == "authorization" { + if value.lowercased().hasPrefix("bearer ") { + safeHeaders[key] = "Bearer " + String(value.dropFirst(7).prefix(4)) + "..." + } else { + safeHeaders[key] = String(value.prefix(4)) + "..." + } + } else if lowerKey == "x-api-key" || lowerKey.contains("key") { + safeHeaders[key] = String(value.prefix(4)) + "..." + } else { + safeHeaders[key] = value + } + } + logger.info("Sending request to \(apiURL.absoluteString) with headers: \(safeHeaders)") + } let (data, response) = try await URLSession.shared.data(for: request) guard let httpResponse = response as? HTTPURLResponse else { @@ -295,17 +359,90 @@ actor CompatibleTranslationProvider: TranslationProvider, TranslationPromptConfi } private func parseResponse(_ data: Data) throws -> String { - guard let json = try JSONSerialization.jsonObject(with: data) as? [String: Any], - let choices = json["choices"] as? [[String: Any]], + guard let rawString = String(data: data, encoding: .utf8) else { + throw TranslationProviderError.translationFailed("Unable to decode data to UTF-8 string") + } + + let trimmedResponse = rawString.trimmingCharacters(in: .whitespacesAndNewlines) + + // Check if response is Server-Sent Events (SSE) stream format + if trimmedResponse.hasPrefix("data:") || trimmedResponse.contains("\ndata:") { + return try parseSSEStream(trimmedResponse) + } + + let jsonObject: Any + do { + jsonObject = try JSONSerialization.jsonObject(with: data) + } catch { + logger.error("JSON serialization failed: \(error.localizedDescription). Response size: \(data.count) bytes") + throw error + } + + guard let json = jsonObject as? [String: Any] else { + logger.error("Response JSON is not a dictionary object") + throw TranslationProviderError.translationFailed("Response JSON is not a dictionary") + } + + // Handle error responses from OpenAI compatible APIs + if let errorObj = json["error"] as? [String: Any], + let errorMessage = errorObj["message"] as? String { + logger.error("API returned error: \(errorMessage)") + throw TranslationProviderError.translationFailed("API error: \(errorMessage)") + } + + guard let choices = json["choices"] as? [[String: Any]], let firstChoice = choices.first, let message = firstChoice["message"] as? [String: Any], let content = message["content"] as? String else { - throw TranslationProviderError.translationFailed("Failed to parse response") + logger.error("Unexpected JSON response structure (missing choices or content)") + throw TranslationProviderError.translationFailed("Unexpected JSON response structure") } return content.trimmingCharacters(in: .whitespacesAndNewlines) } + private func parseSSEStream(_ streamText: String) throws -> String { + var resultText = "" + let lines = streamText.components(separatedBy: .newlines) + + for line in lines { + let trimmedLine = line.trimmingCharacters(in: .whitespacesAndNewlines) + guard !trimmedLine.isEmpty else { continue } + + // Skip stream end marker + if trimmedLine == "data: [DONE]" { + continue + } + + if trimmedLine.hasPrefix("data:") { + let jsonText = trimmedLine.dropFirst(5).trimmingCharacters(in: .whitespaces) + guard !jsonText.isEmpty else { continue } + + guard let jsonData = jsonText.data(using: .utf8), + let json = try? JSONSerialization.jsonObject(with: jsonData) as? [String: Any] else { + continue + } + + if let choices = json["choices"] as? [[String: Any]], + let firstChoice = choices.first { + if let delta = firstChoice["delta"] as? [String: Any], + let content = delta["content"] as? String { + resultText += content + } else if let text = firstChoice["text"] as? String { + resultText += text + } + } + } + } + + guard !resultText.isEmpty else { + logger.error("Failed to extract any text from SSE stream (length: \(streamText.count))") + throw TranslationProviderError.translationFailed("Empty text from SSE stream") + } + + return resultText.trimmingCharacters(in: .whitespacesAndNewlines) + } + private func translate( text: String, from sourceLanguage: String?, @@ -347,4 +484,14 @@ actor CompatibleTranslationProvider: TranslationProvider, TranslationPromptConfi targetLanguage: targetLanguage ) } + + private nonisolated static func isLocalhost(_ host: String) -> Bool { + let lowered = host.lowercased() + return lowered == "localhost" + || lowered == "127.0.0.1" + || lowered == "::1" + || lowered == "0.0.0.0" + || lowered.hasSuffix(".local") + || lowered.contains(".local:") + } } diff --git a/ScreenTranslate/Services/Translation/Providers/DeepLTranslationProvider.swift b/ScreenTranslate/Services/Translation/Providers/DeepLTranslationProvider.swift index bc8d7c1..d68f6c3 100644 --- a/ScreenTranslate/Services/Translation/Providers/DeepLTranslationProvider.swift +++ b/ScreenTranslate/Services/Translation/Providers/DeepLTranslationProvider.swift @@ -174,15 +174,6 @@ actor DeepLTranslationProvider: TranslationProvider { } } - func checkConnection() async -> Bool { - do { - _ = try await translate(text: "test", from: "en", to: "zh") - return true - } catch { - logger.error("DeepL connection check failed: \(error.localizedDescription)") - return false - } - } // MARK: - Private Methods diff --git a/ScreenTranslate/Services/Translation/Providers/GoogleTranslationProvider.swift b/ScreenTranslate/Services/Translation/Providers/GoogleTranslationProvider.swift index d57d03f..1a2f4d2 100644 --- a/ScreenTranslate/Services/Translation/Providers/GoogleTranslationProvider.swift +++ b/ScreenTranslate/Services/Translation/Providers/GoogleTranslationProvider.swift @@ -183,15 +183,6 @@ actor GoogleTranslationProvider: TranslationProvider { } } - func checkConnection() async -> Bool { - do { - _ = try await translate(text: "test", from: "en", to: "zh") - return true - } catch { - logger.error("Google connection check failed: \(error.localizedDescription)") - return false - } - } // MARK: - Private Methods diff --git a/ScreenTranslate/Services/Translation/Providers/LLMTranslationProvider.swift b/ScreenTranslate/Services/Translation/Providers/LLMTranslationProvider.swift index b86941f..bd3a78a 100644 --- a/ScreenTranslate/Services/Translation/Providers/LLMTranslationProvider.swift +++ b/ScreenTranslate/Services/Translation/Providers/LLMTranslationProvider.swift @@ -166,17 +166,58 @@ actor LLMTranslationProvider: TranslationProvider, TranslationPromptConfigurable ) } - func checkConnection() async -> Bool { - do { - _ = try await translate( - text: "Hello", - from: "en", - to: "zh" - ) - return true - } catch { - logger.error("Connection check failed: \(error.localizedDescription)") - return false + func verifyConnection() async throws { + let credentials = try await getCredentials() + let baseURL = try getBaseURL() + + // For OpenAI and Ollama, we can perform a models GET check (non-billing) + if engineType == .openai || engineType == .ollama { + let endpoint = engineType == .ollama ? baseURL.appendingPathComponent("api/tags") : baseURL.appendingPathComponent("models") + + var request = URLRequest(url: endpoint) + request.httpMethod = "GET" + request.timeoutInterval = 10.0 + + if let apiKey = credentials?.apiKey { + // Security check + if let host = baseURL.host, !Self.isLocalhost(host) && baseURL.scheme != "https" { + throw TranslationProviderError.invalidConfiguration( + "Refusing to send API key over insecure connection (HTTP). Use HTTPS or a localhost URL." + ) + } + request.setValue("Bearer \(apiKey)", forHTTPHeaderField: "Authorization") + } + + if let headers = request.allHTTPHeaderFields { + var safeHeaders: [String: String] = [:] + for (key, value) in headers { + let lowerKey = key.lowercased() + if lowerKey == "authorization" { + if value.lowercased().hasPrefix("bearer ") { + safeHeaders[key] = "Bearer " + String(value.dropFirst(7).prefix(4)) + "..." + } else { + safeHeaders[key] = String(value.prefix(4)) + "..." + } + } else if lowerKey == "x-api-key" || lowerKey.contains("key") { + safeHeaders[key] = String(value.prefix(4)) + "..." + } else { + safeHeaders[key] = value + } + } + logger.info("Sending verifyConnection request to \(endpoint.absoluteString) with headers: \(safeHeaders)") + } + + let (_, response) = try await URLSession.shared.data(for: request) + guard let httpResponse = response as? HTTPURLResponse else { + throw TranslationProviderError.connectionFailed("Invalid response") + } + guard httpResponse.statusCode == 200 else { + logger.error("Connection verify failed: status \(httpResponse.statusCode)") + throw TranslationProviderError.connectionFailed("Connection verify failed: HTTP \(httpResponse.statusCode)") + } + } else { + // Claude, Gemini, etc. fallback to translation of "1" + _ = try await translate(text: "1", from: "en", to: "zh") } } @@ -283,6 +324,7 @@ actor LLMTranslationProvider: TranslationProvider, TranslationPromptConfigurable "messages": [ ["role": "user", "content": prompt] ], + "stream": false, "temperature": config.options?.temperature ?? 0.3, "max_tokens": config.options?.maxTokens ?? 2048 ] @@ -291,6 +333,24 @@ actor LLMTranslationProvider: TranslationProvider, TranslationPromptConfigurable request.httpBody = try JSONSerialization.data(withJSONObject: body) // Execute request + if let headers = request.allHTTPHeaderFields { + var safeHeaders: [String: String] = [:] + for (key, value) in headers { + let lowerKey = key.lowercased() + if lowerKey == "authorization" { + if value.lowercased().hasPrefix("bearer ") { + safeHeaders[key] = "Bearer " + String(value.dropFirst(7).prefix(4)) + "..." + } else { + safeHeaders[key] = String(value.prefix(4)) + "..." + } + } else if lowerKey == "x-api-key" || lowerKey.contains("key") { + safeHeaders[key] = String(value.prefix(4)) + "..." + } else { + safeHeaders[key] = value + } + } + logger.info("Sending request to \(endpoint.absoluteString) with headers: \(safeHeaders)") + } let (data, response) = try await URLSession.shared.data(for: request) guard let httpResponse = response as? HTTPURLResponse else { @@ -315,8 +375,35 @@ actor LLMTranslationProvider: TranslationProvider, TranslationPromptConfigurable } private func parseResponse(_ data: Data, for engineType: TranslationEngineType) throws -> String { - guard let json = try JSONSerialization.jsonObject(with: data) as? [String: Any] else { - throw TranslationProviderError.translationFailed("Failed to parse response") + guard let rawString = String(data: data, encoding: .utf8) else { + throw TranslationProviderError.translationFailed("Unable to decode data to UTF-8 string") + } + + let trimmedResponse = rawString.trimmingCharacters(in: .whitespacesAndNewlines) + + // Check if response is Server-Sent Events (SSE) stream format + if trimmedResponse.hasPrefix("data:") || trimmedResponse.contains("\ndata:") { + return try parseSSEStream(trimmedResponse, for: engineType) + } + + let jsonObject: Any + do { + jsonObject = try JSONSerialization.jsonObject(with: data) + } catch { + logger.error("JSON serialization failed for \(engineType.rawValue): \(error.localizedDescription). Response size: \(data.count) bytes") + throw error + } + + guard let json = jsonObject as? [String: Any] else { + logger.error("Response JSON for \(engineType.rawValue) is not a dictionary object") + throw TranslationProviderError.translationFailed("Response JSON is not a dictionary") + } + + // Handle error responses from LLM APIs + if let errorObj = json["error"] as? [String: Any], + let errorMessage = errorObj["message"] as? String { + logger.error("API returned error for \(engineType.rawValue): \(errorMessage)") + throw TranslationProviderError.translationFailed("API error: \(errorMessage)") } let content: String? @@ -331,29 +418,83 @@ actor LLMTranslationProvider: TranslationProvider, TranslationPromptConfigurable } guard let text = content else { - throw TranslationProviderError.translationFailed("Failed to parse response") + logger.error("Unexpected JSON response structure for \(engineType.rawValue) (missing choices or content)") + throw TranslationProviderError.translationFailed("Unexpected JSON response structure") } return text.trimmingCharacters(in: .whitespacesAndNewlines) } + private func parseSSEStream(_ streamText: String, for engineType: TranslationEngineType) throws -> String { + var resultText = "" + let lines = streamText.components(separatedBy: .newlines) + + for line in lines { + let trimmedLine = line.trimmingCharacters(in: .whitespacesAndNewlines) + guard !trimmedLine.isEmpty else { continue } + + // Skip stream end marker + if trimmedLine == "data: [DONE]" { + continue + } + + if trimmedLine.hasPrefix("data:") { + let jsonText = trimmedLine.dropFirst(5).trimmingCharacters(in: .whitespaces) + guard !jsonText.isEmpty else { continue } + + guard let jsonData = jsonText.data(using: .utf8), + let json = try? JSONSerialization.jsonObject(with: jsonData) as? [String: Any] else { + continue + } + + switch engineType { + case .claude: + // Claude streaming format (usually text delta in content_block_delta or message_delta) + if let type = json["type"] as? String, type == "content_block_delta", + let delta = json["delta"] as? [String: Any], + let text = delta["text"] as? String { + resultText += text + } + default: + // OpenAI/Gemini/Ollama streaming format + if let choices = json["choices"] as? [[String: Any]], + let firstChoice = choices.first { + if let delta = firstChoice["delta"] as? [String: Any], + let content = delta["content"] as? String { + resultText += content + } else if let text = firstChoice["text"] as? String { + resultText += text + } + } + } + } + } + + guard !resultText.isEmpty else { + logger.error("Failed to extract any text from SSE stream for \(engineType.rawValue) (length: \(streamText.count))") + throw TranslationProviderError.translationFailed("Empty text from SSE stream") + } + + return resultText.trimmingCharacters(in: .whitespacesAndNewlines) + } + private func getBaseURL() throws -> URL { if let customURL = config.options?.baseURL { guard let url = URL(string: customURL) else { throw TranslationProviderError.invalidConfiguration("Invalid custom baseURL: \(customURL)") } - return url + return url.resolvingLocalhost } if let defaultURL = engineType.defaultBaseURL, let url = URL(string: defaultURL) { - return url + return url.resolvingLocalhost } guard let url = URL(string: "https://api.openai.com/v1") else { throw TranslationProviderError.invalidConfiguration("Failed to create API URL") } - return url + return url.resolvingLocalhost } private func getModelName() -> String { diff --git a/ScreenTranslate/Services/TranslationEngine.swift b/ScreenTranslate/Services/TranslationEngine.swift index 5923bf5..8909924 100644 --- a/ScreenTranslate/Services/TranslationEngine.swift +++ b/ScreenTranslate/Services/TranslationEngine.swift @@ -1,5 +1,6 @@ import Foundation import Translation +import NaturalLanguage import os.signpost import os.log @@ -167,9 +168,6 @@ actor TranslationEngine { // MARK: - Internal Error Types private struct TranslationTimeout: Error {} - private struct AppleTranslationError: Error { - let nsError: NSError - } // MARK: - Configuration @@ -235,6 +233,21 @@ actor TranslationEngine { os_signpost(.begin, log: Self.performanceLog, name: "Translation", signpostID: Self.signpostID) let startTime = CFAbsoluteTimeGetCurrent() + let detectedSource = config.sourceLanguage ?? Self.detectLanguage(for: text) + if let detectedSource, detectedSource == effectiveTargetLanguage { + let duration = (CFAbsoluteTimeGetCurrent() - startTime) * 1000 + os_signpost(.end, log: Self.performanceLog, name: "Translation", signpostID: Self.signpostID) + #if DEBUG + os_log("Translation skipped (source == target) in %.1fms", log: OSLog.default, type: .info, duration) + #endif + return TranslationResult( + sourceText: text, + translatedText: text, + sourceLanguage: detectedSource.rawValue, + targetLanguage: effectiveTargetLanguage.rawValue + ) + } + do { let response = try await performTranslation( text: text, @@ -345,19 +358,13 @@ actor TranslationEngine { ) { group in group.addTask { [text, source, target] in do { - // The current TranslationSession initializer exposed by this SDK - // still requires an installed source language. + let detectedSource = source ?? Self.detectLanguage(for: text) ?? .english let session = TranslationSession( - installedSource: (source ?? .english).localeLanguage, + installedSource: detectedSource.localeLanguage, target: target.localeLanguage ) let result = try await session.translate(text) return .success(result) - } catch let error as NSError { - if error.domain == "TranslationErrorDomain" { - return .failure(AppleTranslationError(nsError: error)) - } - return .failure(error) } catch { return .failure(error) } @@ -382,8 +389,10 @@ actor TranslationEngine { return TranslationEngineError.timeout } - if let appleError = error as? AppleTranslationError { - if appleError.nsError.code == 16 { + let nsError = error as NSError + if nsError.domain == "TranslationErrorDomain" || nsError.domain.contains("Translation") { + let desc = nsError.localizedDescription.lowercased() + if nsError.code == 16 || nsError.code == 5 || desc.contains("offline models") || desc.contains("not installed") || desc.contains("not available") { return TranslationEngineError.languageNotInstalled( language: targetLanguage.localizedName, downloadInstructions: NSLocalizedString( @@ -392,12 +401,20 @@ actor TranslationEngine { ) ) } - return TranslationEngineError.translationFailed(underlying: appleError.nsError) + return TranslationEngineError.translationFailed(underlying: nsError) } return TranslationEngineError.translationFailed(underlying: error) } + /// Detects the language of the given text using NaturalLanguage framework. + private static func detectLanguage(for text: String) -> TranslationLanguage? { + guard let dominantLanguage = NLLanguageRecognizer.dominantLanguage(for: text) else { + return nil + } + return TranslationLanguage.fromTranslationCode(dominantLanguage.rawValue) + } + /// Returns the system's target language based on user preferences private static func systemTargetLanguage() -> TranslationLanguage { let systemLanguage = Locale.current.language.languageCode?.identifier ?? "en" @@ -535,8 +552,9 @@ enum TranslationEngineError: LocalizedError, Sendable { return String(format: NSLocalizedString("error.translation.unsupported.pair", comment: ""), source, target) case .languageNotInstalled(let language, _): return String(format: NSLocalizedString("error.translation.language.not.installed", comment: ""), language) - case .translationFailed: - return NSLocalizedString("error.translation.failed", comment: "") + case .translationFailed(let underlying): + let underlyingDesc = underlying.localizedDescription + return "\(NSLocalizedString("error.translation.failed", comment: "")): \(underlyingDesc)" } } diff --git a/ScreenTranslate/Services/TranslationProvider.swift b/ScreenTranslate/Services/TranslationProvider.swift index ed765bd..d2ae9a4 100644 --- a/ScreenTranslate/Services/TranslationProvider.swift +++ b/ScreenTranslate/Services/TranslationProvider.swift @@ -48,6 +48,10 @@ protocol TranslationProvider: Sendable { /// Check connection status to the translation service /// - Returns: true if the service is reachable and operational func checkConnection() async -> Bool + + /// Verify connection status to the translation service + /// - Throws: An error if the service is unreachable or misconfigured + func verifyConnection() async throws } /// Providers that can execute a translation request with a request-scoped prompt template. @@ -127,6 +131,23 @@ enum TranslationProviderError: LocalizedError, Sendable { // MARK: - Default Implementation extension TranslationProvider { + /// Verify connection status to the translation service + /// - Throws: An error if the service is unreachable or misconfigured + func verifyConnection() async throws { + _ = try await translate(text: "1", from: "en", to: "zh") + } + + /// Check connection status to the translation service + /// - Returns: true if the service is reachable and operational + func checkConnection() async -> Bool { + do { + try await verifyConnection() + return true + } catch { + return false + } + } + /// Default batch translation implementation that calls single translate sequentially /// Providers can override this with more efficient batch implementations func translate( diff --git a/ScreenTranslate/Services/TranslationService.swift b/ScreenTranslate/Services/TranslationService.swift index d8191b9..e8e500b 100644 --- a/ScreenTranslate/Services/TranslationService.swift +++ b/ScreenTranslate/Services/TranslationService.swift @@ -8,6 +8,7 @@ import Foundation import os.log +import NaturalLanguage /// Orchestrates multiple translation providers with various selection modes @available(macOS 13.0, *) @@ -350,6 +351,32 @@ actor TranslationService { return promptConfig } + /// Determines if a text contains translatable characters (letters or ideographs) + private func isTranslatable(_ text: String) -> Bool { + return text.unicodeScalars.contains { scalar in + if CharacterSet.letters.contains(scalar) { + return true + } + if (0x4E00...0x9FFF).contains(scalar.value) { + return true + } + return false + } + } + + /// Checks if the text contains any Chinese characters + private func containsHanCharacters(_ text: String) -> Bool { + return text.unicodeScalars.contains { (0x4E00...0x9FFF).contains($0.value) } + } + + /// Detects the language of the given text using NaturalLanguage framework. + private func detectLanguage(for text: String) -> TranslationLanguage? { + guard let dominantLanguage = NLLanguageRecognizer.dominantLanguage(for: text) else { + return nil + } + return TranslationLanguage.fromTranslationCode(dominantLanguage.rawValue) + } + private func translateWithResolvedPrompt( provider: any TranslationProvider, engine: TranslationEngineType, @@ -358,26 +385,179 @@ actor TranslationService { to targetLanguage: String, scene: TranslationScene? ) async throws -> [TranslationResult] { - guard let promptConfigurableProvider = provider as? TranslationPromptConfigurable else { - return try await provider.translate( - texts: texts, - from: sourceLanguage, - to: targetLanguage - ) + let targetLang = TranslationLanguage.fromTranslationCode(targetLanguage) + let explicitSourceLang = sourceLanguage.flatMap { TranslationLanguage.fromTranslationCode($0) } + + var finalResults = [TranslationResult?](repeating: nil, count: texts.count) + var pendingIndices: [Int] = [] + var pendingTexts: [String] = [] + + for (index, text) in texts.enumerated() { + let trimmed = text.trimmingCharacters(in: .whitespacesAndNewlines) + if trimmed.isEmpty { + finalResults[index] = TranslationResult( + sourceText: text, + translatedText: text, + sourceLanguage: sourceLanguage ?? "auto", + targetLanguage: targetLanguage + ) + continue + } + + // 1. Non-translatable text bypass (e.g. pure numbers, punctuation like "{""}") + if !isTranslatable(text) { + finalResults[index] = TranslationResult( + sourceText: text, + translatedText: text, + sourceLanguage: sourceLanguage ?? "auto", + targetLanguage: targetLanguage + ) + continue + } + + let detectedSourceLang = detectLanguage(for: text) + var resolvedSourceLang = explicitSourceLang ?? detectedSourceLang + + // 2. Handle cases where text clearly contains Chinese and target is Chinese + // Only force resolvedSourceLang to targetLang (Chinese) if: + // - The target is Chinese AND + // - The detected source is nil (NLP failed) or already detected as Chinese AND + // - The text actually contains Han characters + // This prevents misclassifying Japanese/Korean containing Kanji/Hanja as Chinese + if let targetLang, + (targetLang == .chineseSimplified || targetLang == .chineseTraditional), + containsHanCharacters(text) { + if detectedSourceLang == nil || + detectedSourceLang == .chineseSimplified || + detectedSourceLang == .chineseTraditional { + resolvedSourceLang = targetLang + } + } + + // 3. Self-translation bypass + if let resolvedSourceLang, let targetLang, resolvedSourceLang == targetLang { + finalResults[index] = TranslationResult( + sourceText: text, + translatedText: text, + sourceLanguage: resolvedSourceLang.rawValue, + targetLanguage: targetLang.rawValue + ) + } else { + pendingIndices.append(index) + pendingTexts.append(text) + } } - let promptTemplate = await resolvedPromptTemplate( - for: provider, - engine: engine, - scene: scene - ) + if !pendingTexts.isEmpty { + let translatedResults: [TranslationResult] + + if let promptConfigurableProvider = provider as? TranslationPromptConfigurable { + let promptTemplate = await resolvedPromptTemplate( + for: provider, + engine: engine, + scene: scene + ) + translatedResults = try await promptConfigurableProvider.translate( + texts: pendingTexts, + from: sourceLanguage, + to: targetLanguage, + promptTemplate: promptTemplate + ) + } else { + translatedResults = try await provider.translate( + texts: pendingTexts, + from: sourceLanguage, + to: targetLanguage + ) + } - return try await promptConfigurableProvider.translate( - texts: texts, - from: sourceLanguage, - to: targetLanguage, - promptTemplate: promptTemplate - ) + if translatedResults.count == pendingTexts.count { + for (offset, result) in translatedResults.enumerated() { + let originalIndex = pendingIndices[offset] + finalResults[originalIndex] = result + } + } else { + logger.error("Provider returned mismatch count. Expected: \(pendingTexts.count), got: \(translatedResults.count)") + for (offset, originalIndex) in pendingIndices.enumerated() { + if offset < translatedResults.count { + finalResults[originalIndex] = translatedResults[offset] + } else { + let originalText = texts[originalIndex] + finalResults[originalIndex] = TranslationResult( + sourceText: originalText, + translatedText: originalText, + sourceLanguage: sourceLanguage ?? "auto", + targetLanguage: targetLanguage + ) + } + } + } + } + + return finalResults.map { result in + let res = result ?? TranslationResult( + sourceText: "", + translatedText: "", + sourceLanguage: sourceLanguage ?? "auto", + targetLanguage: targetLanguage + ) + let sanitizedText = self.sanitizeTranslation(translated: res.translatedText, source: res.sourceText) + return TranslationResult( + sourceText: res.sourceText, + translatedText: sanitizedText, + sourceLanguage: res.sourceLanguage, + targetLanguage: res.targetLanguage + ) + } + } + + /// Sanitizes the translated text, reverting to original if it is empty, a broken JSON, or just empty curly braces like {""} + private func sanitizeTranslation(translated: String, source: String) -> String { + let trimmed = translated.trimmingCharacters(in: .whitespacesAndNewlines) + + if trimmed.isEmpty { + return source + } + + // 1. 如果原文不包含大括号,且译文以 { 开头、以 } 结尾,进行深度 JSON 解析与内容提取 + let sourceContainsBraces = source.contains("{") || source.contains("}") + if !sourceContainsBraces && trimmed.hasPrefix("{") && trimmed.hasSuffix("}") { + if let data = trimmed.data(using: .utf8), + let json = try? JSONSerialization.jsonObject(with: data) as? [String: Any] { + // 常见大模型输出的翻译字段 + let translationKeys = ["translated_text", "translatedText", "translation", "result", "text"] + for key in translationKeys { + if let value = json[key] as? String { + let trimmedValue = value.trimmingCharacters(in: .whitespacesAndNewlines) + return trimmedValue.isEmpty ? source : value + } + } + // 空 JSON 字典或没有找到任何已知翻译字段的字典,安全回退到原文 + return source + } else { + // 损坏的以大括号包围的字符串,大概率也是泄露的大模型 JSON 结构,安全回退到原文 + return source + } + } + + // 2. 彻底的字符集排查防御:若原文不含大括号,且译文仅由大括号、冒号、空格、各种单双引号组成,直接回退到原文 + if !sourceContainsBraces { + let isOnlyBracesAndQuotes = trimmed.allSatisfy { char in + char == "{" || char == "}" || char == "\"" || char == "'" || char == "`" || + char == "“" || char == "”" || char == "‘" || char == "’" || char == ":" || + char.isWhitespace + } + if isOnlyBracesAndQuotes { + return source + } + } + + // 3. 兜底兼容变体 + if trimmed.hasPrefix("{") && trimmed.hasSuffix("}") && (trimmed.contains("\"\"") || trimmed.contains("“”")) { + return source + } + + return translated } private func resolvedPromptTemplate( @@ -474,6 +654,17 @@ actor TranslationService { // MARK: - Connection Testing + /// Verify connection and throw details on failure + func verifyConnection(for engine: TranslationEngineType) async throws { + let provider: any TranslationProvider + if let existing = await registry.provider(for: engine) { + provider = existing + } else { + provider = try await resolvedProvider(for: engine) + } + try await provider.verifyConnection() + } + /// Test connection to a specific engine func testConnection(for engine: TranslationEngineType) async -> Bool { // First try to get existing provider diff --git a/ScreenTranslateTests/GLMOCRVLMProviderTests.swift b/ScreenTranslateTests/GLMOCRVLMProviderTests.swift index d0ef579..6a209f8 100644 --- a/ScreenTranslateTests/GLMOCRVLMProviderTests.swift +++ b/ScreenTranslateTests/GLMOCRVLMProviderTests.swift @@ -117,3 +117,110 @@ final class GLMOCRVLMProviderTests: XCTestCase { XCTAssertEqual(actual.size.height, expected.size.height, accuracy: accuracy) } } + +final class OpenAIVLMProviderTests: XCTestCase { + var provider: OpenAIVLMProvider! + + override func setUp() { + super.setUp() + let config = VLMProviderConfiguration(apiKey: "test-key", baseURL: URL(string: "https://api.openai.com/v1")!, modelName: "gpt-4o") + provider = OpenAIVLMProvider(configuration: config) + } + + func testExtractContentAndStatusWithReasoningContentFallback() throws { + let json = """ + { + "choices": [ + { + "message": { + "role": "assistant", + "content": "", + "reasoning_content": "{\\"segments\\":[]}" + }, + "finish_reason": "stop" + } + ] + } + """ + let data = Data(json.utf8) + let (content, isTruncated, finishReason) = try provider.extractContentAndStatus(from: data) + XCTAssertEqual(content, "{\"segments\":[]}") + XCTAssertFalse(isTruncated) + XCTAssertEqual(finishReason, "stop") + } + + func testExtractContentAndStatusWithReasoningFallback() throws { + let json = """ + { + "choices": [ + { + "message": { + "role": "assistant", + "content": " ", + "reasoning": "{\\"segments\\":[]}" + }, + "finish_reason": "stop" + } + ] + } + """ + let data = Data(json.utf8) + let (content, isTruncated, finishReason) = try provider.extractContentAndStatus(from: data) + XCTAssertEqual(content, "{\"segments\":[]}") + XCTAssertFalse(isTruncated) + XCTAssertEqual(finishReason, "stop") + } + + func testExtractContentManuallyWithReasoningFallback() throws { + // Broken JSON structure but has reasoning_content + let brokenJSON = """ + { + "choices": [ + { + "message": { + "role": "assistant", + "content": null, + "reasoning_content": "{\\"segments\\":[]}" + } + """ + let extracted = provider.extractContentManually(from: brokenJSON) + XCTAssertEqual(extracted, "{\"segments\":[]}") + + // Broken JSON structure but has reasoning + let brokenJSON2 = """ + { + "choices": [ + { + "message": { + "role": "assistant", + "content": "", + "reasoning": "{\\"segments\\":[]}" + } + """ + let extracted2 = provider.extractContentManually(from: brokenJSON2) + XCTAssertEqual(extracted2, "{\"segments\":[]}") + } + + func testParseVLMContentWithEmptyResponseError() { + XCTAssertThrowsError(try provider.parseVLMContent(" ")) { error in + guard let providerError = error as? VLMProviderError else { + XCTFail("Expected VLMProviderError") + return + } + XCTAssertTrue(providerError.localizedDescription.contains("Received empty response from model")) + } + } + + func testParseVLMContentWithCleanedEmptyError() { + // A response that has code blocks but nothing inside + let content = "```json\n```" + XCTAssertThrowsError(try provider.parseVLMContent(content)) { error in + guard let providerError = error as? VLMProviderError else { + XCTFail("Expected VLMProviderError") + return + } + XCTAssertTrue(providerError.localizedDescription.contains("Cleaned content is empty")) + XCTAssertTrue(providerError.localizedDescription.contains("```json")) + } + } +} diff --git a/ScreenTranslateTests/ModelDiscoveryServiceTests.swift b/ScreenTranslateTests/ModelDiscoveryServiceTests.swift new file mode 100644 index 0000000..8aafb53 --- /dev/null +++ b/ScreenTranslateTests/ModelDiscoveryServiceTests.swift @@ -0,0 +1,107 @@ +import XCTest +@testable import ScreenTranslate + +final class ModelDiscoveryServiceTests: XCTestCase { + + // MARK: - JSON Parsing Tests + + func testParseOpenAIModelsResponse() { + let jsonString = """ + { + "object": "list", + "data": [ + { + "id": "gpt-4o", + "object": "model", + "created": 1686935002, + "owned_by": "organization" + }, + { + "id": "gpt-4o-mini", + "object": "model", + "created": 1686935003, + "owned_by": "organization" + } + ] + } + """ + + guard let data = jsonString.data(using: .utf8) else { + XCTFail("Failed to convert JSON string to data") + return + } + + struct OpenAIModel: Codable { + let id: String + } + + struct OpenAIModelsResponse: Codable { + let data: [OpenAIModel] + } + + do { + let decoded = try JSONDecoder().decode(OpenAIModelsResponse.self, from: data) + let models = decoded.data.map { $0.id }.sorted() + XCTAssertEqual(models.count, 2) + XCTAssertEqual(models[0], "gpt-4o") + XCTAssertEqual(models[1], "gpt-4o-mini") + } catch { + XCTFail("Decoding failed: \(error.localizedDescription)") + } + } + + func testParseOllamaTagsResponse() { + let jsonString = """ + { + "models": [ + { + "name": "llama3:latest", + "modified_at": "2024-06-19T12:00:00Z", + "size": 4700000000, + "digest": "sha256:12345" + }, + { + "name": "qwen2:7b", + "modified_at": "2024-06-19T12:00:00Z", + "size": 4700000000, + "digest": "sha256:67890" + } + ] + } + """ + + guard let data = jsonString.data(using: .utf8) else { + XCTFail("Failed to convert JSON string to data") + return + } + + struct OllamaModel: Codable { + let name: String + } + + struct OllamaTagsResponse: Codable { + let models: [OllamaModel] + } + + do { + let decoded = try JSONDecoder().decode(OllamaTagsResponse.self, from: data) + let models = decoded.models.map { $0.name }.sorted() + XCTAssertEqual(models.count, 2) + XCTAssertEqual(models[0], "llama3:latest") + XCTAssertEqual(models[1], "qwen2:7b") + } catch { + XCTFail("Decoding failed: \(error.localizedDescription)") + } + } + + // MARK: - Validation of URL Normalization + + func testEmptyURLThrowsError() async { + do { + _ = try await ModelDiscoveryService.fetchModels(baseURL: "", apiKey: nil, engineType: nil) + XCTFail("Expected fetchModels to throw error on empty URL") + } catch { + XCTAssertTrue(error.localizedDescription.contains("empty")) + } + } +} diff --git a/ScreenTranslateTests/TranslationServiceMocks.swift b/ScreenTranslateTests/TranslationServiceMocks.swift new file mode 100644 index 0000000..1d5653d --- /dev/null +++ b/ScreenTranslateTests/TranslationServiceMocks.swift @@ -0,0 +1,196 @@ +import XCTest +@testable import ScreenTranslate + +@available(macOS 13.0, *) +actor MockTranslationProvider: TranslationProvider, TranslationPromptConfigurable, TranslationPromptContextProviding { + struct Request: Sendable, Equatable { + let texts: [String] + let sourceLanguage: String? + let targetLanguage: String + } + + nonisolated let id: String + nonisolated let name: String + + private var available: Bool + private var translateError: Error? + private var batchResults: [TranslationResult] + private var checkConnectionResult: Bool + private var promptContextID: String? + private(set) var requests: [Request] = [] + private(set) var promptTemplates: [String?] = [] + + init( + id: String, + name: String, + available: Bool = true, + batchResults: [TranslationResult] = [], + translateError: Error? = nil, + checkConnectionResult: Bool = true, + promptContextID: String? = nil + ) { + self.id = id + self.name = name + self.available = available + self.batchResults = batchResults + self.translateError = translateError + self.checkConnectionResult = checkConnectionResult + self.promptContextID = promptContextID + } + + var isAvailable: Bool { + get async { available } + } + + func translate( + text: String, + from sourceLanguage: String?, + to targetLanguage: String + ) async throws -> TranslationResult { + let results = try await translate( + texts: [text], + from: sourceLanguage, + to: targetLanguage + ) + guard let result = results.first else { + XCTFail("MockTranslationProvider returned no results for a single-text request") + throw TranslationProviderError.translationFailed("MockTranslationProvider returned no results") + } + return result + } + + func translate( + texts: [String], + from sourceLanguage: String?, + to targetLanguage: String + ) async throws -> [TranslationResult] { + try await translate( + texts: texts, + from: sourceLanguage, + to: targetLanguage, + promptTemplate: nil + ) + } + + func translate( + texts: [String], + from sourceLanguage: String?, + to targetLanguage: String, + promptTemplate: String? + ) async throws -> [TranslationResult] { + requests.append( + Request(texts: texts, sourceLanguage: sourceLanguage, targetLanguage: targetLanguage) + ) + promptTemplates.append(promptTemplate) + + if let translateError { + throw translateError + } + + if batchResults.count == texts.count { + return batchResults + } + + if batchResults.count == 1, let first = batchResults.first { + return texts.map { text in + TranslationResult( + sourceText: text, + translatedText: first.translatedText, + sourceLanguage: first.sourceLanguage, + targetLanguage: first.targetLanguage + ) + } + } + + return texts.map { text in + TranslationResult( + sourceText: text, + translatedText: "\(text) -> \(targetLanguage)", + sourceLanguage: sourceLanguage ?? "Auto", + targetLanguage: targetLanguage + ) + } + } + + func checkConnection() async -> Bool { + checkConnectionResult + } + + func verifyConnection() async throws { + if let translateError { + throw translateError + } + } + + func requestCount() async -> Int { + requests.count + } + + func lastPromptTemplate() async -> String? { + promptTemplates.last.flatMap { $0 } + } + + func compatiblePromptIdentifier() async -> String? { + promptContextID + } +} + +@available(macOS 13.0, *) +actor MockTranslationServicing: TranslationServicing { + struct Request: Sendable, Equatable { + let segments: [String] + let targetLanguage: String + let preferredEngine: TranslationEngineType + let sourceLanguage: String? + let scene: TranslationScene? + let mode: EngineSelectionMode + let fallbackEnabled: Bool + let parallelEngines: [TranslationEngineType] + let sceneBindings: [TranslationScene: SceneEngineBinding] + } + + private var nextResult: [BilingualSegment] + private var nextError: Error? + private(set) var requests: [Request] = [] + + init(nextResult: [BilingualSegment] = [], nextError: Error? = nil) { + self.nextResult = nextResult + self.nextError = nextError + } + + func translate( + segments: [String], + to targetLanguage: String, + preferredEngine: TranslationEngineType, + from sourceLanguage: String?, + scene: TranslationScene?, + mode: EngineSelectionMode, + fallbackEnabled: Bool, + parallelEngines: [TranslationEngineType], + sceneBindings: [TranslationScene: SceneEngineBinding] + ) async throws -> [BilingualSegment] { + requests.append( + Request( + segments: segments, + targetLanguage: targetLanguage, + preferredEngine: preferredEngine, + sourceLanguage: sourceLanguage, + scene: scene, + mode: mode, + fallbackEnabled: fallbackEnabled, + parallelEngines: parallelEngines, + sceneBindings: sceneBindings + ) + ) + + if let nextError { + throw nextError + } + + return nextResult + } + + func requestCount() async -> Int { + requests.count + } +} diff --git a/ScreenTranslateTests/TranslationServicePipelineTests.swift b/ScreenTranslateTests/TranslationServicePipelineTests.swift index a8fe1ae..acde1d8 100644 --- a/ScreenTranslateTests/TranslationServicePipelineTests.swift +++ b/ScreenTranslateTests/TranslationServicePipelineTests.swift @@ -2,195 +2,31 @@ import XCTest @testable import ScreenTranslate @available(macOS 13.0, *) -actor MockTranslationProvider: TranslationProvider, TranslationPromptConfigurable, TranslationPromptContextProviding { - struct Request: Sendable, Equatable { - let texts: [String] - let sourceLanguage: String? - let targetLanguage: String - } - - nonisolated let id: String - nonisolated let name: String - - private var available: Bool - private var translateError: Error? - private var batchResults: [TranslationResult] - private var checkConnectionResult: Bool - private var promptContextID: String? - private(set) var requests: [Request] = [] - private(set) var promptTemplates: [String?] = [] - - init( - id: String, - name: String, - available: Bool = true, - batchResults: [TranslationResult] = [], - translateError: Error? = nil, - checkConnectionResult: Bool = true, - promptContextID: String? = nil - ) { - self.id = id - self.name = name - self.available = available - self.batchResults = batchResults - self.translateError = translateError - self.checkConnectionResult = checkConnectionResult - self.promptContextID = promptContextID - } - - var isAvailable: Bool { - get async { available } - } - - func translate( - text: String, - from sourceLanguage: String?, - to targetLanguage: String - ) async throws -> TranslationResult { - let results = try await translate( - texts: [text], - from: sourceLanguage, - to: targetLanguage - ) - guard let result = results.first else { - XCTFail("MockTranslationProvider returned no results for a single-text request") - throw TranslationProviderError.translationFailed("MockTranslationProvider returned no results") - } - return result - } - - func translate( - texts: [String], - from sourceLanguage: String?, - to targetLanguage: String - ) async throws -> [TranslationResult] { - try await translate( - texts: texts, - from: sourceLanguage, - to: targetLanguage, - promptTemplate: nil - ) - } - - func translate( - texts: [String], - from sourceLanguage: String?, - to targetLanguage: String, - promptTemplate: String? - ) async throws -> [TranslationResult] { - requests.append( - Request(texts: texts, sourceLanguage: sourceLanguage, targetLanguage: targetLanguage) - ) - promptTemplates.append(promptTemplate) - - if let translateError { - throw translateError - } +@MainActor +final class TranslationServicePipelineTests: XCTestCase { - if batchResults.count == texts.count { - return batchResults - } + private var originalEngineConfigs: [TranslationEngineType: TranslationEngineConfig] = [:] - if batchResults.count == 1, let first = batchResults.first { - return texts.map { text in - TranslationResult( - sourceText: text, - translatedText: first.translatedText, - sourceLanguage: first.sourceLanguage, - targetLanguage: first.targetLanguage - ) + override func setUp() async throws { + try await super.setUp() + let settings = AppSettings.shared + originalEngineConfigs = settings.engineConfigs + for engine in TranslationEngineType.allCases { + if var config = settings.engineConfigs[engine] { + config.isEnabled = true + settings.engineConfigs[engine] = config } } - - return texts.map { text in - TranslationResult( - sourceText: text, - translatedText: "\(text) -> \(targetLanguage)", - sourceLanguage: sourceLanguage ?? "Auto", - targetLanguage: targetLanguage - ) - } + settings.saveEngineConfigs() } - func checkConnection() async -> Bool { - checkConnectionResult + override func tearDown() async throws { + let settings = AppSettings.shared + settings.engineConfigs = originalEngineConfigs + settings.saveEngineConfigs() + try await super.tearDown() } - func requestCount() async -> Int { - requests.count - } - - func lastPromptTemplate() async -> String? { - promptTemplates.last.flatMap { $0 } - } - - func compatiblePromptIdentifier() async -> String? { - promptContextID - } -} - -@available(macOS 13.0, *) -actor MockTranslationServicing: TranslationServicing { - struct Request: Sendable, Equatable { - let segments: [String] - let targetLanguage: String - let preferredEngine: TranslationEngineType - let sourceLanguage: String? - let scene: TranslationScene? - let mode: EngineSelectionMode - let fallbackEnabled: Bool - let parallelEngines: [TranslationEngineType] - let sceneBindings: [TranslationScene: SceneEngineBinding] - } - - private var nextResult: [BilingualSegment] - private var nextError: Error? - private(set) var requests: [Request] = [] - - init(nextResult: [BilingualSegment] = [], nextError: Error? = nil) { - self.nextResult = nextResult - self.nextError = nextError - } - - func translate( - segments: [String], - to targetLanguage: String, - preferredEngine: TranslationEngineType, - from sourceLanguage: String?, - scene: TranslationScene?, - mode: EngineSelectionMode, - fallbackEnabled: Bool, - parallelEngines: [TranslationEngineType], - sceneBindings: [TranslationScene: SceneEngineBinding] - ) async throws -> [BilingualSegment] { - requests.append( - Request( - segments: segments, - targetLanguage: targetLanguage, - preferredEngine: preferredEngine, - sourceLanguage: sourceLanguage, - scene: scene, - mode: mode, - fallbackEnabled: fallbackEnabled, - parallelEngines: parallelEngines, - sceneBindings: sceneBindings - ) - ) - - if let nextError { - throw nextError - } - - return nextResult - } - - func requestCount() async -> Int { - requests.count - } -} - -@available(macOS 13.0, *) -final class TranslationServicePipelineTests: XCTestCase { private func makeResult( source: String, translated: String, @@ -713,4 +549,117 @@ final class TranslationServicePipelineTests: XCTestCase { } XCTAssertEqual(serviceRequestCount, 1) } + + func testSelfLanguageTranslationBypass() async throws { + let registry = TranslationEngineRegistry(registerBuiltInProviders: false) + let apple = MockTranslationProvider( + id: "apple", + name: "Apple", + batchResults: [] + ) + await registry.register(apple, for: .apple) + + let service = TranslationService(registry: registry) + + let bundle = try await service.translate( + segments: ["你好", "Hello"], + to: "zh-Hans", + from: nil, + mode: .primaryWithFallback, + preferredEngine: .apple, + fallbackEnabled: false + ) + + let appleRequests = await apple.requests + print("DEBUG_APPLE_REQUESTS: \(appleRequests)") + XCTAssertEqual(appleRequests.count, 1) + if appleRequests.count > 0 { + print("DEBUG_APPLE_REQUEST_0_TEXTS: \(appleRequests[0].texts)") + XCTAssertEqual(appleRequests[0].texts, ["Hello"]) + } + + let primaryResult = bundle.primaryResult + print("DEBUG_PRIMARY_RESULT_COUNT: \(primaryResult.count)") + for (i, res) in primaryResult.enumerated() { + print("DEBUG_PRIMARY_RESULT_\(i): source=\(res.sourceText), translated=\(res.translated)") + } + XCTAssertEqual(primaryResult.count, 2) + XCTAssertEqual(primaryResult[0].translated, "你好") + XCTAssertEqual(primaryResult[1].translated, "Hello -> zh-Hans") + } + + func testPrintRealUserSettings() async { + await MainActor.run { + print("--- USER SETTINGS PRINT ---") + let settings = AppSettings.shared + print("ocrEngine: \(settings.ocrEngine.rawValue)") + print("translationEngine: \(settings.translationEngine.rawValue)") + print("translationTargetLanguage: \(String(describing: settings.translationTargetLanguage?.rawValue))") + print("translationSourceLanguage: \(settings.translationSourceLanguage.rawValue)") + print("translationFallbackEnabled: \(settings.translationFallbackEnabled)") + print("engineSelectionMode: \(settings.engineSelectionMode.rawValue)") + print("vlmProvider: \(settings.vlmProvider.rawValue)") + print("---------------------------") + } + } + + func testSanitizeTranslationCurlyBraces() async throws { + let registry = TranslationEngineRegistry(registerBuiltInProviders: false) + let apple = MockTranslationProvider( + id: "apple", + name: "Apple", + batchResults: [ + makeResult(source: "Hello world", translated: #"{""}"#), + makeResult(source: "Good morning", translated: "{}"), + makeResult(source: "Screen Translate", translated: "屏幕翻译"), + makeResult(source: "Welcome", translated: ""), + makeResult(source: "Nice to meet you", translated: #"{ "" }"#), + makeResult(source: "How are you", translated: "{“”}"), + makeResult(source: "Goodbye", translated: #"{ "": "" }"#), + makeResult(source: "Apple", translated: #"{"translation": "苹果"}"#), + makeResult(source: "Orange", translated: #"{"translatedText": " "}"#), + makeResult(source: "Banana", translated: #"{"result":}"#) + ] + ) + await registry.register(apple, for: .apple) + + let service = TranslationService(registry: registry) + + let bundle = try await service.translate( + segments: [ + "Hello world", "Good morning", "Screen Translate", "Welcome", + "Nice to meet you", "How are you", "Goodbye", "Apple", "Orange", "Banana" + ], + to: "zh-Hans", + from: "en", + scene: .screenshot, + mode: .primaryWithFallback, + preferredEngine: .apple, + fallbackEnabled: false + ) + let results = bundle.primaryResult + + XCTAssertEqual(results.count, 10) + // {""} -> "Hello world" + XCTAssertEqual(results[0].translated, "Hello world") + // {} -> "Good morning" + XCTAssertEqual(results[1].translated, "Good morning") + // Normal -> "屏幕翻译" + XCTAssertEqual(results[2].translated, "屏幕翻译") + // Empty -> "Welcome" + XCTAssertEqual(results[3].translated, "Welcome") + // { "" } -> "Nice to meet you" + XCTAssertEqual(results[4].translated, "Nice to meet you") + // {“”} -> "How are you" + XCTAssertEqual(results[5].translated, "How are you") + // { "": "" } -> "Goodbye" + XCTAssertEqual(results[6].translated, "Goodbye") + // {"translation": "苹果"} -> "苹果" (深度提取成功) + XCTAssertEqual(results[7].translated, "苹果") + // {"translatedText": " "} -> "Orange" (空提取,安全回退) + XCTAssertEqual(results[8].translated, "Orange") + // {"result":} -> "Banana" (语法错误损坏JSON,安全回退) + XCTAssertEqual(results[9].translated, "Banana") + } } + diff --git a/create_dmg.sh b/create_dmg.sh index 5c46d50..391c882 100755 --- a/create_dmg.sh +++ b/create_dmg.sh @@ -25,14 +25,17 @@ echo "📁 准备 DMG 目录结构..." mkdir -p "$DMG_TEMP_DIR" # 复制 .app 到临时目录 -APP_PATH="$PROJECT_PATH/build/Products/Release/ScreenTranslate.app" +APP_PATH="$PROJECT_PATH/build_artifacts/dmg/Build/Products/Release/ScreenTranslate.app" +if [ ! -d "$APP_PATH" ]; then + APP_PATH="$PROJECT_PATH/build/Products/Release/ScreenTranslate.app" +fi if [ ! -d "$APP_PATH" ]; then APP_PATH="$PROJECT_PATH/Build/Products/Release/ScreenTranslate.app" fi if [ ! -d "$APP_PATH" ]; then # 模糊查找 - APP_PATH=$(find "$PROJECT_PATH/build" "$PROJECT_PATH/Build" -name "ScreenTranslate.app" -type d | head -n 1) + APP_PATH=$(find "$PROJECT_PATH/build" "$PROJECT_PATH/Build" "$PROJECT_PATH/build_artifacts" -name "ScreenTranslate.app" -type d | head -n 1) fi if [ -z "$APP_PATH" ] || [ ! -d "$APP_PATH" ]; then