Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
50 changes: 28 additions & 22 deletions ScreenTranslate/Features/Capture/CaptureManager.swift
Original file line number Diff line number Diff line change
Expand Up @@ -200,40 +200,46 @@ actor CaptureManager {

// Configure capture for the full display first
let filter = SCContentFilter(display: scDisplay, excludingWindows: [])
let config = createCaptureConfiguration(for: display)

// Set source rect for region capture
// sourceRect must be in PIXEL coordinates (not normalized!)
// The rect is in points from SelectionOverlayWindow, convert to pixels
// IMPORTANT: Round to integers to avoid fractional pixel boundaries
// which cause ScreenCaptureKit to apply anti-aliasing/interpolation
let pixelX = round(rect.origin.x * display.scaleFactor)
let pixelY = round(rect.origin.y * display.scaleFactor)
let pixelWidth = round(rect.width * display.scaleFactor)
let pixelHeight = round(rect.height * display.scaleFactor)
let config = SCStreamConfiguration()

// sourceRect is in POINTS (same coordinate system as display.frame)
// NOT in pixels! ScreenCaptureKit handles the scaling internally.
let clampedX = min(max(rect.origin.x, 0), display.frame.width - 1)
let clampedY = min(max(rect.origin.y, 0), display.frame.height - 1)
let clampedWidth = min(rect.width, display.frame.width - clampedX)
let clampedHeight = min(rect.height, display.frame.height - clampedY)

let sourceRect = CGRect(
x: pixelX,
y: pixelY,
width: pixelWidth,
height: pixelHeight
x: clampedX,
y: clampedY,
width: clampedWidth,
height: clampedHeight
)

config.sourceRect = sourceRect

// Output size should be in PIXELS for crisp capture
let outputWidth = Int(clampedWidth * display.scaleFactor)
let outputHeight = Int(clampedHeight * display.scaleFactor)
config.width = outputWidth
config.height = outputHeight

// High quality settings
config.minimumFrameInterval = CMTime(value: 1, timescale: 1)
config.pixelFormat = kCVPixelFormatType_32BGRA
config.showsCursor = false
config.colorSpaceName = CGColorSpace.sRGB

#if DEBUG
print("=== CAPTURE MANAGER DEBUG ===")
print("[CAP-1] Input rect (points): \(rect)")
print("[CAP-2] display.frame (points): \(display.frame)")
print("[CAP-3] display.scaleFactor: \(display.scaleFactor)")
print("[CAP-4] sourceRect (pixels, rounded): \(sourceRect)")
print("[CAP-4] sourceRect (points, clamped): \(sourceRect)")
print("[CAP-5] outputSize (pixels): \(outputWidth)x\(outputHeight)")
print("=== END CAPTURE MANAGER DEBUG ===")
#endif

config.sourceRect = sourceRect

// Adjust output size to match the region (use same rounded values)
config.width = Int(pixelWidth)
config.height = Int(pixelHeight)

// Perform capture with signpost for profiling
os_signpost(.begin, log: Self.performanceLog, name: "RegionCapture", signpostID: Self.signpostID)
let captureStartTime = CFAbsoluteTimeGetCurrent()
Expand Down
113 changes: 101 additions & 12 deletions ScreenTranslate/Features/Onboarding/OnboardingView.swift
Original file line number Diff line number Diff line change
Expand Up @@ -258,19 +258,12 @@ struct OnboardingView: View {
}

VStack(alignment: .leading, spacing: 16) {
VStack(alignment: .leading, spacing: 8) {
Text(NSLocalizedString("onboarding.configuration.paddleocr", comment: ""))
.font(.headline)
Text(NSLocalizedString("onboarding.configuration.paddleocr.hint", comment: ""))
.font(.caption)
.foregroundStyle(.secondary)
TextField(
NSLocalizedString("onboarding.configuration.placeholder", comment: ""),
text: $viewModel.paddleOCRServerAddress
)
.textFieldStyle(.roundedBorder)
}
// PaddleOCR Installation Section
paddleOCRConfigSection

Divider()

// MTran Server Section
VStack(alignment: .leading, spacing: 8) {
Text(NSLocalizedString("onboarding.configuration.mtran", comment: ""))
.font(.headline)
Expand All @@ -284,6 +277,7 @@ struct OnboardingView: View {
.textFieldStyle(.roundedBorder)
}

// Translation Test Section
VStack(alignment: .leading, spacing: 8) {
Text(NSLocalizedString("onboarding.configuration.test", comment: ""))
.font(.headline)
Expand Down Expand Up @@ -341,6 +335,101 @@ struct OnboardingView: View {
.padding(32)
}

// MARK: - PaddleOCR Configuration Section

private var paddleOCRConfigSection: some View {
VStack(alignment: .leading, spacing: 12) {
HStack {
Text(NSLocalizedString("onboarding.paddleocr.title", comment: ""))
.font(.headline)

Spacer()

// Installation status indicator
if viewModel.isPaddleOCRInstalled {
HStack(spacing: 4) {
Image(systemName: "checkmark.circle.fill")
.foregroundStyle(.green)
Text(NSLocalizedString("onboarding.paddleocr.installed", comment: ""))
.font(.caption)
.foregroundStyle(.green)
}
} else {
HStack(spacing: 4) {
Image(systemName: "xmark.circle.fill")
.foregroundStyle(.secondary)
Text(NSLocalizedString("onboarding.paddleocr.not.installed", comment: ""))
.font(.caption)
.foregroundStyle(.secondary)
}
}
}

Text(NSLocalizedString("onboarding.paddleocr.description", comment: ""))
.font(.caption)
.foregroundStyle(.secondary)

if !viewModel.isPaddleOCRInstalled {
// Installation options
VStack(alignment: .leading, spacing: 8) {
Text(NSLocalizedString("onboarding.paddleocr.install.hint", comment: ""))
.font(.caption)
.foregroundStyle(.secondary)

HStack(spacing: 12) {
Button {
viewModel.installPaddleOCR()
} label: {
if viewModel.isInstallingPaddleOCR {
ProgressView()
.controlSize(.small)
.frame(width: 16, height: 16)
Text(NSLocalizedString("onboarding.paddleocr.installing", comment: ""))
} else {
Image(systemName: "arrow.down.circle")
Text(NSLocalizedString("onboarding.paddleocr.install", comment: ""))
}
}
.buttonStyle(.borderedProminent)
.disabled(viewModel.isInstallingPaddleOCR)

Button {
viewModel.copyInstallCommand()
} label: {
Image(systemName: "doc.on.doc")
Text(NSLocalizedString("onboarding.paddleocr.copy.command", comment: ""))
}
.buttonStyle(.bordered)

Button {
viewModel.refreshPaddleOCRStatus()
} label: {
Image(systemName: "arrow.clockwise")
}
.buttonStyle(.borderless)
.help(NSLocalizedString("onboarding.paddleocr.refresh", comment: ""))
}

if let error = viewModel.paddleOCRInstallError {
Text(error)
.font(.caption)
.foregroundStyle(.red)
}
}
} else {
// PaddleOCR is installed - show version
if let version = viewModel.paddleOCRVersion {
Text(String(format: NSLocalizedString("onboarding.paddleocr.version", comment: ""), version))
.font(.caption)
.foregroundStyle(.secondary)
}
}
}
.padding()
.background(Color(nsColor: .controlBackgroundColor))
.cornerRadius(8)
}

// MARK: - Step 3: Complete

private var completeStep: some View {
Expand Down
81 changes: 81 additions & 0 deletions ScreenTranslate/Features/Onboarding/OnboardingViewModel.swift
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,18 @@ final class OnboardingViewModel {
/// Translation test success status
var translationTestSuccess = false

/// Whether PaddleOCR is installed
var isPaddleOCRInstalled = false

/// Whether PaddleOCR installation is in progress
var isInstallingPaddleOCR = false

/// PaddleOCR installation error message
var paddleOCRInstallError: String?

/// PaddleOCR version if installed
var paddleOCRVersion: String?

// MARK: - Computed Properties

/// Whether we can move to the next step
Expand Down Expand Up @@ -79,6 +91,7 @@ final class OnboardingViewModel {
Task {
await MainActor.run {
checkPermissions()
refreshPaddleOCRStatus()
}
}
}
Expand Down Expand Up @@ -239,6 +252,74 @@ final class OnboardingViewModel {
mtranServerURL = ""
completeOnboarding()
}

// MARK: - PaddleOCR Management

func refreshPaddleOCRStatus() {
PaddleOCRChecker.resetCache()
PaddleOCRChecker.checkAvailabilityAsync()

Task {
for _ in 0..<20 {
try? await Task.sleep(for: .milliseconds(250))
if PaddleOCRChecker.checkCompleted {
break
}
}
await MainActor.run {
isPaddleOCRInstalled = PaddleOCRChecker.isAvailable
paddleOCRVersion = PaddleOCRChecker.version
paddleOCRInstallError = nil
}
}
}

func installPaddleOCR() {
isInstallingPaddleOCR = true
paddleOCRInstallError = nil

Task.detached(priority: .userInitiated) {
let result = await self.runPipInstall()
await MainActor.run {
self.isInstallingPaddleOCR = false
if let error = result {
self.paddleOCRInstallError = error
} else {
self.refreshPaddleOCRStatus()
}
}
}
}

private func runPipInstall() async -> String? {
let task = Process()
task.executableURL = URL(fileURLWithPath: "/usr/bin/env")
task.arguments = ["pip3", "install", "paddleocr", "paddlepaddle"]

let stderrPipe = Pipe()
task.standardError = stderrPipe
task.standardOutput = Pipe()

do {
try task.run()
task.waitUntilExit()

if task.terminationStatus != 0 {
let stderrData = stderrPipe.fileHandleForReading.readDataToEndOfFile()
let stderr = String(data: stderrData, encoding: .utf8) ?? "Unknown error"
return stderr.isEmpty ? "Installation failed with exit code \(task.terminationStatus)" : stderr
}
return nil
} catch {
return error.localizedDescription
}
}

func copyInstallCommand() {
let command = "pip3 install paddleocr paddlepaddle"
NSPasteboard.general.clearContents()
NSPasteboard.general.setString(command, forType: .string)
}
}

// MARK: - Notification Names
Expand Down
4 changes: 2 additions & 2 deletions ScreenTranslate/Features/Preview/PreviewViewModel.swift
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ final class PreviewViewModel {
private let recentCapturesStore: RecentCapturesStore

@ObservationIgnored
private let ocrEngine = OCREngine.shared
private let ocrService = OCRService.shared

@ObservationIgnored
private let translationEngine = TranslationEngine.shared
Expand Down Expand Up @@ -970,7 +970,7 @@ final class PreviewViewModel {
defer { isPerformingOCR = false }

do {
let result = try await ocrEngine.recognize(
let result = try await ocrService.recognize(
image,
languages: [.english, .chineseSimplified]
)
Expand Down
Loading