From cf75348b3bd0a51cfdffbfaa7553df6a5b4e6bdf Mon Sep 17 00:00:00 2001 From: Zach Nagengast Date: Tue, 19 Mar 2024 13:22:56 -0700 Subject: [PATCH] Use GPU for audio encoder on macOS 13 (#83) * Use gpu on macos 13 for the audio encoder * Use gpu on macos 13 for the audio encoder * Update Sources/WhisperKitCLI/CLIArguments.swift --- Sources/WhisperKit/Core/Models.swift | 12 +++++++++--- Sources/WhisperKitCLI/CLIArguments.swift | 1 - Sources/WhisperKitCLI/Transcribe.swift | 14 ++++++++++++-- 3 files changed, 21 insertions(+), 6 deletions(-) diff --git a/Sources/WhisperKit/Core/Models.swift b/Sources/WhisperKit/Core/Models.swift index ff862a7..b440656 100644 --- a/Sources/WhisperKit/Core/Models.swift +++ b/Sources/WhisperKit/Core/Models.swift @@ -135,7 +135,7 @@ public struct ModelComputeOptions { public init( melCompute: MLComputeUnits = .cpuAndGPU, - audioEncoderCompute: MLComputeUnits = .cpuAndNeuralEngine, + audioEncoderCompute: MLComputeUnits? = nil, textDecoderCompute: MLComputeUnits = .cpuAndNeuralEngine, prefillCompute: MLComputeUnits = .cpuOnly ) { @@ -146,10 +146,16 @@ public struct ModelComputeOptions { self.prefillCompute = .cpuOnly return } + self.melCompute = melCompute - self.audioEncoderCompute = audioEncoderCompute - self.textDecoderCompute = textDecoderCompute self.prefillCompute = prefillCompute + self.textDecoderCompute = textDecoderCompute + + if #available(macOS 14.0, iOS 17.0, watchOS 10, visionOS 1, *) { + self.audioEncoderCompute = audioEncoderCompute ?? .cpuAndNeuralEngine + } else { + self.audioEncoderCompute = audioEncoderCompute ?? .cpuAndGPU + } } } diff --git a/Sources/WhisperKitCLI/CLIArguments.swift b/Sources/WhisperKitCLI/CLIArguments.swift index aafb133..de1294d 100644 --- a/Sources/WhisperKitCLI/CLIArguments.swift +++ b/Sources/WhisperKitCLI/CLIArguments.swift @@ -21,7 +21,6 @@ struct CLIArguments: ParsableArguments { @Option(help: "Compute units for audio encoder model with {all,cpuOnly,cpuAndGPU,cpuAndNeuralEngine,random}") var audioEncoderComputeUnits: ComputeUnits = .cpuAndNeuralEngine - @Option(help: "Compute units for text decoder model with {all,cpuOnly,cpuAndGPU,cpuAndNeuralEngine,random}") var textDecoderComputeUnits: ComputeUnits = .cpuAndNeuralEngine diff --git a/Sources/WhisperKitCLI/Transcribe.swift b/Sources/WhisperKitCLI/Transcribe.swift index fd60e11..581aeb0 100644 --- a/Sources/WhisperKitCLI/Transcribe.swift +++ b/Sources/WhisperKitCLI/Transcribe.swift @@ -32,9 +32,19 @@ struct Transcribe: AsyncParsableCommand { print("Transcribing audio at \(cliArguments.audioPath)") } + var audioEncoderComputeUnits = cliArguments.audioEncoderComputeUnits.asMLComputeUnits + let textDecoderComputeUnits = cliArguments.textDecoderComputeUnits.asMLComputeUnits + + // Use gpu for audio encoder on macOS below 14 + if audioEncoderComputeUnits == .cpuAndNeuralEngine { + if #unavailable(macOS 14.0) { + audioEncoderComputeUnits = .cpuAndGPU + } + } + let computeOptions = ModelComputeOptions( - audioEncoderCompute: cliArguments.audioEncoderComputeUnits.asMLComputeUnits, - textDecoderCompute: cliArguments.textDecoderComputeUnits.asMLComputeUnits + audioEncoderCompute: audioEncoderComputeUnits, + textDecoderCompute: textDecoderComputeUnits ) let downloadTokenizerFolder: URL? =