Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
42 changes: 31 additions & 11 deletions crates/swift/src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
use swift_rs::{swift, Bool, Int, Int16, SRArray, SRObject};

swift!(fn _prepare_audio_capture() -> Bool);
swift!(fn _start_audio_capture() -> Bool);
swift!(fn _stop_audio_capture() -> Bool);
swift!(fn _read_audio_capture() -> SRObject<IntArray>);
swift!(fn _read_samples(max: Int) -> SRObject<IntArray>);
swift!(fn _available_samples() -> Int);
swift!(fn _audio_format() -> Option<SRObject<AudioFormat>>);
swift!(fn _count_taps() -> Int);

#[repr(C)]
#[derive(Debug)]
Expand All @@ -29,10 +30,13 @@ pub struct AudioCapture {}

impl AudioCapture {
pub fn new() -> Self {
unsafe { _prepare_audio_capture() };
Self {}
}

pub fn count_taps(&self) -> Int {
unsafe { _count_taps() }
}

pub fn format(&self) -> Option<AudioFormat> {
let format = unsafe { _audio_format() };
match format {
Expand All @@ -53,10 +57,14 @@ impl AudioCapture {
unsafe { _stop_audio_capture() }
}

pub fn read(&self) -> Vec<Int16> {
let result = unsafe { _read_audio_capture() };
pub fn read_samples(&self, max: Int) -> Vec<Int16> {
let result = unsafe { _read_samples(max) };
result.buffer()
}

pub fn available_samples(&self) -> Int {
unsafe { _available_samples() }
}
}

#[cfg(test)]
Expand All @@ -82,7 +90,7 @@ mod tests {
let source = source
.convert_samples()
.take_duration(Duration::from_secs(seconds))
.amplify(0.01);
.amplify(0.002);

stream_handle.play_raw(source).unwrap();
sleep(Duration::from_secs(seconds));
Expand All @@ -93,32 +101,44 @@ mod tests {
#[serial]
fn test_audio_format() {
let audio_capture = AudioCapture::new();

assert!(audio_capture.start());
let format = audio_capture.format().unwrap();

assert_eq!(format.channels, 1);
assert_eq!(format.sample_rate, 48000);
assert_eq!(format.bits_per_sample, 32);

assert!(audio_capture.stop());
}

#[test]
#[serial]
fn test_start_and_stop() {
let audio_capture = AudioCapture::new();
assert!(audio_capture.start());
assert_eq!(audio_capture.count_taps(), 1);
assert!(audio_capture.stop());
assert_eq!(audio_capture.count_taps(), 0);
}

#[test]
#[serial]
fn test_read() {
let audio_capture = AudioCapture::new();
let numbers = audio_capture.read();
assert_eq!(numbers, vec![]);
let len = audio_capture.available_samples();
assert_eq!(len, 0);

assert!(audio_capture.start());
play_for_sec(1).join().unwrap();
assert!(audio_capture.stop());

let numbers = audio_capture.read();
assert_eq!(numbers, vec![512, 512, 512, 512]);
let samples = audio_capture.read_samples(16000 * 2);
assert!(samples.len() >= 16000 * 1);
assert!(samples.len() < 16000 * 2);
assert_eq!(samples.iter().sum::<Int16>(), 0);

assert!(audio_capture.stop());
let samples = audio_capture.read_samples(4);
assert_eq!(samples, vec![]);
}
}
29 changes: 15 additions & 14 deletions crates/swift/swift-lib/src/lib.swift
Original file line number Diff line number Diff line change
@@ -1,28 +1,29 @@
@_cdecl("_prepare_audio_capture")
public func prepare_audio_capture() -> Bool {
do {
try AudioCaptureState.shared.prepare()
return true
} catch {
return false
}
}

@_cdecl("_start_audio_capture")
public func start_audio_capture() -> Bool {
return AudioCaptureState.shared.start()
}

@_cdecl("_stop_audio_capture")
public func stop_audio_capture() {
public func stop_audio_capture() -> Bool {
return AudioCaptureState.shared.stop()
}

@_cdecl("_read_samples")
public func read_samples(max: Int) -> IntArray {
return AudioCaptureState.shared.read_samples(max: max)
}

@_cdecl("_read_audio_capture")
public func read_audio_capture() -> IntArray {
return AudioCaptureState.shared.read()
@_cdecl("_available_samples")
public func available_samples() -> Int {
return AudioCaptureState.shared.available_samples()
}

@_cdecl("_audio_format")
public func audio_format() -> AudioFormat? {
return AudioCaptureState.shared.format()
}

@_cdecl("_count_taps")
public func count_taps() -> Int {
return AudioCaptureState.shared.count_taps()
}
22 changes: 21 additions & 1 deletion crates/swift/swift-lib/src/queue.swift
Original file line number Diff line number Diff line change
Expand Up @@ -4,19 +4,39 @@ import Foundation
public class AudioQueue<T> {
private var queue: Deque<T> = Deque<T>()
private let lock = NSLock()
private var count: Int = 0

public init() {}

public var length: Int {
lock.lock()
defer { lock.unlock() }
return count
}

public func clear() {
lock.lock()
defer { lock.unlock() }
queue.removeAll()
count = 0
}

public func push(_ items: any Collection<T>) {
lock.lock()
defer { lock.unlock() }

queue.append(contentsOf: items)
count += items.count
}

public func pop() -> T? {
lock.lock()
defer { lock.unlock() }

return queue.popFirst()
let ret = queue.popFirst()
if ret != nil {
count -= 1
}
return ret
}
}
126 changes: 99 additions & 27 deletions crates/swift/swift-lib/src/state.swift
Original file line number Diff line number Diff line change
Expand Up @@ -6,36 +6,58 @@ import AudioToolbox
import CoreAudio

// https://github.com/insidegui/AudioCap/blob/93881a4201cba1ee1cee558744492660caeaa3f1/AudioCap/ProcessTap/ProcessTap.swift#L7
// https://github.com/tensorflow/examples/blob/master/lite/examples/speech_commands/ios/SpeechCommands/AudioInputManager/AudioInputManager.swift
public class AudioCaptureState {
public static let shared = AudioCaptureState()

private var audioQueue: AudioQueue<Int16> = AudioQueue()
private let dispatchQueue = DispatchQueue(label: "hypr-dispatch-queue", qos: .userInitiated)

private var deviceProcID: AudioDeviceIOProcID?
private var processTapID: AudioObjectID = kAudioObjectUnknown
private var aggregateDeviceID: AudioObjectID = kAudioObjectUnknown
private var deviceProcID: AudioDeviceIOProcID?

private var audioFormat: AudioFormat?
private var tapStreamDescription: AudioStreamBasicDescription?
private var outputAudioFormat = AVAudioFormat(
commonFormat: .pcmFormatInt16,
sampleRate: 16000,
channels: 1,
interleaved: false)

private init() {}

public func format() -> AudioFormat? {
return audioFormat
}

public func prepare() throws {
public func count_taps() -> Int {
return countTapsFromAggregateDevice(id: aggregateDeviceID)
}

public func start() -> Bool {
do {
try _start()
return true
} catch {
return false
}
}

private func _start() throws {
let tapDescription = CATapDescription(monoGlobalTapButExcludeProcesses: [])
tapDescription.uuid = UUID()
tapDescription.isPrivate = true
tapDescription.muteBehavior = .unmuted

var tapID: AUAudioObjectID = kAudioObjectUnknown

var err = AudioHardwareCreateProcessTap(tapDescription, &tapID)
guard err == noErr else { throw AudioError.tapError }
guard tapID != kAudioObjectUnknown else { throw AudioError.tapError }
self.processTapID = tapID

tapStreamDescription = try getAudioTapStreamBasicDescription(tapID: tapID)
audioFormat = AudioFormat(from: tapStreamDescription!)
var tapStreamDescription = try getAudioTapStreamBasicDescription(tapID: tapID)
self.audioFormat = AudioFormat(from: tapStreamDescription)

let systemOutputDeviceUID = try getDefaultSystemOutputDeviceUID()
let aggregateDescription: [String: Any] = [
Expand All @@ -58,42 +80,92 @@ public class AudioCaptureState {
aggregateDescription as CFDictionary, &aggregateDeviceID)
guard err == noErr else { throw AudioError.deviceError }
guard aggregateDeviceID != kAudioObjectUnknown else { throw AudioError.deviceError }
}

public func start() -> Bool {
let format = AVAudioFormat(streamDescription: &tapStreamDescription!)
guard format != nil else { return false }
let inputAudioFormat = AVAudioFormat(streamDescription: &tapStreamDescription)
// we need to reuse single converter - https://stackoverflow.com/a/64572254
let converter = AVAudioConverter(from: inputAudioFormat!, to: outputAudioFormat!)

// https://developer.apple.com/documentation/coreaudio/audiodeviceioblock
// https://forums.swift.org/t/audiobuffer-syntax/40400/2
// https://github.com/insidegui/AudioCap/blob/93881a4201cba1ee1cee558744492660caeaa3f1/AudioCap/ProcessTap/ProcessTap.swift#L227C35-L227C39
try run(on: dispatchQueue) {
[weak self] inputTimestamp, inputBuffer, _outputTimestamp, _outputBuffer, _callbackTimestamp
in
guard let self = self else { return }

let rawBuffer = AVAudioPCMBuffer(
pcmFormat: inputAudioFormat!,
bufferListNoCopy: inputBuffer,
deallocator: nil)

let conversionRatio =
Float(outputAudioFormat!.sampleRate) / Float(inputAudioFormat!.sampleRate)
let newFrameCapacity = AVAudioFrameCount(Float(rawBuffer!.frameLength) * conversionRatio)

let convertedBuffer = AVAudioPCMBuffer(
pcmFormat: outputAudioFormat!,
frameCapacity: newFrameCapacity)

// convert(to:from:) can't convert sample rate - https://stackoverflow.com/a/60290534
var error: NSError?
converter!.convert(to: convertedBuffer!, error: &error) { inNumPackets, outStatus in
outStatus.pointee = .haveData
return rawBuffer
}

do {
// https://developer.apple.com/documentation/coreaudio/audiodeviceioblock
// https://forums.swift.org/t/audiobuffer-syntax/40400/2
// https://github.com/insidegui/AudioCap/blob/93881a4201cba1ee1cee558744492660caeaa3f1/AudioCap/ProcessTap/ProcessTap.swift#L227C35-L227C39
try run(on: dispatchQueue) {
[weak self] inputTimestamp, inputBuffer, _outputTimestamp, _outputBuffer, _callbackTimestamp
in
guard let self = self else { return }
let buffer = AVAudioPCMBuffer(
pcmFormat: format!, bufferListNoCopy: inputBuffer, deallocator: nil)
self.audioQueue.push([Int16(buffer!.frameLength)])
if let error = error {
self.audioQueue.push([Int16(-1)])
return
}

if let channelData = convertedBuffer?.int16ChannelData {
let channelDataValue = channelData.pointee
let samples = stride(
from: 0,
to: Int(convertedBuffer!.frameLength),
by: 1
).map { channelDataValue[$0] }
self.audioQueue.push(samples)
} else {
self.audioQueue.push([Int16(-1)])
}
}
}

public func stop() -> Bool {
do {
try _stop()
return true
} catch {
return false
}
}

public func stop() -> Bool {
if let deviceProcID = deviceProcID {
let err = AudioDeviceStop(aggregateDeviceID, deviceProcID)
return err == noErr
private func _stop() throws {
var err: OSStatus
if self.aggregateDeviceID != kAudioObjectUnknown && self.deviceProcID != nil {
err = AudioDeviceStop(self.aggregateDeviceID, self.deviceProcID!)
guard err == noErr else { throw AudioError.deviceError }
err = AudioDeviceDestroyIOProcID(self.aggregateDeviceID, self.deviceProcID!)
guard err == noErr else { throw AudioError.deviceError }
err = AudioHardwareDestroyAggregateDevice(self.aggregateDeviceID)
guard err == noErr else { throw AudioError.deviceError }
}
return false
if self.processTapID != kAudioObjectUnknown {
err = AudioHardwareDestroyProcessTap(self.processTapID)
guard err == noErr else { throw AudioError.deviceError }
}
self.audioQueue.clear()
}

public func available_samples() -> Int {
return audioQueue.length
}

public func read() -> IntArray {
public func read_samples(max: Int) -> IntArray {
var samples: [Int16] = []

for _ in 0..<4 {
for _ in 0..<max {
if let sample = audioQueue.pop() {
samples.append(sample)
} else {
Expand Down
17 changes: 17 additions & 0 deletions crates/swift/swift-lib/src/utils.swift
Original file line number Diff line number Diff line change
Expand Up @@ -97,3 +97,20 @@ public func getAudioTapStreamBasicDescription(tapID: AudioObjectID) throws

return description
}

public func countTapsFromAggregateDevice(id: AudioDeviceID) -> Int {
var address = AudioObjectPropertyAddress(
mSelector: kAudioAggregateDevicePropertyTapList,
mScope: kAudioObjectPropertyScopeGlobal,
mElement: kAudioObjectPropertyElementMain
)

var propertySize: UInt32 = 0
AudioObjectGetPropertyDataSize(id, &address, 0, nil, &propertySize)
var list: CFArray? = nil
_ = withUnsafeMutablePointer(to: &list) { list in
AudioObjectGetPropertyData(id, &address, 0, nil, &propertySize, list)
}

return list.map(CFArrayGetCount) ?? 0
}
Loading