Skip to content

Commit

Permalink
Homebridge audio improvements (#1307)
Browse files Browse the repository at this point in the history
* Fix ffmpeg error when streaming over LTE/5G

* Fix audio over LTE/5G

* Opus and AAC-ELD audio improvements
  • Loading branch information
tsightler committed Sep 17, 2023
1 parent 7c5c2ca commit c197ad1
Show file tree
Hide file tree
Showing 3 changed files with 141 additions and 45 deletions.
6 changes: 6 additions & 0 deletions .changeset/poor-moles-cry.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
---
'homebridge-ring': patch
---

Update Opus processing to handle all possible bitrates and packet lengths which should fix audio issues over LTE/5G
Minor tweaks to AAC-ELD processing for slightly improved audio
49 changes: 21 additions & 28 deletions packages/homebridge-ring/camera-source.ts
Original file line number Diff line number Diff line change
Expand Up @@ -157,11 +157,15 @@ class StreamingSessionWrapper {
audio: { port: audioPort },
} = this.prepareStreamRequest,
{
audio: { codec: audioCodec, sample_rate: audioSampleRate },
audio: {
codec: audioCodec,
sample_rate: audioSampleRate,
packet_time: audioPacketTime,
},
} = startStreamRequest,
// Repacketize the audio stream after it's been transcoded
opusRepacketizer = new OpusRepacketizer(1),
audioIntervalScale = audioSampleRate / 8,
opusRepacketizer = new OpusRepacketizer(audioPacketTime / 20),
audioIntervalScale = ((audioSampleRate / 8) * audioPacketTime) / 20,
audioSrtpSession = new SrtpSession(getSessionConfig(this.audioSrtp))

let firstTimestamp: number,
Expand All @@ -173,7 +177,7 @@ class StreamingSessionWrapper {
if (audioCodec === AudioStreamingCodecType.OPUS) {
// borrowed from scrypted
// Original source: https://github.com/koush/scrypted/blob/c13ba09889c3e0d9d3724cb7d49253c9d787fb97/plugins/homekit/src/types/camera/camera-streaming-srtp-sender.ts#L124-L143
rtp = opusRepacketizer.repacketize(RtpPacket.deSerialize(message))
rtp = opusRepacketizer.repacketize(rtp)

if (!rtp) {
return null
Expand All @@ -198,7 +202,7 @@ class StreamingSessionWrapper {
// HAP requests, and the packet time is respected,
// opus 48khz will work just fine.
rtp.header.timestamp =
(firstTimestamp + audioPacketCount * 180 * audioIntervalScale) %
(firstTimestamp + audioPacketCount * 160 * audioIntervalScale) %
0xffffffff
audioPacketCount++
}
Expand All @@ -222,7 +226,6 @@ class StreamingSessionWrapper {
let sentVideo = false
const {
targetAddress,
audio: { srtp_key: remoteAudioSrtpKey, srtp_salt: remoteAudioSrtpSalt },
video: { port: videoPort },
} = this.prepareStreamRequest,
// use to encrypt Ring video to HomeKit
Expand Down Expand Up @@ -273,16 +276,18 @@ class StreamingSessionWrapper {
'-frame_duration',
request.audio.packet_time,
'-application',
'voip',
'-vbr',
'off',
'lowdelay',
]
: [
// AAC-eld specific
'-acodec',
'libfdk_aac',
'-profile:a',
'aac_eld',
'-eld_sbr:a',
'1',
'-eld_v2',
'1',
]),

// Shared options
Expand All @@ -303,33 +308,23 @@ class StreamingSessionWrapper {
'-f',
'rtp',
`rtp://127.0.0.1:${await this.repacketizeAudioSplitter
.portPromise}?pkt_size=188`,
.portPromise}?pkt_size=376`,
],
video: false,
output: [],
})

let cameraSpeakerActive = false
// used to decrypt return audio from HomeKit to Ring
const remoteAudioSrtpOptions: SrtpOptions = {
srtpKey: remoteAudioSrtpKey,
srtpSalt: remoteAudioSrtpSalt,
},
audioSrtpSession = new SrtpSession(
getSessionConfig(remoteAudioSrtpOptions),
),
returnAudioTranscodedSplitter = new RtpSplitter(({ message }) => {
// used to send return audio from HomeKit to Ring
const returnAudioTranscodedSplitter = new RtpSplitter(({ message }) => {
if (!cameraSpeakerActive) {
cameraSpeakerActive = true
this.streamingSession.activateCameraSpeaker()
}

// decrypt the message
// deserialize and send to Ring - werift will handle encryption and other header params
try {
const rtp = RtpPacket.deSerialize(message)
rtp.payload = audioSrtpSession.decrypt(rtp.payload)

// send to Ring - werift will handle encryption and other header params
const rtp: RtpPacket | undefined = RtpPacket.deSerialize(message)
this.streamingSession.sendAudioPacket(rtp)
} catch (_) {
// deSerialize will sometimes fail, but the errors can be ignored
Expand All @@ -351,15 +346,13 @@ class StreamingSessionWrapper {
? [
'libopus',
'-ac',
1,
'1',
'-ar',
'24k',
'-vbr',
'off',
'-b:a',
'24k',
'-application',
'voip',
'lowdelay',
]
: ['pcm_mulaw', '-ac', 1, '-ar', '8k']),
'-flags',
Expand Down
131 changes: 114 additions & 17 deletions packages/homebridge-ring/opus-repacketizer.ts
Original file line number Diff line number Diff line change
@@ -1,9 +1,12 @@
// OpusRepacketizer is borrowed from scrypted
// Original source: https://github.com/koush/scrypted/blob/c13ba09889c3e0d9d3724cb7d49253c9d787fb97/plugins/homekit/src/types/camera/opus-repacketizer.ts
// Original source: https://github.com/koush/scrypted/blob/3150a3033515a3886af1e6b35a0ba7432b63e02b/plugins/homekit/src/types/camera/opus-repacketizer.ts

import type { RtpPacket } from 'werift'

// https://datatracker.ietf.org/doc/html/rfc6716
// INPUT

// INPUT (for single frame sample, see RFC for other 4 code values)

// 0 1 2 3
// 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
// +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
Expand All @@ -13,7 +16,9 @@ import type { RtpPacket } from 'werift'
// : |
// | |
// +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+

// OUTPUT

// 0 1 2 3
// 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
// +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
Expand All @@ -39,7 +44,9 @@ import type { RtpPacket } from 'werift'
// +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
// : Opus Padding (Optional)... |
// +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+

// Figure 6: A CBR Code 3 Packet

// In the VBR case, the (optional) padding length is followed by M-1
// frame lengths (indicated by "N1" to "N[M-1]" in Figure 7), each
// encoded in a one- or two-byte sequence as described above. The
Expand All @@ -54,32 +61,122 @@ import type { RtpPacket } from 'werift'
// the signaled length of the first M-1 frames themselves, plus the
// signaled length of the padding MUST be no larger than N, the total
// size of the packet.

export class OpusRepacketizer {
packets: RtpPacket[] = []
depacketized: Buffer[] = []

constructor(public framesPerPacket: number) {}

// repacketize a packet with a single frame into a packet with multiple frames.
repacketize(packet: RtpPacket): RtpPacket | undefined {
if (this.framesPerPacket === 1) return packet
if (packet.payload[0] & 0b00000011) {
throw new Error('expected opus packet with a single frame.')
const code = packet.payload[0] & 0b00000011
let offset: number

// see Frame Length Coding in RFC
const decodeFrameLength = () => {
let frameLength = packet.payload.readUInt8(offset)
if (frameLength >= 252) {
offset++
frameLength += packet.payload.readUInt8(offset) * 4
}
return frameLength
}
this.packets.push(packet)
if (this.packets.length !== this.framesPerPacket) {
return
// code 0: cbr, 1 packet
// code 1: cbr, 2 packets
// code 2: vbr, 2 packets
// code 3: cbr/vbr signaled, variable packets

if (code === 0) {
if (this.framesPerPacket === 1 && !this.depacketized.length) return packet
// depacketize by stripping off the config byte
this.depacketized.push(packet.payload.subarray(1))
} else if (code === 1) {
if (this.framesPerPacket === 2 && !this.depacketized.length) return packet
// depacketize by dividing the remaining payload into two equal sized frames
const remaining = packet.payload.length - 1
if (remaining % 2) {
throw new Error('expected equal sized opus packets (code 1)')
}
const frameLength = remaining / 2
this.depacketized.push(packet.payload.subarray(1, 1 + frameLength))
this.depacketized.push(packet.payload.subarray(1 + frameLength))
} else if (code === 2) {
if (this.framesPerPacket === 2 && !this.depacketized.length) return packet
offset = 1
// depacketize by dividing the remaining payload into two inequal sized frames
const frameLength = decodeFrameLength()
this.depacketized.push(
packet.payload.subarray(offset, offset + frameLength),
)
this.depacketized.push(packet.payload.subarray(offset + frameLength))
} else if (code === 3) {
// code 3 packet will have a frame count and padding indicator, and whether the packets
// are equal size or not.
const frameCountByte = packet.payload[1],
packetFrameCount = frameCountByte & 0b00111111,
vbr = frameCountByte & 0b10000000
if (
this.framesPerPacket === packetFrameCount &&
!this.depacketized.length
) {
return packet
}
const paddingIndicator = frameCountByte & 0b01000000
offset = 2
let padding = 0
if (paddingIndicator) {
padding = packet.payload.readUInt8(offset)
offset++
if (padding === 255) {
padding = 254 + packet.payload.readUInt8(offset)
offset++
}
}

if (!vbr) {
const remaining = packet.payload.length - offset - padding
if (remaining % packetFrameCount) {
throw new Error('expected equal sized opus packets (code 3)')
}
const frameLength = remaining / packetFrameCount
for (let i = 0; i < packetFrameCount; i++) {
const start = offset + i * frameLength,
end = start + frameLength
this.depacketized.push(packet.payload.subarray(start, end))
}
} else {
const frameLengths: number[] = []
for (let i = 0; i < packetFrameCount; i++) {
const frameLength = decodeFrameLength()
frameLengths.push(frameLength)
}
for (let i = 0; i < packetFrameCount; i++) {
const frameLength = frameLengths[i],
start = offset
offset += frameLength
this.depacketized.push(packet.payload.subarray(start, offset))
}
}
}
const first = this.packets[0],
depacketized = this.packets.map((p) => p.payload.subarray(1))
this.packets = []
let config = first.payload[0]
config |= 0b00000011
const frameCount = 0b10000000 | this.framesPerPacket,
newHeader: number[] = [config, frameCount]
// depacketize by stripping off the config byte

if (this.depacketized.length < this.framesPerPacket) return

const depacketized = this.depacketized.slice(0, this.framesPerPacket)
this.depacketized = this.depacketized.slice(this.framesPerPacket)

// reuse the config and stereo indicator, but change the code to 3.
let toc = packet.payload[0]
toc |= 0b00000011
// vbr | padding indicator | packet count
const frameCountByte = 0b10000000 | this.framesPerPacket,
newHeader: number[] = [toc, frameCountByte]

// M-1 length bytes
newHeader.push(...depacketized.slice(0, -1).map((data) => data.length))

const headerBuffer = Buffer.from(newHeader),
payload = Buffer.concat([headerBuffer, ...depacketized])

packet.payload = payload
return packet
}
Expand Down

0 comments on commit c197ad1

Please sign in to comment.