Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
75 changes: 75 additions & 0 deletions .github/workflows/checks.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
name: Build & Static Checks

on:
push:
branches: [ main, develop ]
pull_request:
branches: [ main, develop ]
workflow_dispatch:

jobs:
analyze:
runs-on: ubuntu-22.04

steps:
- uses: actions/checkout@v4
with:
submodules: 'recursive'

- name: Set up Buildx
uses: docker/setup-buildx-action@v3

- name: Build SDK image (cached)
uses: docker/build-push-action@v6
with:
context: .
file: Dockerfile.ci
tags: freeswitch-sdk:ci
load: true
cache-from: type=gha
cache-to: type=gha,mode=max

- name: Run analysis inside container
uses: addnab/docker-run-action@v3
with:
image: freeswitch-sdk:ci
options: -v ${{ github.workspace }}:/work
run: |
set -eux
cd /work
git config --global --add safe.directory /work
cmake -S . -B build \
-DCMAKE_BUILD_TYPE=Release \
-DCMAKE_EXPORT_COMPILE_COMMANDS=ON \
-DCMAKE_C_COMPILER=clang \
-DCMAKE_CXX_COMPILER=clang++ \
-DCMAKE_C_COMPILER_LAUNCHER=ccache \
-DCMAKE_CXX_COMPILER_LAUNCHER=ccache

scan-build --status-bugs cmake --build build -j"$(nproc)"

FILES="$(git ls-files '*.c' '*.cc' '*.cpp' '*.cxx' | grep -v '^buffer/' | grep -v '^libs/')"
if [ -n "$FILES" ]; then
clang-tidy -p build $FILES \
--warnings-as-errors='clang-analyzer-*,bugprone-*,performance-*'
else
echo "No source files found for clang-tidy analysis."
fi


cppcheck --enable=warning,performance,portability --std=c++17 --force \
--project=build/compile_commands.json \
--suppress=missingIncludeSystem \
-i build -i buffer -i libs 2> cppcheck-warn.log

cppcheck --enable=style --std=c++17 --force \
--project=build/compile_commands.json \
--suppress=missingIncludeSystem \
-i build -i buffer -i libs 2> cppcheck-style.log || true

if [ -s cppcheck-style.log ]; then
echo "Style issues found by cppcheck:"
cat cppcheck-style.log
else
echo "No style issues found by cppcheck."
fi
81 changes: 81 additions & 0 deletions Dockerfile.ci
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
# syntax=docker/dockerfile:1.7

############################
# Stage 1: Build dependencies + FreeSWITCH
############################
FROM debian:12 AS builder

ENV DEBIAN_FRONTEND=noninteractive


RUN apt-get update && apt-get install -y --no-install-recommends \
ca-certificates git curl wget \
build-essential cmake automake autoconf libtool libtool-bin libltdl-dev pkg-config \
libssl-dev zlib1g-dev libdb-dev unixodbc-dev libncurses5-dev libexpat1-dev \
libgdbm-dev bison erlang-dev libtpl-dev libtiff5-dev uuid-dev \
libpcre3-dev libpcre2-dev libedit-dev libsqlite3-dev libcurl4-openssl-dev nasm \
libogg-dev libspeex-dev libspeexdsp-dev libldns-dev python3-dev \
libavformat-dev libswscale-dev libswresample-dev \
liblua5.2-dev libopus-dev libpq-dev \
libsndfile1-dev libflac-dev libvorbis-dev \
&& rm -rf /var/lib/apt/lists/*


WORKDIR /src

RUN git clone https://github.com/signalwire/libks && \
git clone https://github.com/freeswitch/sofia-sip && \
git clone https://github.com/freeswitch/spandsp && \
git clone https://github.com/signalwire/signalwire-c && \
git clone https://github.com/signalwire/freeswitch

# libks
WORKDIR /src/libks
RUN cmake . -DCMAKE_INSTALL_PREFIX=/usr -DWITH_LIBBACKTRACE=1 && \
make -j"$(nproc)" && make install

# sofia-sip
WORKDIR /src/sofia-sip
RUN ./bootstrap.sh && \
./configure --with-pic --with-glib=no --without-doxygen --disable-stun --prefix=/usr && \
make -j"$(nproc)" && make install

# spandsp
WORKDIR /src/spandsp
RUN ./bootstrap.sh && \
./configure --with-pic --prefix=/usr && \
make -j"$(nproc)" && make install

# signalwire-c
WORKDIR /src/signalwire-c
RUN PKG_CONFIG_PATH=/usr/lib/pkgconfig cmake . -DCMAKE_INSTALL_PREFIX=/usr && \
make -j"$(nproc)" && make install

# FreeSWITCH SDK
WORKDIR /src/freeswitch
RUN ./bootstrap.sh -j && \
./configure --prefix=/usr && \
make -j"$(nproc)" && make install

############################
# Stage 2: Slim SDK image (no FreeSWITCH runtime)
############################
FROM debian:12

ENV DEBIAN_FRONTEND=noninteractive

RUN apt-get update && apt-get install -y --no-install-recommends \
build-essential \
clang clang-tidy clang-tools \
cppcheck cmake pkg-config ccache \
libssl-dev zlib1g-dev \
libspeexdsp-dev libspandsp-dev \
git curl wget \
&& rm -rf /var/lib/apt/lists/*

# Copy only SDK bits
COPY --from=builder /usr/include/freeswitch/ /usr/include/freeswitch/
COPY --from=builder /usr/lib/pkgconfig/freeswitch.pc /usr/lib/pkgconfig/
COPY --from=builder /usr/lib/libfreeswitch.so* /usr/lib/

WORKDIR /work
13 changes: 10 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,9 +1,13 @@
# mod_openai_audio_stream

A fork of [mod_audio_stream](https://github.com/amigniter/mod_audio_stream) specifically designed for streaming audio to OpenAI's realtime API and playing the responses back to the user via FreeSWITCH and WebSocket.
**mod_openai_audio_stream** is a FreeSWITCH module that streams L16 audio from a channel to an OpenAI realtime websocket endpoint. The stream is adherent to OpenAI's Realtime API specification and allows for real-time audio playback directly in the channel.
![Build & Static Code Checks](https://github.com/VoiSmart/mod_openai_audio_stream/actions/workflows/checks.yml/badge.svg)

**mod_openai_audio_stream** is a FreeSWITCH module that streams L16 audio from a channel to an OpenAI Realtime WebSocket endpoint. The stream follows OpenAI's Realtime API specification and enables real-time audio playback directly in the channel.

It is a fork of [mod_audio_stream](https://github.com/amigniter/mod_audio_stream), specifically adapted for streaming audio to OpenAI's Realtime API and playing the responses back to the user via FreeSWITCH and WebSocket.

The goal of **mod_openai_audio_stream** is to provide a simple, lightweight, yet effective module for streaming audio and receiving responses directly from OpenAI’s Realtime WebSocket into the call through FreeSWITCH. It uses [ixwebsocket](https://machinezone.github.io/IXWebSocket/), a C++ WebSocket library compiled as a static library.

The purpose of **mod_openai_audio_stream** was to make a simple, less dependent but yet effective module to stream audio and receive responses directly from OpenAI realtime websocket into the call via switch. It uses [ixwebsocket](https://machinezone.github.io/IXWebSocket/), c++ library for websocket protocol which is compiled as a static library.

## Notes

Expand Down Expand Up @@ -72,6 +76,8 @@ The following channel variables can be used to fine tune websocket connection an
- `Buffer Size` actually represents a duration of audio chunk sent to websocket. If you want to send e.g. 100ms audio packets to your ws endpoint
you would set this variable to 100. If ommited, default packet size of 20ms will be sent as grabbed from the audio channel (which is default FreeSWITCH frame size)
- Set `STREAM_OPENAI_API_KEY` to have a valid OpenAI API key to authenticate with OpenAI's Realtime API. This is required for the module to function properly. If not set the module will use the `STREAM_EXTRA_HEADERS` to pass the OpenAI API key as a header assuming you prepared the headers in the channel variable. **NOTE**: An OpenAI API key is required for the module to function properly. If not set, the module will not be able to connect to the API.
- You can specify the OpenAI Realtime model in the URI, e.g. `uuid_openai_audio_stream ${uuid} start wss://api.openai.com/v1/realtime?model=gpt-4o-mini-realtime-preview-2024-12-17 mono 24k`

- Extra headers should be a JSON object with key-value pairs representing additional HTTP headers. Each key should be a header name, and its corresponding value should be a string.
```json
{
Expand Down Expand Up @@ -107,6 +113,7 @@ Attaches a media bug and starts streaming audio (in L16 format) to the websocket
- "8k" = 8000 Hz sample rate will be generated
- "16k" = 16000 Hz sample rate will be generated
- "24k" = 24000 Hz sample rate will be generated
- **IMPORTANT NOTE**: The OpenAI Realtime API, when using PCM audio format, expects the audio to be in 24 kHz sample rate. Use the sampling-rate parameter as `24k` (or `24000`) and mono to ensure that the audio is sent in the correct format. From the OpenAI Realtime API documentation: *input audio must be 16-bit PCM at a 24kHz sample rate, single channel (mono), and little-endian byte order.* Support for exchanging audio with OpenAI in other formats may be developed in the future, which would make the `<sampling-rate>` and `<mono<` parameters useful for controlling the output format dynamically.

```
uuid_openai_audio_stream <uuid> send_json
Expand Down
3 changes: 1 addition & 2 deletions mod_openai_audio_stream.c
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,6 @@ static switch_bool_t capture_callback(switch_media_bug_t *bug, void *user_data,

switch (type) {
case SWITCH_ABC_TYPE_INIT:
const char *uuid = switch_core_session_get_uuid(session);
break;

case SWITCH_ABC_TYPE_CLOSE:
Expand Down Expand Up @@ -156,7 +155,7 @@ SWITCH_STANDARD_API(stream_function)
assert(cmd);

if (zstr(cmd) || argc < 2 || (0 == strcmp(argv[1], "start") && argc < 4)) {
switch_log_printf(SWITCH_CHANNEL_SESSION_LOG(session), SWITCH_LOG_ERROR, "Error with command %s %s %s.\n", cmd, argv[0], argv[1]);
switch_log_printf(SWITCH_CHANNEL_SESSION_LOG(session), SWITCH_LOG_ERROR, "Error with command %s.\n", cmd);
stream->write_function(stream, "-USAGE: %s\n", STREAM_API_SYNTAX);
goto done;
} else {
Expand Down
56 changes: 35 additions & 21 deletions openai_audio_streamer_glue.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -194,36 +194,51 @@ class AudioStreamer {
}
}


std::vector<int16_t> resampleRawAudio(const std::string& input_raw) {

size_t in_samples = input_raw.size() / 2;
size_t out_samples = static_cast<size_t>(in_samples * out_sample_rate / static_cast<float>(in_sample_rate)) + 1;

double scaled = static_cast<double>(in_samples) * out_sample_rate / in_sample_rate;
size_t out_samples = static_cast<size_t>(scaled) + 1;

std::vector<int16_t> in_buffer(in_samples);
std::vector<int16_t> out_buffer(out_samples);

std::memcpy(in_buffer.data(), input_raw.data(), input_raw.size());

spx_uint32_t in_len = in_samples;
spx_uint32_t out_len = out_samples;
if (in_samples > UINT32_MAX || out_samples > UINT32_MAX) {
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR,
"Too many samples to resample: in=%zu, out=%zu\n",
in_samples, out_samples);
return {};
}

spx_uint32_t in_len = static_cast<spx_uint32_t>(in_samples);
spx_uint32_t out_len = static_cast<spx_uint32_t>(out_samples);

int err = speex_resampler_process_int(m_resampler, 0, in_buffer.data(), &in_len, out_buffer.data(), &out_len);
int err = speex_resampler_process_int(m_resampler, 0,
in_buffer.data(), &in_len,
out_buffer.data(), &out_len);

if (err != RESAMPLER_ERR_SUCCESS) {
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Resampling failed with error code: %d\n", err);
return std::vector<int16_t>(); // return empty vector on error
switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR,
"Resampling failed with error code: %d\n", err);
return {}; // empty on error
}

out_buffer.resize(out_len); // resize to actual size used
out_buffer.resize(out_len); // resize to actual resampled size
return out_buffer;
}

std::string createWavFromRaw(std::string rawAudio) {
// create wav file from raw audio
// rawAudio passed as constant reference because it is never edited
std::string createWavFromRaw(const std::string& rawAudio) {

const int numChannels = 1; // mono
const int bitsPerSample = 16; // pcm16
int byteRate = in_sample_rate * numChannels * bitsPerSample / 8;
int blockAlign = numChannels * bitsPerSample / 8;
uint32_t dataSize = rawAudio.size();
uint32_t dataSize = static_cast<uint32_t>(rawAudio.size());
uint32_t chunkSize = 36 + dataSize;

std::ostringstream wavStream; // write in string like stream
Expand Down Expand Up @@ -449,8 +464,10 @@ namespace {

memset(tech_pvt, 0, sizeof(private_t));

strncpy(tech_pvt->sessionId, switch_core_session_get_uuid(session), MAX_SESSION_ID);
strncpy(tech_pvt->ws_uri, wsUri, MAX_WS_URI);
strncpy(tech_pvt->sessionId, switch_core_session_get_uuid(session), MAX_SESSION_ID - 1);
tech_pvt->sessionId[MAX_SESSION_ID - 1] = '\0';
strncpy(tech_pvt->ws_uri, wsUri, MAX_WS_URI - 1);
tech_pvt->ws_uri[MAX_WS_URI - 1] = '\0';
tech_pvt->sampling = desiredSampling;
tech_pvt->responseHandler = responseHandler;
tech_pvt->rtp_packets = rtp_packets;
Expand Down Expand Up @@ -480,7 +497,7 @@ namespace {

switch_mutex_init(&tech_pvt->mutex, SWITCH_MUTEX_NESTED, pool);

if (desiredSampling != sampling) {
if (static_cast<uint32_t>(desiredSampling) != sampling) {
if (switch_buffer_create(pool, &tech_pvt->sbuffer, buflen) != SWITCH_STATUS_SUCCESS) {
switch_log_printf(SWITCH_CHANNEL_SESSION_LOG(session), SWITCH_LOG_ERROR,
"%s: Error creating switch buffer.\n", tech_pvt->sessionId);
Expand Down Expand Up @@ -510,7 +527,7 @@ namespace {
ringBufferInit(tech_pvt->buffer, tech_pvt->data, adjSize);
}

if (desiredSampling != sampling) {
if (static_cast<uint32_t>(desiredSampling) != sampling) {
switch_log_printf(SWITCH_CHANNEL_SESSION_LOG(session), SWITCH_LOG_DEBUG, "(%s) resampling from %u to %u\n", tech_pvt->sessionId, sampling, desiredSampling);
tech_pvt->resampler = speex_resampler_init(channels, sampling, desiredSampling, SWITCH_RESAMPLE_QUALITY, &err);
if (0 != err) {
Expand Down Expand Up @@ -559,17 +576,14 @@ namespace {

extern "C" {
int validate_ws_uri(const char* url, char* wsUri) {
const char* scheme = nullptr;
const char* hostStart = nullptr;
const char* hostEnd = nullptr;
const char* portStart = nullptr;

// Check scheme
if (strncmp(url, "ws://", 5) == 0) {
scheme = "ws";
hostStart = url + 5;
} else if (strncmp(url, "wss://", 6) == 0) {
scheme = "wss";
hostStart = url + 6;
} else {
return 0;
Expand Down Expand Up @@ -728,7 +742,7 @@ extern "C" {
int channels,
void **ppUserData)
{
int deflate, heart_beat;
int deflate = 0, heart_beat = 0;
bool suppressLog = false;
const char* buffer_size;
const char* extra_headers;
Expand Down Expand Up @@ -957,8 +971,6 @@ extern "C" {
return SWITCH_TRUE;
}

uint32_t available = switch_buffer_inuse(tech_pvt->playback_buffer);

uint32_t bytes_needed = frame->datalen;
uint32_t bytes_per_sample = frame->datalen / frame->samples;

Expand Down Expand Up @@ -1003,7 +1015,9 @@ extern "C" {
{
auto* tech_pvt = (private_t*) switch_core_media_bug_get_user_data(bug);
char sessionId[MAX_SESSION_ID];
strcpy(sessionId, tech_pvt->sessionId);

strncpy(sessionId, tech_pvt->sessionId, MAX_SESSION_ID - 1);
sessionId[MAX_SESSION_ID - 1] = '\0';

switch_mutex_lock(tech_pvt->mutex);
switch_log_printf(SWITCH_CHANNEL_SESSION_LOG(session), SWITCH_LOG_DEBUG, "(%s) stream_session_cleanup\n", sessionId);
Expand Down