From 29db390bdda45527ef74523890c125aa455d86ae Mon Sep 17 00:00:00 2001 From: Dario Pellegrino Date: Wed, 30 Jul 2025 15:27:44 +0200 Subject: [PATCH 1/2] Fix compile warnings Signed-off-by: Dario Pellegrino --- mod_openai_audio_stream.c | 3 +-- openai_audio_streamer_glue.cpp | 47 ++++++++++++++++++++-------------- 2 files changed, 29 insertions(+), 21 deletions(-) diff --git a/mod_openai_audio_stream.c b/mod_openai_audio_stream.c index b427649..fd6a246 100644 --- a/mod_openai_audio_stream.c +++ b/mod_openai_audio_stream.c @@ -27,7 +27,6 @@ static switch_bool_t capture_callback(switch_media_bug_t *bug, void *user_data, switch (type) { case SWITCH_ABC_TYPE_INIT: - const char *uuid = switch_core_session_get_uuid(session); break; case SWITCH_ABC_TYPE_CLOSE: @@ -156,7 +155,7 @@ SWITCH_STANDARD_API(stream_function) assert(cmd); if (zstr(cmd) || argc < 2 || (0 == strcmp(argv[1], "start") && argc < 4)) { - switch_log_printf(SWITCH_CHANNEL_SESSION_LOG(session), SWITCH_LOG_ERROR, "Error with command %s %s %s.\n", cmd, argv[0], argv[1]); + switch_log_printf(SWITCH_CHANNEL_SESSION_LOG(session), SWITCH_LOG_ERROR, "Error with command %s.\n", cmd); stream->write_function(stream, "-USAGE: %s\n", STREAM_API_SYNTAX); goto done; } else { diff --git a/openai_audio_streamer_glue.cpp b/openai_audio_streamer_glue.cpp index a9340d1..98d7652 100644 --- a/openai_audio_streamer_glue.cpp +++ b/openai_audio_streamer_glue.cpp @@ -194,27 +194,39 @@ class AudioStreamer { } } + std::vector resampleRawAudio(const std::string& input_raw) { - size_t in_samples = input_raw.size() / 2; - size_t out_samples = static_cast(in_samples * out_sample_rate / static_cast(in_sample_rate)) + 1; + + double scaled = static_cast(in_samples) * out_sample_rate / in_sample_rate; + size_t out_samples = static_cast(scaled) + 1; std::vector in_buffer(in_samples); std::vector out_buffer(out_samples); std::memcpy(in_buffer.data(), input_raw.data(), input_raw.size()); - spx_uint32_t in_len = in_samples; - spx_uint32_t out_len = out_samples; + if (in_samples > UINT32_MAX || out_samples > UINT32_MAX) { + switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, + "Too many samples to resample: in=%zu, out=%zu\n", + in_samples, out_samples); + return {}; + } + + spx_uint32_t in_len = static_cast(in_samples); + spx_uint32_t out_len = static_cast(out_samples); - int err = speex_resampler_process_int(m_resampler, 0, in_buffer.data(), &in_len, out_buffer.data(), &out_len); + int err = speex_resampler_process_int(m_resampler, 0, + in_buffer.data(), &in_len, + out_buffer.data(), &out_len); if (err != RESAMPLER_ERR_SUCCESS) { - switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Resampling failed with error code: %d\n", err); - return std::vector(); // return empty vector on error + switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, + "Resampling failed with error code: %d\n", err); + return {}; // empty on error } - out_buffer.resize(out_len); // resize to actual size used + out_buffer.resize(out_len); // resize to actual resampled size return out_buffer; } @@ -223,7 +235,7 @@ class AudioStreamer { const int bitsPerSample = 16; // pcm16 int byteRate = in_sample_rate * numChannels * bitsPerSample / 8; int blockAlign = numChannels * bitsPerSample / 8; - uint32_t dataSize = rawAudio.size(); + uint32_t dataSize = static_cast(rawAudio.size()); uint32_t chunkSize = 36 + dataSize; std::ostringstream wavStream; // write in string like stream @@ -449,8 +461,10 @@ namespace { memset(tech_pvt, 0, sizeof(private_t)); - strncpy(tech_pvt->sessionId, switch_core_session_get_uuid(session), MAX_SESSION_ID); - strncpy(tech_pvt->ws_uri, wsUri, MAX_WS_URI); + strncpy(tech_pvt->sessionId, switch_core_session_get_uuid(session), MAX_SESSION_ID - 1); + tech_pvt->sessionId[MAX_SESSION_ID - 1] = '\0'; + strncpy(tech_pvt->ws_uri, wsUri, MAX_WS_URI - 1); + tech_pvt->ws_uri[MAX_WS_URI - 1] = '\0'; tech_pvt->sampling = desiredSampling; tech_pvt->responseHandler = responseHandler; tech_pvt->rtp_packets = rtp_packets; @@ -480,7 +494,7 @@ namespace { switch_mutex_init(&tech_pvt->mutex, SWITCH_MUTEX_NESTED, pool); - if (desiredSampling != sampling) { + if (static_cast(desiredSampling) != sampling) { if (switch_buffer_create(pool, &tech_pvt->sbuffer, buflen) != SWITCH_STATUS_SUCCESS) { switch_log_printf(SWITCH_CHANNEL_SESSION_LOG(session), SWITCH_LOG_ERROR, "%s: Error creating switch buffer.\n", tech_pvt->sessionId); @@ -510,7 +524,7 @@ namespace { ringBufferInit(tech_pvt->buffer, tech_pvt->data, adjSize); } - if (desiredSampling != sampling) { + if (static_cast(desiredSampling) != sampling) { switch_log_printf(SWITCH_CHANNEL_SESSION_LOG(session), SWITCH_LOG_DEBUG, "(%s) resampling from %u to %u\n", tech_pvt->sessionId, sampling, desiredSampling); tech_pvt->resampler = speex_resampler_init(channels, sampling, desiredSampling, SWITCH_RESAMPLE_QUALITY, &err); if (0 != err) { @@ -559,17 +573,14 @@ namespace { extern "C" { int validate_ws_uri(const char* url, char* wsUri) { - const char* scheme = nullptr; const char* hostStart = nullptr; const char* hostEnd = nullptr; const char* portStart = nullptr; // Check scheme if (strncmp(url, "ws://", 5) == 0) { - scheme = "ws"; hostStart = url + 5; } else if (strncmp(url, "wss://", 6) == 0) { - scheme = "wss"; hostStart = url + 6; } else { return 0; @@ -728,7 +739,7 @@ extern "C" { int channels, void **ppUserData) { - int deflate, heart_beat; + int deflate = 0, heart_beat = 0; bool suppressLog = false; const char* buffer_size; const char* extra_headers; @@ -957,8 +968,6 @@ extern "C" { return SWITCH_TRUE; } - uint32_t available = switch_buffer_inuse(tech_pvt->playback_buffer); - uint32_t bytes_needed = frame->datalen; uint32_t bytes_per_sample = frame->datalen / frame->samples; From 7af2c7bab2d8b9ac84ddbed197be301817280347 Mon Sep 17 00:00:00 2001 From: Dario Pellegrino Date: Wed, 30 Jul 2025 15:42:33 +0200 Subject: [PATCH 2/2] Update README Signed-off-by: Dario Pellegrino --- README.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/README.md b/README.md index 183191f..9d033f4 100644 --- a/README.md +++ b/README.md @@ -72,6 +72,8 @@ The following channel variables can be used to fine tune websocket connection an - `Buffer Size` actually represents a duration of audio chunk sent to websocket. If you want to send e.g. 100ms audio packets to your ws endpoint you would set this variable to 100. If ommited, default packet size of 20ms will be sent as grabbed from the audio channel (which is default FreeSWITCH frame size) - Set `STREAM_OPENAI_API_KEY` to have a valid OpenAI API key to authenticate with OpenAI's Realtime API. This is required for the module to function properly. If not set the module will use the `STREAM_EXTRA_HEADERS` to pass the OpenAI API key as a header assuming you prepared the headers in the channel variable. **NOTE**: An OpenAI API key is required for the module to function properly. If not set, the module will not be able to connect to the API. +- You can specify the OpenAI Realtime model in the URI, e.g. `uuid_openai_audio_stream ${uuid} start wss://api.openai.com/v1/realtime?model=gpt-4o-mini-realtime-preview-2024-12-17 mono 24k` + - Extra headers should be a JSON object with key-value pairs representing additional HTTP headers. Each key should be a header name, and its corresponding value should be a string. ```json { @@ -107,6 +109,7 @@ Attaches a media bug and starts streaming audio (in L16 format) to the websocket - "8k" = 8000 Hz sample rate will be generated - "16k" = 16000 Hz sample rate will be generated - "24k" = 24000 Hz sample rate will be generated +- **IMPORTANT NOTE**: The OpenAI Realtime API, when using PCM audio format, expects the audio to be in 24 kHz sample rate. Use the sampling-rate parameter as `24k` (or `24000`) and mono to ensure that the audio is sent in the correct format. From the OpenAI Realtime API documentation: *input audio must be 16-bit PCM at a 24kHz sample rate, single channel (mono), and little-endian byte order.* Support for exchanging audio with OpenAI in other formats may be developed in the future, which would make the `` and ` send_json