Skip to content

Commit

Permalink
feat: Implement sentence-based TTS streaming in interact mode
Browse files Browse the repository at this point in the history
- Add sentence-utils.js with naive sentence completion algorithm
- Modify readResponseStream to process sentences in real-time
- Integrate TTS fetching for completed sentences
- Utilize audio queue to prevent overlapping playback

This commit introduces a significant improvement to the interact mode,
enabling faster voice-based interactions by processing and sending
completed sentences to the OpenAI TTS endpoint as they become available
in the streamed response.

Details:
- Implement getCompleteSentences function in sentence-utils.js
- Update readResponseStream to use sentence-based processing
- Add logic to track and process only new sentences
- Integrate with existing audio queue for smooth playback
- Handle potential errors in TTS fetching

Performance impact:
- Reduces latency in voice responses during interact mode
  • Loading branch information
o-stahl committed Jun 23, 2024
1 parent 8f2d16c commit 28d3b69
Show file tree
Hide file tree
Showing 3 changed files with 85 additions and 18 deletions.
60 changes: 46 additions & 14 deletions src/libs/api-access/gpt-api-access.js
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
import { showToast, sleep, parseStreamResponseChunk, handleTextStreamEnd } from '../utils/general-utils';
import { updateUI } from '../utils/general-utils';
import { playAudio } from '../utils/audio-utils';

import { whisperTemperature, audioSpeed, ttsModel, ttsVoice, messages, pushToTalkMode } from '../state-management/state';
import { getCompleteSentences } from '../utils/sentence-utils';
import { whisperTemperature, audioSpeed, ttsModel, ttsVoice, messages, isInteractModeOpen } from '../state-management/state';

import { addMessage } from '../conversation-management/message-processing';
const MAX_RETRY_ATTEMPTS = 5;
Expand Down Expand Up @@ -87,8 +87,6 @@ export async function fetchGPTResponseStream(
streamedMessageText,
autoScrollToBottom = true
) {
//const gptMessagesOnly = filterGPTMessages(conversation);

let tempMessages = conversation.map((message) => ({
role: message.role,
content: message.content,
Expand All @@ -108,13 +106,10 @@ export async function fetchGPTResponseStream(
}),
signal: abortController.signal,
};
let result;

try {
const response = await fetch('https://api.openai.com/v1/chat/completions', requestOptions);

result = await readResponseStream(response, updateUiFunction, autoScrollToBottom);

const result = await readResponseStream(response, updateUiFunction, autoScrollToBottom);
return result;
} catch (error) {
if (error.name === 'AbortError') {
Expand All @@ -132,6 +127,8 @@ async function readResponseStream(response, updateUiFunction, autoScrollToBottom
const reader = response.body.getReader();
const decoder = new TextDecoder('utf-8');
let decodedResult = '';
let buffer = '';
let processedSentences = new Set();

while (true) {
const { done, value } = await reader.read();
Expand All @@ -148,13 +145,44 @@ async function readResponseStream(response, updateUiFunction, autoScrollToBottom
],
} of parsedLines) {
if (content) {
buffer += content;
decodedResult += content;

updateUI(content, messages.value, addMessage, autoScrollToBottom);

if (isInteractModeOpen.value) {
const sentences = getCompleteSentences(buffer);

for (const sentence of sentences) {
if (!processedSentences.has(sentence) && sentence.length <= 4096) {
processedSentences.add(sentence);
try {
await fetchTTSResponse(sentence);
} catch (error) {
console.error('Error fetching TTS response:', error);
}
}
}
buffer = buffer.slice(sentences.join('').length);
}
}
}
}

handleTextStreamEnd(decodedResult);
// Process any remaining content in the buffer
if (buffer.length > 0 && isInteractModeOpen.value) {
const sentences = getCompleteSentences(buffer);
for (const sentence of sentences) {
if (!processedSentences.has(sentence) && sentence.length <= 4096) {
processedSentences.add(sentence);
try {
await fetchTTSResponse(sentence);
} catch (error) {
console.error('Error fetching TTS response:', error);
}
}
}
}

return decodedResult;
}
Expand All @@ -168,15 +196,20 @@ export async function fetchTTSResponse(text) {
}

try {
if (text.length > 4096) {
console.error(`[TTS]: Input text exceeds 4096 characters.`);
return;
}

const response = await fetch('https://api.openai.com/v1/audio/speech', {
method: 'POST',
headers: {
'Authorization': `Bearer ${apiKey}`,
'Content-Type': 'application/json'
},
body: JSON.stringify({
model: ttsModel.value, // Adding the model parameter as required
input: text, // Changing 'text' to 'input' as required
model: ttsModel.value,
input: text,
voice: ttsVoice.value,
speed: audioSpeed.value
})
Expand All @@ -187,9 +220,8 @@ export async function fetchTTSResponse(text) {
throw new Error(`Error from TTS API: ${errorText}`);
}

const audioBlob = await response.blob(); // Get the audio content as a blob
console.log(`[TTS]: Received audio blob, length: ${audioBlob.size}`);
playAudio(audioBlob); // Ensure this function is called
const audioBlob = await response.blob();
playAudio(audioBlob);
} catch (error) {
console.error(`[TTS]: Error fetching TTS response: ${error.message}`);
}
Expand Down
8 changes: 4 additions & 4 deletions src/libs/utils/general-utils.js
Original file line number Diff line number Diff line change
Expand Up @@ -252,12 +252,12 @@ export function swipedRight(event) {
export async function handleTextStreamEnd(message) {
if (isInteractModeOpen.value) {
try {
// Call the fetchTTSResponse with "message" and play the result
await fetchTTSResponse(message);
if (message.length <= 4096) {
await fetchTTSResponse(message);
}
} catch (error) {
console.error('Error with TTS Response:', error);
}
finally {
} finally {
if (pushToTalkMode.value) {
isInteractModeOpen.value = false;
}
Expand Down
35 changes: 35 additions & 0 deletions src/libs/utils/sentence-utils.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
export function getCompleteSentences(buffer) {
const sentences = [];
let currentSentence = '';
const sentenceEndings = ['.', '!', '?'];
let inQuotes = false;
let bracketDepth = 0;

for (let i = 0; i < buffer.length; i++) {
const char = buffer[i];
currentSentence += char;

// Handle quotes
if (char === '"') {
inQuotes = !inQuotes;
}

// Handle brackets
if (char === '(' || char === '[' || char === '{') {
bracketDepth++;
} else if (char === ')' || char === ']' || char === '}') {
bracketDepth = Math.max(0, bracketDepth - 1);
}

// Check for sentence end
if (sentenceEndings.includes(char) &&
!inQuotes &&
bracketDepth === 0 &&
(i === buffer.length - 1 || /\s/.test(buffer[i + 1]))) {
sentences.push(currentSentence.trim());
currentSentence = '';
}
}

return sentences;
}

0 comments on commit 28d3b69

Please sign in to comment.