Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions frontend/src/components/message/PromptInput.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -1241,7 +1241,7 @@ return (

</div>
<div className="flex items-center gap-1.5 md:gap-2 flex-shrink-0">
{isMobile && showScrollButton ? (
{isMobile && showScrollButton && !showVoiceFeedback ? (
<button
onClick={onScrollToBottom}
className="px-4 py-2 rounded-lg bg-black hover:bg-zinc-900 text-white transition-all duration-200 active:scale-95 shadow-md flex items-center justify-center min-w-[52px] border border-zinc-700"
Expand Down Expand Up @@ -1286,7 +1286,7 @@ return (
{sttEnabled && sttSupported && (
renderVoiceButton('desktop')
)}
{isMobile && !showScrollButton && sttEnabled && sttSupported && !hasPendingPermissionForSession && (
{isMobile && sttEnabled && sttSupported && !hasPendingPermissionForSession && (!showScrollButton || showVoiceFeedback) && (
renderVoiceButton('mobile')
)}
<button
Expand Down
58 changes: 30 additions & 28 deletions frontend/src/components/message/VoiceStatusOverlay.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -8,31 +8,36 @@ interface VoiceStatusOverlayProps {
state: VoiceStatusOverlayState | null
}

function WaveformBars() {
return (
<div className="flex items-end gap-[3px] h-8">
{[0, 1, 2, 3, 4].map((i) => (
<div
key={i}
className="w-1.5 rounded-full bg-white"
style={{
height: '100%',
animation: `waveBar 0.9s ease-in-out infinite`,
animationDelay: `${i * 0.12}s`,
}}
/>
))}
<style>{`
@keyframes waveBar {
0%, 100% { transform: scaleY(0.2); }
50% { transform: scaleY(1); }
}
`}</style>
</div>
)
}

export function VoiceStatusOverlay({ show, label, state }: VoiceStatusOverlayProps) {
if (!show || !label || !state) {
return null
}

const isLoading = state === 'starting' || state === 'processing' || state === 'sending'
const showLoadingText = state !== 'starting'
const topLabel = state === 'readyToSend'
? 'Release'
: state === 'starting'
? 'Starting'
: state === 'processing'
? 'Transcribe'
: state === 'sending'
? 'Sending'
: 'Swipe'
const bottomLabel = state === 'starting'
? 'Mic'
: state === 'processing'
? 'Speech'
: state === 'sending'
? 'Prompt'
: state === 'readyToSend'
? 'Send'
: 'Send'
const actionWords = state === 'readyToSend'
? ['Release', 'To', 'Send']
: ['Swipe', 'To', 'Send']
Expand All @@ -47,12 +52,11 @@ export function VoiceStatusOverlay({ show, label, state }: VoiceStatusOverlayPro
<div className="absolute inset-x-1 top-1 h-10 rounded-full bg-white/20 blur-sm" />
<div className="relative flex flex-1 flex-col items-center justify-center gap-1">
{isLoading ? (
<>
state === 'processing' ? (
<WaveformBars />
) : (
<LoaderCircle className="h-6 w-6 animate-spin" />
{showLoadingText && (
<span className="text-[10px] font-bold uppercase leading-none tracking-wide">{topLabel}</span>
)}
</>
)
) : (
<>
<ArrowUp className="h-8 w-8 animate-bounce" />
Expand All @@ -64,11 +68,9 @@ export function VoiceStatusOverlay({ show, label, state }: VoiceStatusOverlayPro
</>
)}
</div>
{isLoading && showLoadingText ? (
<span className="relative text-[10px] font-bold uppercase leading-none tracking-wide">{bottomLabel}</span>
) : !isLoading ? (
{!isLoading && (
<X className="relative h-4 w-4" />
) : null}
)}
</div>
</div>
)
Expand Down
74 changes: 70 additions & 4 deletions frontend/src/hooks/useSTT.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@ import { AudioRecorder } from '@/lib/audioRecorder'
import { sttApi } from '@/api/stt'
import { DEFAULT_STT_CONFIG } from '@/api/types/settings'

const STT_START_TIMEOUT_MS = 10_000

export function useSTT(userId = 'default') {
const { preferences } = useSettings(userId)
const [isRecording, setIsRecording] = useState(false)
Expand All @@ -21,6 +23,8 @@ export function useSTT(userId = 'default') {
const userIdRef = useRef(userId)
const errorTimeoutRef = useRef<ReturnType<typeof setTimeout> | null>(null)
const lastProcessedBlobRef = useRef<Blob | null>(null)
const startupTimeoutRef = useRef<ReturnType<typeof setTimeout> | null>(null)
const startOpIdRef = useRef(0)

useEffect(() => {
userIdRef.current = userId
Expand Down Expand Up @@ -183,6 +187,26 @@ export function useSTT(userId = 'default') {
}
}, [isEnabled, isExternalProvider, setupAudioRecorder])

const clearStartupTimeout = useCallback(() => {
if (startupTimeoutRef.current) {
clearTimeout(startupTimeoutRef.current)
startupTimeoutRef.current = null
}
}, [])

const abortAndResetOnTimeout = useCallback(() => {
if (isExternalProvider && audioRecorder.current) {
audioRecorder.current.abort()
} else {
recognizer.current.abort()
}
setIsRecording(false)
setIsProcessing(false)
setState('idle')
setIsError(true)
setError('Microphone start timed out')
}, [isExternalProvider])

const startRecording = useCallback(async (): Promise<boolean> => {
if (!isSupported) {
setError('Speech recognition is not supported in this browser')
Expand All @@ -202,6 +226,9 @@ export function useSTT(userId = 'default') {
setError(null)
lastProcessedBlobRef.current = null

const startOpId = ++startOpIdRef.current
clearStartupTimeout()

if (isExternalProvider) {
if (!audioRecorder.current) {
audioRecorder.current = new AudioRecorder()
Expand All @@ -210,11 +237,30 @@ export function useSTT(userId = 'default') {

try {
setIsProcessing(true)
await audioRecorder.current.start()

const startupPromise = audioRecorder.current.start()
const timeoutPromise = new Promise<never>((_, reject) => {
startupTimeoutRef.current = setTimeout(() => {
if (startOpIdRef.current !== startOpId) return
reject(new Error('Microphone start timed out'))
}, STT_START_TIMEOUT_MS)
})

await Promise.race([startupPromise, timeoutPromise])
clearStartupTimeout()

if (startOpIdRef.current !== startOpId) return false

setIsProcessing(false)
return true
} catch (err) {
clearStartupTimeout()
if (startOpIdRef.current !== startOpId) return false
setIsProcessing(false)
if (err instanceof Error && err.message === 'Microphone start timed out') {
abortAndResetOnTimeout()
return false
}
setIsError(true)
setError(err instanceof Error ? err.message : 'Failed to start recording')
return false
Expand All @@ -228,16 +274,35 @@ export function useSTT(userId = 'default') {

try {
setIsProcessing(true)
await recognizer.current.start(options)

const startupPromise = recognizer.current.start(options)
const timeoutPromise = new Promise<never>((_, reject) => {
startupTimeoutRef.current = setTimeout(() => {
if (startOpIdRef.current !== startOpId) return
reject(new Error('Microphone start timed out'))
}, STT_START_TIMEOUT_MS)
})

await Promise.race([startupPromise, timeoutPromise])
clearStartupTimeout()

if (startOpIdRef.current !== startOpId) return false

return true
} catch (err) {
clearStartupTimeout()
if (startOpIdRef.current !== startOpId) return false
setIsProcessing(false)
if (err instanceof Error && err.message === 'Microphone start timed out') {
abortAndResetOnTimeout()
return false
}
setIsError(true)
setError(err instanceof Error ? err.message : 'Failed to start recording')
return false
}
}
}, [isSupported, isEnabled, isExternalProvider, config.language, setupAudioRecorder])
}, [isSupported, isEnabled, isExternalProvider, config.language, setupAudioRecorder, clearStartupTimeout, abortAndResetOnTimeout])

const stopRecording = useCallback(() => {
if (isExternalProvider && audioRecorder.current) {
Expand Down Expand Up @@ -286,8 +351,9 @@ export function useSTT(userId = 'default') {
useEffect(() => {
return () => {
if (errorTimeoutRef.current) clearTimeout(errorTimeoutRef.current)
clearStartupTimeout()
}
}, [])
}, [clearStartupTimeout])

return {
isRecording,
Expand Down
Loading