Skip to content

Commit cdeffed

Browse files
authored
🤖 feat: sliding window audio waveform for voice recording (#862)
_Generated with `mux`_ Replaces the static animated bars with a custom sliding window waveform that shows the last 10 seconds of audio amplitude - similar to ChatGPT's recording UI. ### Features - **10-second sliding window**: Shows audio history with new samples appearing on the right and sliding left - **Mode-colored**: Visualization uses plan (blue) or exec (purple) based on current mode - **Full-width responsive**: Bars dynamically fill the container at any viewport size - **Smooth visualization**: RMS amplitude calculation with 50ms sampling interval ### Technical - Custom `SlidingWaveform` component using canvas and Web Audio API - `AnalyserNode` for real-time audio amplitude sampling - `ResizeObserver` for responsive container measurement - Exposed `MediaRecorder` from `useVoiceInput` hook
1 parent 07ea67d commit cdeffed

File tree

5 files changed

+306
-77
lines changed

5 files changed

+306
-77
lines changed
Lines changed: 262 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,262 @@
1+
/**
2+
* Recording overlay - shows live audio visualization during voice recording.
3+
* Replaces the chat textarea when voice input is active.
4+
*/
5+
6+
import React, { useRef, useState, useLayoutEffect, useEffect, useCallback } from "react";
7+
import { Loader2 } from "lucide-react";
8+
import { cn } from "@/common/lib/utils";
9+
import { formatKeybind, KEYBINDS } from "@/browser/utils/ui/keybinds";
10+
import type { UIMode } from "@/common/types/mode";
11+
import type { VoiceInputState } from "@/browser/hooks/useVoiceInput";
12+
13+
/** Canvas fill colors for the waveform (slightly lighter than CSS vars for visibility) */
14+
const MODE_COLORS: Record<UIMode, string> = {
15+
plan: "hsl(210, 70%, 55%)",
16+
exec: "hsl(268, 94%, 65%)",
17+
};
18+
19+
/** Tailwind classes for recording state, keyed by mode */
20+
const RECORDING_CLASSES: Record<UIMode, string> = {
21+
plan: "cursor-pointer border-plan-mode bg-plan-mode/10",
22+
exec: "cursor-pointer border-exec-mode bg-exec-mode/10",
23+
};
24+
25+
const TEXT_CLASSES: Record<UIMode, string> = {
26+
plan: "text-plan-mode-light",
27+
exec: "text-exec-mode-light",
28+
};
29+
30+
// Waveform shows last 10 seconds of audio, sampled every 50ms (200 samples)
31+
const WINDOW_DURATION_MS = 10_000;
32+
const SAMPLE_INTERVAL_MS = 50;
33+
const NUM_SAMPLES = WINDOW_DURATION_MS / SAMPLE_INTERVAL_MS;
34+
35+
interface RecordingOverlayProps {
36+
state: VoiceInputState;
37+
mode: UIMode;
38+
mediaRecorder: MediaRecorder | null;
39+
onStop: () => void;
40+
}
41+
42+
export const RecordingOverlay: React.FC<RecordingOverlayProps> = (props) => {
43+
const isRecording = props.state === "recording";
44+
const isTranscribing = props.state === "transcribing";
45+
46+
const containerClasses = cn(
47+
"mb-1 flex w-full flex-col items-center justify-center gap-1 rounded-md border px-3 py-2 transition-all focus:outline-none",
48+
isRecording ? RECORDING_CLASSES[props.mode] : "cursor-wait border-amber-500 bg-amber-500/10"
49+
);
50+
51+
return (
52+
<button
53+
type="button"
54+
onClick={isRecording ? props.onStop : undefined}
55+
disabled={isTranscribing}
56+
className={containerClasses}
57+
aria-label={isRecording ? "Stop recording" : "Transcribing..."}
58+
>
59+
<div className="flex h-8 w-full items-center justify-center">
60+
{isRecording && props.mediaRecorder ? (
61+
<SlidingWaveform
62+
mediaRecorder={props.mediaRecorder}
63+
color={MODE_COLORS[props.mode]}
64+
height={32}
65+
/>
66+
) : (
67+
<Loader2 className="h-5 w-5 animate-spin text-amber-500" />
68+
)}
69+
</div>
70+
71+
<span
72+
className={cn(
73+
"text-xs font-medium",
74+
isRecording ? TEXT_CLASSES[props.mode] : "text-amber-500"
75+
)}
76+
>
77+
{isRecording ? <RecordingHints /> : "Transcribing..."}
78+
</span>
79+
</button>
80+
);
81+
};
82+
83+
/** Keyboard hint display for recording state */
84+
const RecordingHints: React.FC = () => (
85+
<>
86+
<span className="opacity-70">space</span> send ·{" "}
87+
<span className="opacity-70">{formatKeybind(KEYBINDS.TOGGLE_VOICE_INPUT)}</span> review ·{" "}
88+
<span className="opacity-70">esc</span> cancel
89+
</>
90+
);
91+
92+
// =============================================================================
93+
// SlidingWaveform - Canvas-based amplitude visualization
94+
// =============================================================================
95+
96+
interface SlidingWaveformProps {
97+
mediaRecorder: MediaRecorder;
98+
color: string;
99+
height: number;
100+
}
101+
102+
/**
103+
* Renders a sliding window of audio amplitude over time.
104+
* New samples appear on the right and scroll left as time passes.
105+
* Falls back to a simple pulsing indicator if Web Audio API fails.
106+
*/
107+
const SlidingWaveform: React.FC<SlidingWaveformProps> = (props) => {
108+
const canvasRef = useRef<HTMLCanvasElement>(null);
109+
const containerRef = useRef<HTMLDivElement>(null);
110+
const [containerWidth, setContainerWidth] = useState(600);
111+
const [audioError, setAudioError] = useState(false);
112+
113+
// Audio analysis state (refs to avoid re-renders)
114+
const audioContextRef = useRef<AudioContext | null>(null);
115+
const analyserRef = useRef<AnalyserNode | null>(null);
116+
const samplesRef = useRef<number[]>(new Array<number>(NUM_SAMPLES).fill(0));
117+
const animationFrameRef = useRef<number>(0);
118+
const lastSampleTimeRef = useRef<number>(0);
119+
120+
// Track container width for responsive canvas
121+
useLayoutEffect(() => {
122+
const container = containerRef.current;
123+
if (!container) return;
124+
125+
const observer = new ResizeObserver((entries) => {
126+
for (const entry of entries) {
127+
setContainerWidth(entry.contentRect.width);
128+
}
129+
});
130+
observer.observe(container);
131+
setContainerWidth(container.offsetWidth);
132+
133+
return () => observer.disconnect();
134+
}, []);
135+
136+
// Initialize Web Audio API analyser
137+
useEffect(() => {
138+
const stream = props.mediaRecorder.stream;
139+
if (!stream) return;
140+
141+
try {
142+
const audioContext = new AudioContext();
143+
const analyser = audioContext.createAnalyser();
144+
analyser.fftSize = 256;
145+
analyser.smoothingTimeConstant = 0.3;
146+
147+
const source = audioContext.createMediaStreamSource(stream);
148+
source.connect(analyser);
149+
150+
audioContextRef.current = audioContext;
151+
analyserRef.current = analyser;
152+
samplesRef.current = new Array<number>(NUM_SAMPLES).fill(0);
153+
lastSampleTimeRef.current = performance.now();
154+
155+
return () => {
156+
void audioContext.close();
157+
audioContextRef.current = null;
158+
analyserRef.current = null;
159+
};
160+
} catch (err) {
161+
console.error("Failed to initialize audio visualization:", err);
162+
setAudioError(true);
163+
}
164+
}, [props.mediaRecorder]);
165+
166+
// Animation loop: sample audio amplitude and render bars
167+
const draw = useCallback(() => {
168+
const canvas = canvasRef.current;
169+
const analyser = analyserRef.current;
170+
if (!canvas || !analyser) return;
171+
172+
const ctx = canvas.getContext("2d");
173+
if (!ctx) return;
174+
175+
// Sample audio at fixed intervals
176+
const now = performance.now();
177+
if (now - lastSampleTimeRef.current >= SAMPLE_INTERVAL_MS) {
178+
const dataArray = new Uint8Array(analyser.frequencyBinCount);
179+
analyser.getByteTimeDomainData(dataArray);
180+
181+
// Calculate RMS (root mean square) amplitude
182+
let sum = 0;
183+
for (const sample of dataArray) {
184+
const normalized = (sample - 128) / 128;
185+
sum += normalized * normalized;
186+
}
187+
const rms = Math.sqrt(sum / dataArray.length);
188+
189+
samplesRef.current.shift();
190+
samplesRef.current.push(rms);
191+
lastSampleTimeRef.current = now;
192+
}
193+
194+
// Render bars
195+
ctx.clearRect(0, 0, canvas.width, canvas.height);
196+
197+
const samples = samplesRef.current;
198+
const numBars = samples.length;
199+
// Bar sizing: bars fill full width with 40% gap ratio
200+
const barWidth = canvas.width / (1.4 * numBars - 0.4);
201+
const gap = barWidth * 0.4;
202+
const centerY = canvas.height / 2;
203+
204+
ctx.fillStyle = props.color;
205+
206+
for (let i = 0; i < numBars; i++) {
207+
const scaledAmplitude = Math.min(1, samples[i] * 3); // Boost for visibility
208+
const barHeight = Math.max(2, scaledAmplitude * canvas.height * 0.9);
209+
const x = i * (barWidth + gap);
210+
const y = centerY - barHeight / 2;
211+
212+
ctx.beginPath();
213+
// roundRect fallback for older browsers (though Electron 38+ supports it)
214+
if (ctx.roundRect) {
215+
ctx.roundRect(x, y, barWidth, barHeight, 1);
216+
} else {
217+
ctx.rect(x, y, barWidth, barHeight);
218+
}
219+
ctx.fill();
220+
}
221+
222+
animationFrameRef.current = requestAnimationFrame(draw);
223+
}, [props.color]);
224+
225+
// Run animation loop
226+
useEffect(() => {
227+
if (audioError) return;
228+
animationFrameRef.current = requestAnimationFrame(draw);
229+
return () => cancelAnimationFrame(animationFrameRef.current);
230+
}, [draw, audioError]);
231+
232+
// Fallback: simple pulsing indicator if Web Audio API unavailable
233+
if (audioError) {
234+
return (
235+
<div className="flex h-full w-full items-center justify-center gap-1">
236+
{[0, 1, 2, 3, 4].map((i) => (
237+
<div
238+
key={i}
239+
className="animate-pulse rounded-full"
240+
style={{
241+
width: 4,
242+
height: 12 + (i % 3) * 4,
243+
backgroundColor: props.color,
244+
animationDelay: `${i * 100}ms`,
245+
}}
246+
/>
247+
))}
248+
</div>
249+
);
250+
}
251+
252+
return (
253+
<div ref={containerRef} className="flex h-full w-full items-center justify-center">
254+
<canvas
255+
ref={canvasRef}
256+
width={containerWidth}
257+
height={props.height}
258+
style={{ width: containerWidth, height: props.height }}
259+
/>
260+
</div>
261+
);
262+
};

src/browser/components/ChatInput/VoiceInputButton.tsx

Lines changed: 29 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ import { TooltipWrapper, Tooltip } from "../Tooltip";
99
import { formatKeybind, KEYBINDS } from "@/browser/utils/ui/keybinds";
1010
import { cn } from "@/common/lib/utils";
1111
import type { VoiceInputState } from "@/browser/hooks/useVoiceInput";
12+
import type { UIMode } from "@/common/types/mode";
1213

1314
interface VoiceInputButtonProps {
1415
state: VoiceInputState;
@@ -17,29 +18,43 @@ interface VoiceInputButtonProps {
1718
requiresSecureContext: boolean;
1819
onToggle: () => void;
1920
disabled?: boolean;
21+
mode: UIMode;
2022
}
2123

22-
const STATE_CONFIG: Record<VoiceInputState, { label: string; colorClass: string }> = {
23-
idle: { label: "Voice input", colorClass: "text-muted/50 hover:text-muted" },
24-
recording: { label: "Stop recording", colorClass: "text-blue-500 animate-pulse" },
25-
transcribing: { label: "Transcribing...", colorClass: "text-amber-500" },
24+
/** Color classes for each voice input state */
25+
const STATE_COLORS: Record<VoiceInputState, string> = {
26+
idle: "text-muted/50 hover:text-muted",
27+
recording: "", // Set dynamically based on mode
28+
transcribing: "text-amber-500",
2629
};
2730

31+
const RECORDING_COLORS: Record<UIMode, string> = {
32+
plan: "text-plan-mode-light animate-pulse",
33+
exec: "text-exec-mode-light animate-pulse",
34+
};
35+
36+
function getColorClass(state: VoiceInputState, mode: UIMode): string {
37+
return state === "recording" ? RECORDING_COLORS[mode] : STATE_COLORS[state];
38+
}
39+
2840
export const VoiceInputButton: React.FC<VoiceInputButtonProps> = (props) => {
2941
if (!props.shouldShowUI) return null;
3042

3143
const needsHttps = props.requiresSecureContext;
3244
const needsApiKey = !needsHttps && !props.isApiKeySet;
33-
const isDisabledReason = needsHttps || needsApiKey;
45+
const isDisabled = needsHttps || needsApiKey;
46+
47+
const label = isDisabled
48+
? needsHttps
49+
? "Voice input (requires HTTPS)"
50+
: "Voice input (requires OpenAI API key)"
51+
: props.state === "recording"
52+
? "Stop recording"
53+
: props.state === "transcribing"
54+
? "Transcribing..."
55+
: "Voice input";
3456

35-
const { label, colorClass } = isDisabledReason
36-
? {
37-
label: needsHttps
38-
? "Voice input (requires HTTPS)"
39-
: "Voice input (requires OpenAI API key)",
40-
colorClass: "text-muted/50",
41-
}
42-
: STATE_CONFIG[props.state];
57+
const colorClass = isDisabled ? "text-muted/50" : getColorClass(props.state, props.mode);
4358

4459
const Icon = props.state === "transcribing" ? Loader2 : Mic;
4560
const isTranscribing = props.state === "transcribing";
@@ -49,7 +64,7 @@ export const VoiceInputButton: React.FC<VoiceInputButtonProps> = (props) => {
4964
<button
5065
type="button"
5166
onClick={props.onToggle}
52-
disabled={(props.disabled ?? false) || isTranscribing || isDisabledReason}
67+
disabled={(props.disabled ?? false) || isTranscribing || isDisabled}
5368
aria-label={label}
5469
aria-pressed={props.state === "recording"}
5570
className={cn(

src/browser/components/ChatInput/WaveformBars.tsx

Lines changed: 0 additions & 32 deletions
This file was deleted.

0 commit comments

Comments
 (0)