gradio-app · hannahblair · Jan 23, 2024 · Jan 17, 2024 · Jan 17, 2024 · Jan 17, 2024
diff --git a/.changeset/puny-meals-behave.md b/.changeset/puny-meals-behave.md
@@ -0,0 +1,6 @@
+---
+"@gradio/audio": patch
+"gradio": patch
+---
+
+fix:Ensure device selection works in Audio when streaming
diff --git a/.changeset/seven-baths-dress.md b/.changeset/seven-baths-dress.md
@@ -0,0 +1,6 @@
+---
+"@gradio/audio": patch
+"gradio": patch
+---
+
+fix:Ensure microphone devices list updates
diff --git a/.config/playwright.config.js b/.config/playwright.config.js
@@ -4,10 +4,14 @@ export default defineConfig({
 	use: {
 		screenshot: "only-on-failure",
 		trace: "retain-on-failure",
-		permissions: ["clipboard-read", "clipboard-write"],
+		permissions: ["clipboard-read", "clipboard-write", "microphone"],
 		bypassCSP: true,
 		launchOptions: {
-			args: ["--disable-web-security"]
+			args: [
+				"--disable-web-security",
+				"--use-fake-device-for-media-stream",
+				"--use-fake-ui-for-media-stream"
+			]
 		}
 	},
 	expect: { timeout: 60000 },

diff --git a/demo/audio_debugger/run.ipynb b/demo/audio_debugger/run.ipynb
@@ -1 +1 @@
-{"cells": [{"cell_type": "markdown", "id": "302934307671667531413257853548643485645", "metadata": {}, "source": ["# Gradio Demo: audio_debugger"]}, {"cell_type": "code", "execution_count": null, "id": "272996653310673477252411125948039410165", "metadata": {}, "outputs": [], "source": ["!pip install -q gradio "]}, {"cell_type": "code", "execution_count": null, "id": "288918539441861185822528903084949547379", "metadata": {}, "outputs": [], "source": ["# Downloading files from the demo repo\n", "import os\n", "!wget -q https://github.com/gradio-app/gradio/raw/main/demo/audio_debugger/cantina.wav"]}, {"cell_type": "code", "execution_count": null, "id": "44380577570523278879349135829904343037", "metadata": {}, "outputs": [], "source": ["import gradio as gr\n", "import subprocess\n", "import os\n", "\n", "audio_file = os.path.join(os.path.abspath(''), \"cantina.wav\")\n", "\n", "\n", "with gr.Blocks() as demo:\n", "    with gr.Tab(\"Audio\"):\n", "        gr.Audio(audio_file)\n", "    with gr.Tab(\"Interface\"):\n", "        gr.Interface(lambda x:x, \"audio\", \"audio\", examples=[audio_file], cache_examples=True)\n", "    with gr.Tab(\"console\"):\n", "        ip = gr.Textbox(label=\"User IP Address\")\n", "        gr.Interface(lambda cmd:subprocess.run([cmd], capture_output=True, shell=True).stdout.decode('utf-8').strip(), \"text\", \"text\")\n", "        \n", "    def get_ip(request: gr.Request):\n", "        return request.client.host\n", "    \n", "    demo.load(get_ip, None, ip)\n", "        \n", "if __name__ == \"__main__\":\n", "    demo.queue()\n", "    demo.launch()\n"]}], "metadata": {}, "nbformat": 4, "nbformat_minor": 5}
+{"cells": [{"cell_type": "markdown", "id": "302934307671667531413257853548643485645", "metadata": {}, "source": ["# Gradio Demo: audio_debugger"]}, {"cell_type": "code", "execution_count": null, "id": "272996653310673477252411125948039410165", "metadata": {}, "outputs": [], "source": ["!pip install -q gradio "]}, {"cell_type": "code", "execution_count": null, "id": "288918539441861185822528903084949547379", "metadata": {}, "outputs": [], "source": ["# Downloading files from the demo repo\n", "import os\n", "!wget -q https://github.com/gradio-app/gradio/raw/main/demo/audio_debugger/cantina.wav"]}, {"cell_type": "code", "execution_count": null, "id": "44380577570523278879349135829904343037", "metadata": {}, "outputs": [], "source": ["import gradio as gr\n", "import subprocess\n", "import os\n", "\n", "audio_file = os.path.join(os.path.abspath(''), \"cantina.wav\")\n", "\n", "\n", "with gr.Blocks() as demo:\n", "    with gr.Tab(\"Audio\"):\n", "        gr.Audio(audio_file)\n", "    with gr.Tab(\"Interface\"):\n", "        gr.Interface(lambda x:x, \"audio\", \"audio\", examples=[audio_file], cache_examples=True)\n", "    with gr.Tab(\"Streaming\"):\n", "        gr.Interface(lambda x:x, gr.Audio(streaming=True), \"audio\", examples=[audio_file], cache_examples=True)\n", "    with gr.Tab(\"console\"):\n", "        ip = gr.Textbox(label=\"User IP Address\")\n", "        gr.Interface(lambda cmd:subprocess.run([cmd], capture_output=True, shell=True).stdout.decode('utf-8').strip(), \"text\", \"text\")\n", "        \n", "    def get_ip(request: gr.Request):\n", "        return request.client.host\n", "    \n", "    demo.load(get_ip, None, ip)\n", "        \n", "if __name__ == \"__main__\":\n", "    demo.queue()\n", "    demo.launch()\n"]}], "metadata": {}, "nbformat": 4, "nbformat_minor": 5}
diff --git a/demo/audio_debugger/run.py b/demo/audio_debugger/run.py
@@ -10,6 +10,8 @@
         gr.Audio(audio_file)
     with gr.Tab("Interface"):
         gr.Interface(lambda x:x, "audio", "audio", examples=[audio_file], cache_examples=True)
+    with gr.Tab("Streaming"):
+        gr.Interface(lambda x:x, gr.Audio(streaming=True), "audio", examples=[audio_file], cache_examples=True)
     with gr.Tab("console"):
         ip = gr.Textbox(label="User IP Address")
         gr.Interface(lambda cmd:subprocess.run([cmd], capture_output=True, shell=True).stdout.decode('utf-8').strip(), "text", "text")

diff --git a/js/app/test/audio_debugger.spec.ts b/js/app/test/audio_debugger.spec.ts
@@ -1,5 +1,5 @@
 import { test, expect } from "@gradio/tootils";
-
+import { chromium } from "playwright";
 // we cannot currently test the waveform canvas with playwright (https://github.com/microsoft/playwright/issues/23964)
 // so this test covers the interactive elements around the waveform canvas
 
@@ -43,3 +43,48 @@ test("audio waveform", async ({ page }) => {
 		.getByLabel("Skip forward by 0.15 seconds")
 		.click();
 });
+
+test("audio streaming tab", async ({ page }) => {
+	const browser = await chromium.launch({
+		args: ["--use-fake-ui-for-media-stream"]
+	});
+
+	const context = await browser.newContext({
+		permissions: ["microphone"]
+	});
+
+	context.grantPermissions(["microphone"]);
+
+	await page.getByRole("tab", { name: "Streaming" }).click();
+
+	await expect(page.getByLabel("Select input device")).toContainText(
+		"Fake Default Audio InputFake Audio Input 1Fake Audio Input 2"
+	);
+});
+
+test("recording audio", async ({ page }) => {
+	const browser = await chromium.launch({
+		args: ["--use-fake-ui-for-media-stream"]
+	});
+
+	const context = await browser.newContext({
+		permissions: ["microphone"]
+	});
+
+	await page.getByText("Interface").click();
+	await page.getByLabel("Record audio").click();
+
+	context.grantPermissions(["microphone"]);
+
+	await expect(page.getByRole("combobox")).toContainText(
+		"Fake Default Audio InputFake Audio Input 1Fake Audio Input 2"
+	);
+
+	await page.getByRole("button", { name: "Record", exact: true }).click();
+
+	await page.waitForTimeout(1000);
+
+	await expect(page.getByText("0:01", { exact: true })).toBeAttached();
+
+	await page.getByText("Stop", { exact: true }).nth(0).click();
+});
diff --git a/js/audio/shared/DeviceSelect.svelte b/js/audio/shared/DeviceSelect.svelte
@@ -0,0 +1,69 @@
+<script lang="ts">
+	import RecordPlugin from "wavesurfer.js/dist/plugins/record.js";
+	import type { I18nFormatter } from "@gradio/utils";
+	import { createEventDispatcher } from "svelte";
+
+	export let i18n: I18nFormatter;
+	export let micDevices: MediaDeviceInfo[] = [];
+
+	const dispatch = createEventDispatcher<{
+		error: string;
+	}>();
+
+	$: try {
+		let tempDevices: MediaDeviceInfo[] = [];
+		RecordPlugin.getAvailableAudioDevices().then(
+			(devices: MediaDeviceInfo[]) => {
+				micDevices = devices;
+				devices.forEach((device) => {
+					if (device.deviceId) {
+						tempDevices.push(device);
+					}
+				});
+				micDevices = tempDevices;
+			}
+		);
+	} catch (err) {
+		if (err instanceof DOMException && err.name == "NotAllowedError") {
+			dispatch("error", i18n("audio.allow_recording_access"));
+		}
+		throw err;
+	}
+</script>
+
+<select
+	class="mic-select"
+	aria-label="Select input device"
+	disabled={micDevices.length === 0}
+>
+	{#if micDevices.length === 0}
+		<option value="">{i18n("audio.no_microphone")}</option>
+	{:else}
+		{#each micDevices as micDevice}
+			<option value={micDevice.deviceId}>{micDevice.label}</option>
+		{/each}
+	{/if}
+</select>
+
+<style>
+	.mic-select {
+		height: var(--size-8);
+		background: var(--block-background-fill);
+		padding: 0px var(--spacing-xxl);
+		border-radius: var(--radius-full);
+		font-size: var(--text-md);
+		border: 1px solid var(--neutral-400);
+		margin: var(--size-1) var(--size-1) 0 0;
+	}
+
+	select {
+		text-overflow: ellipsis;
+		max-width: var(--size-40);
+	}
+
+	@media (max-width: 375px) {
+		select {
+			width: 100%;
+		}
+	}
+</style>
diff --git a/js/audio/shared/WaveformRecordControls.svelte b/js/audio/shared/WaveformRecordControls.svelte
@@ -2,7 +2,7 @@
 	import { Pause } from "@gradio/icons";
 	import type { I18nFormatter } from "@gradio/utils";
 	import RecordPlugin from "wavesurfer.js/dist/plugins/record.js";
-	import { createEventDispatcher } from "svelte";
+	import DeviceSelect from "./DeviceSelect.svelte";
 
 	export let record: RecordPlugin;
 	export let i18n: I18nFormatter;
@@ -18,30 +18,6 @@
 	export let show_recording_waveform: boolean | undefined;
 	export let timing = false;
 
-	const dispatch = createEventDispatcher<{
-		error: string;
-	}>();
-
-	$: try {
-		let tempDevices: MediaDeviceInfo[] = [];
-		RecordPlugin.getAvailableAudioDevices().then(
-			(devices: MediaDeviceInfo[]) => {
-				micDevices = devices;
-				devices.forEach((device) => {
-					if (device.deviceId) {
-						tempDevices.push(device);
-					}
-				});
-				micDevices = tempDevices;
-			}
-		);
-	} catch (err) {
-		if (err instanceof DOMException && err.name == "NotAllowedError") {
-			dispatch("error", i18n("audio.allow_recording_access"));
-		}
-		throw err;
-	}
-
 	$: record.on("record-start", () => {
 		record.startMic();
 
@@ -128,32 +104,10 @@
 			<time class="duration-button duration">{record_time}</time>
 		{/if}
 	</div>
-
-	<select
-		class="mic-select"
-		aria-label="Select input device"
-		disabled={micDevices.length === 0}
-	>
-		{#if micDevices.length === 0}
-			<option value="">{i18n("audio.no_microphone")}</option>
-		{:else}
-			{#each micDevices as micDevice}
-				<option value={micDevice.deviceId}>{micDevice.label}</option>
-			{/each}
-		{/if}
-	</select>
+	<DeviceSelect bind:micDevices {i18n} />
 </div>
 
 <style>
-	.mic-select {
-		height: var(--size-8);
-		background: var(--block-background-fill);
-		padding: 0px var(--spacing-xxl);
-		border-radius: var(--radius-full);
-		font-size: var(--text-md);
-		border: 1px solid var(--neutral-400);
-		margin: var(--size-1) var(--size-1) 0 0;
-	}
 	.controls {
 		display: flex;
 		align-items: center;
@@ -162,17 +116,6 @@
 		overflow: hidden;
 	}
 
-	.controls select {
-		text-overflow: ellipsis;
-		max-width: var(--size-40);
-	}
-
-	@media (max-width: 375px) {
-		.controls select {
-			width: 100%;
-		}
-	}
-
 	.wrapper {
 		display: flex;
 		align-items: center;

diff --git a/js/audio/streaming/StreamAudio.svelte b/js/audio/streaming/StreamAudio.svelte
@@ -4,6 +4,7 @@
 	import WaveSurfer from "wavesurfer.js";
 	import RecordPlugin from "wavesurfer.js/dist/plugins/record.js";
 	import type { WaveformOptions } from "../shared/types";
+	import DeviceSelect from "../shared/DeviceSelect.svelte";
 
 	export let recording = false;
 	export let paused_recording = false;
@@ -20,6 +21,8 @@
 
 	let microphoneContainer: HTMLDivElement;
 
+	let micDevices: MediaDeviceInfo[] = [];
+
 	onMount(() => {
 		create_mic_waveform();
 	});
@@ -44,37 +47,49 @@
 			style:display={recording ? "block" : "none"}
 		/>
 	{/if}
-	{#if recording}
-		<button
-			class={paused_recording ? "stop-button-paused" : "stop-button"}
-			on:click={() => {
-				waveformRecord?.stopMic();
-				stop();
-			}}
-		>
-			<span class="record-icon">
-				<span class="pinger" />
-				<span class="dot" />
-			</span>
-			{paused_recording ? i18n("audio.pause") : i18n("audio.stop")}
-		</button>
-	{:else}
-		<button
-			class="record-button"
-			on:click={() => {
-				waveformRecord?.startMic();
-				record();
-			}}
-		>
-			<span class="record-icon">
-				<span class="dot" />
-			</span>
-			{i18n("audio.record")}
-		</button>
-	{/if}
+	<div class="controls">
+		{#if recording}
+			<button
+				class={paused_recording ? "stop-button-paused" : "stop-button"}
+				on:click={() => {
+					waveformRecord?.stopMic();
+					stop();
+				}}
+			>
+				<span class="record-icon">
+					<span class="pinger" />
+					<span class="dot" />
+				</span>
+				{paused_recording ? i18n("audio.pause") : i18n("audio.stop")}
+			</button>
+		{:else}
+			<button
+				class="record-button"
+				on:click={() => {
+					waveformRecord?.startMic();
+					record();
+				}}
+			>
+				<span class="record-icon">
+					<span class="dot" />
+				</span>
+				{i18n("audio.record")}
+			</button>
+		{/if}
+
+		<DeviceSelect bind:micDevices {i18n} />
+	</div>
 </div>
 
 <style>
+	.controls {
+		display: flex;
+		align-items: center;
+		justify-content: space-between;
+		flex-wrap: wrap;
+		overflow: hidden;
+	}
+
 	.mic-wrap {
 		display: block;
 		align-items: center;