Skip to content

Commit

Permalink
Improve Audio Component (#5966)
Browse files Browse the repository at this point in the history
* replace <audio> with wavesurfer: add recording, playing and trimming, playback

* add changeset

* merge cleanup

* improving recording styling

* add recording timer

* add trim region duration

* allow trimming recordings

* clean up playing logic

* add pause_recording event

* remove crop min/max

* add waveform options param

* remove trimmingmode and use mode

* streaming + cleanup

* add changeset

* clean up types

* mobile adjustments

* add min/max length  + trim accessibility

* update pnpm lock

* amend source to a list and allow source switching

* fix no microphone found logic

* change undo logic to reset trims

* tweaks

* tweak reset logic

* ensure recording is sent to backend

* fix audio duration reactivity

* list tweak

* clean up

* change source -> sources + restore wasm changes

* formatting

* fix tests

* fix test

* add default sources value in fe + fix audio demos

* fix audio file name test

* add better sources typing

* ui test tweaks

* add default value in templates.py

* formatting

* remove unused prop

* add audio story

* add changeset

* revert sources changes

* remove story id

* fix be test

* fix be test

* fix notebooks

* formatting

* fix test

* fix test again

---------

Co-authored-by: gradio-pr-bot <gradio-pr-bot@users.noreply.github.com>
Co-authored-by: Abubakar Abid <abubakar@huggingface.co>
Co-authored-by: pngwn <hello@pngwn.io>
  • Loading branch information
4 people committed Oct 27, 2023
1 parent 205f613 commit 9cad212
Show file tree
Hide file tree
Showing 58 changed files with 5,045 additions and 3,792 deletions.
10 changes: 10 additions & 0 deletions .changeset/great-moles-matter.md
@@ -0,0 +1,10 @@
---
"@gradio/app": minor
"@gradio/audio": minor
"@gradio/icons": minor
"@gradio/storybook": minor
"@gradio/utils": minor
"gradio": minor
---

feat:Improve Audio Component
2 changes: 1 addition & 1 deletion demo/asr/run.ipynb
@@ -1 +1 @@
{"cells": [{"cell_type": "markdown", "id": "302934307671667531413257853548643485645", "metadata": {}, "source": ["# Gradio Demo: asr"]}, {"cell_type": "code", "execution_count": null, "id": "272996653310673477252411125948039410165", "metadata": {}, "outputs": [], "source": ["!pip install -q gradio torch torchaudio transformers"]}, {"cell_type": "code", "execution_count": null, "id": "288918539441861185822528903084949547379", "metadata": {}, "outputs": [], "source": ["import gradio as gr\n", "from transformers import pipeline\n", "import numpy as np\n", "\n", "transcriber = pipeline(\"automatic-speech-recognition\", model=\"openai/whisper-base.en\")\n", "\n", "def transcribe(audio):\n", " sr, y = audio\n", " y = y.astype(np.float32)\n", " y /= np.max(np.abs(y))\n", "\n", " return transcriber({\"sampling_rate\": sr, \"raw\": y})[\"text\"]\n", "\n", "\n", "demo = gr.Interface(\n", " transcribe,\n", " gr.Audio(source=\"microphone\"),\n", " \"text\",\n", ")\n", "\n", "if __name__ == \"__main__\":\n", " demo.launch()\n"]}], "metadata": {}, "nbformat": 4, "nbformat_minor": 5}
{"cells": [{"cell_type": "markdown", "id": "302934307671667531413257853548643485645", "metadata": {}, "source": ["# Gradio Demo: asr"]}, {"cell_type": "code", "execution_count": null, "id": "272996653310673477252411125948039410165", "metadata": {}, "outputs": [], "source": ["!pip install -q gradio torch torchaudio transformers"]}, {"cell_type": "code", "execution_count": null, "id": "288918539441861185822528903084949547379", "metadata": {}, "outputs": [], "source": ["import gradio as gr\n", "from transformers import pipeline\n", "import numpy as np\n", "\n", "transcriber = pipeline(\"automatic-speech-recognition\", model=\"openai/whisper-base.en\")\n", "\n", "def transcribe(audio):\n", " sr, y = audio\n", " y = y.astype(np.float32)\n", " y /= np.max(np.abs(y))\n", "\n", " return transcriber({\"sampling_rate\": sr, \"raw\": y})[\"text\"]\n", "\n", "\n", "demo = gr.Interface(\n", " transcribe,\n", " gr.Audio(sources=[\"microphone\"]),\n", " \"text\",\n", ")\n", "\n", "if __name__ == \"__main__\":\n", " demo.launch()\n"]}], "metadata": {}, "nbformat": 4, "nbformat_minor": 5}
2 changes: 1 addition & 1 deletion demo/asr/run.py
Expand Up @@ -14,7 +14,7 @@ def transcribe(audio):

demo = gr.Interface(
transcribe,
gr.Audio(source="microphone"),
gr.Audio(sources=["microphone"]),
"text",
)

Expand Down
2 changes: 1 addition & 1 deletion demo/blocks_kitchen_sink/run.ipynb

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion demo/blocks_kitchen_sink/run.py
Expand Up @@ -167,7 +167,7 @@ def clear():
with gr.Tab("Audio"):
with gr.Row():
gr.Audio()
gr.Audio(source="microphone")
gr.Audio(sources=["microphone"])
gr.Audio(join(KS_FILES, "cantina.wav"))
with gr.Tab("Other"):
# gr.Image(source="webcam")
Expand Down
2 changes: 1 addition & 1 deletion demo/kitchen_sink/run.ipynb

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion demo/kitchen_sink/run.py
Expand Up @@ -107,7 +107,7 @@ def fn(
gr.Image(label="Webcam", source="webcam"),
gr.Video(label="Video"),
gr.Audio(label="Audio"),
gr.Audio(label="Microphone", source="microphone"),
gr.Audio(label="Microphone", sources=["microphone"]),
gr.File(label="File"),
gr.Dataframe(label="Dataframe", headers=["Name", "Age", "Gender"]),
],
Expand Down
2 changes: 1 addition & 1 deletion demo/main_note/run.ipynb
@@ -1 +1 @@
{"cells": [{"cell_type": "markdown", "id": "302934307671667531413257853548643485645", "metadata": {}, "source": ["# Gradio Demo: main_note"]}, {"cell_type": "code", "execution_count": null, "id": "272996653310673477252411125948039410165", "metadata": {}, "outputs": [], "source": ["!pip install -q gradio scipy numpy matplotlib"]}, {"cell_type": "code", "execution_count": null, "id": "288918539441861185822528903084949547379", "metadata": {}, "outputs": [], "source": ["# Downloading files from the demo repo\n", "import os\n", "os.mkdir('audio')\n", "!wget -q -O audio/cantina.wav https://github.com/gradio-app/gradio/raw/main/demo/main_note/audio/cantina.wav\n", "!wget -q -O audio/recording1.wav https://github.com/gradio-app/gradio/raw/main/demo/main_note/audio/recording1.wav"]}, {"cell_type": "code", "execution_count": null, "id": "44380577570523278879349135829904343037", "metadata": {}, "outputs": [], "source": ["from math import log2, pow\n", "import os\n", "\n", "import numpy as np\n", "from scipy.fftpack import fft\n", "\n", "import gradio as gr\n", "\n", "A4 = 440\n", "C0 = A4 * pow(2, -4.75)\n", "name = [\"C\", \"C#\", \"D\", \"D#\", \"E\", \"F\", \"F#\", \"G\", \"G#\", \"A\", \"A#\", \"B\"]\n", "\n", "\n", "def get_pitch(freq):\n", " h = round(12 * log2(freq / C0))\n", " n = h % 12\n", " return name[n]\n", "\n", "\n", "def main_note(audio):\n", " rate, y = audio\n", " if len(y.shape) == 2:\n", " y = y.T[0]\n", " N = len(y)\n", " T = 1.0 / rate\n", " yf = fft(y)\n", " yf2 = 2.0 / N * np.abs(yf[0 : N // 2])\n", " xf = np.linspace(0.0, 1.0 / (2.0 * T), N // 2)\n", "\n", " volume_per_pitch = {}\n", " total_volume = np.sum(yf2)\n", " for freq, volume in zip(xf, yf2):\n", " if freq == 0:\n", " continue\n", " pitch = get_pitch(freq)\n", " if pitch not in volume_per_pitch:\n", " volume_per_pitch[pitch] = 0\n", " volume_per_pitch[pitch] += 1.0 * volume / total_volume\n", " volume_per_pitch = {k: float(v) for k, v in volume_per_pitch.items()}\n", " return volume_per_pitch\n", "\n", "\n", "demo = gr.Interface(\n", " main_note,\n", " gr.Audio(source=\"microphone\"),\n", " gr.Label(num_top_classes=4),\n", " examples=[\n", " [os.path.join(os.path.abspath(''),\"audio/recording1.wav\")],\n", " [os.path.join(os.path.abspath(''),\"audio/cantina.wav\")],\n", " ],\n", " interpretation=\"default\",\n", ")\n", "\n", "if __name__ == \"__main__\":\n", " demo.launch()\n"]}], "metadata": {}, "nbformat": 4, "nbformat_minor": 5}
{"cells": [{"cell_type": "markdown", "id": "302934307671667531413257853548643485645", "metadata": {}, "source": ["# Gradio Demo: main_note"]}, {"cell_type": "code", "execution_count": null, "id": "272996653310673477252411125948039410165", "metadata": {}, "outputs": [], "source": ["!pip install -q gradio scipy numpy matplotlib"]}, {"cell_type": "code", "execution_count": null, "id": "288918539441861185822528903084949547379", "metadata": {}, "outputs": [], "source": ["# Downloading files from the demo repo\n", "import os\n", "os.mkdir('audio')\n", "!wget -q -O audio/cantina.wav https://github.com/gradio-app/gradio/raw/main/demo/main_note/audio/cantina.wav\n", "!wget -q -O audio/recording1.wav https://github.com/gradio-app/gradio/raw/main/demo/main_note/audio/recording1.wav"]}, {"cell_type": "code", "execution_count": null, "id": "44380577570523278879349135829904343037", "metadata": {}, "outputs": [], "source": ["from math import log2, pow\n", "import os\n", "\n", "import numpy as np\n", "from scipy.fftpack import fft\n", "\n", "import gradio as gr\n", "\n", "A4 = 440\n", "C0 = A4 * pow(2, -4.75)\n", "name = [\"C\", \"C#\", \"D\", \"D#\", \"E\", \"F\", \"F#\", \"G\", \"G#\", \"A\", \"A#\", \"B\"]\n", "\n", "\n", "def get_pitch(freq):\n", " h = round(12 * log2(freq / C0))\n", " n = h % 12\n", " return name[n]\n", "\n", "\n", "def main_note(audio):\n", " rate, y = audio\n", " if len(y.shape) == 2:\n", " y = y.T[0]\n", " N = len(y)\n", " T = 1.0 / rate\n", " yf = fft(y)\n", " yf2 = 2.0 / N * np.abs(yf[0 : N // 2])\n", " xf = np.linspace(0.0, 1.0 / (2.0 * T), N // 2)\n", "\n", " volume_per_pitch = {}\n", " total_volume = np.sum(yf2)\n", " for freq, volume in zip(xf, yf2):\n", " if freq == 0:\n", " continue\n", " pitch = get_pitch(freq)\n", " if pitch not in volume_per_pitch:\n", " volume_per_pitch[pitch] = 0\n", " volume_per_pitch[pitch] += 1.0 * volume / total_volume\n", " volume_per_pitch = {k: float(v) for k, v in volume_per_pitch.items()}\n", " return volume_per_pitch\n", "\n", "\n", "demo = gr.Interface(\n", " main_note,\n", " gr.Audio(sources=[\"microphone\"]),\n", " gr.Label(num_top_classes=4),\n", " examples=[\n", " [os.path.join(os.path.abspath(''),\"audio/recording1.wav\")],\n", " [os.path.join(os.path.abspath(''),\"audio/cantina.wav\")],\n", " ],\n", " interpretation=\"default\",\n", ")\n", "\n", "if __name__ == \"__main__\":\n", " demo.launch()\n"]}], "metadata": {}, "nbformat": 4, "nbformat_minor": 5}
2 changes: 1 addition & 1 deletion demo/main_note/run.py
Expand Up @@ -42,7 +42,7 @@ def main_note(audio):

demo = gr.Interface(
main_note,
gr.Audio(source="microphone"),
gr.Audio(sources=["microphone"]),
gr.Label(num_top_classes=4),
examples=[
[os.path.join(os.path.dirname(__file__),"audio/recording1.wav")],
Expand Down

0 comments on commit 9cad212

Please sign in to comment.