diff --git a/README.md b/README.md
index bc1f473..c5a0115 100644
--- a/README.md
+++ b/README.md
@@ -43,6 +43,7 @@ You can also ask Bob about available tools:
Detailed examples of how to interact with Bob are given in these notebooks:
* [Basic usage](https://github.com/haesleinhuepf/bia-bob/blob/main/demo/basic_demo.ipynb)
+* [Speech recognition](https://github.com/haesleinhuepf/bia-bob/blob/main/demo/speech_recognition.ipynb)
* [Complete Bio-image Analysis Workflow](https://github.com/haesleinhuepf/bia-bob/blob/main/demo/complete_workflow.ipynb)
* [Accessing variables](https://github.com/haesleinhuepf/bia-bob/blob/main/demo/globals.ipynb)
* [Image Filtering](https://github.com/haesleinhuepf/bia-bob/blob/main/demo/image_filtering.ipynb)
diff --git a/demo/speech_recognition.ipynb b/demo/speech_recognition.ipynb
new file mode 100644
index 0000000..1353d4e
--- /dev/null
+++ b/demo/speech_recognition.ipynb
@@ -0,0 +1,189 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "id": "a0e02d10-d5cb-4dc1-afb5-744209b1bc1e",
+ "metadata": {},
+ "source": [
+ "# Voice recognition\n",
+ "\n",
+ "Bob can also listen to commands and execute them. This is powered by the [SpeechRecognition](https://github.com/Uberi/speech_recognition#readme) Python library."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "id": "6683e7c1-cfc6-4ad6-9155-75f268ab7303",
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "from bia_bob import bob\n",
+ "\n",
+ "from skimage.io import imread"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "id": "3da08d01-fabe-410f-baa5-3eb80e66849a",
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "image = imread('blobs.tif')"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "id": "bda0f4dd-8104-44dd-a937-ba7867a6301a",
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "bob.initialize(globals())"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "f9fbb587-d413-47da-be8a-45ab96c0f99f",
+ "metadata": {},
+ "source": [
+ "After executing the next cell, say to your microphone something like \"Segment the image and show the results.\""
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "id": "63fd4ae0-0709-4ee1-8bf4-b09d5752dc3d",
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Listening...\n",
+ "You said: segment the Image and show the results\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "\n",
+ "\n",
+ "\n",
+ " | \n",
+ "\n",
+ "\n",
+ "\n",
+ "shape | (254, 256) | \n",
+ "dtype | int32 | \n",
+ "size | 254.0 kB | \n",
+ "min | 0 | max | 64 | \n",
+ " \n",
+ "\n",
+ " | \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ "StackViewNDArray([[0, 0, 0, ..., 4, 4, 4],\n",
+ " [0, 0, 0, ..., 4, 4, 4],\n",
+ " [0, 0, 0, ..., 4, 4, 4],\n",
+ " ...,\n",
+ " [0, 0, 0, ..., 0, 0, 0],\n",
+ " [0, 0, 0, ..., 0, 0, 0],\n",
+ " [0, 0, 0, ..., 0, 0, 0]])"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/markdown": [
+ "The image has been segmented and the result is shown as segmented_image.jpg."
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "bob.listen()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "id": "d9daecfe-246c-4ec7-9fa1-f620d2f83414",
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Listening...\n",
+ "You said: how many objects are there in the segmented image\n"
+ ]
+ },
+ {
+ "data": {
+ "text/markdown": [
+ "There are 64 objects in the segmented image."
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "bob.listen()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "90373f38-a66f-4c31-92dd-45179afff022",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.9.16"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/setup.cfg b/setup.cfg
index a95a731..749816e 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -39,6 +39,8 @@ install_requires =
napari-skimage-regionprops
pandas
seaborn
+ SpeechRecognition
+ PyAudio
python_requires = >=3.8
include_package_data = True
diff --git a/src/bia_bob/__init__.py b/src/bia_bob/__init__.py
index 7b2773a..d1011a0 100644
--- a/src/bia_bob/__init__.py
+++ b/src/bia_bob/__init__.py
@@ -4,7 +4,9 @@
)
from ._machinery import bob, init_assistant, add_function_tool
+from ._speech_recognition import _listen
bob.initialize = init_assistant
bob.add_function_tool = add_function_tool
bob.__version__ = __version__
+bob.listen = _listen
diff --git a/src/bia_bob/_speech_recognition.py b/src/bia_bob/_speech_recognition.py
new file mode 100644
index 0000000..422f8f7
--- /dev/null
+++ b/src/bia_bob/_speech_recognition.py
@@ -0,0 +1,38 @@
+def _listen():
+ """
+ Activate the microphone and listen to the user.
+ The passed command is then executed.
+ """
+ result = _listen_to_microphone()
+ if result:
+ print("You said:", result)
+
+ from ._machinery import bob
+ bob(result)
+
+
+def _listen_to_microphone():
+ """Recognizes speech from microphone and return it as string"""
+ import speech_recognition as sr
+
+ # Initialize the recognizer
+ recognizer = sr.Recognizer()
+
+ with sr.Microphone() as source:
+ # Reducing the noise
+ recognizer.adjust_for_ambient_noise(source)
+ print("Listening...")
+ audio = recognizer.listen(source)
+
+ try:
+ # Recognize the content
+ text = recognizer.recognize_google(audio)
+ return text
+ except sr.UnknownValueError:
+ print("Could not understand audio.")
+ return None
+ except sr.RequestError as e:
+ print("Error calling the API; {0}".format(e))
+ return None
+
+