# Building a Simple Local AI Voice Assistant

# Requirements

1. Be able to control the voice assistant from voice
2. BE able to talk to it as well as perform commands/actions 
3. Tasks
   1. Create tasks in some task backlog db
   2. Create, read, edit and delete files locally
   3. Send emails
   4. Fully local setup (audio transcription and the AI should be fully local)
   5. Answer questions about personal notes and knowledge management stuff 

In [28]:
# 1 - Voice Control 
# We'll need an audion transcription model to convert audio to text 
# We'll use whisper turbo 3
# source: https://huggingface.co/openai/whisper-large-v3-turbo
# pip install --upgrade pip
# pip install --upgrade transformers datasets[audio] accelerate

import torch
from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline


device = "cuda:0" if torch.cuda.is_available() else "cpu"
torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32

model_id = "openai/whisper-large-v3-turbo"

model = AutoModelForSpeechSeq2Seq.from_pretrained(
    model_id, torch_dtype=torch_dtype, low_cpu_mem_usage=True, use_safetensors=True
)
model.to(device)

processor = AutoProcessor.from_pretrained(model_id)

pipe = pipeline(
    "automatic-speech-recognition",
    model=model,
    tokenizer=processor.tokenizer,
    feature_extractor=processor.feature_extractor,
    torch_dtype=torch_dtype,
    device=device,
)

result = pipe("audio-testfile.mp3")
print(result["text"])

Device set to use cpu
Using custom `forced_decoder_ids` from the (generation) config. This is deprecated in favor of the `task` and `language` flags/config options.
Transcription using a multilingual Whisper will default to language detection followed by transcription instead of translation to English. This might be a breaking change for your use case. If you want to instead always translate your audio to English, make sure to pass `language='en'`. See https://github.com/huggingface/transformers/pull/28687 for more details.


 This is the audio test file.


In [None]:
# 2 - Interaction using LLM
# We'll use llama 3.2 with Ollama https://ollama.com/
# pip install ollama
# https://github.com/ollama/ollama-python

import ollama
response = ollama.chat(model='llama3.2', messages=[
  {
    'role': 'user',
    'content': 'Why is the sky blue?',
  },
])
print(response['message']['content'])


ResponseError: model "llama3.3" not found, try pulling it first (status code: 404)

In [30]:
def get_response(prompt):
    response = ollama.chat(model='llama3.2', 
                           messages=[{'role': 'user', 'content': prompt}])
    return response['message']['content']

get_response("What is the country known for having the best weather in the world?")

'It\'s challenging to pinpoint a single country with the "best" weather, as opinions on ideal weather conditions vary greatly. However, some countries are often considered to have pleasant and consistent climates.\n\nAccording to various surveys, reviews, and climate data, countries like:\n\n1. **Hawaii (USA)**: Known for its tropical climate with warm temperatures (70-85°F/21-30°C) and low humidity throughout the year.\n2. **Barbados**: A small island nation in the Caribbean with a subtropical climate, featuring mild temperatures (75-82°F/24-28°C) and plenty of sunshine (over 300 days per year).\n3. **Maldives**: An island nation in the Indian Ocean with a tropical monsoon climate, characterized by warm temperatures (84-91°F/29-33°C) and high humidity.\n4. **Spain (Mediterranean coast)**: The southern region of Spain, particularly the Costa del Sol, enjoys a mild Mediterranean climate with pleasant temperatures (64-77°F/18-25°C) during the summer months.\n\nHowever, if I had to pick o

In [34]:
import pyaudio
import wave

def record_audio(filename="prompt.mp3", duration=5, sample_rate=44100, channels=2, chunk=1024):
    """
    Record audio from the microphone and save it to a file.
    
    :param filename: Name of the output file (default: "prompt.mp3")
    :param duration: Duration of the recording in seconds (default: 5)
    :param sample_rate: Sample rate of the recording (default: 44100 Hz)
    :param channels: Number of audio channels (default: 2 for stereo)
    :param chunk: Number of frames per buffer (default: 1024)
    """
    p = pyaudio.PyAudio()

    stream = p.open(format=pyaudio.paInt16,
                    channels=1,
                    rate=sample_rate,
                    input=True,
                    frames_per_buffer=chunk)

    print("Recording...")

    frames = []

    for i in range(0, int(sample_rate / chunk * duration)):
        data = stream.read(chunk)
        frames.append(data)

    print("Recording finished.")

    stream.stop_stream()
    stream.close()
    p.terminate()

    # Save the recorded data as a WAV file
    wf = wave.open(filename.replace('.mp3', '.wav'), 'wb')
    wf.setnchannels(channels)
    wf.setsampwidth(p.get_sample_size(pyaudio.paInt16))
    wf.setframerate(sample_rate)
    wf.writeframes(b''.join(frames))
    wf.close()

    print(f"Audio saved as {filename.replace('.mp3', '.wav')}")

# Example usage:            
record_audio()

Recording...
Recording finished.
Audio saved as prompt.wav


In [35]:
from transformers import pipeline

def transcribe(audio_filepath):
    result = pipe(audio_filepath)
    return result["text"]

transcribe("./prompt.wav")

' Why is the sky blue?'

In [38]:
record_audio()
prompt = transcribe("./prompt.wav")
get_response(prompt)

Recording...
Recording finished.
Audio saved as prompt.wav


'I\'d be happy to try. However, I need more information from you about the type of song and lyrics you\'re looking for.\n\nHere\'s a sample verse with the phrase "Cutting knowledge date" written as a lyric:\n\n"In the digital age, we\'re living in time\nWhere knowledge is power, but it\'s hard to define\nA cutting knowledge date, that\'s what they say\nBut information is fleeting, and it\'s here to stay"\n\nPlease let me know if you\'d like me to modify this verse or write an entirely new song based on the phrase "Cutting Knowledge Date". What genre of music are you looking for (e.g. pop, rock, hip-hop)?'

**Tasks**
   1. Create tasks in some task backlog db
   2. Create, read, edit and delete files locally
   3. Send emails
   4. Fully local setup (audio transcription and the AI should be fully local)
   5. Answer questions about personal notes and knowledge management stuff 

   6. GPS Directions
   7. Local trivia
   8. 

In [41]:
# Creating the task db first before writing the tools for the model
import pandas as pd
from datetime import datetime

# Create an empty DataFrame for the tasks database
tasks_df = pd.DataFrame(columns=['task', 'status', 'creation_date', 'completed_date'])

tasks_df

Unnamed: 0,task,status,creation_date,completed_date


In [42]:
# Tool for adding a task
def add_task(task_description):
    """
    Add a task to the tasks database.
    """
    new_task = pd.DataFrame({
        'task': [task_description],
        'status': ['Not Started'],
        'creation_date': [datetime.now().strftime('%Y-%m-%d %H:%M:%S')],
        'completed_date': [None]
    })
    global tasks_df
    tasks_df = pd.concat([tasks_df, new_task], ignore_index=True)
    
    return tasks_df

# Tool Calling

Tool calling is about giving LLMs the ability to perform actions.

```
{
    'type': 'function',
    'function': {
        'name': 'create_file',
        'description': 'Create a new file with given content',
        'parameters': {
            'type': 'object',
            'properties': {
                'filename': {
                    'type': 'string',
                    'description': 'The name of the file to create',
                },
                'content': {
                    'type': 'string',
                    'description': 'The content to write to the file',
                },
            },
            'required': ['filename', 'content'],
        },
    },
},
```

In [43]:
tool_add_tasks_to_db = {
    'type': 'function',
    'function': {
        'name': 'add_task',
        'description': 'Add a task to the tasks database',
        'parameters': {
            'type': 'object',
            'properties': {
                'task_description': {
                    'type': 'string',
                    'description': 'The description of the task to add',
                },
            },
            'required': ['task_description'],
        },
    },
}

In [44]:
# Creating tasks in a backlog task db
def get_response_with_tools(prompt):
    response = ollama.chat(model='llama3.2', 
                           messages=[{'role': 'user', 'content': prompt}],
                           tools=[tool_add_tasks_to_db])
    # Process tool calls if present
    if 'tool_calls' in response['message']:
        for tool_call in response['message']['tool_calls']:
            if tool_call['function']['name'] == 'add_task':
                task_description = tool_call['function']['arguments']['task_description']
                add_task(task_description)
                print(f"Task added: {task_description}")
    else:
        return response['message']['content']

In [45]:
get_response_with_tools("Create a task to create a local voice AI assistant")

Task added: Create a local voice AI assistant


In [46]:
tasks_df

Unnamed: 0,task,status,creation_date,completed_date
0,Create a local voice AI assistant,Not Started,2025-07-16 14:16:46,


In [47]:
tool_create_file = {
            'type': 'function',
            'function': {
                'name': 'create_file',
                'description': 'Create a new file with given content',
                'parameters': {
                    'type': 'object',
                    'properties': {
                        'filename': {
                            'type': 'string',
                            'description': 'The name of the file to create',
                        },
                        'content': {
                            'type': 'string',
                            'description': 'The content to write to the file',
                        },
                    },
                    'required': ['filename', 'content'],
                },
            },
        }
tool_read_file = {
            'type': 'function',
            'function': {
                'name': 'read_file',
                'description': 'Read the content of a file',
                'parameters': {
                    'type': 'object',
                    'properties': {
                        'filename': {
                            'type': 'string',
                            'description': 'The name of the file to read',
                        },
                    },
                    'required': ['filename'],
                },
            },
        }
tool_delete_file = {
            'type': 'function',
            'function': {
                'name': 'delete_file',
                'description': 'Delete a file',
                'parameters': {
                    'type': 'object',
                    'properties': {
                        'filename': {
                            'type': 'string',
                            'description': 'The name of the file to delete',
                        },
                    },
                    'required': ['filename'],
                },
            },
        }

tool_edit_file = {
            'type': 'function', 
            'function': {
                'name': 'edit_file',
                'description': 'Edit the content of a file',
                'parameters': {
                    'type': 'object',
                    'properties': {
                        'filename': {
                            'type': 'string',
                            'description': 'The name of the file to edit',
                        },
                        'content': {
                            'type': 'string',
                            'description': 'The content to write to the file',
                        },
                    },
                    'required': ['filename', 'content'],
                },
            },
        }
tools = [tool_create_file, tool_read_file, tool_delete_file, tool_add_tasks_to_db]

In [48]:
import os
# Writing functions to create, read, edit and delete files

def create_file(filename, content):
    with open(filename, 'w') as file:
        file.write(content)
    return f"File {filename} created successfully"

def read_file(filename):
    with open(filename, 'r') as file:
        return file.read()

def edit_file(filename, content):
    with open(filename, 'w') as file:
        file.write(content)
    return f"File {filename} edited successfully"

def delete_file(filename):
    os.remove(filename)
    return f"File {filename} deleted successfully"

# Creating tasks in a backlog task db
def get_response_with_tools(prompt):
    response = ollama.chat(model='llama3.2', 
                           messages=[{'role': 'user', 'content': prompt}],
                           tools=tools)
    # Process tool calls if present
    if 'tool_calls' in response['message']:
        for tool_call in response['message']['tool_calls']:
            if tool_call['function']['name'] == 'add_task':
                task_description = tool_call['function']['arguments']['task_description']
                add_task(task_description)
                print(f"Task added: {task_description}")
            elif tool_call['function']['name'] == 'create_file':
                print("Creating file...")
                filename = tool_call['function']['arguments']['filename']
                content = tool_call['function']['arguments']['content']
                create_file(filename, content)
                print(f"File created: {filename}")
            elif tool_call['function']['name'] == 'read_file':
                print("Reading file...")
                filename = tool_call['function']['arguments']['filename']
                content = read_file(filename)
                print(f"File content: {content}")
            elif tool_call['function']['name'] == 'delete_file':
                print("Deleting file...")
                filename = tool_call['function']['arguments']['filename']
                delete_file(filename)
                print(f"File deleted: {filename}")
    else:
        return response['message']['content']

In [55]:
get_response_with_tools("Create a file called 'test.txt' with the content 'Hello, world!'")

Creating file...
File created: test.txt


In [57]:
tasks_df

Unnamed: 0,task,status,creation_date,completed_date
0,Create a local voice AI assistant,Not Started,2025-07-16 14:16:46,


In [58]:
record_audio(duration=5)
prompt = transcribe("./prompt.wav")
get_response_with_tools(prompt)

Recording...
Recording finished.
Audio saved as prompt.wav




Creating file...
File created: experience.txt


In [59]:
# Save tasks_df to CSV file
tasks_df.to_csv('tasks.csv', index=False)
print("Tasks saved to tasks.csv")

Tasks saved to tasks.csv
