In [1]:
# Livekit AI Voice Agent with MCP Server M-1

# https://docs.livekit.io/agents/start/voice-ai/

In [2]:
# Choose kernel "Python (.venv_livekit)"

# python -m venv .venv_livekit
# .venv_livekit\scripts\activate
# pip install ipykernel
# python -m ipykernel install --user --name=.venv_livekit --display-name "Python (.venv_livekit)"

# pip freeze > .req_venv_livekit

In [3]:
# Description

In [5]:
# Documentation
# https://learn.deeplearning.ai/courses/building-ai-voice-agents-for-production/lesson/idsit/voice-agent-overview
# https://docs.livekit.io/agents/start/voice-ai/

# Create a project AI_Voice_Assistant on livekit cloud
# https://cloud.livekit.io/projects/p_34qw70e3usd/overview

# Your agent strings together three specialized providers into a high-performance voice pipeline.
# You need accounts and API keys for each.

# Components
# STT
# Provider: OpenAI 
# https://docs.livekit.io/agents/integrations/stt/openai/
# https://pypi.org/project/livekit-plugins-openai/
# Parameter: model, language (model: gpt-4o-transcribe (default) or whisper-1)
# OPENAI_API_KEY in .venv
# pip install livekit-plugins-openai

# LLM:
# Provider: OpenAI 
# https://docs.livekit.io/agents/integrations/llm/openai/
# https://pypi.org/project/livekit-plugins-openai/
# Parameter: model, temperature, tool_choice (model: gpt-4o-mini (default) or gpt-4o or o1)
# OPENAI_API_KEY in .venv
# pip install livekit-plugins-openai

# TTS
# Provider: Hume
# https://docs.livekit.io/agents/integrations/tts/hume/
# https://pypi.org/project/livekit-plugins-hume/
# Parameter: voice, description, speed, context, instant_mode (voice and/or description: see documentation)
# HUME_API_KEY
# pip install livekit-plugins-hume

# Usage:
# See documantation (voice by name, id or even generated)

#from livekit.plugins import hume
#from hume.tts import PostedUtteranceVoiceWithName

#session = AgentSession(
#   tts=hume.TTS(
#      voice=PostedUtteranceVoiceWithName(name="Colton Rivers", provider="HUME_AI"),
#      description="The voice exudes calm, serene, and peaceful qualities, like a gentle stream flowing through a quiet forest.",
#   )
# ... llm, stt, etc.
#)


In [None]:
# Additional documentation (slightly different)

# Tech with Tim
# Python AI Voice Assistant & Agent - Full Tutorial
# https://www.youtube.com/watch?v=DNWLIAK4BUY

In [None]:
# MCP tool

# https://docs.livekit.io/agents/build/tools/#model-context-protocol-mcp-

In [None]:
# .env

#OPENAI_API_KEY=<Your OpenAI API Key>
#HUME_API_KEY=<Your Hume API Key>
#LIVEKIT_API_KEY=<your API Key>
#LIVEKIT_API_SECRET=<your API Secret>
#LIVEKIT_URL=wss://aivoiceassistant-yf8o74l4.livekit.cloud


In [None]:
# Python Packages

#!pip install python-dotenv

#!pip install livekit-plugins-openai
#!pip install livekit-plugins-hume

#!pip install livekit-agents[openai,openai,hume,silero,turn-detector]~=1.0
#!pip install livekit-plugins-noise-cancellation~=0.2

#!pip install livekit-agents[mcp]~=1.0

In [8]:
#from dotenv import load_dotenv

#from livekit import agents
#from livekit.agents import AgentSession, Agent, RoomInputOptions
#from livekit.plugins import (
#    openai,
#    hume,
#    noise_cancellation,
#    silero,
#)
#from livekit.plugins.turn_detector.multilingual import MultilingualModel

#load_dotenv()

In [1]:
#from dotenv import load_dotenv
#import asyncio

#from livekit import agents
#from livekit.agents import AgentSession, Agent, RoomInputOptions
#from livekit.plugins import (
#    openai,
#    hume,
#    noise_cancellation,
#    silero,
#)
#from livekit.plugins.turn_detector.multilingual import MultilingualModel

#load_dotenv()

True

In [5]:
# from hume.tts import PostedUtteranceVoiceWithName

In [7]:
from dotenv import load_dotenv
import asyncio

from livekit import agents
from livekit.agents import AgentSession, Agent, RoomInputOptions
from livekit.agents import mcp
from livekit.plugins import (
    openai,
    hume,
    noise_cancellation,
    silero, # Silero VAD plugin
)
from livekit.plugins.turn_detector.multilingual import MultilingualModel # LiveKit turn detector plugin

from hume.tts import PostedUtteranceVoiceWithName
from hume.tts import PostedUtteranceVoiceWithId

load_dotenv()


class Assistant(Agent):
    def __init__(self) -> None:
        super().__init__(instructions="You are a helpful voice AI assistant.")


async def entrypoint(ctx: agents.JobContext):
    session = AgentSession(
        stt=openai.STT(model="gpt-4o-transcribe"),
        llm=openai.LLM(model="gpt-4o-mini"),
#        tts=hume.TTS(
##            voice=PostedUtteranceVoiceWithName(name="Ava Song", provider="HUME_AI"),
##            voice=PostedUtteranceVoiceWithName(name="Warm Irish Woman", provider="CUSTOM_VOICE"),
#            voice=PostedUtteranceVoiceWithId(id="0bae3af7-1f3a-426e-9285-13015427577c"),
##            description="The voice exudes calm, serene, and peaceful qualities, like a gentle stream flowing through a quiet forest.",
#        ),
        tts=openai.TTS(
            model="gpt-4o-mini-tts",
            voice="shimmer",
            instructions="Speak in a friendly and conversational tone.",
        ),
        vad=silero.VAD.load(),
        turn_detection=MultilingualModel(),
        mcp_servers=[
            mcp.MCPServerHTTP(
#                "https://jn2atbn3.rpcld.cc/mcp/9dd7b985-3694-40e5-a1d6-f9fe3c941dfe/sse" # See "N8N 2025-05-22 MCP server"
                url="https://jn2atbn3.rpcld.cc/mcp/9bdf3e79-c503-4a5c-b055-17bca1b40242/sse", # See "N8N 2025-06-12 MCP Server M-1"
                timeout=5,
                client_session_timeout_seconds=5,
            )       
        ]         
    )

    await session.start(
        room=ctx.room,
        agent=Assistant(),
        room_input_options=RoomInputOptions(
            # LiveKit Cloud enhanced noise cancellation
            # - If self-hosting, omit this parameter
            # - For telephony applications, use `BVCTelephony` for best results
            noise_cancellation=noise_cancellation.BVC(), 
        ),
    )

    await ctx.connect()

    await session.generate_reply(
        instructions="""
# Role
You are a friendly and helpful assistant called Anna.
Keep your answer short and to the point.
Greet the user and offer your assistance.

# Tools
You have access to the following tools via MCP:
## Gmail - Read eMails
Use this tool to read unread eMails
## Gmail - Send eMails
Use this tool to send eMails. This tool requires the recipient email address, the email subject and email body text.
If you have to retrieve an eMail address use the Pinecone Vector Store.
Please underwrite the eMail with 'Anna (Personal Assistant of Jochen)'
## Pinecone Vector Store
Use this tool to retrieve eMail addresses. If you do not find the eMail address, stop and inform the user by 'Sorry, I cannot find the requested email address'.
## Google Calendar - Get calendar events
Use this tool to retrieve events, their time and location from the calendar
## Google Calendar - Make calendar events
Use this tool to schedule events for the calendar
"""
    )


if __name__ == "__main__":
    agents.cli.run_app(agents.WorkerOptions(entrypoint_fnc=entrypoint))

In [None]:
# Download livekit_ai_voice_agent.py 

In [None]:
# Download model files

# To use the turn-detector, silero, or noise-cancellation plugins, you first need to download the model files:
# C:\Users\Gebruiker\.cache\huggingface\hub\models--livekit--turn-detector

In [None]:
# Run in directory jupyter_notebook and in your virtual environment .venv_livekit

# python livekit_ai_voice_agent_mcp.py download-files (already done by python livekit_ai_voice_agent.py download-files)

In [None]:
# Speak to your agent
# Run in directory jupyter_notebook and in your virtual environment .venv_livekit
# Start your agent in console mode to run inside your terminal:

In [None]:
# python livekit_ai_voice_agent_mcp.py console

In [None]:
# Connect to playground
# Run in directory jupyter_notebook and in your virtual environment .venv_livekit
# Start your agent in dev mode to connect it to LiveKit and make it available from anywhere on the internet:

In [None]:
# python livekit_ai_voice_agent_mcp.py dev

In [None]:
# Use the Agents playground to speak with your agent and explore its full range of multimodal capabilities.

# https://docs.livekit.io/agents/start/playground/
# https://agents-playground.livekit.io/

# Congratulations, your agent is up and running. Continue to use the playground or the console mode as you build and test your agent.

# Agent CLI modes
# In the console mode, the agent runs locally and is only available within your terminal.

# Run your agent in dev (development / debug) or start (production) mode to connect to LiveKit and join rooms.

In [None]:
# I can access the agent
#    via the Livekit playground: https://agents-playground.livekit.io/
#    via the Livekit Sandbox directly: https://synchronized-server-2h6j7i.sandbox.livekit.io/ 
#    via the Livekit Sandbox overview: https://cloud.livekit.io/projects/p_34qw70e3usd/sandbox


In [None]:
# NEXT STEPS
# https://docs.livekit.io/agents/start/voice-ai/#next-steps

# https://docs.livekit.io/agents/start/frontend/
# https://docs.livekit.io/agents/build/
...
