In [12]:
from dotenv import load_dotenv
from elevenlabs.client import ElevenLabs
from elevenlabs.play import play
import os

load_dotenv()
#tts
elevenlabs = ElevenLabs(
  api_key=os.getenv("ELEVENLABS_API_KEY"),
)

audio = elevenlabs.text_to_speech.convert(
    text="The first move is what sets everything in motion.",
    voice_id="JBFqnCBsd6RMkjVDRZzb",
    model_id="eleven_multilingual_v2",
    output_format="mp3_44100_128",
)

play(audio)



In [13]:
import requests

r = requests.post("http://localhost:8000/call-me")
r.json()

{'status': 'calling', 'sid': 'CA355794db9ea01cc8c480a24061c3707b'}

In [None]:
import asyncio
import os
from openai import AsyncOpenAI


client = AsyncOpenAI(api_key=os.environ.get("OPENAI_API_KEY"))

SYSTEM_PROMPT = """You are a voice assistant responsible for contacting vendors to call for maintenance requests.
For this current maintenance request, these are the details: f"{}
Let the vendor know the details of the maintenance request and ask for a time when they can come to perform the maintenance. 
When they have given you a time, confirm the time back to them.
"""


TURN_SILENCE_SEC = 0.7          # how long of "no new finals" before we commit a user turn
IGNORE_PARTIALS = True          # only use final/committed transcript chunks for turns
PRINT_RAW_NONTRANSCRIPT = False # set True if you want to debug other messages
SUMMARY_EVERY_TURNS = 10         # summarize after roughly this many turns
LAST_TURNS_TO_KEEP = 2           # keep 1-2 turns in context after summarization


class TurnState:
    def __init__(self):
        self.buffer = []                 # final transcript chunks for current user turn
        self.silence_task = None         # timer that commits a turn after silence
        self.gpt_task = None             # in-flight GPT task
        self.turn_index = 0              # counter
        self.history = []                # list of message dicts (role/content)
        self.summary = ""               # rolling summary


state = TurnState()



def is_final_transcript(msg: dict) -> bool:
    if msg.get("raw").get('message_type') == "committed_transcript":
        return True
    return False


async def call_gpt(messages: list[dict]) -> str:
    """
    Calls GPT and returns assistant text.
    """
    resp = await client.chat.completions.create(
        model="gpt-4.1-mini",
        messages=messages,
        temperature=0.3,
    )
    return resp.choices[0].message.content.strip()


def build_context_messages(user_text: str) -> list[dict]:
    messages = [{"role": "system", "content": SYSTEM_PROMPT}]
    if state.summary:
        messages.append({"role": "system", "content": f"Conversation summary:\n{state.summary}"})
    if state.history:
        tail_count = LAST_TURNS_TO_KEEP * 2
        messages.extend(state.history[-tail_count:])
    messages.append({"role": "user", "content": user_text})
    return messages


async def summarize_history() -> None:
    if not state.history:
        return

    summary_messages = [
        {
            "role": "system",
            "content": "Summarize the conversation so far for future context. Keep it concise.",
        },
    ]
    if state.summary:
        summary_messages.append({"role": "system", "content": f"Current summary:\n{state.summary}"})
    summary_messages.append({"role": "system", "content": "Subsequent 10 turns of conversation:"})
    summary_messages.extend(state.history)

    resp = await client.chat.completions.create(
        model="gpt-4.1-mini",
        messages=summary_messages,
        temperature=0.2,
    )
    state.summary = resp.choices[0].message.content.strip()

    tail_count = LAST_TURNS_TO_KEEP * 2
    state.history = state.history[-tail_count:]


async def commit_turn_after_silence():
    """
    Waits a short time after the last FINAL chunk.
    If no new final arrives, commits the turn and calls GPT.
    """
    await asyncio.sleep(TURN_SILENCE_SEC)

    user_turn = " ".join(state.buffer).strip()
    state.buffer.clear()

    if not user_turn:
        return

    state.turn_index += 1
    turn_id = state.turn_index

    print(f"\n=== USER TURN {turn_id} ===")
    print(user_turn)

    # Cancel any in-flight GPT task (barge-in style)
    if state.gpt_task and not state.gpt_task.done():
        state.gpt_task.cancel()

    async def run_gpt():
        try:
            messages = build_context_messages(user_turn)
            answer = await call_gpt(messages)
            print(f"\n--- ASSISTANT {turn_id} ---")
            print(answer)
            print()

            state.history.extend([
                {"role": "user", "content": user_turn},
                {"role": "assistant", "content": answer},
            ])

            if turn_id % SUMMARY_EVERY_TURNS == 0:
                await summarize_history()
        except asyncio.CancelledError:
            # Newer user speech arrived; we dropped this response
            print(f"\n--- ASSISTANT {turn_id} cancelled (barge-in) ---\n")
        except Exception as e:
            print(f"\n--- GPT error on turn {turn_id}: {e} ---\n")

    state.gpt_task = asyncio.create_task(run_gpt())



def reset_silence_timer():
    """
    Every time we receive a FINAL transcript chunk, we reset the "end turn" timer.
    """
    if state.silence_task and not state.silence_task.done():
        state.silence_task.cancel()
    state.silence_task = asyncio.create_task(commit_turn_after_silence())


async def handle_transcript_message(msg: dict):
    text = (msg.get("text") or "").strip()
    if not text:
        return

    final = is_final_transcript(msg)

    if not final and IGNORE_PARTIALS:
        # Optionally print partials for debugging
        # print(f"PARTIAL: {text}")
        return

    tag = "FINAL" if final else "PARTIAL"
    print(f"{tag}: {text}")

    if final:
        # Append final chunk to current turn and reset silence timer
        state.buffer.append(text)
        reset_silence_timer()


In [15]:
import json, websockets

async def listen():
    uri = "ws://localhost:8000/ws/client"
    async with websockets.connect(uri) as ws:
        print("Connected to /ws/client (transcript stream)")
        # keep-alive / unblock server's receive loop
        await ws.send("hi")

        while True:
            raw = await ws.recv()
            msg = json.loads(raw)

            src = msg.get("source")
            ev = msg.get("event")
            
  

            if src == "eleven" and ev == "transcript":
                await handle_transcript_message(msg)

            elif ev in {"error", "closed"}:
                print(msg)

            else:
                if PRINT_RAW_NONTRANSCRIPT:
                    print("OTHER:", msg)


await listen()


Connected to /ws/client (transcript stream)
FINAL: Alright, now that it has started beeping, we can now begin. Hello, I have an inquiry about...

=== USER TURN 1 ===
Alright, now that it has started beeping, we can now begin. Hello, I have an inquiry about...

--- ASSISTANT 1 ---
Hello! I have a maintenance request that needs your attention. The issue is that the equipment has started beeping. Could you please let me know a convenient time when you can come to perform the maintenance?

FINAL: What, is it not printing anything out? Okay, it is.

=== USER TURN 2 ===
What, is it not printing anything out? Okay, it is.

--- ASSISTANT 2 ---
Thank you for the update. Since the equipment is beeping and not printing anything out, could you please let me know when you would be available to come and perform the maintenance?

FINAL: Okay, so just to ensure that you do in fact have the history, can you please tell me about what problem I was facing earlier?

=== USER TURN 3 ===
Okay, so just to en

ConnectionClosedError: received 1012 (service restart); then sent 1012 (service restart)

{'status': 'calling', 'sid': 'CA69f9a8c7d5471eed6d56050a42eabf52'}