<a href="https://colab.research.google.com/github/ishupandey098/low-latency-voice-rag-pipeline/blob/main/rag_pipeline.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [22]:
!pip uninstall -y whisper
!pip install -q openai-whisper faiss-cpu sentence-transformers rank-bm25 groq coqui-tts



[0m

In [57]:
import threading
import whisper
import importlib
from sentence_transformers import SentenceTransformer
import faiss
import numpy as np

# Reload the whisper module to ensure the newly installed package is used
importlib.reload(whisper)

<module 'whisper' from '/usr/local/lib/python3.12/dist-packages/whisper/__init__.py'>

In [58]:
def audio_to_query(audio_path):
    result = asr_model.transcribe(audio_path)
    return result["text"]
embed_model = SentenceTransformer("all-MiniLM-L6-v2")


In [59]:
def stream_asr(audio_path, callback):
    result = asr_model.transcribe(audio_path)
    words = result["text"].split()
    partial = ""

    for w in words:
        partial += w + " "
        callback(partial)


In [60]:
def prefetch_rag(query):
    print("Prefetch RAG for:", query)


In [61]:
def on_partial_text(text):
    if len(text.split()) > 4:
        threading.Thread(target=prefetch_rag, args=(text,)).start()


In [62]:
stream_asr("sample.wav", on_partial_text)




Prefetch RAG for: What are the safety rules? 


In [63]:
!pip install -q rank-bm25
!pip install -q pypdf nltk
from pypdf import PdfReader
import nltk
nltk.download("punkt")
from nltk.tokenize import sent_tokenize
from rank_bm25 import BM25Okapi
from sentence_transformers import SentenceTransformer, CrossEncoder

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


In [64]:
# Cross-encoder reranker (accurate but slower)
reranker = CrossEncoder("cross-encoder/ms-marco-MiniLM-L-6-v2")


In [65]:
from pypdf import PdfReader

def extract_text_from_pdf(pdf_path):
    reader = PdfReader(pdf_path)
    text = ""

    for page in reader.pages:
        page_text = page.extract_text()
        if page_text:
            text += page_text + "\n"

    return text


In [66]:
pdf_path = "/content/hardware_manual.pdf"  # change if name differs
raw_text = extract_text_from_pdf(pdf_path)

print(raw_text[:1000])  # sanity check


www.ismacontrolli.com DMP224en | 1st Issue rev. 5 | 06/2025
RAC18-IP
User Manual
Hardware

RAC18-IP Hardware User Manual
www.ismacontrolli.com DMP224en | 1st Issue rev. 5 | 06/2025 page 2 of 62
Table of Contents
1 Introduction ............................................................................................................................. 4
1.1 Revision History................................................................................................................................4
2 Safety Rules.............................................................................................................................. 5
2.1 General...............................................................................................................................................5
2.2 UL ........................................................................................................................................................6
3 Technical Specification .........

In [67]:
def chunk_text_for_voice(text, max_words=80):
    sentences = sent_tokenize(text)

    chunks = []
    current_chunk = []
    word_count = 0

    for sentence in sentences:
        words = sentence.split()

        if word_count + len(words) > max_words:
            chunks.append(" ".join(current_chunk))
            current_chunk = []
            word_count = 0

        current_chunk.append(sentence)
        word_count += len(words)

    if current_chunk:
        chunks.append(" ".join(current_chunk))

    return chunks


In [68]:
import nltk
nltk.download('punkt_tab')


[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!


True

In [69]:
documents = chunk_text_for_voice(raw_text)

print("Total chunks created:", len(documents))
print("\nSample chunk:\n")
print(documents[0])


Total chunks created: 104

Sample chunk:

www.ismacontrolli.com DMP224en | 1st Issue rev. 5 | 06/2025
RAC18-IP
User Manual
Hardware

RAC18-IP Hardware User Manual
www.ismacontrolli.com DMP224en | 1st Issue rev.


In [70]:
documents = [
    chunk for chunk in documents
    if len(chunk.split()) > 10
]


In [71]:
embeddings = embed_model.encode(documents, convert_to_numpy=True)

index = faiss.IndexFlatL2(embeddings.shape[1])
index.add(embeddings)


In [72]:
tokenized_docs = [doc.lower().split() for doc in documents]
bm25 = BM25Okapi(tokenized_docs)


In [73]:
def hybrid_search(query, top_k=5):
    # Vector search
    q_emb = embed_model.encode([query], convert_to_numpy=True)
    _, vector_ids = index.search(q_emb, top_k)
    vector_results = set(vector_ids[0])

    # BM25 search
    bm25_scores = bm25.get_scores(query.lower().split())
    bm25_ids = np.argsort(bm25_scores)[-top_k:]
    bm25_results = set(bm25_ids)

    # Union of both
    combined_ids = list(vector_results.union(bm25_results))
    return combined_ids


In [74]:
def rerank(query, doc_ids):
    pairs = [(query, documents[i]) for i in doc_ids]
    scores = reranker.predict(pairs)

    ranked = sorted(
        zip(doc_ids, scores),
        key=lambda x: x[1],
        reverse=True
    )

    return [documents[i] for i, _ in ranked]


In [75]:
def filler_response():
    print("Voice Agent:", "Give me a second while i check that for you...")



In [76]:
import time

In [77]:
def rewrite_query(query, history):
    if "second" in query.lower() and history:
        return f"Explain the second item related to {history[-1]}"
    return query


In [78]:
def rewrite_query(query, history):
    if "second" in query.lower() and history:
        return f"Explain the second item related to {history[-1]}"
    return query

In [85]:
query = audio_to_query("sample.wav")
answer_doc = task2_pipeline(query)

print("\nRetrieved Answer Chunk:")
print(answer_doc)




Voice Agent: Give me a second while i check that for you...

Retrieved Answer Chunk:
Revision history
RAC18-IP Hardware User Manual
www.ismacontrolli.com DMP224en | 1st Issue rev. 5 | 06/2025 page 5 of 62
•
•
•
•
•
•
•
•
•
•
•
•
•
•
•
•
2 Safety Rules
2.1 General
Improper wiring of the product can damage it and lead to other hazards. Make 
sure that the product has been correctly wired before turning the power on. Before wiring or removing/mounting the product, make sure to turn the power off.


In [97]:
import re
import nltk

def voice_optimized_response(text):
    """
    Converts technical, text-heavy RAG output
    into short, spoken-English sentences.
    """

    # 1. Remove bullet points and normalize whitespace
    text = text.replace("•", "")
    text = re.sub(r'\s+', ' ', text).strip()

    # 2. Pre-process to handle problematic periods for spoken output
    # Replace numerical section periods with ' point ' for natural speech
    text = re.sub(r'(\d)\.(\d)', r'\1 point \2', text)

    # Handle 'rev.' abbreviation for better speech
    text = text.replace("rev.", "revision")

    # Explicitly handle URLs for better spoken output
    # Example: 'www.ismacontrolli.com' -> 'www dot ismacontrolli dot com'
    text = text.replace("www.ismacontrolli.com", "www dot ismacontrolli dot com")

    # Use NLTK for more robust sentence tokenization
    sentences = nltk.sent_tokenize(text)

    simplified = []

    for s in sentences:
        s = s.strip()
        if not s:
            continue

        # Ensure each sentence ends with exactly one period
        if s.endswith('.'):
            s = s.rstrip('.') + '.' # Remove multiple periods, keep one
        else:
            s = s + '.' # Add a period if missing

        # 3. Replace complex words with simpler spoken terms
        s = s.replace("indicates", "means")
        s = s.replace("occurs", "happens")
        s = s.replace("approximately", "nearly")
        s = s.replace("utilize", "use")
        s = s.replace("damage", "destroy")

        # 4. Phonetic hints for technical terms
        s = s.replace("SATA", "S A T A")
        s = s.replace("SMART", "S M A R T")
        s = s.replace("HDD", "hard disk")
        s = s.replace("hazards", "problems")

        simplified.append(s)

    # 5. Join as short spoken sentences (no trailing period needed as sentences already have one)
    return " ".join(simplified)

In [98]:
def task3_pipeline(query):
    # Get best chunk from Task 2
    retrieved_chunk = task2_pipeline(query)

    # Convert to spoken English
    spoken_answer = voice_optimized_response(retrieved_chunk)

    return spoken_answer


In [99]:
query = audio_to_query("sample.wav")
final_voice_text = task3_pipeline(query)

print("Final Voice Output:\n")
print(final_voice_text)



Voice Agent: Give me a second while i check that for you...
Final Voice Output:

Revision history RAC18-IP Hardware User Manual www dot ismacontrolli dot com DMP224en | 1st Issue revision 5 | 06/2025 page 5 of 62 2 Safety Rules 2 point 1 General Improper wiring of the product can destroy it and lead to other problems. Make sure that the product has been correctly wired before turning the power on. Before wiring or removing/mounting the product, make sure to turn the power off.


In [100]:
!pip install gtts
from gtts import gTTS
from IPython.display import Audio

def speak_text(text, lang='en'):
    tts = gTTS(text=text, lang=lang)
    filename = "output.mp3"
    tts.save(filename)
    return Audio(filename, autoplay=True)

query = audio_to_query("sample.wav")
answer_doc = task2_pipeline(query)  # call your existing pipeline

print("\nRetrieved Answer Chunk:")
print(answer_doc)

speak_text(answer_doc)





Voice Agent: Give me a second while i check that for you...

Retrieved Answer Chunk:
Revision history
RAC18-IP Hardware User Manual
www.ismacontrolli.com DMP224en | 1st Issue rev. 5 | 06/2025 page 5 of 62
•
•
•
•
•
•
•
•
•
•
•
•
•
•
•
•
2 Safety Rules
2.1 General
Improper wiring of the product can damage it and lead to other hazards. Make 
sure that the product has been correctly wired before turning the power on. Before wiring or removing/mounting the product, make sure to turn the power off.
