In [1]:
!pip install faiss-gpu
!pip install transformers
!pip install langchain
!pip install gradio
!pip install elevenlabs
!pip install langchain-community
!pip install pypdf


Collecting faiss-gpu
  Downloading faiss_gpu-1.7.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (1.4 kB)
Downloading faiss_gpu-1.7.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (85.5 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m85.5/85.5 MB[0m [31m8.0 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: faiss-gpu
Successfully installed faiss-gpu-1.7.2
Collecting gradio
  Downloading gradio-5.9.1-py3-none-any.whl.metadata (16 kB)
Collecting aiofiles<24.0,>=22.0 (from gradio)
  Downloading aiofiles-23.2.1-py3-none-any.whl.metadata (9.7 kB)
Collecting fastapi<1.0,>=0.115.2 (from gradio)
  Downloading fastapi-0.115.6-py3-none-any.whl.metadata (27 kB)
Collecting ffmpy (from gradio)
  Downloading ffmpy-0.5.0-py3-none-any.whl.metadata (3.0 kB)
Collecting gradio-client==1.5.2 (from gradio)
  Downloading gradio_client-1.5.2-py3-none-any.whl.metadata (7.1 kB)
Collecting markupsafe~=2.0 (from gradio)
  Downloading MarkupSa

In [3]:
import requests
import faiss
import numpy as np
from transformers import pipeline
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
import gradio as gr

# Load the PDF file and split it into chunks
file_path = "/content/final.pdf"
loader = PyPDFLoader(file_path)
documents = loader.load_and_split()

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=5000,
    chunk_overlap=200
)
chunks = text_splitter.split_documents(documents)

# Initialize HuggingFaceEmbeddings
hf = HuggingFaceEmbeddings(
    model_name="sentence-transformers/all-mpnet-base-v2",
    model_kwargs={"device": "cpu"},
    encode_kwargs={"normalize_embeddings": False},
)

# Generate embeddings for the chunks
embeddings = [hf.embed_query(chunk.page_content) for chunk in chunks]
embedding_matrix = np.array(embeddings).astype('float32')

# Create a FAISS index
dimension = embedding_matrix.shape[1]
index = faiss.IndexFlatL2(dimension)
index.add(embedding_matrix)

# Save and reload the FAISS index
faiss.write_index(index, "faiss_index.bin")
index = faiss.read_index("faiss_index.bin")

# ElevenLabs API for text-to-speech
ELEVENLABS_API_KEY = "sk_89a06e7268565b6e966bf2efed06e37aa93a272a5b0fe6bc"  # Replace with your API key
ELEVENLABS_VOICE_ID = "21m00Tcm4TlvDq8ikWAM"
  # Replace with your desired voice ID

def text_to_speech_elevenlabs(text):
    """Convert text to speech using ElevenLabs API."""
    url = f"https://api.elevenlabs.io/v1/text-to-speech/{ELEVENLABS_VOICE_ID}"
    headers = {
        "Content-Type": "application/json",
        "xi-api-key": ELEVENLABS_API_KEY
    }
    payload = {
        "text": text,
        "voice_settings": {
            "stability": 0.75,
            "similarity_boost": 0.75
        }
    }
    response = requests.post(url, headers=headers, json=payload)
    if response.status_code == 200:
        with open("output_audio.mp3", "wb") as f:
            f.write(response.content)
        return "output_audio.mp3"
    else:
        return f"Error: {response.status_code}, {response.text}"

# Query FAISS index for similar documents
def query_to_answer(query, k=1):
    query_embedding = hf.embed_query(query)
    query_embedding = np.array(query_embedding).astype('float32').reshape(1, -1)
    distances, indices = index.search(query_embedding, k)
    results = []
    for i in range(k):
        doc_idx = indices[0][i]
        if 0 <= doc_idx < len(documents):
            doc_content = documents[doc_idx].page_content[:500]
            similarity_score = distances[0][i]
            results.append({"document": doc_content, "similarity_score": similarity_score})
    return results

# Process voice input
def process_voice(audio, k=1):
    try:
        # Speech-to-text using Whisper
        asr = pipeline("automatic-speech-recognition", model="openai/whisper-small")
        transcript = asr(audio)["text"]

        # Query FAISS database
        search_results = query_to_answer(transcript, k)
        if not search_results:
            return "No relevant documents found.", None

        result_text = search_results[0]["document"]

        # Convert result to speech
        audio_path = text_to_speech_elevenlabs(result_text)

        return result_text, audio_path
    except Exception as e:
        return f"An error occurred: {e}", None

# Gradio Interface
iface = gr.Interface(
    fn=process_voice,
    inputs=gr.Audio(sources="microphone", type="filepath"),
    outputs=["text", gr.Audio(type="filepath")],
    title="Voice Query and Voice Response",
    description="Speak a query to retrieve relevant document information, and hear the response."
)

iface.launch()


  hf = HuggingFaceEmbeddings(
The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.6k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/571 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/438M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/363 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/239 [00:00<?, ?B/s]

1_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

Running Gradio in a Colab notebook requires sharing enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://91a7d1193e741e7eb5.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


