In [None]:
!pip install -q gradio langchain langchain-nvidia-ai-endpoints faiss-cpu langchain-community "unstructured[pdf]" pdfminer.six transformers tavily-python requests beautifulsoup4 -q streamlit

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.3/44.3 kB[0m [31m2.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m9.9/9.9 MB[0m [31m70.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.9/6.9 MB[0m [31m71.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m79.1/79.1 kB[0m [31m6.7 MB/s[0m eta [36m0:00:00[0m
[?25h

In [None]:
# 🔧 1. Installing Dependencies
# !pip install -q gradio langchain langchain-nvidia-ai-endpoints faiss-cpu langchain-community "unstructured[pdf]" pdfminer.six transformers

# 🔐 2. Load NVIDIA API Key
from google.colab import userdata
api_key = userdata.get("NVIDIA_API_KEY")

# 🏗 3. Imports & Config
import os
import pickle
import gradio as gr

from langchain_nvidia_ai_endpoints import ChatNVIDIA, NVIDIAEmbeddings
from langchain.text_splitter import CharacterTextSplitter
from langchain_community.document_loaders import DirectoryLoader
from langchain_community.vectorstores import FAISS
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser

from transformers import AutoTokenizer

DOCS_DIR = "./uploaded_docs"
os.makedirs(DOCS_DIR, exist_ok=True)
VECTORSTORE_PATH = "vectorstore.pkl"
MAX_TOKENS = 512

# 🔧 Accurate Token Counter using HF tokenizer
tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
def count_tokens(text):
    return len(tokenizer.encode(text, truncation=False))

# 📂 4. Load Documents
loader = DirectoryLoader(DOCS_DIR)
raw_documents = loader.load()
print(f"📄 Loaded {len(raw_documents)} documents")

# 📐 5. Split and Filter Chunks
splitter = CharacterTextSplitter(chunk_size=300, chunk_overlap=100, separator='\n')
documents = splitter.split_documents(raw_documents)
print(f"🔍 Created {len(documents)} chunks")

safe_documents = [doc for doc in documents if count_tokens(doc.page_content) <= MAX_TOKENS]
print(f"✅ Filtered to {len(safe_documents)} safe chunks under {MAX_TOKENS} tokens")

# 🤖 6. Embedding Model
embedder = NVIDIAEmbeddings(model="nvidia/nv-embedqa-e5-v5", model_type="passage", api_key=api_key)

# 💾 7. Build FAISS Vectorstore Every Time
vectorstore = FAISS.from_documents(safe_documents, embedder)
with open(VECTORSTORE_PATH, "wb") as f:
    pickle.dump(vectorstore, f)
print("✅ Rebuilt vector store.")

# 🧠 8. RAG Chain Setup
llm = ChatNVIDIA(model="meta/llama3-70b-instruct", api_key=api_key)

template = ChatPromptTemplate.from_messages([
    ("system", "You are a helpful AI assistant named Envie. Use any provided context."),
    ("human", "{input}")
])

chain = template | llm | StrOutputParser()

# 🔍 9. RAG Query Function
def ask(question: str, k_retrieval: int = 4):
    retriever = vectorstore.as_retriever()
    docs = retriever.invoke(question, k=k_retrieval)
    context = "\n\n".join(doc.page_content for doc in docs)
    prompt_input = f"Context:\n{context}\n\nQuestion: {question}" if docs else f"Question: {question}"
    return "".join(chunk for chunk in chain.stream({"input": prompt_input}))

# 📤 10. Upload + Rebuild Helpers
def upload_and_rebuild(file):
    import shutil
    if file:
        shutil.move(file.name, os.path.join(DOCS_DIR, os.path.basename(file.name)))
    return "📁 Uploaded. Click 'Rebuild Vectorstore'."

def rebuild_vectorstore():
    loader = DirectoryLoader(DOCS_DIR)
    raw_documents = loader.load()
    documents = CharacterTextSplitter(chunk_size=300, chunk_overlap=100).split_documents(raw_documents)
    safe_documents = [doc for doc in documents if count_tokens(doc.page_content) <= MAX_TOKENS]

    global vectorstore
    vectorstore = FAISS.from_documents(safe_documents, embedder)
    with open(VECTORSTORE_PATH, "wb") as f:
        pickle.dump(vectorstore, f)
    return f"✅ Vectorstore rebuilt with {len(safe_documents)} chunks."

# 🖼️ 11. Gradio Interface
with gr.Blocks() as demo:
    gr.Markdown("## 🚀 RAG Assistant - Ask Questions About Your Documents")

    with gr.Row():
        file_input = gr.File(label="Upload PDF", file_types=[".pdf"])
        upload_status = gr.Textbox(label="Upload Status", interactive=False)

    upload_btn = gr.Button("Upload")
    upload_btn.click(upload_and_rebuild, inputs=file_input, outputs=upload_status)

    rebuild_btn = gr.Button("Rebuild Vectorstore")
    rebuild_output = gr.Textbox(label="Rebuild Output", interactive=False)
    rebuild_btn.click(rebuild_vectorstore, outputs=rebuild_output)

    with gr.Row():
        question_input = gr.Textbox(label="Enter your question")
        output_display = gr.Textbox(label="Answer", lines=10)

    ask_button = gr.Button("Submit")
    ask_button.click(fn=ask, inputs=question_input, outputs=output_display)

demo.launch()


tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

Token indices sequence length is longer than the specified maximum sequence length for this model (536 > 512). Running this sequence through the model will result in indexing errors


📄 Loaded 4 documents
🔍 Created 464 chunks
✅ Filtered to 462 safe chunks under 512 tokens
✅ Rebuilt vector store.
It looks like you are running Gradio on a hosted a Jupyter notebook. For the Gradio app to work, sharing must be enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://3ededfa6b55dde44ac.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)




In [None]:
# 🔧 1. Installing Dependencies
# !pip install -q gradio langchain langchain-nvidia-ai-endpoints faiss-cpu langchain-community "unstructured[pdf]" pdfminer.six transformers requests beautifulsoup4

# 🔐 2. Load API Keys
from google.colab import userdata
api_key = userdata.get("NVIDIA_API_KEY")

# 🏗 3. Imports & Config
import os
import pickle
import gradio as gr
import shutil
import requests
from bs4 import BeautifulSoup

from langchain_nvidia_ai_endpoints import ChatNVIDIA, NVIDIAEmbeddings
from langchain.text_splitter import CharacterTextSplitter
from langchain_community.document_loaders import DirectoryLoader
from langchain_community.vectorstores import FAISS
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
from transformers import AutoTokenizer

DOCS_DIR = "./uploaded_docs"
os.makedirs(DOCS_DIR, exist_ok=True)
VECTORSTORE_PATH = "vectorstore.pkl"
MAX_TOKENS = 512

# 🔧 4. Token Counter
tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
def count_tokens(text):
    return len(tokenizer.encode(text, truncation=False))

# 📂 5. Load and Split Docs
loader = DirectoryLoader(DOCS_DIR)
raw_documents = loader.load()
splitter = CharacterTextSplitter(chunk_size=300, chunk_overlap=100, separator="\n")
documents = splitter.split_documents(raw_documents)
safe_documents = [doc for doc in documents if count_tokens(doc.page_content) <= MAX_TOKENS]

# 🤖 6. Embedding & Vectorstore
embedder = NVIDIAEmbeddings(model="nvidia/nv-embedqa-e5-v5", model_type="passage", api_key=api_key)
vectorstore = FAISS.from_documents(safe_documents, embedder)
with open(VECTORSTORE_PATH, "wb") as f:
    pickle.dump(vectorstore, f)

# 🧠 7. LLM Agent (Envie)
llm_envie = ChatNVIDIA(model="meta/llama3-70b-instruct", api_key=api_key)
template_envie = ChatPromptTemplate.from_messages([
    ("system", "You are a helpful AI assistant named Envie. Use the context to answer the user's question."),
    ("human", "{input}")
])
envie_chain = template_envie | llm_envie | StrOutputParser()

# 🤖 8. Book Recommender Agent
llm_books = ChatNVIDIA(model="meta/llama3-70b-instruct", api_key=api_key)
template_books = ChatPromptTemplate.from_messages([
    ("system", "You are a book expert. Extract the main topic from the user's question."),
    ("human", "{question}")
])
book_topic_chain = template_books | llm_books | StrOutputParser()

def fetch_books_by_topic(topic):
    import urllib.parse
    search_query = f"best books on {topic}"
    query = urllib.parse.quote_plus(search_query)
    url = f"https://html.duckduckgo.com/html/?q={query}"

    headers = {
        "User-Agent": "Mozilla/5.0"
    }

    response = requests.get(url, headers=headers)
    soup = BeautifulSoup(response.text, "html.parser")

    results = []
    for link in soup.select("a.result__a")[:5]:
        title = link.get_text(strip=True)
        href = link.get("href")
        results.append(f"- [{title}]({href})")

    return "\n".join(results) if results else "❌ No books found via search."

# 🔍 9. RAG + Agent Query Function (updated with markdown output)
def ask(question: str, k_retrieval: int = 4):
    retriever = vectorstore.as_retriever()
    docs = retriever.invoke(question, k=k_retrieval)

    if docs:
        context = "\n\n".join(doc.page_content for doc in docs)
        prompt_input = f"Context:\n{context}\n\nQuestion: {question}"
        context_msg = "📚 Using local document context."
    else:
        prompt_input = f"Question: {question}"
        context_msg = "🌐 No local documents found, using general LLM response."

    answer = "".join(chunk for chunk in envie_chain.stream({"input": prompt_input}))

    # ✳️ Book Agent Call
    topic = book_topic_chain.invoke({"question": question}).strip()
    books = fetch_books_by_topic(topic)

    output_md = f"""
### ✨ AI Insight & Recommended Books 📚

{answer.strip()}

---

#### 🔍 **Main Topic:**
**{topic}**

---

#### 📚 **Top Book Recommendations:**

{books}

---

#### 🔗 **Tips:**
- Click on the book titles above to learn more.
- Combine reading with practice for best results.
- Ask more questions to explore deeper!

---

*{context_msg}*
"""
    return output_md

# 📤 10. Upload + Rebuild Vectorstore
def upload_and_rebuild(file):
    if file:
        shutil.move(file.name, os.path.join(DOCS_DIR, os.path.basename(file.name)))
    return "📁 Uploaded. Click 'Rebuild Vectorstore'."

def rebuild_vectorstore():
    loader = DirectoryLoader(DOCS_DIR)
    raw_documents = loader.load()
    documents = CharacterTextSplitter(chunk_size=300, chunk_overlap=100).split_documents(raw_documents)
    safe_documents = [doc for doc in documents if count_tokens(doc.page_content) <= MAX_TOKENS]

    global vectorstore
    vectorstore = FAISS.from_documents(safe_documents, embedder)
    with open(VECTORSTORE_PATH, "wb") as f:
        pickle.dump(vectorstore, f)
    return f"✅ Vectorstore rebuilt with {len(safe_documents)} chunks."

# 🖼️ 11. Gradio Interface (updated output component to Markdown)
with gr.Blocks() as demo:
    gr.Markdown("## 🤖 Envie Multi-Agent RAG + 📚 Book Recommender")

    with gr.Row():
        file_input = gr.File(label="Upload PDF", file_types=[".pdf"])
        upload_status = gr.Textbox(label="Upload Status", interactive=False)
    upload_btn = gr.Button("Upload")
    upload_btn.click(upload_and_rebuild, inputs=file_input, outputs=upload_status)

    rebuild_btn = gr.Button("Rebuild Vectorstore")
    rebuild_output = gr.Textbox(label="Rebuild Output", interactive=False)
    rebuild_btn.click(rebuild_vectorstore, outputs=rebuild_output)

    with gr.Row():
        question_input = gr.Textbox(label="Ask a Question (any topic)")
        output_display = gr.Markdown(label="Answer + Books")  # changed to Markdown for rich output

    ask_button = gr.Button("Submit")
    ask_button.click(fn=ask, inputs=question_input, outputs=output_display)

demo.launch()




It looks like you are running Gradio on a hosted a Jupyter notebook. For the Gradio app to work, sharing must be enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://ce80da13005b8177f2.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)




In [None]:
# 🔧 Install dependencies
!pip install -q gradio langchain langchain-nvidia-ai-endpoints faiss-cpu langchain-community "unstructured[pdf]" pdfminer.six transformers requests beautifulsoup4

# 🔐 Load API Key
from google.colab import userdata
api_key = userdata.get("NVIDIA_API_KEY")

# 📚 Imports
import os, pickle, shutil, requests
from bs4 import BeautifulSoup
import gradio as gr
from langchain_nvidia_ai_endpoints import ChatNVIDIA, NVIDIAEmbeddings
from langchain.text_splitter import CharacterTextSplitter
from langchain_community.document_loaders import DirectoryLoader
from langchain_community.vectorstores import FAISS
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
from transformers import AutoTokenizer

# 📁 Constants
DOCS_DIR = "./uploaded_docs"
VECTORSTORE_PATH = "vectorstore.pkl"
MAX_TOKENS = 512
os.makedirs(DOCS_DIR, exist_ok=True)

# 🔢 Token Counter
tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
def count_tokens(text):
    return len(tokenizer.encode(text, truncation=False))

# 🤖 AI Agents
llm_envie = ChatNVIDIA(model="meta/llama3-70b-instruct", api_key=api_key)
envie_chain = ChatPromptTemplate.from_messages([
    ("system", "You are a helpful AI assistant named Envie. Use the context to answer the user's question."),
    ("human", "{input}")
]) | llm_envie | StrOutputParser()

llm_books = ChatNVIDIA(model="meta/llama3-70b-instruct", api_key=api_key)
book_topic_chain = ChatPromptTemplate.from_messages([
    ("system", "You are a book expert. Extract the main topic from the user's question."),
    ("human", "{question}")
]) | llm_books | StrOutputParser()

# 🧠 Embeddings
embedder = NVIDIAEmbeddings(model="nvidia/nv-embedqa-e5-v5", model_type="passage", api_key=api_key)

# 🗂️ Load & Process Docs
def load_docs():
    loader = DirectoryLoader(DOCS_DIR)
    raw_docs = loader.load()
    splitter = CharacterTextSplitter(chunk_size=300, chunk_overlap=100, separator="\n")
    docs = splitter.split_documents(raw_docs)
    return [doc for doc in docs if count_tokens(doc.page_content) <= MAX_TOKENS]

def build_vectorstore():
    docs = load_docs()
    store = FAISS.from_documents(docs, embedder)
    with open(VECTORSTORE_PATH, "wb") as f:
        pickle.dump(store, f)
    return store, len(docs)

# 📥 Upload & Rebuild
def upload_and_rebuild(file):
    if file:
        shutil.move(file.name, os.path.join(DOCS_DIR, os.path.basename(file.name)))
    return "📁 Uploaded. Click 'Rebuild Vectorstore'."

def rebuild():
    global vectorstore
    vectorstore, chunk_count = build_vectorstore()
    return f"✅ Vectorstore rebuilt with {chunk_count} chunks."

# 🔍 Fetch Books
def fetch_books(topic):
    import urllib.parse
    query = urllib.parse.quote_plus(f"best books on {topic}")
    url = f"https://html.duckduckgo.com/html/?q={query}"
    headers = {"User-Agent": "Mozilla/5.0"}
    soup = BeautifulSoup(requests.get(url, headers=headers).text, "html.parser")
    return "\n".join(
        f"- [{link.get_text(strip=True)}]({link.get('href')})"
        for link in soup.select("a.result__a")[:5]
    ) or "❌ No books found."

# 💬 Ask Function
def ask(question):
    retriever = vectorstore.as_retriever()
    docs = retriever.invoke(question, k=4)
    context = "\n\n".join(doc.page_content for doc in docs) if docs else ""
    prompt_input = f"Context:\n{context}\n\nQuestion: {question}" if docs else f"Question: {question}"
    context_msg = "📚 Used document context." if docs else "🌐 No context found."

    answer = "".join(chunk for chunk in envie_chain.stream({"input": prompt_input}))
    topic = book_topic_chain.invoke({"question": question}).strip()
    books = fetch_books(topic)

    return f"""
### ✨ AI Insight & Recommendations

{answer.strip()}

---

**🔍 Main Topic:** {topic}

**📚 Book Recommendations:**

{books}

---

{context_msg}
"""

# 🌐 Gradio App
with gr.Blocks() as demo:
    gr.Markdown("## 🤖 Envie Multi-Agent RAG + 📚 Book Recommender")

    with gr.Row():
        file_input = gr.File(label="Upload PDF", file_types=[".pdf"])
        upload_status = gr.Textbox(label="Upload Status", interactive=False)
    gr.Button("Upload").click(upload_and_rebuild, inputs=file_input, outputs=upload_status)

    rebuild_output = gr.Textbox(label="Rebuild Output", interactive=False)
    gr.Button("Rebuild Vectorstore").click(rebuild, outputs=rebuild_output)

    with gr.Row():
        question_input = gr.Textbox(label="Ask a question")
        output_display = gr.Markdown(label="Response")

    gr.Button("Submit").click(ask, inputs=question_input, outputs=output_display)

# 🚀 Launch in notebook
demo.launch()


It looks like you are running Gradio on a hosted a Jupyter notebook. For the Gradio app to work, sharing must be enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://cc03db939ea9676ec6.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


