In [3]:
# Install essentials
!pip install langchain-groq faiss-cpu transformers sentence-transformers langchain_community

Collecting langchain_community
  Downloading langchain_community-0.3.27-py3-none-any.whl.metadata (2.9 kB)
Collecting dataclasses-json<0.7,>=0.5.7 (from langchain_community)
  Downloading dataclasses_json-0.6.7-py3-none-any.whl.metadata (25 kB)
Collecting pydantic-settings<3.0.0,>=2.4.0 (from langchain_community)
  Downloading pydantic_settings-2.10.1-py3-none-any.whl.metadata (3.4 kB)
Collecting httpx-sse<1.0.0,>=0.4.0 (from langchain_community)
  Downloading httpx_sse-0.4.1-py3-none-any.whl.metadata (9.4 kB)
Collecting marshmallow<4.0.0,>=3.18.0 (from dataclasses-json<0.7,>=0.5.7->langchain_community)
  Downloading marshmallow-3.26.1-py3-none-any.whl.metadata (7.3 kB)
Collecting typing-inspect<1,>=0.4.0 (from dataclasses-json<0.7,>=0.5.7->langchain_community)
  Downloading typing_inspect-0.9.0-py3-none-any.whl.metadata (1.5 kB)
Collecting python-dotenv>=0.21.0 (from pydantic-settings<3.0.0,>=2.4.0->langchain_community)
  Downloading python_dotenv-1.1.1-py3-none-any.whl.metadata (24 k

In [36]:
# Core LangChain & embeddings
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
from langchain.retrievers import ParentDocumentRetriever
from langchain_core.documents import Document
# Groq LLM
from langchain_groq import ChatGroq
# LangChain core for chains and prompts
from langchain_core.prompts import ChatPromptTemplate, SystemMessagePromptTemplate, HumanMessagePromptTemplate
from langchain_core.runnables import RunnableLambda, RunnableSequence, RunnableParallel
# Hugging Face local summarizer
from transformers import pipeline
# Other
import os
import requests
import numpy as np

In [6]:
# Provide your Groq API key only
from google.colab import userdata
GROQ_API_KEY = userdata.get('GROQ_API_KEY')
os.environ["GROQ_API_KEY"] = GROQ_API_KEY

In [7]:
# Example using NewsAPI.org (mock if you want)
news_api_key = userdata.get('NEWS_API_KEY')
url = f"https://newsapi.org/v2/top-headlines?country=us&apiKey={news_api_key}"

In [48]:
response = requests.get(url)
data = response.json()
articles = data["articles"]

# Turn each article into a parent Document
parent_docs = []
for i, article in enumerate(articles):
    text = f"{article['title']}\n{article['description']}\n{article['content']}"
    parent_docs.append(
        Document(page_content=text, metadata={"id": str(i)})
    )

print(f"Loaded {len(parent_docs)} parent articles.")

Loaded 20 parent articles.


In [49]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.storage import InMemoryStore
# Use RecursiveCharacterTextSplitter to chunk parent docs into children
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=500,
    chunk_overlap=50
)

In [50]:
# Create embeddings
embedder = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")

In [56]:
# Store in FAISS using from_texts
vectorstore = FAISS.from_texts(["dummy"], embedding=embedder)
docstore = InMemoryStore()  # Stores parent docs

In [57]:
retriever = ParentDocumentRetriever(
    vectorstore=vectorstore,
    docstore=docstore,
    child_splitter=text_splitter,
    parent_splitter=None,  # no parent split, original docs are used
)
retriever

ParentDocumentRetriever(vectorstore=<langchain_community.vectorstores.faiss.FAISS object at 0x7bfe55518590>, docstore=<langchain_core.stores.InMemoryStore object at 0x7bfe55518d90>, search_kwargs={}, child_splitter=<langchain_text_splitters.character.RecursiveCharacterTextSplitter object at 0x7bfe55508050>)

In [None]:
import polars as pl

# Create a list of dictionaries from the combined data
# Each dictionary should contain the metadata and the embedding
vector_db_data = []
for i, vec in enumerate(all_vectors):
    data_row = vec["metadata"].copy() # Start with metadata
    data_row["embedding"] = vec["embedding"] # Add the embedding
    vector_db_data.append(data_row)

# Create a Polars DataFrame
vectordb_df = pl.DataFrame(vector_db_data)

# Display the DataFrame
vectordb_df

In [59]:
# Add parent docs: the retriever will chunk them and index child chunks
retriever.add_documents(parent_docs)
print("ParentDocumentRetriever ready.")

ParentDocumentRetriever ready.


In [60]:
# Small LLM: BART summarizer for quick draft summaries
summarizer = pipeline("summarization", model="facebook/bart-large-cnn")

def draft_summary(inputs):
    """
    Speculative RAG: Fast draft stage.
    Uses summarizer to quickly generate a rough summary.
    """
    context = inputs["context"]
    combined_text = " ".join([doc.page_content for doc in context])
    result = summarizer(combined_text, max_length=100, min_length=30, do_sample=False)
    return result[0]["summary_text"]

config.json: 0.00B [00:00, ?B/s]

model.safetensors:   0%|          | 0.00/1.63G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/363 [00:00<?, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

Device set to use cuda:0


In [61]:
# Big LLM for final verification and refinement
big_llm = ChatGroq(
    groq_api_key=GROQ_API_KEY,
    model_name="llama-3.3-70b-versatile"
)

In [62]:
# Prompt for the verifying big LLM using ChatPromptTemplate
verify_prompt = ChatPromptTemplate.from_messages([
    SystemMessagePromptTemplate.from_template(
        "You are a professional news editor and fact-checker. "
        "Ensure the final summary is accurate, clear, and aligns with the facts. "
        "No hallucinations. Keep it concise."
    ),
    HumanMessagePromptTemplate.from_template(
        "Question: {question}\n\n"
        "Relevant News Context:\n{context}\n\n"
        "Draft Summary:\n{draft}\n\n"
        "Please rewrite or approve this draft, under 100 words."
    )
])

In [75]:
# === Speculative RAG Workflow ===

# Retrieve relevant parent docs via ParentDocumentRetriever
def retrieve_parent_docs(inputs):
    """
    Retrieves parent docs relevant to the question.
    """
    return retriever.invoke(inputs["question"])

# Fast draft: uses local summarizer
draft_chain = RunnableLambda(lambda x: {"context": retrieve_parent_docs(x)})
draft_summary_chain = RunnableLambda(draft_summary)

In [77]:
def speculative_draft(inputs):
    context = retriever.invoke(inputs["question"])
    combined_text = " ".join([doc.page_content for doc in context])
    result = summarizer(combined_text, max_length=100, min_length=30, do_sample=False)
    return result[0]["summary_text"]

In [78]:
# Verify & Refine: uses Groq Llama with system/human prompt
verify_chain = (
    RunnableParallel({
        "context": retrieve_parent_docs,
        "draft": RunnableLambda(speculative_draft),
        "question": lambda x: x["question"]
    })
    | verify_prompt
    | big_llm
)

# Final Speculative RAG pipeline
speculative_rag = RunnableSequence(verify_chain)
print("Speculative RAG Chain with ParentDocumentRetriever ready!")

Speculative RAG Chain with ParentDocumentRetriever ready!


In [79]:
query = {"question": "Summarize the latest updates in US politics."}

result = speculative_rag.invoke(query)

print("\nFinal Verified Summary:")
print(result.content)


Final Verified Summary:
Approved summary: 
The foreign-born workforce has contracted for the third straight month. White House officials believe the megabill will encourage more Americans to enter the workforce, offsetting the decline. However, many economists disagree, predicting negative economic impacts from immigration changes in the bill.


In [88]:
query = {"question": "Summarize the latest updates in NVIDIA Stocks."}

result = speculative_rag.invoke(query)

print("\nFinal Verified Summary:")
print(result.content)


Final Verified Summary:
The provided news context does not mention NVIDIA stocks. The draft summary appears to be a mix of unrelated topics. 

Approved Summary: There is no information available on NVIDIA stocks in the provided context.


In [82]:
query = {"question": "Summarize the latest updates in Business."}

result = speculative_rag.invoke(query)

print("\nFinal Verified Summary:")
print(result.content)


Final Verified Summary:
Approved summary: 
Microsoft has laid off 9,000 workers amid an $80 billion AI investment. An executive suggested laid-off colleagues talk to ChatGPT about their feelings. This comes as Big Tech's salary divide grows, with hundred-million-dollar pay packages being floated to top performers in Silicon Valley.
