# RAG with Groq

**Free Tier:** 14,400 requests/day, no credit card!

RAG = Retrieve documents + Generate answers with LLM

In [None]:
%pip install groq python-dotenv faiss-cpu scikit-learn

In [None]:
import faiss
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer

documents = [
    "Python is a programming language known for its simplicity.",
    "The capital of France is Paris, known for the Eiffel Tower.",
    "Groq provides ultra-fast LLM inference using custom hardware.",
    "The Great Wall of China spans over 13,000 miles.",
    "Machine learning is a subset of AI that learns from data.",
    "DocuSign provides electronic signature technology.",
    "RAG combines retrieval and generation for better LLM responses."
]

vectorizer = TfidfVectorizer()
doc_vectors = vectorizer.fit_transform(documents).toarray()
index = faiss.IndexFlatL2(doc_vectors.shape[1])
index.add(doc_vectors.astype(np.float32))

def retrieve(query, top_n=1):
    vec = vectorizer.transform([query]).toarray().astype(np.float32)
    _, indices = index.search(vec, top_n)
    return [documents[i] for i in indices[0]]

print(f"Indexed {len(documents)} documents")

In [None]:
import os
from dotenv import load_dotenv
from groq import Groq

load_dotenv()
client = Groq(api_key=os.getenv("GROQ_API_KEY"))
model = "llama-3.3-70b-versatile"

def rag_pipeline(query):
    context = retrieve(query)[0]
    print(f"Retrieved: {context}")
    print("-" * 40)
    
    response = client.chat.completions.create(
        model=model, max_tokens=200, temperature=0.3,
        messages=[
            {"role": "system", "content": "Answer based only on the context provided."},
            {"role": "user", "content": f"Context: {context}\n\nQuestion: {query}"}
        ]
    )
    return response.choices[0].message.content

In [None]:
print("Q: What is the capital of France?\n")
print(rag_pipeline("What is the capital of France?"))

In [None]:
print("Q: What is Python?\n")
print(rag_pipeline("What is Python?"))

In [None]:
print("Q: What is Groq?\n")
print(rag_pipeline("What is Groq?"))

In [None]:
# Question NOT in knowledge base
print("Q: What is the population of Tokyo?\n")
print(rag_pipeline("What is the population of Tokyo?"))

## Compare: RAG vs Direct

In [None]:
def direct_query(q):
    r = client.chat.completions.create(
        model=model, max_tokens=200,
        messages=[{"role": "user", "content": q}]
    )
    return r.choices[0].message.content

q = "What does DocuSign do?"
print("DIRECT:", direct_query(q))
print("\nRAG:")
print(rag_pipeline(q))