In [1]:
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np

# Load embedding model
model = SentenceTransformer('all-MiniLM-L6-v2')

# Sample document store
documents = [
    "Machine learning is a core part of AI.",
    "Deep learning is used in image recognition.",
    "AI is transforming many industries."
]

# Precompute embeddings
doc_embeddings = [model.encode(doc) for doc in documents]

# Function: Corrective RAG
def corrective_rag(query, top_k=1):
    # Step 1: Generate query embedding
    query_emb = model.encode(query)
    
    # Step 2: Compute similarity with all documents
    scores = [cosine_similarity([query_emb], [doc_emb])[0][0] for doc_emb in doc_embeddings]
    
    # Step 3: Select top-k relevant documents
    top_indices = np.argsort(scores)[-top_k:][::-1]
    retrieved_docs = [documents[i] for i in top_indices]
    
    # Step 4: Generate initial answer (simplified as concatenation)
    initial_answer = " ".join(retrieved_docs)
    
    # Step 5: Apply correction (simplified rule-based example)
    corrected_answer = initial_answer.replace("Deep learning is used", "Deep learning is commonly used")
    
    return corrected_answer

# Example usage
query = "What is a core part of AI?"
answer = corrective_rag(query, top_k=2)

print("Query:", query)
print("Corrective RAG Answer:", answer)


  from .autonotebook import tqdm as notebook_tqdm


Query: What is a core part of AI?
Corrective RAG Answer: Machine learning is a core part of AI. AI is transforming many industries.
