In [1]:
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity

# 1. Load model (Dense embedding model)
model = SentenceTransformer('all-MiniLM-L6-v2')

# 2. Sample documents
documents = [
    "Machine learning is a core part of AI.",
    "Deep learning is used in image and speech recognition.",
    "AI is transforming many industries."
]

# 3. Compute embeddings for all documents
doc_embeddings = [model.encode(doc) for doc in documents]

# 4. Simple Vanilla RAG function
def vanilla_rag(query):
    # Convert query to embedding
    query_emb = model.encode(query)
    
    # Compute similarity with each document
    scores = [cosine_similarity([query_emb], [doc_emb])[0][0] for doc_emb in doc_embeddings]
    
    # Find index of the most similar document
    best_idx = scores.index(max(scores))
    
    # Return that document as the answer
    return documents[best_idx]

# 5. Example usage
query = "What is a core part of AI?"
answer = vanilla_rag(query)

print("Query:", query)
print("Answer:", answer)


  from .autonotebook import tqdm as notebook_tqdm


Query: What is a core part of AI?
Answer: Machine learning is a core part of AI.
