In [1]:
documents = [
    "Employees must wear safety helmets inside the production area.",
    "Machine maintenance should be performed every 6 months.",
    "Overtime requires manager approval.",
    "All visitors must sign in at the reception desk.",
    "Production reports must be submitted weekly.",
    "Emergency exits should never be blocked.",
    "Quality checks are mandatory before shipment.",
    "Data backups are performed every night.",
    "Unauthorized access to machines is prohibited.",
    "Training is required before operating heavy equipment."
]


In [2]:
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
embedder = SentenceTransformer('all-MiniLM-L6-v2')
doc_embeddings = embedder.encode(documents)
doc_embeddings.shape

Loading weights: 100%|██████████| 103/103 [00:00<00:00, 560.97it/s, Materializing param=pooler.dense.weight]                             
[1mBertModel LOAD REPORT[0m from: sentence-transformers/all-MiniLM-L6-v2
Key                     | Status     |  | 
------------------------+------------+--+-
embeddings.position_ids | UNEXPECTED |  | 

[3mNotes:
- UNEXPECTED[3m	:can be ignored when loading from different task/architecture; not ok if you expect identical arch.[0m


(10, 384)

In [4]:
def retrieve_context(query, top_k=3):
    query_embedding = embedder.encode(query)
    scores = cosine_similarity([query_embedding],doc_embeddings)[0]
    top_indices = np.argsort(scores)[-top_k:][::-1]
    return [(documents[i], float(scores[i]))for i in top_indices]

In [5]:
def build_prompt(query, retrieved):
    context_text = "\n".join([f"- {doc}" for doc, _ in retrieved])

    prompt = f"""You are an assistant that answers using ONLY the context below.
If the answer is not present in the context, say: "I don't know based on the provided context."

Context:
{context_text}

Question: {query}

Answer in 2-4 sentences:
"""
    return prompt


In [29]:
def build_prompt(query, retrieved):
    context = "\n".join([f"- {doc}" for doc, _ in retrieved])
    return f"""Answer the question using ONLY the context.
If the context does not contain the answer, say: I don't know.

Context:
{context}

Question: {query}

Answer:"""


In [30]:
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM

model_name = "google/flan-t5-base"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)

def generate_answer(prompt, max_new_tokens=120):
    inputs = tokenizer(prompt, return_tensors="pt", truncation=True)
    outputs = model.generate(
        **inputs,
        max_new_tokens=max_new_tokens,
        do_sample=False  # deterministic, less nonsense
    )
    return tokenizer.decode(outputs[0], skip_special_tokens=True)


Loading weights: 100%|██████████| 282/282 [00:00<00:00, 528.36it/s, Materializing param=shared.weight]                                                       


In [31]:
def rag_answer(query, top_k=3):
    retrieved = retrieve_context(query, top_k=top_k)
    prompt = build_prompt(query, retrieved)
    answer = generate_answer(prompt)
    return answer,retrieved

In [32]:
answer, retrieved = rag_answer("What safety rules must employees follow?")
print(answer)


helmets inside the production area


In [28]:
answer, retrieved = rag_answer("What safety rules must employees follow?")
print("ANSWER:\n", answer)
print("\nRETRIEVED CONTEXT:")
for doc, score in retrieved:
    print(f"{score:.3f} | {doc}")


ANSWER:
 [1]

RETRIEVED CONTEXT:
0.650 | Employees must wear safety helmets inside the production area.
0.361 | Overtime requires manager approval.
0.335 | Training is required before operating heavy equipment.
