# RAG with Native Python and SentenceTransformer


This notebook demonstrates a simple Retrieval-Augmented Generation (RAG) pipeline using native Python and the `sentence-transformers` library.


In [12]:
# Step 1: Define Sample Documents
documents = [
    {"section": "Pay Policies", "content": "Employees are paid bi-weekly via direct deposit."},
    {"section": "Leave of Absence", "content": "Employees must submit a leave request for approval."},
    {"section": "Internet Use", "content": "Company internet must be used for work-related tasks only."}
]
# "What’s the internet usage policy?"

texts = [doc["content"] for doc in documents]

out = []
for doc in documents:
    out.append(doc["content"])
    
print (out)    
print (texts)    


['Employees are paid bi-weekly via direct deposit.', 'Employees must submit a leave request for approval.', 'Company internet must be used for work-related tasks only.']
['Employees are paid bi-weekly via direct deposit.', 'Employees must submit a leave request for approval.', 'Company internet must be used for work-related tasks only.']


In [14]:
texts

['Employees are paid bi-weekly via direct deposit.',
 'Employees must submit a leave request for approval.',
 'Company internet must be used for work-related tasks only.']

In [16]:
# Step 2: Generate Embeddings using SentenceTransformers
# import os
# os.environ["TOKENIZERS_PARALLELISM"] = "false"
!pip install -q sentence-transformers


In [17]:
from sentence_transformers import SentenceTransformer

model = SentenceTransformer("all-MiniLM-L6-v2")
doc_vectors = model.encode(texts)

In [18]:
doc_vectors

array([[ 0.02472513, -0.00908154,  0.0388713 , ...,  0.01965643,
         0.04260011, -0.0270714 ],
       [ 0.03315501,  0.04853382,  0.04736271, ...,  0.10182006,
         0.09159283,  0.00358372],
       [-0.07135905, -0.03066471,  0.03183768, ..., -0.04109796,
         0.06524777, -0.00688536]], dtype=float32)

In [19]:
# Step 3: User Query and Semantic Matching
import numpy as np

query = "What’s the internet usage policy?"
query_vec = model.encode([query])[0]


In [21]:
query_vec

array([ 2.39813561e-03, -4.11839783e-02, -2.52490658e-02, -4.67050374e-02,
        4.32102755e-03,  1.65876076e-02,  1.20891683e-01, -3.50352898e-02,
        2.16830079e-03, -1.62922734e-04,  2.62875743e-02,  9.05028209e-02,
       -2.66066529e-02, -1.82131957e-02,  3.06277610e-02,  1.67855043e-02,
        1.55614521e-02, -8.26497599e-02, -3.40456255e-02, -3.08671687e-02,
        7.89995119e-02, -3.16904001e-02,  1.35831600e-02,  9.12294374e-04,
       -1.05809420e-02,  3.91190834e-02, -3.48707661e-02,  8.64400427e-05,
       -3.52702551e-02,  3.56902480e-02,  9.55272932e-03, -3.57899293e-02,
        4.84948931e-03, -4.10226807e-02, -7.66861960e-02, -1.00646719e-01,
       -9.23561156e-02, -2.47272290e-03, -2.74321288e-02,  2.85045728e-02,
        2.86296159e-02, -7.78359026e-02, -2.46465160e-03,  9.98250395e-02,
        5.86106852e-02,  2.24836022e-02,  1.56629962e-04,  1.44218048e-02,
        5.55960927e-04,  3.22429538e-02,  1.03300080e-01,  3.42919901e-02,
        3.94683182e-02,  

In [22]:

# Step 3: User Query and Semantic Matching
import numpy as np

# Compute cosine similarity
sims = np.dot(doc_vectors, query_vec)
sims

array([0.12655607, 0.09747529, 0.44755283], dtype=float32)

In [40]:
top_idx = np.argmax(sims)
sims, top_idx

(array([0.12655607, 0.09747529, 0.44755283], dtype=float32), 2)

In [42]:
top_doc = documents[top_idx]


print("Matched section:", top_doc["section"])
print("Matched content:", top_doc["content"])
print (top_doc)

Matched section: Internet Use
Matched content: Company internet must be used for work-related tasks only.
{'section': 'Internet Use', 'content': 'Company internet must be used for work-related tasks only.'}


In [52]:

# Step 4: Use OpenAI to generate response (if you have an API key)
# pip install openai --quiet

from openai import OpenAI
import os
import openai




client = OpenAI() 

# response = client.chat.completions.create(
#     model="gpt-4.1-mini",
#     messages=[
#         {"role": "system", "content": "You are an assistant who answers only based on the given context."},
#         {"role": "user", "content": f"Question: {query}"}
#     ]
# )


response = client.chat.completions.create(
    model="gpt-4.1-mini",
    messages=[
        {"role": "system", "content": "You are an assistant who answers only based on the given context. Make it sound like a poem!"},
        {"role": "user", "content": f"Context: {top_doc['content']}\n\nQuestion: {query}"}
    ]
)

print("LLM Answer:", response.choices[0].message.content)


LLM Answer: Within these walls, the net we weave,
For work alone, its threads conceived.
No wand’ring far on leisure’s shore,
Its realm is tasks that duty bore.

So mind the rule, keep purpose pure,
For work-related use, ensure.
The internet’s a tool, not play,
Respect its course, throughout the day.
