In [1]:
!uv pip install sentence_transformers

[2mUsing Python 3.12.12 environment at: /usr[0m
[2mAudited [1m1 package[0m [2min 304ms[0m[0m


In [2]:
import numpy as np
from sentence_transformers import SentenceTransformer
from openai import OpenAI
from google.colab import userdata

In [3]:
# Simple vector database
class VectorDatabase:
    def __init__(self):
        # Store all vectors in an array
        self.vectors = []

    # Add vector to database
    def add_vector(self, vec_id, vector, metadata=None):
        record = {
            "id": vec_id,
            "vector": np.array(vector, dtype=np.float32),
            "metadata": metadata
        }

        self.vectors.append(record)

    # Retreive all vectors from database
    def get_all_vectors(self):
        return self.vectors

    # Calculate consine similarity between vectors
    def _cosine_similarity(self, vec_a, vec_b):
        # Calculate dot product
        dot_product = np.dot(vec_a, vec_b)

        # Calculate the magnitude of vector A
        norm_a = np.linalg.norm(vec_a)

        # Calculate the magnitude of vector B
        norm_b = np.linalg.norm(vec_b)

        cos_sim = dot_product / (norm_a * norm_b + 1e-8)  # small epsilon to avoid division by zero

        return cos_sim

    # Search for similar vectors and return the top_k results
    def search(self, query_vector, top_k = 3):
        query_vector = np.array(query_vector, dtype = np.float32)

        # Stores the top_k results
        results = []

        for record in self.vectors:
            sim = self._cosine_similarity(query_vector, record["vector"])

            results.append({
                "id": record["id"],
                "similarity": sim,
                "metadata": record["metadata"]
            })

        results.sort(key=lambda x: x["similarity"], reverse=True)

        return results[:top_k]

In [4]:
# Instantiate vector database
db = VectorDatabase()

In [5]:
# Embedding model
model = SentenceTransformer('all-MiniLM-L6-v2')

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

In [6]:
# Documents for the vector database
sentences = [
    "Kunal's cat is playing with wool.",
    "Ashish was born on 1st September 1700.",
    "Dogs are loyal animals.",
    "I love eating pizza but only on Fridays.",
    "Manika was born in Bhopal."
]

In [7]:
# Populate vector database

for idx, sentence in enumerate(sentences):
    # Create sentence embedding
    embedding = model.encode(sentence)

    # Add sentence embedding to the database
    db.add_vector(vec_id=f"sent_{idx}", vector=embedding, metadata={"sentence": sentence})

In [8]:
# Instantiate OpenAI client
OPENAI_API_KEY = userdata.get('OPENAI_API_KEY')
client = OpenAI(api_key = OPENAI_API_KEY)

In [9]:
# Query
query = "When is Ashish's birthday?"

In [10]:
# Help function to generate response using RAG
def generate_rag_response(client, query):
    # Obtain relevant context from vector database
    context = ""

    query_embedding = model.encode(query)

    results = db.search(query_embedding, top_k = 3)

    for res in results:
        context += f"{res['metadata']['sentence']}\n"

    completion = client.chat.completions.create(
        model = "gpt-4o-mini",
        messages=[
            {"role": "system", "content": "You are a helpful assistant. Use the provided context to answer accurately."},
            {"role": "user", "content": f"Context:\n{context}\n\nQuestion: {query}"}
        ],
    )

    response =  completion.choices[0].message.content

    return response

In [11]:
# Generate answer with RAG
rag_answer = generate_rag_response(client, query)
print(rag_answer)

Ashish's birthday is on 1st September.


In [14]:
# Helper function to generate answer without RAG
def generate_response_without_rag(client, query):
    completion = client.chat.completions.create(
        model = "gpt-4o-mini",
        messages=[
            {"role": "system", "content": "You are a helpful assistant. Use the provided context to answer accurately."},
            {"role": "user", "content": f"{query}"}
        ],
    )

    response =  completion.choices[0].message.content

    return response

In [15]:
# Generate answer witout RAG
no_rag_answer = generate_response_without_rag(client, query)
print(no_rag_answer)

I'm sorry, but I don't have any information about Ashish's birthday or personal details. If you have more context or specific information to share, I might be able to help you with something else!
