In [1]:
%pip install transformers faiss-cpu numpy torch

Collecting transformers
  Downloading transformers-4.44.2-py3-none-any.whl.metadata (43 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m43.7/43.7 kB[0m [31m3.4 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting faiss-cpu
  Downloading faiss_cpu-1.8.0.post1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.7 kB)
Collecting huggingface-hub<1.0,>=0.23.2 (from transformers)
  Downloading huggingface_hub-0.25.0-py3-none-any.whl.metadata (13 kB)
Collecting regex!=2019.12.17 (from transformers)
  Downloading regex-2024.9.11-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (40 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m40.5/40.5 kB[0m [31m5.8 MB/s[0m eta [36m0:00:00[0m
Collecting safetensors>=0.4.1 (from transformers)
  Downloading safetensors-0.4.5-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.8 kB)
Collecting tokenizers<0.20,>=0.19 (from transformers)
  Downloading tokenizers-0.19

In [1]:
import numpy as np
import faiss
import torch
from transformers import AutoTokenizer, AutoModel

In [2]:
# Step 1: Load the LLM
model_name = "distilbert-base-uncased"  # You can use any compatible model
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModel.from_pretrained(model_name)



In [3]:
# Step 2: Prepare some documents for the vector database
documents = [
    "The cat sat on the mat.",
    "The dog chased the ball.",
    "Birds fly in the sky.",
    "Fish swim in the ocean.",
    "Tables have four legs."
]

In [5]:
# Step 3: Encode documents into vectors
def encode_documents(documents):
    inputs = tokenizer(documents, padding=True, truncation=True, return_tensors="pt")
    with torch.no_grad():
        embeddings = model(**inputs).last_hidden_state.mean(dim=1)  # Average pooling
    return embeddings.numpy()

# Create the vector database
document_vectors = encode_documents(documents)
dim = document_vectors.shape[1]

In [6]:
# Step 4: Build the FAISS index
index = faiss.IndexFlatL2(dim)  # Using L2 distance
index.add(document_vectors)  # Add document vectors to the index

In [7]:
# Step 5: Define a function for RAG
def retrieve_and_generate(query):
    # Encode the query
    query_vector = encode_documents([query])

    # Retrieve top-k similar documents
    k = 1  # Number of top results to retrieve
    D, I = index.search(query_vector, k)  # D: distances, I: indices

    # Get the relevant documents
    relevant_docs = [documents[i] for i in I[0]]

    # Simple "generation" (for demonstration, just concatenate)
    response = " ".join(relevant_docs)
    return response

In [8]:
# Step 6: Use the RAG system
query = "What do animals do?"
response = retrieve_and_generate(query)
print("Response:", response)

Response: Fish swim in the ocean.


In [9]:
query = "What do you know about barking "
response = retrieve_and_generate(query)
print("Response:", response)

Response: The dog chased the ball.
