In [None]:
!pip install transformers sentence-transformers faiss-cpu langchain

In [None]:
import os
from langchain.text_splitter import RecursiveCharacterTextSplitter

# Load our document
with open("/content/myknowledge.txt") as f:
    knowledge_text = f.read()

# 1. Initialize the Text Splitter
# This splitter is smart. It tries to split on paragraphs ("\n\n"),
# then newlines ("\n"), then spaces (" "), to keep semantically
# related text together as much as possible.
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=150,  # Max size of a chunk
    chunk_overlap=20, # Overlap to maintain context between chunks
    length_function=len
)

# 2. Create the chunks
chunks = text_splitter.split_text(knowledge_text)

print(f"We have {len(chunks)} chunks:")
for i, chunk in enumerate(chunks):
    print(f"--- Chunk {i+1} ---\n{chunk}\n")

In [None]:
from sentence_transformers import SentenceTransformer

# 1. Load the embedding model
# 'all-MiniLM-L6-v2' is a fantastic, fast, and small model.
# It runs 100% on your local machine.
model = SentenceTransformer('all-MiniLM-L6-v2')

# 2. Embed all our chunks
# This will take a moment as it "reads" and "understands" each chunk.
chunk_embeddings = model.encode(chunks)

print(f"Shape of our embeddings: {chunk_embeddings.shape}")

In [None]:
from transformers import pipeline

# 1. Load a "Question-Answering" or "Text-Generation" model
# We'll use a small, instruction-tuned model from Google.
generator = pipeline('text2text-generation', model='google/flan-t5-small')

# --- This is our RAG pipeline function ---
def answer_question(query):
    # 1. RETRIEVE
    # Embed the user's query
    query_embedding = model.encode([query]).astype('float32')

    # Search the FAISS index for the top k (e.g., k=2) most similar chunks
    k = 2
    distances, indices = index.search(query_embedding, k)

    # Get the actual text chunks from our original 'chunks' list
    retrieved_chunks = [chunks[i] for i in indices[0]]
    context = "\n\n".join(retrieved_chunks)

    # 2. AUGMENT
    # This is the "magic prompt." We combine the retrieved context
    # with the user's query.
    prompt_template = f"""
    Answer the following question using *only* the provided context.
    If the answer is not in the context, say "I don't have that information."

    Context:
    {context}

    Question:
    {query}

    Answer:
    """

    # 3. GENERATE
    # Feed the augmented prompt to our generative model
    answer = generator(prompt_template, max_length=100)
    print(f"--- CONTEXT ---\n{context}\n")
    return answer[0]['generated_text']

In [None]:
from faiss import IndexFlatL2

# 3. Create a FAISS Index
# FAISS is a library for efficient similarity search. We'll use
# a simple L2 distance (Euclidean distance) index.
# The index needs to know the dimension of our embeddings (384 for all-MiniLM-L6-v2)
index = IndexFlatL2(chunk_embeddings.shape[1])

# Add our embeddings to the index
index.add(chunk_embeddings)

print(f"Number of vectors in the index: {index.ntotal}")

In [None]:
query_1 = "What is the WFH policy?"
print(f"Query: {query_1}")
print(f"Answer: {answer_question(query_1)}\n")

In [None]:
query_2 = "What is the company's dental plan?"
print(f"Query: {query_2}")
print(f"Answer: {answer_question(query_2)}\n")