Dynamic Builders – Use LLMs to rephrase, enrich, or clarify the query.

In [1]:
# ===================== INSTALL DEPENDENCIES =====================
!pip install -q langchain sentence-transformers faiss-cpu pypdf groq langchain-community langchain-groq

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m31.3/31.3 MB[0m [31m44.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m313.2/313.2 kB[0m [31m22.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m131.1/131.1 kB[0m [31m10.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.5/2.5 MB[0m [31m53.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m45.2/45.2 kB[0m [31m3.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m363.4/363.4 MB[0m [31m4.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m13.8/13.8 MB[0m [31m113.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m24.6/24.6 MB[0m [31m89.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [3]:
# ================== IMPORTS ==================
from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain_core.prompts import PromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_groq import ChatGroq

In [4]:
# ================== LOAD & SPLIT PDF ==================
loader = PyPDFLoader("/content/solid-python.pdf")
documents = loader.load_and_split()

splitter = CharacterTextSplitter(chunk_size=500, chunk_overlap=50)
docs = splitter.split_documents(documents)

In [5]:
# ================== EMBEDDINGS + VECTORSTORE ==================
embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
vectorstore = FAISS.from_documents(docs, embedding_model)

retriever = vectorstore.as_retriever(search_type="mmr", search_kwargs={"k": 5})

  embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

In [6]:
# ================== DEFINE LLM ==================
from google.colab import userdata
llm = ChatGroq(
    model_name="llama-3.3-70b-versatile",
    api_key=userdata.get("GROQ_API_KEY")
)
llm

ChatGroq(client=<groq.resources.chat.completions.Completions object at 0x79db04211690>, async_client=<groq.resources.chat.completions.AsyncCompletions object at 0x79daf4b74890>, model_name='llama-3.3-70b-versatile', model_kwargs={}, groq_api_key=SecretStr('**********'))

In [7]:
# ================== DYNAMIC QUERY BUILDER ==================
query_builder_prompt = PromptTemplate.from_template(
    "You are a smart assistant. Rewrite the following user question to make it clearer and more suitable for document retrieval:\n\nOriginal Question: {question}\n\nRewritten Query:"
)

query_rewriter = query_builder_prompt | llm | StrOutputParser()

In [8]:
# ================== FINAL PROMPT TEMPLATE ==================
prompt = PromptTemplate.from_template(
    "Use the following context to answer the question:\n\n{context}\n\nQuestion: {question}"
)

In [9]:
# ================== FINAL DYNAMIC RAG FLOW ==================

def run_dynamic_rag(user_question):
    # Step 1: Dynamically rewrite the query using the LLM
    rewritten_query = query_rewriter.invoke({"question": user_question})

    # Step 2: Retrieve documents based on the rewritten query
    retrieved_docs = retriever.get_relevant_documents(rewritten_query)

    # Step 3: Build context for prompt
    context = "\n\n".join(doc.page_content for doc in retrieved_docs)

    # Step 4: Format final prompt
    final_prompt = prompt.format(context=context, question=user_question)

    # Step 5: Generate final answer from LLM
    response = llm.invoke(final_prompt)
    return response.content

In [10]:
# ================== ASK A QUESTION ==================
question = "Explain Open Close Principle?"
answer = run_dynamic_rag(question)

print("Final Answer:\n")
print(answer)

  retrieved_docs = retriever.get_relevant_documents(rewritten_query)


Final Answer:

The Open-Closed Principle (OCP) is one of the S.O.L.I.D. principles of object-oriented design. It states that a class should be:

**Open** for extension, meaning that you can add new functionality to the class without modifying its existing code.

**Closed** for modification, meaning that you should not have to modify the existing code of the class to add new functionality.

In other words, the OCP says that a class should be designed in such a way that you can add new features or behaviors to it without changing its underlying structure or code. This allows you to extend the class without breaking existing functionality or affecting other parts of the system that depend on it.

The OCP is often achieved by using abstraction, polymorphism, and inheritance. By depending on abstractions rather than concrete implementations, you can create classes that are more flexible and easier to extend.

For example, consider a `CarWash` class that has a method `washCar()` that perform