# Query Variation and Augmentation.

In [59]:
from llama_index.core import Settings
from llama_index.core.query_engine.multistep_query_engine import MultiStepQueryEngine
from llama_index.llms.google_genai import GoogleGenAI
from llama_index.embeddings.google_genai import GoogleGenAIEmbedding
from llama_index.embeddings.openai import OpenAIEmbedding
from llama_index.llms.openai import OpenAI
from llama_index.core.indices.query.query_transform.base import (
    StepDecomposeQueryTransform,
)
from llama_index.vector_stores.chroma import ChromaVectorStore
from llama_index.vector_stores.qdrant import QdrantVectorStore
from qdrant_client import QdrantClient
from typing import Any, List, Optional, Sequence, Union
from llama_index.core import VectorStoreIndex

import asyncio
import chromadb
import logging as log
import os
import pprint

In [60]:
FORMAT_STRING = "%(module)s.%(funcName)s():%(lineno)d %(asctime)s\n[%(levelname)-5s] %(message)s\n"
log.basicConfig(level= log.ERROR, format=FORMAT_STRING)

In [61]:
import nest_asyncio
nest_asyncio.apply()

In [62]:
GOOGLE_API_KEY_NAME='GOOGLE_API_KEY'
assert GOOGLE_API_KEY_NAME in os.environ

OPEN_AI_KEY_NAME='OPENAI_API_KEY'
assert OPEN_AI_KEY_NAME in os.environ

TAI_DATASET_ROOT_ENV_VAR='TAI_DATASET_ROOT'
assert TAI_DATASET_ROOT_ENV_VAR in os.environ

model_name= "gemini-2.0-flash-lite"

In [63]:
llm = GoogleGenAI(model=model_name)
response = llm.complete('Who is Richard Feynman')
print(str(response)[:500]+ '\n[...]')

_client._send_single_request():1025 2025-04-06 17:16:35,321
[INFO ] HTTP Request: GET https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-lite "HTTP/1.1 200 OK"

models.generate_content():4934 2025-04-06 17:16:35,331
[INFO ] AFC is enabled with max remote calls: 10.

_client._send_single_request():1025 2025-04-06 17:16:40,441
[INFO ] HTTP Request: POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-lite:generateContent "HTTP/1.1 200 OK"

models.generate_content():4945 2025-04-06 17:16:40,447
[INFO ] AFC remote call 1 is done.



Richard Feynman (1918-1988) was a highly influential American theoretical physicist. He is widely regarded as one of the most brilliant and original minds of the 20th century. Here's a breakdown of his key contributions and characteristics:

**Key Contributions to Physics:**

*   **Quantum Electrodynamics (QED):** Feynman is best known for his work on QED, the theory describing the interaction of light and matter. He developed a groundbreaking approach using:
    *   **Feynman Diagrams:** These 
[...]


In [64]:
# The Vector database was created using the OpenAI embedding !!!!!!

# Settings.llm = GoogleGenAI(model=model_name)
# Settings.embed_model = GoogleGenAIEmbedding(model="text-embedding-3-small")

Settings.llm = OpenAI(temperature=0, model="gpt-4o-mini")
Settings.embed_model = OpenAIEmbedding(model="text-embedding-3-small")

In [65]:
step_decompose_transform = StepDecomposeQueryTransform(verbose=True, llm=Settings.llm)

In [66]:
assert TAI_DATASET_ROOT_NAME in os.environ
vectorDbPath = os.path.join( os.environ[TAI_DATASET_ROOT_NAME], 'ai_tutor_knowledge_vectdb')
print(f'vectorDbPath: {vectorDbPath}')

# Load the vector store from the local storage.
db = chromadb.PersistentClient(path=vectorDbPath)
chroma_collection = db.get_collection("ai_tutor_knowledge")
vector_store = ChromaVectorStore(chroma_collection=chroma_collection)

vectorDbPath: /home/minguzzi/repo/towards_ai_course/dataset/ai_tutor_knowledge_vectdb


In [67]:
vector_index = VectorStoreIndex.from_vector_store(vector_store)

In [78]:
# Multi Step Query Engine
query_engine_from_vector_db = vector_index.as_query_engine()
multi_step_query_engine = MultiStepQueryEngine(
    query_engine = query_engine_from_vector_db,
    query_transform = step_decompose_transform,
    index_summary = """Used to answer the Questions about RAG, Machine Learning, Deep Learning, and Generative AI, Note: Don't absolutely repeat the Same quesion, for any reason.""",
)

response = multi_step_query_engine.query("Write about Llama 3.1 Model, BERT and PEFT methods")
print(response.response)

_client._send_single_request():1025 2025-04-06 17:23:45,296
[INFO ] HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"



[1;3;33m> Current query: Write about Llama 3.1 Model, BERT and PEFT methods
[0m[1;3;38;5;200m> New query: What are the key features of the Llama 3.1 Model?
[0m

_client._send_single_request():1025 2025-04-06 17:23:46,013
[INFO ] HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"

_client._send_single_request():1025 2025-04-06 17:23:50,212
[INFO ] HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"

_client._send_single_request():1025 2025-04-06 17:23:51,338
[INFO ] HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"



[1;3;33m> Current query: Write about Llama 3.1 Model, BERT and PEFT methods
[0m[1;3;38;5;200m> New query: What are the key features of the Llama 3.1 Model?
[0m

_client._send_single_request():1025 2025-04-06 17:23:57,072
[INFO ] HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"

_client._send_single_request():1025 2025-04-06 17:24:01,475
[INFO ] HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"

_client._send_single_request():1025 2025-04-06 17:24:02,806
[INFO ] HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"



[1;3;33m> Current query: Write about Llama 3.1 Model, BERT and PEFT methods
[0m[1;3;38;5;200m> New query: What are the key features of the BERT model?
[0m

_client._send_single_request():1025 2025-04-06 17:24:05,264
[INFO ] HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"

_client._send_single_request():1025 2025-04-06 17:24:08,950
[INFO ] HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"

_client._send_single_request():1025 2025-04-06 17:24:16,937
[INFO ] HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"



The Llama 3.1 model is a state-of-the-art AI language model developed by Meta, featuring significant advancements in scale and capabilities. It is the largest model from Meta, trained on over 15 trillion tokens using more than 16,000 H100 GPUs. One of its standout features is the extended context length of 128K, which allows it to process longer text inputs effectively. The model excels in reasoning and coding tasks, demonstrating strong logical reasoning, problem-solving, and analytical skills. Additionally, it supports zero-shot tool use, enabling it to perform tasks without prior training on specific tools. With approximately 50% of its training data consisting of multilingual tokens, Llama 3.1 is adept at understanding and processing multiple languages. It also outperforms competing models in various benchmarks, particularly in mathematical reasoning and complex reasoning tasks.

BERT, on the other hand, is a foundational model in natural language processing characterized by its bi

_client._send_single_request():1025 2025-04-06 17:20:28,893
[INFO ] HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"



[1;3;33m> Current query: Write about Llama 3.1 Model, BERT and PEFT methods
[0m[1;3;38;5;200m> New query: What are the key features of the Llama 3.1 Model?
[0m

_client._send_single_request():1025 2025-04-06 17:20:32,376
[INFO ] HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"

_client._send_single_request():1025 2025-04-06 17:20:36,472
[INFO ] HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"

_client._send_single_request():1025 2025-04-06 17:20:37,905
[INFO ] HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"



[1;3;33m> Current query: Write about Llama 3.1 Model, BERT and PEFT methods
[0m[1;3;38;5;200m> New query: What are the key features of the Llama 3.1 Model?
[0m

_client._send_single_request():1025 2025-04-06 17:20:39,954
[INFO ] HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"

_client._send_single_request():1025 2025-04-06 17:20:44,357
[INFO ] HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"

_client._send_single_request():1025 2025-04-06 17:20:44,971
[INFO ] HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"



[1;3;33m> Current query: Write about Llama 3.1 Model, BERT and PEFT methods
[0m[1;3;38;5;200m> New query: What are the key features of BERT?
[0m

_client._send_single_request():1025 2025-04-06 17:20:47,429
[INFO ] HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"

_client._send_single_request():1025 2025-04-06 17:20:51,217
[INFO ] HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"

_client._send_single_request():1025 2025-04-06 17:21:00,228
[INFO ] HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"



The Llama 3.1 model is a state-of-the-art AI language model developed by Meta, featuring a large scale with training on over 15 trillion tokens using more than 16,000 H100 GPUs. It supports an extended context length of 128K, allowing it to process longer inputs effectively. The model excels in reasoning and coding capabilities, generating high-quality code and demonstrating strong problem-solving skills. It also supports zero-shot tool use, enabling it to perform tasks without prior training on specific tools. With approximately 50% of its training data being multilingual, Llama 3.1 is adept at understanding and processing multiple languages. Its benchmark performance surpasses that of competing models in areas such as mathematical reasoning and long text processing.

BERT, or Bidirectional Encoder Representations from Transformers, is another influential model characterized by its bidirectional processing, which captures the full context of sentences by analyzing text in both directi

In [76]:
for query, resp in response.metadata['sub_qa']:
    print(f"QUERY:\n{query}\n\nRESPONSE:\n{resp}\n")

QUERY:
What are the key features of the Llama 3.1 Model?

RESPONSE:
The Llama 3.1 model boasts several key features, including:

1. **Model Scale and Training**: It is the largest model from Meta, trained on over 15 trillion tokens using more than 16,000 H100 GPUs.

2. **Extended Context Length**: The model supports a context length of 128K, enhancing its ability to handle longer inputs.

3. **Enhanced Capabilities**: It demonstrates improved reasoning and coding abilities compared to its predecessors.

4. **Tool Use and Agentic Behaviors**: The model supports zero-shot tool use, allowing it to perform tasks without prior training on specific tools.

5. **Multilingual Processing**: Approximately 50% of its training data consists of multilingual tokens, enabling effective understanding and processing of multiple languages.

6. **Programming and Reasoning Skills**: Llama 3.1 excels in generating high-quality code and exhibits strong logical reasoning, problem-solving, and analytical capa

In [77]:
for src in response.source_nodes:
    print("Node ID\t", src.node_id)
    print("Text\t", src.text)
    print("Score\t", src.score)
    print("-_" * 20)

Node ID	 d3fdc971-e094-4ae2-b348-7400cc2f2fe5
Text	 
Question: What are the key features of the Llama 3.1 Model?
Answer: The Llama 3.1 model boasts several key features, including:

1. **Model Scale and Training**: It is the largest model from Meta, trained on over 15 trillion tokens using more than 16,000 H100 GPUs.

2. **Extended Context Length**: The model supports a context length of 128K, enhancing its ability to handle longer inputs.

3. **Enhanced Capabilities**: It demonstrates improved reasoning and coding abilities compared to its predecessors.

4. **Tool Use and Agentic Behaviors**: The model supports zero-shot tool use, allowing it to perform tasks without prior training on specific tools.

5. **Multilingual Processing**: Approximately 50% of its training data consists of multilingual tokens, enabling effective understanding and processing of multiple languages.

6. **Programming and Reasoning Skills**: Llama 3.1 excels in generating high-quality code and exhibits strong lo

In [83]:

# StepDecomposeQueryTransform with verbose output
other_decompose_transform = StepDecomposeQueryTransform(llm=llm, verbose=True)
pprint.pp(other_decompose_transform)



<llama_index.core.indices.query.query_transform.base.StepDecomposeQueryTransform object at 0x7fe085a3a5f0>


In [None]:
# Define a complex query
query = "How can artificial intelligence be used in healthcare and education?"

# Decompose the query using StepDecomposeQueryTransform
decomposed_result = other_decompose_transform.as_query_component().query( query)

# Print the decomposed results
print("Decomposed Query Result:", decomposed_result)
