# Tools

In [80]:
from dotenv import load_dotenv
load_dotenv()

True

In [81]:
from llama_index.core.tools import FunctionTool

def addition(x: int, y: int) -> int:
    return x + y

def mystery(x: int, y: int) -> int:
    return (x + y) * (x + y)

addition_tool = FunctionTool.from_defaults(fn=addition)
mystry_tool = FunctionTool.from_defaults(fn=mystery)

In [82]:
from llama_index.llms.openai import OpenAI

llm = OpenAI(model='gpt-4o-mini')
response = llm.predict_and_call(
    [addition_tool, mystry_tool],
    "Tell me the output of the mystry function on 2 and 9",
    verbose=True
)

response 

=== Calling Function ===
Calling function: mystery with args: {"x": 2, "y": 9}
=== Function Output ===
121


AgentChatResponse(response='121', sources=[ToolOutput(content='121', tool_name='mystery', raw_input={'args': (), 'kwargs': {'x': 2, 'y': 9}}, raw_output=121, is_error=False)], source_nodes=[], is_dummy_stream=False, metadata=None)

# Example

In [83]:
from llama_index.core import SimpleDirectoryReader

#loading
documents = SimpleDirectoryReader(input_files=['../src/The_7th_Sense_For_Data-Driven_Decision_Mastery_Paper_V2.pdf']).load_data()

Ignoring wrong pointing object 6 0 (offset 0)
Ignoring wrong pointing object 8 0 (offset 0)
Ignoring wrong pointing object 10 0 (offset 0)
Ignoring wrong pointing object 12 0 (offset 0)
Ignoring wrong pointing object 14 0 (offset 0)
Ignoring wrong pointing object 16 0 (offset 0)
Ignoring wrong pointing object 18 0 (offset 0)
Ignoring wrong pointing object 20 0 (offset 0)
Ignoring wrong pointing object 22 0 (offset 0)
Ignoring wrong pointing object 24 0 (offset 0)
Ignoring wrong pointing object 26 0 (offset 0)
Ignoring wrong pointing object 32 0 (offset 0)
Ignoring wrong pointing object 34 0 (offset 0)
Ignoring wrong pointing object 36 0 (offset 0)
Ignoring wrong pointing object 38 0 (offset 0)
Ignoring wrong pointing object 43 0 (offset 0)
Ignoring wrong pointing object 45 0 (offset 0)


In [84]:
#splitting
from llama_index.core.node_parser import SentenceSplitter

splitter = SentenceSplitter(chunk_size=1024)
nodes = splitter.get_nodes_from_documents(documents)

In [85]:
print(nodes[0].get_content(metadata_mode='all'))

page_label: 1
file_name: The_7th_Sense_For_Data-Driven_Decision_Mastery_Paper_V2.pdf
file_path: ../src/The_7th_Sense_For_Data-Driven_Decision_Mastery_Paper_V2.pdf
file_type: application/pdf
file_size: 682557
creation_date: 2024-11-05
last_modified_date: 2024-10-30

1  The 7th Sense For Data-Driven Decision Mastery Raja Brundha A  Assistant Professor Department of Artificial  Intelligence and Data Science,  Sri Sai Ram Engineering College, Chennai, Tamil Nadu, India. rajabrundha.ai@sairam.edu.in   Abubacker S  Student Department of Artificial  Intelligence and Data Science, Sri Sai Ram Engineering College, Chennai, Tamil Nadu, India. nav.abubacker@gmail.com    Deva Praveen K  Student Department of Artificial  Intelligence and Data Science,  Sri Sai Ram Engineering College, Chennai, Tamil Nadu, India. devapraveen20@gmail.com     Abstract— Our project automates the manual data analytics pipeline, transforming it into a seamless, AI-powered process designed to revolutionize decision-making

In [86]:
#basic Query engine
from llama_index.core import VectorStoreIndex

vector_index = VectorStoreIndex(nodes)
query_engine = vector_index.as_query_engine(similarity_top_k=2)

In [40]:
#Using filters for specific page retrival
from llama_index.core.vector_stores import MetadataFilters

query_engine = vector_index.as_query_engine(
    similarity_top_k=2,
    filters=MetadataFilters.from_dicts(
        [
            {'key': 'page_label', 'value': '3'} #based on the pageno retrival
        ]
    )
)
response = query_engine.query(
    "What is 7th Sense"
)
print(str(response))

The 7th Sense in this context refers to the project's approach to automating the data analysis process from start to finish. It involves connecting to the client's database, fetching relevant data tables for analysis, performing automated preprocessing tasks like data cleaning and imputation, generating an abstracted text file with metadata and statistical summaries, identifying outliers, conducting advanced analyses such as RFM and customer behavior clustering, converting data into embeddings using generative AI, and engaging decision-makers through a question-answering chatbox to provide precise insights and strategic recommendations tailored to their needs.


In [41]:
for x in response.source_nodes:
    print(x.metadata)

{'page_label': '3', 'file_name': 'The_7th_Sense_For_Data-Driven_Decision_Mastery_Paper_V2.pdf', 'file_path': '../src/The_7th_Sense_For_Data-Driven_Decision_Mastery_Paper_V2.pdf', 'file_type': 'application/pdf', 'file_size': 682557, 'creation_date': '2024-11-05', 'last_modified_date': '2024-10-30'}


In [88]:
from typing import List
from llama_index.core.vector_stores import FilterCondition
from llama_index.core.tools import FunctionTool


def vector_query(
    query: str, 
    page_numbers: List[str]
) -> str:
    """Perform a vector search over an index.
    
    query (str): the string query to be embedded.
    page_numbers (List[str]): Filter by set of pages. Leave BLANK if we want to perform a vector search
        over all pages. Otherwise, filter by the set of specified pages.
    
    """

    metadata_dicts = [
        {"key": "page_label", "value": p} for p in page_numbers
    ]
    
    query_engine = vector_index.as_query_engine(
        similarity_top_k=2,
        filters=MetadataFilters.from_dicts(
            metadata_dicts,
            condition=FilterCondition.OR
        )
    )
    response = query_engine.query(query)
    return response
    

vector_query_tool = FunctionTool.from_defaults(
    name="vector_tool",
    fn=vector_query
)

In [90]:
from llama_index.llms.openai import OpenAI

llm = OpenAI(model='gpt-4o-mini', temperature=0)
response = llm.predict_and_call(
    [vector_query_tool],
    "Tell me the report of page numbers 1",
    verbose=True
)
print(str(response))

=== Calling Function ===
Calling function: vector_tool with args: {"query": "report", "page_numbers": ["1"]}
=== Function Output ===
The project described in the context automates the manual data analytics pipeline, transforming it into a seamless, AI-powered process designed to revolutionize decision-making and drive strategic innovation. By connecting to a client’s database, it automates data preprocessing, cleaning, and imputation, ensuring accurate data types through sophisticated feature-template similarity calculations. The system generates a comprehensive abstracted text file, detailing metadata, feature descriptions, missing values, and statistical summaries, alongside advanced analyses such as RFM, customer behavior clustering, and churn prediction. Leveraging cutting-edge generative AI with retrieval-augmented generation (RAG) and few-shot prompting, the project converts text files into embeddings, indexed in a vector database for insightful, similarity-based content retrieva

In [55]:
for n in response.source_nodes:
    print(n.metadata)

{'page_label': '1', 'file_name': 'The_7th_Sense_For_Data-Driven_Decision_Mastery_Paper_V2.pdf', 'file_path': '../src/The_7th_Sense_For_Data-Driven_Decision_Mastery_Paper_V2.pdf', 'file_type': 'application/pdf', 'file_size': 682557, 'creation_date': '2024-11-05', 'last_modified_date': '2024-10-30'}
{'page_label': '1', 'file_name': 'The_7th_Sense_For_Data-Driven_Decision_Mastery_Paper_V2.pdf', 'file_path': '../src/The_7th_Sense_For_Data-Driven_Decision_Mastery_Paper_V2.pdf', 'file_type': 'application/pdf', 'file_size': 682557, 'creation_date': '2024-11-05', 'last_modified_date': '2024-10-30'}


### Adding Summary Tool

In [91]:
from llama_index.core import SummaryIndex
from llama_index.core.tools import QueryEngineTool

summary_index = SummaryIndex(nodes)
summary_query_engine = summary_index.as_query_engine(
    response_mode="tree_summarize",
    use_async=True
)
summary_tool = QueryEngineTool.from_defaults(
    query_engine=summary_query_engine,
    description=(
        "Useful for summarization questions related to the Given Document"
    )
)

In [75]:
response = llm.predict_and_call(
    [vector_query_tool, summary_tool], 
    "give the summary?", 
    verbose=True
)

=== Calling Function ===
Calling function: query_engine_tool with args: {"input": "give the summary?"}
=== Function Output ===
The project described in the provided context focuses on automating the data analytics pipeline using advanced AI techniques to enhance decision-making processes. It involves connecting to client databases, automating data preprocessing, generating detailed abstracts of the data, and utilizing generative AI for content retrieval and analysis. The system also includes a question-answering chatbox for interactive clarification and refinement. By combining the Analytic Agent with Large Language Models, the project aims to provide comprehensive insights and reports tailored to specific business needs, ultimately empowering decision-makers with faster, more accurate, and more actionable data-driven insights for strategic planning and implementation in the retail industry.


In [76]:
for n in response.source_nodes:
    print(n.metadata)

{'page_label': '1', 'file_name': 'The_7th_Sense_For_Data-Driven_Decision_Mastery_Paper_V2.pdf', 'file_path': '../src/The_7th_Sense_For_Data-Driven_Decision_Mastery_Paper_V2.pdf', 'file_type': 'application/pdf', 'file_size': 682557, 'creation_date': '2024-11-05', 'last_modified_date': '2024-10-30'}
{'page_label': '1', 'file_name': 'The_7th_Sense_For_Data-Driven_Decision_Mastery_Paper_V2.pdf', 'file_path': '../src/The_7th_Sense_For_Data-Driven_Decision_Mastery_Paper_V2.pdf', 'file_type': 'application/pdf', 'file_size': 682557, 'creation_date': '2024-11-05', 'last_modified_date': '2024-10-30'}
{'page_label': '2', 'file_name': 'The_7th_Sense_For_Data-Driven_Decision_Mastery_Paper_V2.pdf', 'file_path': '../src/The_7th_Sense_For_Data-Driven_Decision_Mastery_Paper_V2.pdf', 'file_type': 'application/pdf', 'file_size': 682557, 'creation_date': '2024-11-05', 'last_modified_date': '2024-10-30'}
{'page_label': '2', 'file_name': 'The_7th_Sense_For_Data-Driven_Decision_Mastery_Paper_V2.pdf', 'file_p

In [94]:
from llama_index.llms.openai import OpenAI

llm = OpenAI(model='gpt-4o-mini', temperature=0)
response = llm.predict_and_call(
    [vector_query_tool, summary_tool], 
    "give the details of page no 5 ?", 
    verbose=True
)

=== Calling Function ===
Calling function: vector_tool with args: {"query": "details", "page_numbers": ["5"]}
=== Function Output ===
The dataset has undergone transformations to ensure data integrity and consistency, including identifying data types, correcting inconsistencies, and imputing missing values. Each feature in the dataset has been described meticulously, detailing its role, relevance, and distribution, along with statistical summaries like minimum, maximum, mean, median, and quantiles. Exploratory data analysis has been conducted to uncover patterns, trends, and anomalies, supported by visualizations such as histograms, box plots, and scatter plots to enhance understanding of variable distribution and relationships.


In [95]:
print(str(response))

The dataset has undergone transformations to ensure data integrity and consistency, including identifying data types, correcting inconsistencies, and imputing missing values. Each feature in the dataset has been described meticulously, detailing its role, relevance, and distribution, along with statistical summaries like minimum, maximum, mean, median, and quantiles. Exploratory data analysis has been conducted to uncover patterns, trends, and anomalies, supported by visualizations such as histograms, box plots, and scatter plots to enhance understanding of variable distribution and relationships.
