# Tools

In [None]:
from dotenv import load_dotenv
load_dotenv()

In [None]:
from llama_index.core.tools import FunctionTool

def addition(x: int, y: int) -> int:
    return x + y

def mystery(x: int, y: int) -> int:
    return (x + y) * (x + y)

addition_tool = FunctionTool.from_defaults(fn=addition)
mystry_tool = FunctionTool.from_defaults(fn=mystery)

In [None]:
from llama_index.llms.openai import OpenAI

llm = OpenAI(model='gpt-4o-mini')
response = llm.predict_and_call(
    [addition_tool, mystry_tool],
    "Tell me the output of the mystry function on 2 and 9",
    verbose=True
)

response 

# Example

In [3]:
from llama_index.core import SimpleDirectoryReader

#loading
documents = SimpleDirectoryReader(input_files=['../src/The_7th_Sense_For_Data-Driven_Decision_Mastery_Paper_V2.pdf']).load_data()

Ignoring wrong pointing object 6 0 (offset 0)
Ignoring wrong pointing object 8 0 (offset 0)
Ignoring wrong pointing object 10 0 (offset 0)
Ignoring wrong pointing object 12 0 (offset 0)
Ignoring wrong pointing object 14 0 (offset 0)
Ignoring wrong pointing object 16 0 (offset 0)
Ignoring wrong pointing object 18 0 (offset 0)
Ignoring wrong pointing object 20 0 (offset 0)
Ignoring wrong pointing object 22 0 (offset 0)
Ignoring wrong pointing object 24 0 (offset 0)
Ignoring wrong pointing object 26 0 (offset 0)
Ignoring wrong pointing object 32 0 (offset 0)
Ignoring wrong pointing object 34 0 (offset 0)
Ignoring wrong pointing object 36 0 (offset 0)
Ignoring wrong pointing object 38 0 (offset 0)
Ignoring wrong pointing object 43 0 (offset 0)
Ignoring wrong pointing object 45 0 (offset 0)


In [4]:
#splitting
from llama_index.core.node_parser import SentenceSplitter

splitter = SentenceSplitter(chunk_size=1024)
nodes = splitter.get_nodes_from_documents(documents)

In [6]:
print(nodes[0].get_content(metadata_mode='all'))

page_label: 1
file_name: The_7th_Sense_For_Data-Driven_Decision_Mastery_Paper_V2.pdf
file_path: ../src/The_7th_Sense_For_Data-Driven_Decision_Mastery_Paper_V2.pdf
file_type: application/pdf
file_size: 682557
creation_date: 2024-11-05
last_modified_date: 2024-10-30

1  The 7th Sense For Data-Driven Decision Mastery Raja Brundha A  Assistant Professor Department of Artificial  Intelligence and Data Science,  Sri Sai Ram Engineering College, Chennai, Tamil Nadu, India. rajabrundha.ai@sairam.edu.in   Abubacker S  Student Department of Artificial  Intelligence and Data Science, Sri Sai Ram Engineering College, Chennai, Tamil Nadu, India. nav.abubacker@gmail.com    Deva Praveen K  Student Department of Artificial  Intelligence and Data Science,  Sri Sai Ram Engineering College, Chennai, Tamil Nadu, India. devapraveen20@gmail.com     Abstract— Our project automates the manual data analytics pipeline, transforming it into a seamless, AI-powered process designed to revolutionize decision-making

In [8]:
#basic Query engine
from llama_index.core import VectorStoreIndex

vector_index = VectorStoreIndex(nodes)
query_engine = vector_index.as_query_engine(similarity_top_k=2)

In [11]:
#Using filters for specific page retrival
from llama_index.core.vector_stores import MetadataFilters

query_engine = vector_index.as_query_engine(
    similarity_top_k=2,
    filters=MetadataFilters.from_dicts(
        [
            {'key': 'page_label', 'value': '3'} #based on the pageno retrival
        ]
    )
)
response = query_engine.query(
    "What is 7th Sense"
)
print(str(response))

The 7th Sense is a project that employs a multi-step approach to automate the data analysis process, starting from connecting to the client's database, performing automated preprocessing tasks like data cleaning and imputation, generating an abstracted text file with metadata and statistical summaries, identifying outliers, conducting advanced analyses such as RFM and customer behavior clustering, converting data into embeddings using generative AI, and providing precise insights and strategic recommendations through a question-answering chatbox for informed decision-making.


In [12]:
for x in response.source_nodes:
    print(x.metadata)

{'page_label': '3', 'file_name': 'The_7th_Sense_For_Data-Driven_Decision_Mastery_Paper_V2.pdf', 'file_path': '../src/The_7th_Sense_For_Data-Driven_Decision_Mastery_Paper_V2.pdf', 'file_type': 'application/pdf', 'file_size': 682557, 'creation_date': '2024-11-05', 'last_modified_date': '2024-10-30'}


In [15]:
from typing import List
from llama_index.core.vector_stores import FilterCondition
from llama_index.core.tools import FunctionTool


def vector_query(
    query: str, 
    page_numbers: List[str]
) -> str:
    """Perform a vector search over an index.
    
    query (str): the string query to be embedded.
    page_numbers (List[str]): Filter by set of pages. Leave BLANK if we want to perform a vector search
        over all pages. Otherwise, filter by the set of specified pages.
    
    """

    metadata_dicts = [
        {"key": "page_label", "value": p} for p in page_numbers
    ]
    
    query_engine = vector_index.as_query_engine(
        similarity_top_k=2,
        filters=MetadataFilters.from_dicts(
            metadata_dicts,
            condition=FilterCondition.OR
        )
    )
    response = query_engine.query(query)
    return response
    

vector_query_tool = FunctionTool.from_defaults(
    name="vector_tool",
    fn=vector_query
)

In [21]:
from llama_index.llms.openai import OpenAI

llm = OpenAI(model='gpt-4o-mini', temperature=0)
response = llm.predict_and_call(
    [vector_query_tool],
    "Tell me the summary of page numbers 2, 3, 4",
    verbose=True
)
print(str(response))

=== Calling Function ===
Calling function: vector_tool with args: {"query": "summary", "page_numbers": ["2"]}
=== Function Output ===
The papers discussed in the context cover a range of topics from retail sales forecasting to anomaly detection, customer segmentation, automated fashion analysis, and the intersection of large language models (LLMs) with data retrieval. Each paper contributes uniquely to the fields of retail analytics, fashion report generation, and data analysis, emphasizing the importance of advanced analytics and artificial intelligence in revolutionizing industries. The studies explore different methodologies and models, such as Random Forest Regression for sales forecasting, the RFM model for customer segmentation, GPT-FAR system for automated fashion analysis, and Retrieval-Augmented Generation (RAG) for enhancing LLM capabilities. Additionally, the research delves into the potential of LLMs like GPT-4 in data analysis tasks and raises questions about the role of t

In [22]:
for n in response.source_nodes:
    print(n.metadata)

{'page_label': '2', 'file_name': 'The_7th_Sense_For_Data-Driven_Decision_Mastery_Paper_V2.pdf', 'file_path': '../src/The_7th_Sense_For_Data-Driven_Decision_Mastery_Paper_V2.pdf', 'file_type': 'application/pdf', 'file_size': 682557, 'creation_date': '2024-11-05', 'last_modified_date': '2024-10-30'}
{'page_label': '2', 'file_name': 'The_7th_Sense_For_Data-Driven_Decision_Mastery_Paper_V2.pdf', 'file_path': '../src/The_7th_Sense_For_Data-Driven_Decision_Mastery_Paper_V2.pdf', 'file_type': 'application/pdf', 'file_size': 682557, 'creation_date': '2024-11-05', 'last_modified_date': '2024-10-30'}


### Adding Summary Tool

In [28]:
from llama_index.core import SummaryIndex
from llama_index.core.tools import QueryEngineTool

summary_index = SummaryIndex(nodes)
summary_query_engine = summary_index.as_query_engine(
    response_mode="tree_summarize",
    use_async=True,
)
summary_tool = QueryEngineTool.from_defaults(
    name="summary_tool",
    query_engine=summary_query_engine,
    description=(
        "Useful if you want to get a summary of the paper"
    ),
)

In [25]:
response = llm.predict_and_call(
    [vector_query_tool, summary_tool], 
    "give the summary of page 8?", 
    verbose=True
)

=== Calling Function ===
Calling function: summary_tool with args: {"input": "page 8"}
=== Function Output ===
Page 8 of the document is not provided in the context information.


In [26]:
for n in response.source_nodes:
    print(n.metadata)

{'page_label': '1', 'file_name': 'The_7th_Sense_For_Data-Driven_Decision_Mastery_Paper_V2.pdf', 'file_path': '../src/The_7th_Sense_For_Data-Driven_Decision_Mastery_Paper_V2.pdf', 'file_type': 'application/pdf', 'file_size': 682557, 'creation_date': '2024-11-05', 'last_modified_date': '2024-10-30'}
{'page_label': '1', 'file_name': 'The_7th_Sense_For_Data-Driven_Decision_Mastery_Paper_V2.pdf', 'file_path': '../src/The_7th_Sense_For_Data-Driven_Decision_Mastery_Paper_V2.pdf', 'file_type': 'application/pdf', 'file_size': 682557, 'creation_date': '2024-11-05', 'last_modified_date': '2024-10-30'}
{'page_label': '2', 'file_name': 'The_7th_Sense_For_Data-Driven_Decision_Mastery_Paper_V2.pdf', 'file_path': '../src/The_7th_Sense_For_Data-Driven_Decision_Mastery_Paper_V2.pdf', 'file_type': 'application/pdf', 'file_size': 682557, 'creation_date': '2024-11-05', 'last_modified_date': '2024-10-30'}
{'page_label': '2', 'file_name': 'The_7th_Sense_For_Data-Driven_Decision_Mastery_Paper_V2.pdf', 'file_p

In [31]:
from llama_index.llms.openai import OpenAI

llm = OpenAI(model='gpt-4o-mini', temperature=0)
response = llm.predict_and_call(
    [vector_query_tool, summary_tool], 
    "give is a summary of the paper?", 
    verbose=True
)

ValueError: Expected at least one tool call, but got 0 tool calls.