In [None]:
import warnings
warnings.filterwarnings('ignore')
import os, sys
src_path = os.path.join(os.path.abspath('..'), 'src')
if src_path not in sys.path:
    sys.path.append(src_path)

print(src_path)

#from src.local_models.llm import MyLLM, LlamaCPPLLM
from llama_index.llms.openai import OpenAI
from llama_index.embeddings.openai import OpenAIEmbedding

from data_loader.splitting import split_by_md_headers
from data_loader.parsing import get_html, extract_md_tables

from llama_index.core import Document
from data_loader.chunking import chunk_docs_standalone
from llama_index.core import VectorStoreIndex
from llama_index.core import PromptTemplate

from data_loader.load_from_dir import rebuild_index
from utils import load_prompt



from llama_index.core.tools import QueryEngineTool, ToolMetadata
from llama_index.agent.openai import OpenAIAgent


from dotenv import load_dotenv
import os

In [None]:
load_dotenv(override=True)

In [None]:
embed_model = OpenAIEmbedding()
llm = OpenAI()

In [None]:
#first-time build

df = split_by_md_headers('../data/RAG-Zelda-Tears-of-the-Kingdom(Fan-made).md')
df['text_html'] = df['content'].apply(lambda x: get_html(x))
df = extract_md_tables(df)

In [None]:
df

In [None]:
table_docs = [Document(text=tbl, metadata={}) for tbl in df[df['is_table']==1]['table'].values]

text_docs = [Document(text=tbl, metadata={}) for tbl in df[df['is_table']==0]['text'].values]
text_nodes = [chunk_docs_standalone(text_docs[i], chunk_size=512, chunk_overlap=50)[0] for i in range(len(text_docs))]

table_index = VectorStoreIndex.from_documents(table_docs)
text_index = VectorStoreIndex(text_nodes)

#persist to disk
table_index.storage_context.persist(persist_dir="../db_stores/table_index")
text_index.storage_context.persist(persist_dir='../db_stores/text_index')


In [None]:
#rebuild storage context from disk
table_index = rebuild_index("../db_stores/table_index")
text_index = rebuild_index("../db_stores/text_index")


In [None]:
table_engine = table_index.as_query_engine()
text_engine = text_index.as_query_engine()


In [None]:
# define tools
query_engine_tools = [
    QueryEngineTool(
        query_engine=table_engine,
        metadata=ToolMetadata(
            name="table_tool",
            description=(
                "Useful for retrieving specific context from tables"
            ),
        ),
    ),
    QueryEngineTool(
        query_engine=text_engine,
        metadata=ToolMetadata(
            name="text_tool",
            description=(
                "Useful for retrieving specific context from plain text"
            ),
        ),
    ),
]

In [None]:
# build agent
agent = OpenAIAgent.from_tools(
    query_engine_tools,
    max_function_calls=3,
    verbose=True,
    system_prompt=f"""\        
        If you inquire general-purpose questions, refer to the text_tool as a priority.
        For questions involving numbers and figures, refer to the table_tool first.
        If you need to analyze problems demanding a higher degree of rigorous reasoning, such as step-by-step instructions, 
        please utilize both the text_tool and table_tool to synthesize your answer.
        
        You must ALWAYS use at least one of the tools provided when answering a question; do NOT rely on prior knowledge.\
        You should answer in English only.
        """,
)

In [None]:
response = agent.query('What is the background setting of Tears of the Kingdom?')

In [None]:
response.response

In [None]:
response.source_nodes[0].text

In [None]:
agent.query('Tell me something about the Sautéed Warm Fruit	？')