In [None]:
# Define the path to the RAG repository
path_to_rag = "path/to/rag/repository"

import os
import sys
sys.path.append(path_to_rag)

In [None]:
from basic_rag import RAGgish
from config.setting import Settings
params = Settings()

## Initialize RAG class

In [14]:
basic_rag = RAGgish(embed_name=params.embed_name, llm_name=params.llm_name, temperature=params.temperature)

## Load documents

In [15]:
directory_test_document = os.path.join(path_to_rag, params.input_dir_test)

documents = basic_rag.load_data(input_dir=directory_test_document, required_exts=[".pdf"])

## Parse documents

In [16]:
nodes = basic_rag.parse_documents(documents, chunk_size=params.sentence_splitter_chunk)

Parsing nodes: 100%|██████████| 8/8 [00:00<00:00, 568.94it/s]


## Define *Vectore-Store-Index*

In [17]:
vector_index = basic_rag.create_or_load_vector_idx(nodes=nodes,
                                                vec_store_path=params.vec_store_idx_dir,
                                                vec_store_idx=params.vec_store_idx_name)

INFO:sentence_transformers.SentenceTransformer:Load pretrained SentenceTransformer: BAAI/bge-small-en-v1.5
Load pretrained SentenceTransformer: BAAI/bge-small-en-v1.5
INFO:sentence_transformers.SentenceTransformer:2 prompts are loaded, with the keys: ['query', 'text']
2 prompts are loaded, with the keys: ['query', 'text']


Generating embeddings:   0%|          | 0/19 [00:00<?, ?it/s]

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"


Generating embeddings: 100%|██████████| 19/19 [00:01<00:00, 11.10it/s]


## Define *Summary-Index*

In [18]:
summary_index = basic_rag.create_or_load_summary_idx(nodes=nodes,
                                                    summary_path=params.summ_idx_dir,
                                                    summary_idx=params.summ_idx_name)

INFO:sentence_transformers.SentenceTransformer:Load pretrained SentenceTransformer: BAAI/bge-small-en-v1.5
Load pretrained SentenceTransformer: BAAI/bge-small-en-v1.5
INFO:sentence_transformers.SentenceTransformer:2 prompts are loaded, with the keys: ['query', 'text']
2 prompts are loaded, with the keys: ['query', 'text']


## Define *Query-Tools*

In [19]:
list_tools = []
for tools in params.list_tools:
    if tools == 'Base':
        list_tools.append(basic_rag.create_base_query_tool(vector_index))
    elif tools == 'Meta':
        list_tools.append(basic_rag.create_metadata_query_tool(vector_index))
    elif tools == 'Summary':
        list_tools.append(basic_rag.create_summary_query_tool(summary_index))
    else:
        raise ValueError(f"Tool {tools} not recognized. Please check the config file.")

Vector Tool Schema:
{'properties': {'query': {'title': 'Query', 'type': 'string'}}, 'required': ['query'], 'type': 'object'}
Metadata Tool Schema:
{'properties': {'query': {'title': 'Query', 'type': 'string'}, 'page_numbers': {'items': {'type': 'string'}, 'title': 'Page Numbers', 'type': 'array'}}, 'required': ['query', 'page_numbers'], 'type': 'object'}
Summary Tool Schema:
{'properties': {'input': {'title': 'Input', 'type': 'string'}}, 'required': ['input'], 'type': 'object'}


## Answer to the following query

In [20]:
query = "What is the main topic of the document?"

In [21]:
response = basic_rag.answer(query, list_tools)

--------------------------------------
Query: What is the main topic of the document?
--------------------------------------
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
=== Calling Function ===
Calling function: vector_tool with args: {"query": "main topic"}
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
=== Function Output ===
{"response":"Anomaly Attribution Methods in XAI","confidence":0.9,"confidence_explanation":"The answer is derived directly from the provided context information, specifically focusing on the main contributions of the work related to anomaly attribution methods