In [1]:
import torch
print(torch.__version__)
print(torch.cuda.is_available())

2.1.2+rocm5.6
True


In [2]:
! git clone https://github.com/aurorabuilder/elements || echo "Elements already cloned"
! git clone https://github.com/Tabyltop/CC-SRD || echo "CC-SRD already cloned"

fatal: destination path 'elements' already exists and is not an empty directory.
Elements already cloned
fatal: destination path 'CC-SRD' already exists and is not an empty directory.
CC-SRD already cloned


## LLaMaIndex Vector Store
In this section we initialize the vector store to retrieve the rules and info from manuals

Starter documentation [here](https://docs.llamaindex.ai/en/stable/getting_started/starter_example.html)

In [3]:
from llama_index import SimpleDirectoryReader

documents = SimpleDirectoryReader(
    input_files=["CC-SRD/SRD5.1-CCBY4.0License-TT.txt"]
).load_data()
print(type(documents), "\n")
print(len(documents), "\n")
print(type(documents[0]))
print(documents[0])

<class 'list'> 

1 

<class 'llama_index.schema.Document'>
Doc ID: 04252613-e8b7-478b-8476-168446b2fc93
Text: Legal Information    This work includes material taken from the
System Reference Document 5.1 (“SRD 5.1”) by Wizards of    the Coast
LLC and available at https://dnd.wizards.com/resources/systems-
reference-document. The    SRD 5.1 is licensed under the Creative
Commons Attribution 4.0 International License available at
https://creativecommons....


Merge all the documents into a single document - they say it's better this way

In [4]:
from llama_index import Document

document = Document(text="\n\n".join([doc.text for doc in documents]))

Create the vector store starting from the OpenAI LLM and the documents we loaded previously

### Auto merging retrieval setup

In [12]:
from llama_index.node_parser import HierarchicalNodeParser

# create the hierarchical node parser w/ default settings
node_parser = HierarchicalNodeParser.from_defaults(
    chunk_sizes=[2048, 512, 128]
)

In [13]:
nodes = node_parser.get_nodes_from_documents([document])

In [16]:
# Print out a terminal node
from llama_index.node_parser import get_leaf_nodes

leaf_nodes = get_leaf_nodes(nodes)
print(leaf_nodes[30].text)
print("---")
# Print out the parent node for the previous node
nodes_by_id = {node.node_id: node for node in nodes}

parent_node = nodes_by_id[leaf_nodes[30].parent_node.node_id]
print(parent_node.text)

Breath Weapon.You can use your action to exhale destructive energy. Your draconic ancestry determines the size, shape, and damage type of the exhalation.
---
Dragonborn

Dragonborn Traits
Your draconic heritage manifests in a variety of traits you share with other dragonborn.
Ability Score Increase.Your Strength score increases by 2, and your Charisma score increases by
1.
Age. Young dragonborn grow quickly. They walk hours after hatching, attain the size and development of a 10-year-old human child by the age of 3, and reach adulthood by 15. They live to be around 80.
Alignment.Dragonborn tend to extremes, making a conscious choice for one side or the other in the cosmic war between good and evil. Most dragonborn are good, but those who side with evil can be terrible villains.
Size.Dragonborn are taller and heavier than humans, standing well over 6 feet tall and averaging almost 250 pounds. Your size is Medium.
Speed.Your base walking speed is 30 feet.

Draconic Ancestry
Dragon  Damag

In [17]:
from llama_index import VectorStoreIndex
from llama_index import ServiceContext
from llama_index.llms import OpenAI

llm = OpenAI(model="gpt-3.5-turbo", temperature=0.1)
auto_merging_context = ServiceContext.from_defaults(
    llm=llm, embed_model="local:BAAI/bge-large-en-v1.5",
    node_parser=node_parser
)

#### Persist the index

In [19]:
from llama_index import VectorStoreIndex, StorageContext

storage_context = StorageContext.from_defaults()
storage_context.docstore.add_documents(nodes)

automerging_index = VectorStoreIndex(
    leaf_nodes, storage_context=storage_context, service_context=auto_merging_context
)

automerging_index.storage_context.persist(persist_dir="./merging_index")

#### Query the retrieval engine
This will:
- Embed the question
- Match similar embeddings in the vector store
- Add the corresponding text to the context
- Call the LLM with the retrieved context

In [20]:
# Convert the index to a query engine
query_engine = automerging_index.as_query_engine()

In [22]:
response = query_engine.query(
    "What are specific level 1 spells that a wizard can choose from?"
)
print(str(response))

A wizard can choose any six 1st-level spells from their spellbook.


### Evaluation of the RAG system
We use `trulens` to run LLM-based evaluation of the context retrieval

In [23]:
from trulens_eval import Tru
from src.evaluation import get_prebuilt_trulens_recorder

eval_questions = [
    "What are level 1 spells that a wizard can choose from?",
    "What is the key ability of a wizard?",
    "Which abilities is a warrior proficient in?",
    "What is the hit die of a wizard?",
    "What are the available classes?",
    "What are the available races?",
    "Do humans see in the dark?",
    "Which are the race bonuses for an half-elf?",
    "Does a level 3 barbarian have access to 'Magic Missile'?"
]

# Create the tru database
tru = Tru()
tru.reset_database()

# Create the recorder
tru_recorder = get_prebuilt_trulens_recorder(query_engine,
                                             app_id="Direct Query Engine")

✅ In Answer Relevance, input prompt will be set to __record__.main_input or `Select.RecordInput` .
✅ In Answer Relevance, input response will be set to __record__.main_output or `Select.RecordOutput` .
✅ In Context Relevance, input question will be set to __record__.main_input or `Select.RecordInput` .
✅ In Context Relevance, input statement will be set to __record__.app.query.rets.source_nodes[:].node.text .
✅ In Groundedness, input source will be set to __record__.app.query.rets.source_nodes[:].node.text .
✅ In Groundedness, input statement will be set to __record__.main_output or `Select.RecordOutput` .


In [24]:
with tru_recorder as recording:
    for question in eval_questions:
        response = query_engine.query(question)

records, feedback = tru.get_records_and_feedback(app_ids=[])

In [25]:
records.head()

Unnamed: 0,app_id,app_json,type,record_id,input,output,tags,record_json,cost_json,perf_json,ts,Context Relevance,Answer Relevance,Groundedness,Context Relevance_calls,Answer Relevance_calls,Groundedness_calls,latency,total_tokens,total_cost
0,Direct Query Engine,"{""tru_class_info"": {""name"": ""TruLlama"", ""modul...",RetrieverQueryEngine(llama_index.query_engine....,record_hash_4de9b0158b801e2076edeb8c2dae4344,"""What are level 1 spells that a wizard can cho...","""A wizard can choose from six level 1 spells f...",-,"{""record_id"": ""record_hash_4de9b0158b801e2076e...","{""n_requests"": 1, ""n_successful_requests"": 1, ...","{""start_time"": ""2024-01-17T19:44:30.960433"", ""...",2024-01-17T19:44:32.724342,0.9,1.0,1.0,[{'args': {'question': 'What are level 1 spell...,[{'args': {'prompt': 'What are level 1 spells ...,[{'args': {'source': '3 2 2 1 1 ...,1,384,0.000583
1,Direct Query Engine,"{""tru_class_info"": {""name"": ""TruLlama"", ""modul...",RetrieverQueryEngine(llama_index.query_engine....,record_hash_f30a0d742cd026a8008da506258ebf59,"""What is the key ability of a wizard?""","""Intelligence is the key ability of a wizard.""",-,"{""record_id"": ""record_hash_f30a0d742cd026a8008...","{""n_requests"": 1, ""n_successful_requests"": 1, ...","{""start_time"": ""2024-01-17T19:44:32.879647"", ""...",2024-01-17T19:44:34.144900,0.45,0.9,1.0,[{'args': {'question': 'What is the key abilit...,[{'args': {'prompt': 'What is the key ability ...,[{'args': {'source': 'You can change your list...,1,344,0.000521
2,Direct Query Engine,"{""tru_class_info"": {""name"": ""TruLlama"", ""modul...",RetrieverQueryEngine(llama_index.query_engine....,record_hash_ee518612d726eb0f505848d66042cf3c,"""Which abilities is a warrior proficient in?""","""A warrior is proficient in using martial weap...",-,"{""record_id"": ""record_hash_ee518612d726eb0f505...","{""n_requests"": 1, ""n_successful_requests"": 1, ...","{""start_time"": ""2024-01-17T19:44:34.295516"", ""...",2024-01-17T19:44:35.558999,0.5,0.9,1.0,[{'args': {'question': 'Which abilities is a w...,[{'args': {'prompt': 'Which abilities is a war...,[{'args': {'source': 'STR DEX CON ...,1,365,0.000553
3,Direct Query Engine,"{""tru_class_info"": {""name"": ""TruLlama"", ""modul...",RetrieverQueryEngine(llama_index.query_engine....,record_hash_5587adc37fe0389cfcae4cba1e7d0084,"""What is the hit die of a wizard?""","""The hit die of a wizard is 1d6.""",-,"{""record_id"": ""record_hash_5587adc37fe0389cfca...","{""n_requests"": 1, ""n_successful_requests"": 1, ...","{""start_time"": ""2024-01-17T19:44:35.709981"", ""...",2024-01-17T19:44:37.124520,0.6,1.0,1.0,[{'args': {'question': 'What is the hit die of...,[{'args': {'prompt': 'What is the hit die of a...,[{'args': {'source': 'Wizard Class Features A...,1,380,0.000576
4,Direct Query Engine,"{""tru_class_info"": {""name"": ""TruLlama"", ""modul...",RetrieverQueryEngine(llama_index.query_engine....,record_hash_6837203d5f5cf2aba4d584ff268e2577,"""What are the available classes?""","""The available classes are not explicitly ment...",-,"{""record_id"": ""record_hash_6837203d5f5cf2aba4d...","{""n_requests"": 1, ""n_successful_requests"": 1, ...","{""start_time"": ""2024-01-17T19:44:37.253258"", ""...",2024-01-17T19:44:38.972673,,1.0,,,[{'args': {'prompt': 'What are the available c...,,1,297,0.000452


In [26]:
tru.run_dashboard()

Starting dashboard ...
Config file already exists. Skipping writing process.
Credentials file already exists. Skipping writing process.
Dashboard already running at path:   Network URL: http://192.168.1.96:8501



<Popen: returncode: None args: ['streamlit', 'run', '--server.headless=True'...>

#### LLaMaIndex agent tool
To interoperate with langchain, we create a tool from the query engine like in the official [documentation](https://docs.llamaindex.ai/en/stable/community/integrations/using_with_langchain.html)

In [None]:
from llama_index.langchain_helpers.agents import (
    IndexToolConfig,
    LlamaIndexTool,
)

tool_config = IndexToolConfig(
    query_engine=query_engine,
    name=f"Vector Index",
    description=f"useful for when you want to answer queries about X",
    tool_kwargs={"return_direct": True},
)

tool = LlamaIndexTool.from_tool_config(tool_config)


In [None]:
from langchain_community.vectorstores import FAISS
from langchain.prompts import ChatPromptTemplate, ChatMessagePromptTemplate
from langchain.schema import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain.memory import ConversationBufferWindowMemory
from langchain.chains import LLMChain
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnableLambda, RunnablePassthrough

from src.models import ModelFactory, ModelType

In [None]:
llm = ModelFactory.get_model(ModelType.GPT_35_TURBO)

In [None]:
from langchain import hub
# Get the prompt from the hub
prompt = hub.pull("hwchase17/openai-tools-agent")
prompt

In [None]:
# Construct the OpenAI Tools agent
from langchain.agents import AgentExecutor, create_openai_tools_agent
agent = create_openai_tools_agent(llm, tools, prompt)

## Embeddings model

In [None]:
model_name = "sentence-transformers/all-MiniLM-L6-v2"
model_kwargs = {'device': 'cuda'}
encode_kwargs = {'normalize_embeddings': False}
hf = HuggingFaceEmbeddings(
    model_name=model_name,
    model_kwargs=model_kwargs,
    encode_kwargs=encode_kwargs
)

In [None]:
from langchain.document_loaders import PyPDFLoader, UnstructuredXMLLoader
from langchain_community.document_loaders.unstructured import UnstructuredBaseLoader


import xml.etree.ElementTree as ET
from typing import List, Any

class XMLRPGClassLoader(UnstructuredBaseLoader):
    """Custom Loader for RPG class XML files, focusing on detailed features/abilities."""

    def __init__(
        self,
        file_path: str,
        mode: str = "elements",
        **unstructured_kwargs: Any,
    ):
        """Initialize with XML file path."""
        self.file_path = file_path
        super().__init__(mode=mode, **unstructured_kwargs)

    def _parse_xml(self) -> ET.ElementTree:
        """Parse the XML file and return the root element."""
        tree = ET.parse(self.file_path)
        return tree.getroot()

    def _get_elements(self) -> List[dict]:
            """Extract elements from the XML file."""
            root = self._parse_xml()
            elements = []

            for element in root.findall('.//element'):
                element_data = {}
                element_texts = []

                # Element name and type
                name = element.get('name', 'Unnamed Feature')
                element_type = element.get('type', 'Unknown Type')
                element_texts.append(name)
                element_data['type'] = element_type

                # Extracting description, sheet, and supports text
                for tag in ['description', 'sheet', 'supports']:
                    text_content = element.find(tag)
                    if text_content is not None:
                        text = ''.join(text_content.itertext()).strip()
                        element_texts.append(text)

                # Combine all texts and add metadata
                element_data['text'] = "\n\n".join(element_texts)
                elements.append(element_data)

            return elements

    def _get_metadata(self) -> dict:
        """Get metadata for the XML file."""
        return {"source": self.file_path, "file_type": "xml"}

# Example usage
# loader = XMLRPGClassLoader(file_path="elements/core/players-handbook/classes/class-barbarian.xml")
# docs = loader.load()
# docs[0]

In [None]:
import os 
import glob  
# Define the directory path 
directory_path = 'elements/core/players-handbook/classes'  
# Initialize a list to store the XML file contents as strings 
xml_strings = []  
# Use the glob module to recursively find XML files in the directory and its subfolders 
xml_files = glob.glob('*.xml', root_dir=directory_path, recursive=True)

documents = []

for xml_file in xml_files:
    loader = XMLRPGClassLoader(os.path.join(directory_path, xml_file))
    documents.extend(loader.load())
# Loop through each XML file and read its content as a string 
# for xml_file in xml_files:     
#     with open(os.path.join(directory_path, xml_file), 'r', encoding='utf-8') as file:         
#         xml_string = file.read()         
#         xml_strings.append(xml_string)  
# Now, the xml_strings list contains the contents of all XML files as strings

In [None]:
documents[0]

### Sentence splitting

from langchain.text_splitter import CharacterTextSplitter, SentenceTransformersTokenTextSplitter

# text_splitter = CharacterTextSplitter(chunk_size=128, chunk_overlap=30, separator="\n")
text_splitter = SentenceTransformersTokenTextSplitter(chunk_overlap=0)
docs = text_splitter.split_documents(documents=documents)
docs[1]

### Vector store
Let's load the document inside the FAISS vector store. Pay attention that the metadata will be discarded, so include everything that would be relevant to the model inside the text.

In [None]:
document_texts = [doc.page_content for doc in documents]
vectorstore = FAISS.from_texts(texts=document_texts, embedding=hf)
retriever = vectorstore.as_retriever(search_kwargs={"k": 100})

### Reranking
Vector stores' performance can be increased by reranking the documents extracted with the embeddings by using a dedicated reranking model. The output of such a model will be an ordered subset of the input documents retrieved by the vector store

In [None]:
from __future__ import annotations
from typing import Dict, Optional, Sequence
from langchain.schema import Document
from langchain.pydantic_v1 import Extra, root_validator

from langchain.callbacks.manager import Callbacks
from langchain.retrievers.document_compressors.base import BaseDocumentCompressor

from sentence_transformers import CrossEncoder
# from config import bge_reranker_large

class BgeRerank(BaseDocumentCompressor):
    model_name: str = 'BAAI/bge-reranker-large'  
    """Model name to use for reranking."""    
    top_n: int = 10   
    """Number of documents to return."""
    model: CrossEncoder = CrossEncoder(model_name, device="cuda")
    """CrossEncoder instance to use for reranking."""

    def bge_rerank(self,query,docs):
        model_inputs =  [[query, doc] for doc in docs]
        scores = self.model.predict(model_inputs)
        results = sorted(enumerate(scores), key=lambda x: x[1], reverse=True)
        return results[:self.top_n]


    class Config:
        """Configuration for this pydantic object."""

        extra = Extra.forbid
        arbitrary_types_allowed = True

    def compress_documents(
        self,
        documents: Sequence[Document],
        query: str,
        callbacks: Optional[Callbacks] = None,
    ) -> Sequence[Document]:
        """
        Compress documents using BAAI/bge-reranker models.

        Args:
            documents: A sequence of documents to compress.
            query: The query to use for compressing the documents.
            callbacks: Callbacks to run during the compression process.

        Returns:
            A sequence of compressed documents.
        """
        if len(documents) == 0:  # to avoid empty api call
            return []
        doc_list = list(documents)
        _docs = [d.page_content for d in doc_list]
        results = self.bge_rerank(query, _docs)
        final_results = []
        for r in results:
            doc = doc_list[r[0]]
            doc.metadata["relevance_score"] = r[1]
            final_results.append(doc)
        return final_results

In [None]:
from langchain.retrievers import ContextualCompressionRetriever

compressor = BgeRerank(top_n=5)

compression_retriever = ContextualCompressionRetriever(
    base_compressor=compressor, base_retriever=retriever
)

In [None]:
compression_retriever.get_relevant_documents(query="Barbarian history proficiency")
context = str(compression_retriever.get_relevant_documents(query="Barbarian history proficiency"))

# Sanitize context to be used in a formattable string
context = context.replace("{", "{{").replace("}", "}}")

In [None]:
template = """### System:
Answer the user's question based only on the following context:
{context}

### User: Question: {question}

### Assistant:
"""
prompt = ChatPromptTemplate.from_template(template)

chain = (
    {"context": compression_retriever, "question": RunnablePassthrough()}
    | prompt
    | model
    | StrOutputParser()
)

In [23]:
chain.invoke("What's a bard's most important skill?")

"\nA bard's most important skill lies in their ability to weave magic through words and music. Their versatility, knowledge, and talent in various subjects make them masters of performance, speech, and the magic they contain. Bards are known for their skill in inspiring allies, demoralizing foes, manipulating minds, creating illusions, and even healing wounds through their spells and powers. Their strength lies in their sheer versatility and the depth of knowledge they possess, making them valuable assets in any adventuring party."