In [39]:
import json
import os
import logging
import traceback
from pathlib import Path
from datetime import datetime
from typing import Optional

import torch
import tomli as tomlib
import pandas as pd
from dotenv import load_dotenv
from tqdm import tqdm
from huggingface_hub import login
from llama_index.core import Settings, VectorStoreIndex, get_response_synthesizer, PromptTemplate
from llama_index.core.node_parser import TokenTextSplitter, SentenceSplitter
from llama_index.core.query_engine import RetrieverQueryEngine
from llama_index.core.indices.vector_store.retrievers import VectorIndexRetriever
from llama_index.core.postprocessor import SimilarityPostprocessor, NERPIINodePostprocessor, PrevNextNodePostprocessor
from llama_index.core.tools import QueryEngineTool
from llama_index.core.agent import AgentRunner, ReActAgentWorker
from llama_index.core.base.llms.types import ChatMessage
from llama_index.llms.huggingface import HuggingFaceLLM
from llama_index.llms.openai import OpenAI
from llama_index.embeddings.openai import OpenAIEmbedding
from llama_index.vector_stores.neo4jvector import Neo4jVectorStore
from llama_index.agent.openai import OpenAIAgentWorker
from trulens.core import TruSession
from trulens.core.feedback.feedback import Feedback
from trulens.providers.openai import OpenAI as OpenAIProvider
from trulens.providers.huggingface import Huggingface
from trulens.apps.llamaindex import TruLlama
from trulens.dashboard import run_dashboard

In [2]:
load_dotenv(override=True)

True

In [34]:
# parse configuration
with open('../pyproject.toml', "rb") as file:
    CFG = tomlib.load(file)

In [4]:
# Use your token here
login(token=CFG['configuration']['models']['hf_token'])

The token has not been saved to the git credentials helper. Pass `add_to_git_credential=True` in this function directly or `--add-to-git-credential` if using via `huggingface-cli` if you want to set the git credential as well.
Token is valid (permission: read).
Your token has been saved to /root/.cache/huggingface/token
Login successful


## Data

In [5]:
# read dataset
with open(CFG['configuration']['data']['dataset_path'], 'rb') as file:
    dataset = json.load(file)

## Initialization

In [12]:
# connect to the existing neo4j vector index
vector_store = Neo4jVectorStore(
    username=CFG['configuration']['db']['username'],
    password=CFG['configuration']['db']['password'],
    url=CFG['configuration']['db']['url'],
    embedding_dimension=CFG['configuration']['embedding_dimension'],
    distance_strategy=CFG['configuration']['distance_strategy'],
    index_name=CFG['configuration']['db']['index_name'],
    text_node_property=CFG['configuration']['db']['text_node_property']
)

index = VectorStoreIndex.from_vector_store(vector_store)

### OpenAI

In [127]:
# initialize models
llm = OpenAI(
    api_key=os.getenv('AZURE_OPENAI_API_KEY'), 
    model=CFG['configuration']['models']['llm'],
    temperature=CFG['configuration']['models']['temperature']
)

embed_model = OpenAIEmbedding(
    model=CFG['configuration']['models']['embedding_model'],
    api_key=os.getenv('AZURE_OPENAI_API_KEY'),
    dimensions=CFG['configuration']['embedding_dimension']
)

Settings.llm = llm
Settings.embed_model = embed_model

### Opensource

In [71]:
SYSTEM_PROMPT = """You are an AI assistant that answers questions in a friendly manner, based on the given source documents. Here are some rules you always follow:
- Generate human readable output, avoid creating output with gibberish text.
- Generate only the requested output, don't include any other language before or after the requested output.
- Never say thank you, that you are happy to help, that you are an AI agent, etc. Just answer directly.
- Generate professional language typically used in business documents in North America.
- Never generate offensive or foul language.
"""

query_wrapper_prompt = PromptTemplate(
    "<|start_header_id|>system<|end_header_id|>\n" + SYSTEM_PROMPT + "<|eot_id|><|start_header_id|>user<|end_header_id|>{query_str}<|eot_id|><|start_header_id|>assistant<|end_header_id|>"
)

llm = HuggingFaceLLM(
    context_window=CFG['configuration']['models']['context_window'],
    max_new_tokens=CFG['configuration']['models']['max_new_tokens'],
    generate_kwargs={"temperature": CFG['configuration']['models']['temperature'], "do_sample": False},
    query_wrapper_prompt=query_wrapper_prompt,
    tokenizer_name=CFG['configuration']['models']['llm_hf'],
    model_name=CFG['configuration']['models']['llm_hf'],
    device_map="cuda:0",
    model_kwargs={"torch_dtype": torch.bfloat16}
)

Settings.llm = llm

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

## QueryEngine

In [128]:
# create query engine
query_engine = index.as_query_engine()

In [129]:
# custom question
response = query_engine.query("What is the primary focus of Ukraine Boats Inc.?")

# get similarity scores
for node in response.source_nodes:
    print(f'{node.node.id_}, {node.score}')
    
# predicted answer
print(response.response)

ukraine-boats-3, 0.8536546230316162
ukraine-boats-4, 0.8363556861877441
The primary focus of Ukraine Boats Inc. is on designing and manufacturing boats that incorporate eco-friendly technologies, ensuring compliance with maritime safety standards, and expanding their market presence in Europe, North America, and Asia. They also emphasize customer relationships, quality control in production, and initiatives aimed at reducing their carbon footprint and making boating more accessible.


## Custom query engine

In [137]:
# custom retriever
retriever = VectorIndexRetriever(
    index=index,
    similarity_top_k=CFG['configuration']['similarity_top_k'],
    vector_store_query_mode=CFG['configuration']['vector_store_query_mode']
)

# similarity threshold
similarity_postprocessor = SimilarityPostprocessor(similarity_cutoff=CFG['configuration']['similarity_cutoff'])

# custom response synthesizer
response_synthesizer = get_response_synthesizer(
    response_mode=CFG['configuration']['response_mode']
)

# combine custom query engine
query_engine = RetrieverQueryEngine(
    retriever=retriever,
    node_postprocessors=[similarity_postprocessor],
    response_synthesizer=response_synthesizer
)

In [138]:
# custom questions
response = query_engine.query("What is the primary focus of Ukraine Boats Inc.?")

In [139]:
for node in response.source_nodes:
    print(f'{node.node.id_}, {node.score}')

ukraine-boats-3, 0.8536546230316162
ukraine-boats-4, 0.8363556861877441
ukraine-boats-2, 0.8338797092437744
ukraine-boats-0, 0.8337738513946533
ukraine-boats-5, 0.83036208152771
ukraine-boats-6, 0.8298962116241455
ukraine-boats-7, 0.8119900226593018
ukraine-boats-9, 0.7885396480560303
ukraine-boats-1, 0.7823765277862549
ukraine-boats-10, 0.7761285305023193


In [140]:
response.response

'The primary focus of Ukraine Boats Inc. is to manufacture and supply high-quality boats and maritime solutions, specializing in recreational, commercial, and luxury vessels. The company emphasizes blending traditional craftsmanship with modern technology while integrating eco-friendly practices and ensuring compliance with maritime safety standards.'

## Agent

In [130]:
AGENT_SYSTEM_PROMPT = "You are a helpful human assistant. You always call the retrieve_semantically_similar_data tool before answering any questions. If the answer to the questions couldn't be found using the tool, just respond with `Didn't find relevant information`."
TOOL_NAME = "retrieve_semantically_similar_data"
TOOL_DESCRIPTION = "Provides additional information about the companies. Input: string"

### OpenAI

In [141]:
# agent worker
agent_worker = OpenAIAgentWorker.from_tools(
    [
        QueryEngineTool.from_defaults(
            query_engine=query_engine,
            name=TOOL_NAME,
            description=TOOL_DESCRIPTION,
            return_direct=False,
        )
    ],
    system_prompt=AGENT_SYSTEM_PROMPT,
    llm=llm,
    verbose=True,
    max_function_calls=CFG['configuration']['max_function_calls']
)

# agent runner
agent = AgentRunner(agent_worker=agent_worker)

### Opensource

In [104]:
agent_worker = ReActAgentWorker.from_tools(
    [
        QueryEngineTool.from_defaults(
            query_engine=query_engine,
            name=TOOL_NAME,
            description=TOOL_DESCRIPTION,
            return_direct=False,
        )
    ],
    llm=llm,
    verbose=True,
    chat_history=[ChatMessage(content=AGENT_SYSTEM_PROMPT, role="system")]
)

# agent runner
agent = AgentRunner(agent_worker=agent_worker)

## Chat

In [32]:
while True:
    # get user input
    current_message = input('Insert your next message:')
    print(f'{datetime.now().strftime("%H:%M:%S.%f")[:-3]}|User: {current_message}')

    response = agent.chat(current_message)
    print(f'{datetime.now().strftime("%H:%M:%S.%f")[:-3]}|Agent: {response.response}')

Insert your next message: Hi


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


16:04:29.117|User: Hi
[1;3;38;5;200mThought: The current language of the user is: English. I need to use a tool to help me answer the question.
Action: retrieve_semantically_similar_data
Action Input: {'input': 'hello world', 'num_beams': 5}
[0m

Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


[1;3;34mObservation: Empty Response
[0m[1;3;38;5;200mThought: I still need more information to answer the question. The tool did not provide any useful output. I'll try to gather more context.
Action: retrieve_semantically_similar_data
Action Input: {'input': 'hello world', 'num_beams': 5}
[0m

Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


[1;3;34mObservation: Empty Response
[0m[1;3;38;5;200mThought: I still don't have enough information to answer the question. The tool did not provide any useful output. I'll try to gather more context.
Action: retrieve_semantically_similar_data
Action Input: {'input': 'hello world', 'num_beams': 5}
[0m

Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


[1;3;34mObservation: Empty Response
[0m[1;3;38;5;200mThought: I'm starting to think that the tool is not the best approach to answer this question. The user's input "hello world" is very general and the tool is not providing any useful output. I'll try to think of a different approach.
Answer: Hello, how can I assist you today?
[0m16:04:37.764|Agent: Hello, how can I assist you today?


Insert your next message: Do you know anything about the city solve?


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


16:05:08.756|User: Do you know anything about the city solve?
[1;3;38;5;200mThought: The current language of the user is: English. I need to use a tool to help me answer the question.
Action: retrieve_semantically_similar_data
Action Input: {'input': 'solve city'}
[0m

Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


[1;3;34mObservation: 

CitySolve Municipal Services is the lifeline of New Urbania, addressing a wide range of city-level concerns and providing prompt solutions to residents' everyday needs.
[0m[1;3;38;5;200mThought: I can answer without using any more tools. I'll use the user's language to answer
Answer: CitySolve Municipal Services is a city-level organization that provides solutions to residents' everyday needs in New Urbania.
[0m16:05:13.003|Agent: CitySolve Municipal Services is a city-level organization that provides solutions to residents' everyday needs in New Urbania.


Insert your next message: What is the primary focus of Ukraine Boats Inc.?


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


16:05:34.892|User: What is the primary focus of Ukraine Boats Inc.?
[1;3;38;5;200mThought: The current language of the user is: English. I need to use a tool to help me answer the question.
Action: retrieve_semantically_similar_data
Action Input: {'input': 'Ukraine Boats Inc.'}
[0m

Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


[1;3;34mObservation: 

Ukraine Boats Inc. is a premier manufacturer and supplier of high-quality boats and maritime solutions based in Odessa, Ukraine. The company prides itself on blending traditional craftsmanship with modern technology to serve clients worldwide. Founded in 2005, the company has grown to be a leader in the boating industry, specializing in recreational, commercial, and luxury vessels.

The company has successfully delivered a range of boats and solutions to various clients, including Blue Horizon Fisheries, Azure Seas Luxury Charters, Coastal Safety Patrol, EcoTrade Logistics, Team HydroBlitz Racing, and Paradise Resorts International. These clients have reported significant benefits from working with Ukraine Boats Inc., including increased efficiency, reduced costs, and enhanced customer satisfaction.

Ukraine Boats Inc. offers a range of products and services, including luxury yachts, commercial boats, and accessories. The company's products are designed to meet 

Insert your next message: Do you know anything about the government company city solve?


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


16:06:09.949|User: Do you know anything about the government company city solve?


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


[1;3;38;5;200mThought: The current language of the user is English. I need to use a tool to help me answer the question.
Action: retrieve_semantically_similar_data
Action Input: {'input': AttributedDict([('title', 'CitySolve'), ('type', 'string')])}
[0m[1;3;34mObservation: Error: 2 validation errors for QueryStartEvent
query.str
  Input should be a valid string [type=string_type, input_value=AttributedDict([('title',...'), ('type', 'string')]), input_type=AttributedDict]
    For further information visit https://errors.pydantic.dev/2.9/v/string_type
query.QueryBundle.query_str
  Field required [type=missing, input_value=AttributedDict([('title',...'), ('type', 'string')]), input_type=AttributedDict]
    For further information visit https://errors.pydantic.dev/2.9/v/missing
[0m

Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


[1;3;34mObservation: Error: Could not parse output. Please follow the thought-action-input format. Try again.
[0m[1;3;38;5;200mThought: I understand that the tool retrieve_semantically_similar_data requires a specific input format. I will make sure to follow the correct format.
Action: retrieve_semantically_similar_data
Action Input: {'title': 'CitySolve', 'type': 'string'}
[0m

Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


[1;3;34mObservation: 

CitySolve Municipal Services is a government-owned and operated company that serves as the backbone of New Urbania's civic infrastructure, addressing a wide range of city-level concerns.
[0m[1;3;38;5;200mThought: I can answer without using any more tools. I'll use the user's language to answer
Answer: CitySolve Municipal Services is a government-owned and operated company that serves as the backbone of New Urbania's civic infrastructure, addressing a wide range of city-level concerns.
[0m16:06:17.799|Agent: CitySolve Municipal Services is a government-owned and operated company that serves as the backbone of New Urbania's civic infrastructure, addressing a wide range of city-level concerns.


Insert your next message: Thanks


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


16:06:34.232|User: Thanks
[1;3;38;5;200mThought: I can answer without using any more tools. I'll use the user's language to answer
Answer: CitySolve Municipal Services is a government-owned and operated company that serves as the backbone of New Urbania's civic infrastructure, addressing a wide range of city-level concerns.
[0m16:06:35.734|Agent: CitySolve Municipal Services is a government-owned and operated company that serves as the backbone of New Urbania's civic infrastructure, addressing a wide range of city-level concerns.


KeyboardInterrupt: Interrupted by user

## Evaluation

In [44]:
tru = TruSession()
tru.reset_database()

Updating app_name and app_version in apps table: 0it [00:00, ?it/s]
Updating app_id in records table: 0it [00:00, ?it/s]
Updating app_json in apps table: 0it [00:00, ?it/s]


In [142]:
experiment_name = "gpt-4o-mini-custom-retriever-added-tool-inputs"

In [57]:
provider = OpenAIProvider(
    model_engine=CFG['configuration']['models']['llm_evaluation']
)

In [58]:
context_selection = TruLlama.select_source_nodes().node.text

# context relevance (for each of the context chunks)
f_context_relevance = (
    Feedback(
        provider.context_relevance, name="Context Relevance"
    )
    .on_input()
    .on(context_selection)
)

# groundedness
f_groundedness_cot = (
    Feedback(
        provider.groundedness_measure_with_cot_reasons, name="Groundedness"
    )
    .on(context_selection.collect())
    .on_output()
)

# answer relevance between overall question and answer
f_qa_relevance = (
    Feedback(
        provider.relevance_with_cot_reasons, name="Answer Relevance"
    )
    .on_input_output()
)

✅ In Context Relevance, input question will be set to __record__.main_input or `Select.RecordInput` .
✅ In Context Relevance, input context will be set to __record__.calls[-1].rets.source_nodes[:].node.text .
✅ In Groundedness, input source will be set to __record__.calls[-1].rets.source_nodes[:].node.text.collect() .
✅ In Groundedness, input statement will be set to __record__.main_output or `Select.RecordOutput` .
✅ In Answer Relevance, input prompt will be set to __record__.main_input or `Select.RecordInput` .
✅ In Answer Relevance, input response will be set to __record__.main_output or `Select.RecordOutput` .


In [143]:
# Create TruLlama agent
tru_agent = TruLlama(
    agent,
    app_name=experiment_name,
    tags="agent testing",
    feedbacks=[f_qa_relevance, f_context_relevance, f_groundedness_cot],
)

In [144]:
tru_agent.print_instrumented()

Components:
	TruLlama (Custom) at 0x7f57b7bff700 with path __app__
	AgentRunner (Custom) at 0x7f57b6916090 with path __app__.app
	ChatMemoryBuffer (Custom) at 0x7f57b77ae4e0 with path __app__.app.memory
	SimpleChatStore (Custom) at 0x7f57b77ae170 with path __app__.app.memory.chat_store

Methods:
Object at 0x7f57b77ae4e0:
	<function BaseChatStoreMemory.put at 0x7f58a685ccc0> with path __app__.app.memory
	<function BaseMemory.put at 0x7f58a685c4a0> with path __app__.app.memory
Object at 0x7f57b6916090:
	<function AgentRunner.chat at 0x7f58a4fe8900> with path __app__.app
	<function AgentRunner.stream_chat at 0x7f58a4fe8680> with path __app__.app
	<function AgentRunner.achat at 0x7f58a4fe8f40> with path __app__.app
	<function AgentRunner.astream_chat at 0x7f58a4fe9300> with path __app__.app
	<function BaseQueryEngine.query at 0x7f58a6ce96c0> with path __app__.app
	<function BaseQueryEngine.aquery at 0x7f58a6ce9940> with path __app__.app
	<function BaseQueryEngine.synthesize at 0x7f58a6ce99

In [145]:
for item in tqdm(dataset):
    try:
        agent.reset()
        
        with tru_agent as recording:
            agent.query(item.get('question'))
        record_agent = recording.get()
        
        # wait until all the feedback function are finished
        for feedback, result in record_agent.wait_for_feedback_results().items():
            logging.info(f'{feedback.name}: {result.result}')
    except Exception as e:
        logging.error(e)
        traceback.format_exc()

  0% 0/10 [00:00<?, ?it/s]

Added user message to memory: What is the primary focus of Ukraine Boats Inc.?
=== Calling Function ===
Calling function: retrieve_semantically_similar_data with args: {"input":"Ukraine Boats Inc."}
Got output: Ukraine Boats Inc. is a premier manufacturer and supplier of high-quality boats and maritime solutions based in Odessa, Ukraine. Founded in 2005, the company specializes in recreational, commercial, and luxury vessels, blending traditional craftsmanship with modern technology. It has established a strong market presence in Europe, North America, and Asia, supported by partnerships with distribution companies like Baltic Marine Distributors in Germany, OceanCraft LLC in the USA, and Yokohama SeaTech in Japan.

The company is organized into several departments, including Engineering, Sales and Marketing, Production, and Customer Service, each with specific responsibilities to ensure efficient operations and customer satisfaction. Ukraine Boats Inc. is committed to sustainability t

 10% 1/10 [00:21<03:15, 21.76s/it]

Added user message to memory: What is the price range for recreational boats offered by Ukraine Boats Inc.?
=== Calling Function ===
Calling function: retrieve_semantically_similar_data with args: {"input":"price range for recreational boats offered by Ukraine Boats Inc."}
Got output: The price range for recreational boats offered is $32,000 to $55,000.



 20% 2/10 [00:39<02:33, 19.24s/it]

Added user message to memory: Which manufacturing facility focuses on bespoke yachts and customizations?
=== Calling Function ===
Calling function: retrieve_semantically_similar_data with args: {"input":"manufacturing facility bespoke yachts customizations"}
Got output: The manufacturing facility dedicated to bespoke yachts and high-end customizations is the Lviv Custom Craft Workshop. This workshop focuses on creating one-off designs tailored to client specifications, utilizing handcrafted woodwork and premium materials imported from Italy.



 30% 3/10 [00:59<02:18, 19.72s/it]

Added user message to memory: What is the warranty coverage offered for boats by Ukraine Boats Inc.?
=== Calling Function ===
Calling function: retrieve_semantically_similar_data with args: {"input":"warranty coverage for boats by Ukraine Boats Inc."}
Got output: All boats come with a 5-year warranty covering manufacturing defects. Additionally, engines are covered under a separate 3-year engine performance guarantee.



 40% 4/10 [01:08<01:33, 15.59s/it]

Added user message to memory: Which client used the Neptune Voyager catamaran, and what was the impact on their business?
=== Calling Function ===
Calling function: retrieve_semantically_similar_data with args: {"input":"Neptune Voyager catamaran client impact on business"}
Got output: The deployment of 10 Neptune Voyager catamarans significantly enhanced the business for Paradise Resorts International in the Bahamas. The impact included a 45% increase in resort bookings, attributed to the popularity of the boat tours. The catamarans also earned the "Best Tourism Experience" award at the Caribbean Hospitality Awards and improved guest satisfaction ratings by 35%. The Neptune Voyager became the flagship attraction for the resort, with guests expressing enthusiasm about the unforgettable marine views and luxurious comfort.



 50% 5/10 [01:50<02:04, 24.98s/it]

Added user message to memory: What award did the SolarGlide EcoBoat win at the Global Marine Design Challenge?
=== Calling Function ===
Calling function: retrieve_semantically_similar_data with args: {"input":"SolarGlide EcoBoat Global Marine Design Challenge award"}
Got output: The SolarGlide EcoBoat won the **Best Eco-Friendly Design** category at the Global Marine Design Challenge in 2022 for its innovative solar energy system and zero-emission operation.



 60% 6/10 [02:15<01:39, 24.97s/it]

Added user message to memory: How has the Arctic Research Consortium benefited from the Poseidon Explorer?
=== Calling Function ===
Calling function: retrieve_semantically_similar_data with args: {"input":"Arctic Research Consortium Poseidon Explorer benefits"}
Got output: The Poseidon Explorer provided significant benefits to the Arctic Research Consortium, including:

- Successful completion of 5 Arctic research missions, leading to the collection of over 2,000 unique ice core samples.
- A 60% improvement in data collection efficiency, facilitated by onboard autonomous navigation systems.
- Enhanced safety for researchers operating in extreme conditions, thanks to its durable design and advanced technology.



 70% 7/10 [02:24<00:59, 19.94s/it]

Added user message to memory: What is the price of the Odessa Opulence 5000 luxury yacht?
=== Calling Function ===
Calling function: retrieve_semantically_similar_data with args: {"input":"Odessa Opulence 5000 luxury yacht price"}
Got output: Starting at $1,500,000.



 80% 8/10 [02:36<00:34, 17.25s/it]

Added user message to memory: Which features make the WaveRunner X200 suitable for watersports?
=== Calling Function ===
Calling function: retrieve_semantically_similar_data with args: {"input":"WaveRunner X200 features for watersports"}
Got output: The WaveRunner X200 is designed for high-speed performance and durability in watersport racing. Key features include enhanced turbocharged engines and aerodynamic hull modifications, which contribute to its competitive edge on the water. These specifications enable teams to achieve superior speed and maneuverability, making it an ideal choice for dominating regional races.



 90% 9/10 [02:43<00:14, 14.20s/it]

Added user message to memory: What sustainability initiative is Ukraine Boats Inc. pursuing?
=== Calling Function ===
Calling function: retrieve_semantically_similar_data with args: {"input":"sustainability initiative Ukraine Boats Inc."}
Got output: The sustainability initiative at Ukraine Boats Inc. is the **Green Maritime Initiative (GMI)**, which aims to reduce the carbon footprint by incorporating renewable energy solutions in 50% of the fleet by 2030. Additionally, the company promotes the **SolarGlide EcoBoat**, a solar-powered vessel designed for environmentally conscious customers, featuring zero emissions and advanced solar technology.



100% 10/10 [02:58<00:00, 17.87s/it]


In [146]:
tru.get_leaderboard()

Unnamed: 0_level_0,Unnamed: 1_level_0,Answer Relevance,Context Relevance,Groundedness,latency,total_cost
app_name,app_version,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
gpt-4o-mini-custom-retriever,base,1.0,0.588889,1.0,6.702693,0.001112
gpt-4o-mini-custom-retriever-added-tool-inputs,base,1.0,0.548889,1.0,11.007236,0.001114
llama-3.2-3B-custom-retriever-added-tool-inputs,base,0.9,0.616111,0.9,6.842885,0.0
gpt-4o-mini-default-retriever,base,0.8,0.716667,0.793333,5.125279,0.000534
gpt-4o-mini-default-retriever-added-tool-inputs,base,0.8,0.716667,0.793333,7.523859,0.000534
llama-3.2-3B-default-retriever-added-tool-inputs,base,0.7,0.558333,0.833333,7.137055,0.0
llama-3.2-3B-custom-retriever,base,0.541667,0.510251,0.708333,8.730055,0.0
llama-3.2-3B-default-retriever,base,0.428571,0.52381,0.714286,8.83018,0.0


In [None]:
# tru.get_apps()
# tru.delete_app("app_hash_8f3f218c0136495a4835614dc1bbf1be")