In [None]:
import sys
import os

# Insert the parent directory of "app" into sys.path
# so that Python recognizes "app" as an importable package.
notebook_dir = os.path.dirname(os.path.abspath("__file__"))
parent_dir = os.path.abspath(os.path.join(notebook_dir, ".."))
sys.path.append(parent_dir)

In [None]:
from dotenv import load_dotenv
load_dotenv()  # This looks for .env in the current working directory

In [3]:
import time

import pandas as pd
import seaborn as sns
from datasets import Dataset
from ragas import evaluate
from ragas.metrics import (
    answer_correctness,
)

In [4]:
# Benchmark data
test_df = pd.read_csv('benchmark.csv', delimiter=";")
test_df.head()

Unnamed: 0,question,ground_truth
0,What are some contracts that ended after 1990-...,There are a total of 377 contracts that ended ...
1,What are some contracts that ended after 1990-...,"There are 48 contracts classified as ""Strategi..."
2,"What are some contracts with HC2 Holdings, Inc.?","There is one contract involving HC2 Holdings, ..."
3,Which year was the most contracts signed?,The year with the most contracts signed was 20...
4,Which company has the most active contracts?,The company with the most active contracts is ...


In [5]:
from backend.src.agent import get_agent

In [6]:
agent = get_agent()

In [7]:
from langchain_core.messages import HumanMessage

input_messages = [HumanMessage(content="Which year was the most contracts signed?")]
messages = agent.invoke({"messages": input_messages})
for m in messages["messages"]:
    m.pretty_print()


Which year was the most contracts signed?
Tool Calls:
  ContractSearch (849649ec-8886-45c1-93c0-a2edd53938ec)
 Call ID: 849649ec-8886-45c1-93c0-a2edd53938ec
  Args:
    cypher_aggregation: RETURN effective_date.year AS year, count(*) AS count ORDER BY year DESC LIMIT 1
Name: ContractSearch

[{"year": 2024, "count": 1}]

In 2024, the most contracts were signed, with a total of 1 contract.


In [8]:
# History
messages

{'messages': [HumanMessage(content='Which year was the most contracts signed?', additional_kwargs={}, response_metadata={}, id='90697b7d-fcf8-4b9f-acab-11f061715877'),
  AIMessage(content='', additional_kwargs={'function_call': {'name': 'ContractSearch', 'arguments': '{"cypher_aggregation": "RETURN effective_date.year AS year, count(*) AS count ORDER BY year DESC LIMIT 1"}'}}, response_metadata={'prompt_feedback': {'block_reason': 0, 'safety_ratings': []}, 'finish_reason': 'STOP', 'safety_ratings': []}, id='run-d89ed9ac-331a-4b1f-9c39-a22abb2746f8-0', tool_calls=[{'name': 'ContractSearch', 'args': {'cypher_aggregation': 'RETURN effective_date.year AS year, count(*) AS count ORDER BY year DESC LIMIT 1'}, 'id': '849649ec-8886-45c1-93c0-a2edd53938ec', 'type': 'tool_call'}], usage_metadata={'input_tokens': 532, 'output_tokens': 26, 'total_tokens': 558, 'input_token_details': {'cache_read': 0}}),
  ToolMessage(content='[{"year": 2024, "count": 1}]', name='ContractSearch', id='2ab2fed0-7041-

In [9]:
# Chat with history

input_messages = messages['messages'] + [HumanMessage(content="Which party has the most active contracts?")]
messages = agent.invoke({"messages": input_messages})
for m in messages["messages"]:
    m.pretty_print()


Which year was the most contracts signed?
Tool Calls:
  ContractSearch (849649ec-8886-45c1-93c0-a2edd53938ec)
 Call ID: 849649ec-8886-45c1-93c0-a2edd53938ec
  Args:
    cypher_aggregation: RETURN effective_date.year AS year, count(*) AS count ORDER BY year DESC LIMIT 1
Name: ContractSearch

[{"year": 2024, "count": 1}]

In 2024, the most contracts were signed, with a total of 1 contract.

Which party has the most active contracts?
Tool Calls:
  ContractSearch (4c463873-1277-4fda-b626-c0107003162e)
 Call ID: 4c463873-1277-4fda-b626-c0107003162e
  Args:
    active: True
    cypher_aggregation: UNWIND parties AS party WITH party, active, count(*) AS contract_count WHERE active = true RETURN party, contract_count ORDER BY contract_count DESC LIMIT 1
Name: ContractSearch

[{"party": "HOF Village, LLC", "contract_count": 2}]

HOF Village, LLC has the most active contracts with a total of 2.


In [10]:
def get_answer(input: str) -> str:
    input_messages = [HumanMessage(content=input)]
    messages = agent.invoke({"messages": input_messages})
    return messages["messages"][-1].content

In [11]:
get_answer("What's your name?")

'I am Gemini, a large language model built by Google.'

In [12]:
from tqdm import tqdm

# Get workflows generated answers
results = []
latencies = []
for i, row in tqdm(test_df.iterrows(), total=len(test_df), desc="Processing questions"):
    question = row['question']
    start = time.time()
    try:
        data = get_answer(question)
    except:
        data = {"answer": "timeout", "question": question}
    end = time.time()
    latencies.append(end - start)
    results.append(data)

Processing questions: 100%|██████████| 6/6 [00:09<00:00,  1.54s/it]


In [13]:
test_df["latencies"] = latencies
test_df["response"] = results


In [14]:
test_df.head()

Unnamed: 0,question,ground_truth,latencies,response
0,What are some contracts that ended after 1990-...,There are a total of 377 contracts that ended ...,1.896093,I found 377 contracts that ended after 1990-01...
1,What are some contracts that ended after 1990-...,"There are 48 contracts classified as ""Strategi...",2.076751,I found 48 strategic alliance contracts that e...
2,"What are some contracts with HC2 Holdings, Inc.?","There is one contract involving HC2 Holdings, ...",1.141005,I found one strategic alliance contract for HC...
3,Which year was the most contracts signed?,The year with the most contracts signed was 20...,1.031179,"In 2024, the most contracts were signed. The c..."
4,Which company has the most active contracts?,The company with the most active contracts is ...,1.549473,"HOF Village, LLC has the most active contracts..."


In [15]:
dataset = Dataset.from_pandas(test_df)

result = evaluate(
    dataset,
    metrics=[
        answer_correctness
    ],
)

Evaluating:   0%|          | 0/6 [00:00<?, ?it/s]

In [16]:
result.to_pandas()

Unnamed: 0,user_input,response,reference,answer_correctness
0,What are some contracts that ended after 1990-...,I found 377 contracts that ended after 1990-01...,There are a total of 377 contracts that ended ...,0.516704
1,What are some contracts that ended after 1990-...,I found 48 strategic alliance contracts that e...,"There are 48 contracts classified as ""Strategi...",0.44014
2,"What are some contracts with HC2 Holdings, Inc.?",I found one strategic alliance contract for HC...,"There is one contract involving HC2 Holdings, ...",0.99288
3,Which year was the most contracts signed?,"In 2024, the most contracts were signed. The c...",The year with the most contracts signed was 20...,0.227758
4,Which company has the most active contracts?,"HOF Village, LLC has the most active contracts...",The company with the most active contracts is ...,0.994188
5,Do we have any contracts about indemnification?,I looked for any contracts containing the word...,There are currently no contracts in our system...,0.727548
