In [1]:
import sys
import os

# Insert the parent directory of "app" into sys.path
# so that Python recognizes "app" as an importable package.
notebook_dir = os.path.dirname(os.path.abspath("__file__"))
parent_dir = os.path.abspath(os.path.join(notebook_dir, ".."))
sys.path.append(parent_dir)

In [2]:
from dotenv import load_dotenv
load_dotenv()  # This looks for .env in the current working directory

True

In [3]:
import time

import pandas as pd
import seaborn as sns
from datasets import Dataset
from ragas import evaluate
from ragas.metrics import (
    answer_correctness,
)

In [4]:
# Benchmark data
test_df = pd.read_csv('benchmark.csv', delimiter=";")
test_df.head()

Unnamed: 0,question,ground_truth
0,What are some contracts that ended after 1990-...,There are a total of 377 contracts that ended ...
1,What are some contracts that ended after 1990-...,"There are 48 contracts classified as ""Strategi..."
2,"What are some contracts with HC2 Holdings, Inc.?","There is one contract involving HC2 Holdings, ..."
3,Which year was the most contracts signed?,The year with the most contracts signed was 20...
4,Which company has the most active contracts?,The company with the most active contracts is ...


In [5]:
from backend.src.agent import get_agent

In [6]:
agent = get_agent()

In [7]:
from langchain_core.messages import HumanMessage

input_messages = [HumanMessage(content="Which year was the most contracts signed?")]
messages = agent.invoke({"messages": input_messages})
for m in messages["messages"]:
    m.pretty_print()


Which year was the most contracts signed?
Tool Calls:
  ContractSearch (51f41440-29f8-4ecb-9583-a2ff518c5dff)
 Call ID: 51f41440-29f8-4ecb-9583-a2ff518c5dff
  Args:
    cypher_aggregation: RETURN effective_date.year AS year, count(*) AS count ORDER BY year DESC LIMIT 1
Name: ContractSearch

[{"year": 2024, "count": 1}]

The year in which the most contracts were signed was 2024, with a total of 1 contract.


In [8]:
# History
messages

{'messages': [HumanMessage(content='Which year was the most contracts signed?', additional_kwargs={}, response_metadata={}, id='6df6f881-8a98-4cfc-996a-5b4fb20b56d0'),
  AIMessage(content='', additional_kwargs={'function_call': {'name': 'ContractSearch', 'arguments': '{"cypher_aggregation": "RETURN effective_date.year AS year, count(*) AS count ORDER BY year DESC LIMIT 1"}'}}, response_metadata={'prompt_feedback': {'block_reason': 0, 'safety_ratings': []}, 'finish_reason': 'STOP', 'safety_ratings': []}, id='run-cf6abb07-8430-47b1-a764-c8f637011228-0', tool_calls=[{'name': 'ContractSearch', 'args': {'cypher_aggregation': 'RETURN effective_date.year AS year, count(*) AS count ORDER BY year DESC LIMIT 1'}, 'id': '51f41440-29f8-4ecb-9583-a2ff518c5dff', 'type': 'tool_call'}], usage_metadata={'input_tokens': 551, 'output_tokens': 26, 'total_tokens': 577, 'input_token_details': {'cache_read': 0}}),
  ToolMessage(content='[{"year": 2024, "count": 1}]', name='ContractSearch', id='6fb756c2-08c9-

In [9]:
# Chat with history

input_messages = messages['messages'] + [HumanMessage(content="Which party has the most active contracts?")]
messages = agent.invoke({"messages": input_messages})
for m in messages["messages"]:
    m.pretty_print()


Which year was the most contracts signed?
Tool Calls:
  ContractSearch (51f41440-29f8-4ecb-9583-a2ff518c5dff)
 Call ID: 51f41440-29f8-4ecb-9583-a2ff518c5dff
  Args:
    cypher_aggregation: RETURN effective_date.year AS year, count(*) AS count ORDER BY year DESC LIMIT 1
Name: ContractSearch

[{"year": 2024, "count": 1}]

The year in which the most contracts were signed was 2024, with a total of 1 contract.

Which party has the most active contracts?
Tool Calls:
  ContractSearch (75f220ff-a014-4a4b-aca5-483520dca2e8)
 Call ID: 75f220ff-a014-4a4b-aca5-483520dca2e8
  Args:
    cypher_aggregation: UNWIND parties AS party WITH party, active, count(*) AS contract_count WHERE active = true RETURN party, contract_count ORDER BY contract_count DESC LIMIT 1
Name: ContractSearch

[{"party": "HOF Village, LLC", "contract_count": 2}]

The party with the most active contracts is HOF Village, LLC, with a total of 2 active contracts.


In [10]:
# Chat with history
messages = agent.invoke({"messages": [HumanMessage(content="List contracts with more than 100000 euros of value?")]})
for m in messages["messages"]:
    m.pretty_print()


List contracts with more than 100000 euros of value?
Tool Calls:
  ContractSearch (e79f7d3f-7c54-4aea-88e5-90852ca45d91)
 Call ID: e79f7d3f-7c54-4aea-88e5-90852ca45d91
  Args:
    monetary_value: {'value': 100000.0, 'operator': '>'}
Name: ContractSearch

[{"output": {"total_count_of_contracts": 65, "example_values": [{"summary": "Consulting agreement between GROUPE PARAMEUS CORP and Sphere 3D Corp. GROUPE PARAMEUS CORP will provide business development services to Sphere 3D Corp.", "end_date": "2021-5-31", "monetary_value": 150000.0, "contract_type": "Service", "contract_scope": "The Consultant agrees to provide business development services to the Company, including, but not limited to, the following: Advise and assist the company in developing and implementing appropriate plans and materials for presenting the Company and its business plan, strategy and personnel to the financial community. With the cooperation of the Company, maintain an awareness during the term of the agreement o

In [11]:
def get_answer(input: str) -> str:
    input_messages = [HumanMessage(content=input)]
    messages = agent.invoke({"messages": input_messages})
    return messages["messages"][-1].content

In [12]:
get_answer("What's your name?")

'I am Gemini, a large language model built by Google.'

In [13]:
from tqdm import tqdm

# Get workflows generated answers
results = []
latencies = []
for i, row in tqdm(test_df.iterrows(), total=len(test_df), desc="Processing questions"):
    question = row['question']
    start = time.time()
    try:
        data = get_answer(question)
    except:
        data = {"answer": "timeout", "question": question}
    end = time.time()
    latencies.append(end - start)
    results.append(data)

Processing questions: 100%|██████████| 45/45 [01:00<00:00,  1.34s/it]


In [14]:
test_df["latencies"] = latencies
test_df["response"] = results

In [15]:
pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)  # Adjust width to prevent wrapping
pd.set_option('display.max_colwidth', None)  # Prevents truncation of long column values

In [16]:
test_df[['response','question']]

Unnamed: 0,response,question
0,"I found 377 contracts that ended after 1990-01-01. Here are five examples:\n* **NUVEEN - REMARKETING AGREEMENT:** A service agreement for remarketing Series [●] MuniFund Preferred Shares, effective from 2024-1-1 to 2024-3-31.\n* **LUCIDINC\_04\_15\_2011-EX-10.9-DISTRIBUTOR AGREEMENT:** A distributor agreement for Lucid products, effective from 2023-1-1 to 2026-1-1.\n* **MFAFINANCIAL,INC\_07\_06\_2020-EX-99.D-JOINT FILING AGREEMENT:** A joint venture agreement effective from 2020-7-6 to 2021-7-6.\n* **ADUROBIOTECH,INC\_06\_02\_2020-EX-10.7-CONSULTING AGREEMENT(1):** A service agreement for consulting services, effective from 2020-7-1 to 2020-12-31.\n* **ADUROBIOTECH,INC\_06\_02\_2020-EX-10.7-CONSULTING AGREEMENT:** Another service agreement for consulting, effective from 2020-7-1 to 2020-12-31.",What are some contracts that ended after 1990-01-01?
1,"I found 48 strategic alliance contracts that ended after 1990-01-01. Here are a few examples:\n* **ELFBEAUTY,INC\_07\_02\_2020-EX-10.1-COOPERATION AGREEMENT**: This cooperation agreement between e.l.f. Beauty, Inc. and Marathon Partners Equity Management, LLC covers board nomination, support period covenants, and standstill covenants. It was effective from 2020-7-1 to 2021-11-1.\n* **HC2HOLDINGS,INC\_05\_14\_2020-EX-10.1-COOPERATION AGREEMENT**: This agreement modifies the composition of the Company's board of directors and other matters. It was effective from 2020-5-13 to 2021-1-1\n* **ROCKYMOUNTAINCHOCOLATEFACTORY,INC\_12\_23\_2019-EX-10.2-STRATEGIC ALLIANCE AGREEMENT**: A strategic cooperation alliance that was effective from 2019-12-20 to 2021-12-20.",What are some contracts that ended after 1990-01-01 that fall under strategic alliance?
2,"I found one strategic alliance contract with HC2 Holdings, Inc. The contract involves an agreement to modify the composition of the company's board of directors and other matters. It was effective from May 13, 2020, to January 1, 2021, with a monetary value of $352,290.25.","What are some contracts with HC2 Holdings, Inc.?"
3,"The year with the most contracts signed was 2024, with a total of 1 contract.",Which year was the most contracts signed?
4,"The party with the most active contracts is HOF Village, LLC, with a total of 2 active contracts.",Which company has the most active contracts?
5,"I am sorry, I cannot fulfill this request. The available tools lack the functionality to search contracts by keywords within the contract text. Is there anything else I can help you with?",Do we have any contracts about indemnification?
6,"To assist you with this request, I need to know how you would like to define ""highest total value"". For example, do you want to specify a value to compare against, or do you want to see the contracts with the top 10 highest monetary values?\n\nAlso, please confirm the currency that the monetary value is in.",Which Hosting contracts have the highest total value?
7,Microgenics Corporation has 1 contract.,How many contracts does Microgenics Corporation have?
8,"I can search for contracts by country. Would you like me to search for contracts with the country set to ""US""?",What contracts are governed by US law?
9,"The party with the most Strategic Alliance contracts is VerticalNet, Inc. with 3 contracts.",Which party has the most Strategic Alliance contracts?


In [17]:
dataset = Dataset.from_pandas(test_df)

result = evaluate(
    dataset,
    metrics=[
        answer_correctness
    ],
)

Evaluating:   0%|          | 0/45 [00:00<?, ?it/s]

Exception raised in Job[7]: KeyError('reference')
Exception raised in Job[15]: KeyError('reference')
Exception raised in Job[8]: KeyError('reference')
Exception raised in Job[9]: KeyError('reference')
Exception raised in Job[12]: KeyError('reference')
Exception raised in Job[14]: KeyError('reference')
Exception raised in Job[6]: KeyError('reference')
Exception raised in Job[11]: KeyError('reference')
Exception raised in Job[10]: KeyError('reference')
Exception raised in Job[17]: KeyError('reference')
Exception raised in Job[20]: KeyError('reference')
Exception raised in Job[16]: KeyError('reference')
Exception raised in Job[18]: KeyError('reference')
Exception raised in Job[23]: KeyError('reference')
Exception raised in Job[21]: KeyError('reference')
Exception raised in Job[26]: KeyError('reference')
Exception raised in Job[25]: KeyError('reference')
Exception raised in Job[24]: KeyError('reference')
Exception raised in Job[22]: KeyError('reference')
Exception raised in Job[27]: KeyErr

In [18]:
result.to_pandas()

Unnamed: 0,user_input,response,reference,answer_correctness
0,What are some contracts that ended after 1990-01-01?,"I found 377 contracts that ended after 1990-01-01. Here are five examples:\n* **NUVEEN - REMARKETING AGREEMENT:** A service agreement for remarketing Series [●] MuniFund Preferred Shares, effective from 2024-1-1 to 2024-3-31.\n* **LUCIDINC\_04\_15\_2011-EX-10.9-DISTRIBUTOR AGREEMENT:** A distributor agreement for Lucid products, effective from 2023-1-1 to 2026-1-1.\n* **MFAFINANCIAL,INC\_07\_06\_2020-EX-99.D-JOINT FILING AGREEMENT:** A joint venture agreement effective from 2020-7-6 to 2021-7-6.\n* **ADUROBIOTECH,INC\_06\_02\_2020-EX-10.7-CONSULTING AGREEMENT(1):** A service agreement for consulting services, effective from 2020-7-1 to 2020-12-31.\n* **ADUROBIOTECH,INC\_06\_02\_2020-EX-10.7-CONSULTING AGREEMENT:** Another service agreement for consulting, effective from 2020-7-1 to 2020-12-31.","There are a total of 377 contracts that ended after January 1, 1990",0.432857
1,What are some contracts that ended after 1990-01-01 that fall under strategic alliance?,"I found 48 strategic alliance contracts that ended after 1990-01-01. Here are a few examples:\n* **ELFBEAUTY,INC\_07\_02\_2020-EX-10.1-COOPERATION AGREEMENT**: This cooperation agreement between e.l.f. Beauty, Inc. and Marathon Partners Equity Management, LLC covers board nomination, support period covenants, and standstill covenants. It was effective from 2020-7-1 to 2021-11-1.\n* **HC2HOLDINGS,INC\_05\_14\_2020-EX-10.1-COOPERATION AGREEMENT**: This agreement modifies the composition of the Company's board of directors and other matters. It was effective from 2020-5-13 to 2021-1-1\n* **ROCKYMOUNTAINCHOCOLATEFACTORY,INC\_12\_23\_2019-EX-10.2-STRATEGIC ALLIANCE AGREEMENT**: A strategic cooperation alliance that was effective from 2019-12-20 to 2021-12-20.","There are 48 contracts classified as ""Strategic Alliance"" that ended after January 1, 1990.",0.349037
2,"What are some contracts with HC2 Holdings, Inc.?","I found one strategic alliance contract with HC2 Holdings, Inc. The contract involves an agreement to modify the composition of the company's board of directors and other matters. It was effective from May 13, 2020, to January 1, 2021, with a monetary value of $352,290.25.","There is one contract involving HC2 Holdings, Inc. This contract is a ""Cooperation Agreement"" under the type ""Strategic Alliance."" It was effective from May 13, 2020, and ended on January 1, 2021. The contract's scope involved an agreement to modify the composition of the company's board of directors and related matters.",0.647371
3,Which year was the most contracts signed?,"The year with the most contracts signed was 2024, with a total of 1 contract.","The year with the most contracts signed was 2020, with a total of 50 contracts.",0.23214
4,Which company has the most active contracts?,"The party with the most active contracts is HOF Village, LLC, with a total of 2 active contracts.","The company with the most active contracts is HOF Village, LLC, with a total of 2 active contracts.",0.996484
5,Do we have any contracts about indemnification?,"I am sorry, I cannot fulfill this request. The available tools lack the functionality to search contracts by keywords within the contract text. Is there anything else I can help you with?","There are currently no contracts in our system that specifically mention ""indemnification"" in their summaries. If you have any more specific criteria or need assistance with something else, feel free to ask!\n",0.209469
6,Which Hosting contracts have the highest total value?,"To assist you with this request, I need to know how you would like to define ""highest total value"". For example, do you want to specify a value to compare against, or do you want to see the contracts with the top 10 highest monetary values?\n\nAlso, please confirm the currency that the monetary value is in.",,
7,How many contracts does Microgenics Corporation have?,Microgenics Corporation has 1 contract.,,
8,What contracts are governed by US law?,"I can search for contracts by country. Would you like me to search for contracts with the country set to ""US""?",,
9,Which party has the most Strategic Alliance contracts?,"The party with the most Strategic Alliance contracts is VerticalNet, Inc. with 3 contracts.",,
