In [1]:
import sys
import os

# Insert the parent directory of "app" into sys.path
# so that Python recognizes "app" as an importable package.
notebook_dir = os.path.dirname(os.path.abspath("__file__"))
parent_dir = os.path.abspath(os.path.join(notebook_dir, ".."))
sys.path.append(parent_dir)

In [2]:
from dotenv import load_dotenv
load_dotenv()  # This looks for .env in the current working directory

True

In [3]:
import time

import pandas as pd
import seaborn as sns
from datasets import Dataset
from ragas import evaluate
from ragas.metrics import (
    answer_correctness,
    context_recall,
    faithfulness
)

from langchain_core.messages import HumanMessage, ToolMessage, AIMessage, ToolMessage


In [4]:
# Benchmark data
test_df = pd.read_csv('benchmark.csv', delimiter=";")
test_df.head()

Unnamed: 0,question,ground_truth
0,What are some contracts that ended after 1990-...,There are a total of 377 contracts that ended ...
1,What are some contracts that ended after 1990-...,"There are 48 contracts classified as ""Strategi..."
2,"What are some contracts with HC2 Holdings, Inc.?","There is one contract involving HC2 Holdings, ..."
3,Which year was the most contracts signed?,The year with the most contracts signed was 20...
4,Which company has the most active contracts?,The company with the most active contracts is ...


In [5]:
from backend.src.agent import get_agent

In [6]:
agent = get_agent()

In [7]:
input_messages = [HumanMessage(content="Which day is today?")]
messages = agent.invoke({"messages": input_messages})
for m in messages["messages"]:
    m.pretty_print()


Which day is today?

Today is 2025-03-19.


In [8]:
# History
messages

{'messages': [HumanMessage(content='Which day is today?', additional_kwargs={}, response_metadata={}, id='4b8afdd3-95a1-4594-9aed-3c87ca1732ba'),
  AIMessage(content='Today is 2025-03-19.', additional_kwargs={}, response_metadata={'prompt_feedback': {'block_reason': 0, 'safety_ratings': []}, 'finish_reason': 'STOP', 'safety_ratings': []}, id='run-6ac03913-65df-4573-9f41-29fad6cb93a1-0', usage_metadata={'input_tokens': 683, 'output_tokens': 15, 'total_tokens': 698, 'input_token_details': {'cache_read': 0}})]}

In [9]:
# Chat with history
input_messages = messages['messages'] + [HumanMessage(content="List all contracts where a party from Japan is involved.")]
messages = agent.invoke({"messages": input_messages})
for m in messages["messages"]:
    m.pretty_print()


Which day is today?

Today is 2025-03-19.

List all contracts where a party from Japan is involved.
Tool Calls:
  ContractSearch (0d68bb74-0823-4f1a-84ad-84f091999dd1)
 Call ID: 0d68bb74-0823-4f1a-84ad-84f091999dd1
  Args:
    country: JP
Name: ContractSearch

[{"output": {"total_count_of_contracts": 0, "example_values": []}}]

There are no contracts involving a party from Japan.


In [10]:
def get_answer(input: str) -> str:
    input_messages = [HumanMessage(content=input)]
    messages = agent.invoke({"messages": input_messages})
    tools = [m.additional_kwargs.get('function_call') for m in messages["messages"] if isinstance(m, AIMessage) and m.additional_kwargs.get('function_call')]
    context = [m.content for m in messages["messages"] if isinstance(m, ToolMessage)]
    answer = messages["messages"][-1].content
    return answer, tools, context

In [22]:
from tqdm import tqdm

# Get workflows generated answers
results = []
latencies = []
for i, row in tqdm(test_df.iterrows(), total=len(test_df), desc="Processing questions"):
    question = row['question']
    start = time.time()
    try:
        data = get_answer(question)
    except:
        data = {"answer": "timeout", "question": question}
    end = time.time()
    latencies.append(end - start)
    results.append(data)

test_df["latencies"] = latencies
test_df["response"] = [el[0] for el in results]
test_df["tools"] = [el[1] for el in results]
test_df["retrieved_contexts"] = [[str(item) for item in el[2]] for el in results]

Processing questions: 100%|██████████| 44/44 [01:08<00:00,  1.57s/it]


In [12]:
pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)  # Adjust width to prevent wrapping
pd.set_option('display.max_colwidth', None)  # Prevents truncation of long column values

In [23]:
test_df[['response','question', 'tools']].head()

Unnamed: 0,response,question,tools
0,"Here are some contracts with end dates after 1990-01-01. The tool found 323 contracts. I am including a few examples in this response:\n* **Strategic Alliance**: This contract is between PHL VARIABLE INSURANCE COMPANY, PHOENIX LIFE INSURANCE COMPANY, PHOENIX EQUITY PLANNING CORPORATION, and INVESTORS CAPITAL CORPORATION. It was effective on 2023-12-06 and ends on 2033-12-06.\n* **Service**: This contract is between Rise (Tianjin) Education Information Consulting Co., Ltd. and \[•]. It was effective on 2023-08-24 and ends on 2028-08-24.\n* **Service**: This contract is between Aduro Biotech, Inc. and IREYA B.V. It was effective on 2020-07-01 and ends on 2020-12-31.",What are some contracts that ended after 1990-01-01?,"[{'name': 'ContractSearch', 'arguments': '{""min_end_date"": ""1990-01-01""}'}]"
1,"Here are some strategic alliance contracts that ended after 1990-01-01. I'm including details on 5 of the 52 contracts found:\n\n1. **Parties**: PHL VARIABLE INSURANCE COMPANY, PHOENIX LIFE INSURANCE COMPANY, PHOENIX EQUITY PLANNING CORPORATION, and INVESTORS CAPITAL CORPORATION. **Summary**: Strategic Alliance Agreement by and among PHL VARIABLE INSURANCE COMPANY, PHOENIX LIFE INSURANCE COMPANY, PHOENIX EQUITY PLANNING CORPORATION, and INVESTORS CAPITAL CORPORATION. ICC is a broker dealer that is registered under the 1934 Act and with FINRA and an investment adviser that is registered with the SEC under the Advisers Act and doing business as an investment adviser as Investors Capital Advisory Services. ICAS has established certain asset allocation Models that are eligible for use with the GIE, and in the future may establish other Models that become eligible for use with the GIE. ICC will solicit sales of the GIE to ICC Customers. **End Date**: 2033-12-6\n2. **Parties**: Spôk Holdings, Inc., White Hat Strategic Partners LP, White Hat SP GP LLC, White Hat Capital Partners LP, and White Hat Capital Partners GP LLC. **Summary**: Cooperation Agreement between Spôk Holdings, Inc. and White Hat Strategic Partners LP, White Hat SP GP LLC, White Hat Capital Partners LP, and White Hat Capital Partners GP LLC. The White Hat Parties have beneficial ownership of 319,708 shares of the Company's Common Stock. Brett Shockley will be appointed to the Board of Directors. The White Hat Parties will withdraw their nomination of three directors for election to the Board at the Company's 2020 Annual Meeting of Stockholders. The White Hat Parties shall vote in favor of all persons nominated by the Board to serve as directors of the Company. **End Date**: 2021-5-18\n3. **Parties**: MG Capital Management Ltd., HC2 Holdings, Inc. **Summary**: Cooperation Agreement between HC2 Holdings, Inc. and MG Capital Management Ltd. regarding board composition and other matters. MG Capital Parties agreed to withdraw the nomination notice and terminate solicitation of proxies in connection with the 2020 Annual Meeting. Board of Directors will increase from six (6) to ten (10) Directors, and appoint each of Kenneth S. Courtis and Michael Gorzynski and Avram A. Glazer and Shelly Lombard to the Board. Mr. Glazer will be appointed as Chairman of the Board. The Company agrees that it shall hold the 2020 Annual Meeting no later than July 8, 2020. The Company will nominate each of the New Directors and Wayne Barr, Jr., Warren H. Gfeller and Philip A. Falcone for election to the Board at the 2020 Annual Meeting for a term expiring at the Company's 2021 Annual Meeting of Stockholders. The MG Capital Parties shall comply with the terms of this Agreement and shall be responsible for any breach of this Agreement by any such Affiliate or Associate. The standstill period begins on the date of this Agreement and shall extend until thirty (30) days prior to the deadline for the submission of stockholder nominations for directors for the 2021 Annual Meeting pursuant to the By-Laws. **End Date**: 2021-1-1\n4. **Parties**: LIQUIDMETAL TECHNOLOGIES, INC., EUTECTIX, LLC. **Summary**: Business Development Agreement between Liquidmetal Technologies, Inc. and Eutectix, LLC for collaboration on amorphous alloy materials and technologies. Liquidmetal will utilize Eutectix's capabilities for alloy development and manufacturing. Eutectix gains access to Liquidmetal's processing equipment and methodologies. The agreement includes equipment licensing, alloy purchase, and field of use restrictions. Eutectix will pay Liquidmetal a royalty based on a percentage of the invoice price of any Licensed Products. Liquidmetal may purchase Liquidmetal Products from Eutectix. **End Date**: 2025-1-1\n5. **Parties**: Farids & Co. LLC, Edible Arrangements, LLC, Rocky Mountain Chocolate Factory, Inc. **Summary**: Strategic Alliance Agreement dated as of December 20, 2019, by and among Farids & Co. LLC, Edible Arrangements, LLC, and Rocky Mountain Chocolate Factory, Inc. The agreement includes the issuance and purchase of 126,839 shares of the Company's common stock by Farids, the execution and delivery of a Warrant and an Exclusive Supplier Operating Agreement between the Company and EA. The parties aim to enhance their strategic cooperation alliance. **End Date**: 2019-12-20",What are some contracts that ended after 1990-01-01 that fall under strategic alliance?,"[{'name': 'ContractSearch', 'arguments': '{""contract_type"": ""Strategic Alliance"", ""min_end_date"": ""1990-01-01""}'}]"
2,"I found one contract with HC2 Holdings, Inc. It is a Strategic Alliance agreement with MG Capital Management Ltd. regarding board composition, it was effective from 2020-05-13 to 2021-01-01.","What are some contracts with HC2 Holdings, Inc.?","[{'name': 'ContractSearch', 'arguments': '{""parties"": [""HC2 Holdings, Inc.""]}'}]"
3,"Based on the query, the year with the most contracts signed is 'None' with 1 contract. This likely means the effective date is missing from most contracts. I can refine the search if you provide a date range to look within.",Which year was the most contracts signed?,"[{'name': 'ContractSearch', 'arguments': '{""cypher_aggregation"": ""RETURN effective_date.year AS year, count(*) AS count ORDER BY year DESC LIMIT 1""}'}]"
4,"Beijing Sun Seven Stars Culture Development Limited has the most active contracts, with a total of 2 active contracts.",Which company has the most active contracts?,"[{'name': 'ContractSearch', 'arguments': '{""active"": true, ""cypher_aggregation"": ""\nUNWIND parties AS party\nWITH party.name AS party_name, count(*) AS contract_count\nRETURN party_name, contract_count\nORDER BY contract_count DESC\nLIMIT 1\n""}'}]"


In [24]:
dataset = Dataset.from_pandas(test_df)

result = evaluate(
    dataset,
    metrics=[
        answer_correctness,
        context_recall,
        faithfulness
    ],
)

Evaluating:   0%|          | 0/132 [00:00<?, ?it/s]

Exception raised in Job[15]: KeyError('reference')
Exception raised in Job[16]: KeyError('reference')
Exception raised in Job[19]: KeyError('reference')
Exception raised in Job[18]: KeyError('reference')
Exception raised in Job[22]: KeyError('reference')
Exception raised in Job[21]: KeyError('reference')
Exception raised in Job[25]: KeyError('reference')
Exception raised in Job[24]: KeyError('reference')
Exception raised in Job[28]: KeyError('reference')
Exception raised in Job[31]: KeyError('reference')
Exception raised in Job[27]: KeyError('reference')
Exception raised in Job[34]: KeyError('reference')
Exception raised in Job[37]: KeyError('reference')
Exception raised in Job[30]: KeyError('reference')
Exception raised in Job[40]: KeyError('reference')
Exception raised in Job[36]: KeyError('reference')
Exception raised in Job[39]: KeyError('reference')
Exception raised in Job[43]: KeyError('reference')
Exception raised in Job[46]: KeyError('reference')
Exception raised in Job[48]: Ke

In [29]:
result.to_pandas()[['user_input','answer_correctness', 'context_recall', 'faithfulness', 'response']]

Unnamed: 0,user_input,answer_correctness,context_recall,faithfulness,response
0,What are some contracts that ended after 1990-01-01?,0.35098,0.0,1.0,"Here are some contracts with end dates after 1990-01-01. The tool found 323 contracts. I am including a few examples in this response:\n* **Strategic Alliance**: This contract is between PHL VARIABLE INSURANCE COMPANY, PHOENIX LIFE INSURANCE COMPANY, PHOENIX EQUITY PLANNING CORPORATION, and INVESTORS CAPITAL CORPORATION. It was effective on 2023-12-06 and ends on 2033-12-06.\n* **Service**: This contract is between Rise (Tianjin) Education Information Consulting Co., Ltd. and \[•]. It was effective on 2023-08-24 and ends on 2028-08-24.\n* **Service**: This contract is between Aduro Biotech, Inc. and IREYA B.V. It was effective on 2020-07-01 and ends on 2020-12-31."
1,What are some contracts that ended after 1990-01-01 that fall under strategic alliance?,0.254406,0.0,1.0,"Here are some strategic alliance contracts that ended after 1990-01-01. I'm including details on 5 of the 52 contracts found:\n\n1. **Parties**: PHL VARIABLE INSURANCE COMPANY, PHOENIX LIFE INSURANCE COMPANY, PHOENIX EQUITY PLANNING CORPORATION, and INVESTORS CAPITAL CORPORATION. **Summary**: Strategic Alliance Agreement by and among PHL VARIABLE INSURANCE COMPANY, PHOENIX LIFE INSURANCE COMPANY, PHOENIX EQUITY PLANNING CORPORATION, and INVESTORS CAPITAL CORPORATION. ICC is a broker dealer that is registered under the 1934 Act and with FINRA and an investment adviser that is registered with the SEC under the Advisers Act and doing business as an investment adviser as Investors Capital Advisory Services. ICAS has established certain asset allocation Models that are eligible for use with the GIE, and in the future may establish other Models that become eligible for use with the GIE. ICC will solicit sales of the GIE to ICC Customers. **End Date**: 2033-12-6\n2. **Parties**: Spôk Holdings, Inc., White Hat Strategic Partners LP, White Hat SP GP LLC, White Hat Capital Partners LP, and White Hat Capital Partners GP LLC. **Summary**: Cooperation Agreement between Spôk Holdings, Inc. and White Hat Strategic Partners LP, White Hat SP GP LLC, White Hat Capital Partners LP, and White Hat Capital Partners GP LLC. The White Hat Parties have beneficial ownership of 319,708 shares of the Company's Common Stock. Brett Shockley will be appointed to the Board of Directors. The White Hat Parties will withdraw their nomination of three directors for election to the Board at the Company's 2020 Annual Meeting of Stockholders. The White Hat Parties shall vote in favor of all persons nominated by the Board to serve as directors of the Company. **End Date**: 2021-5-18\n3. **Parties**: MG Capital Management Ltd., HC2 Holdings, Inc. **Summary**: Cooperation Agreement between HC2 Holdings, Inc. and MG Capital Management Ltd. regarding board composition and other matters. MG Capital Parties agreed to withdraw the nomination notice and terminate solicitation of proxies in connection with the 2020 Annual Meeting. Board of Directors will increase from six (6) to ten (10) Directors, and appoint each of Kenneth S. Courtis and Michael Gorzynski and Avram A. Glazer and Shelly Lombard to the Board. Mr. Glazer will be appointed as Chairman of the Board. The Company agrees that it shall hold the 2020 Annual Meeting no later than July 8, 2020. The Company will nominate each of the New Directors and Wayne Barr, Jr., Warren H. Gfeller and Philip A. Falcone for election to the Board at the 2020 Annual Meeting for a term expiring at the Company's 2021 Annual Meeting of Stockholders. The MG Capital Parties shall comply with the terms of this Agreement and shall be responsible for any breach of this Agreement by any such Affiliate or Associate. The standstill period begins on the date of this Agreement and shall extend until thirty (30) days prior to the deadline for the submission of stockholder nominations for directors for the 2021 Annual Meeting pursuant to the By-Laws. **End Date**: 2021-1-1\n4. **Parties**: LIQUIDMETAL TECHNOLOGIES, INC., EUTECTIX, LLC. **Summary**: Business Development Agreement between Liquidmetal Technologies, Inc. and Eutectix, LLC for collaboration on amorphous alloy materials and technologies. Liquidmetal will utilize Eutectix's capabilities for alloy development and manufacturing. Eutectix gains access to Liquidmetal's processing equipment and methodologies. The agreement includes equipment licensing, alloy purchase, and field of use restrictions. Eutectix will pay Liquidmetal a royalty based on a percentage of the invoice price of any Licensed Products. Liquidmetal may purchase Liquidmetal Products from Eutectix. **End Date**: 2025-1-1\n5. **Parties**: Farids & Co. LLC, Edible Arrangements, LLC, Rocky Mountain Chocolate Factory, Inc. **Summary**: Strategic Alliance Agreement dated as of December 20, 2019, by and among Farids & Co. LLC, Edible Arrangements, LLC, and Rocky Mountain Chocolate Factory, Inc. The agreement includes the issuance and purchase of 126,839 shares of the Company's common stock by Farids, the execution and delivery of a Warrant and an Exclusive Supplier Operating Agreement between the Company and EA. The parties aim to enhance their strategic cooperation alliance. **End Date**: 2019-12-20"
2,"What are some contracts with HC2 Holdings, Inc.?",0.611974,1.0,1.0,"I found one contract with HC2 Holdings, Inc. It is a Strategic Alliance agreement with MG Capital Management Ltd. regarding board composition, it was effective from 2020-05-13 to 2021-01-01."
3,Which year was the most contracts signed?,0.21148,0.0,0.0,"Based on the query, the year with the most contracts signed is 'None' with 1 contract. This likely means the effective date is missing from most contracts. I can refine the search if you provide a date range to look within."
4,Which company has the most active contracts?,0.216552,0.0,0.5,"Beijing Sun Seven Stars Culture Development Limited has the most active contracts, with a total of 2 active contracts."
5,Which Hosting contracts have the highest total value?,,,0.5,The Hosting contract with the highest total value is 'BEYONDCOMCORP_08_03_2000-EX-10.2-CO-HOSTING AGREEMENT' with a monetary value of 2500000.
6,How many contracts does Microgenics Corporation have?,,,0.0,Microgenics Corporation is listed as a party in 1 contract.
7,What contracts are governed by US law?,,,0.0,"I am sorry, I cannot fulfill this request. The available tools lack the functionality to filter contracts based on governing law."
8,Which party has the most Strategic Alliance contracts?,,,0.5,"VerticalNet, Inc. is the party with the most Strategic Alliance contracts, with a total of 4."
9,"What is the total monetary value of all contracts with WebMD, Inc.?",,,0.0,"I cannot provide the total monetary value of all contracts with WebMD, Inc. I do not have the ability to sum all monetary values. However, I can provide the monetary values of individual contracts. Would you like me to search for contracts involving WebMD, Inc. and list their individual monetary values?"
