In [1]:
from guardrails.prompt_repo import Prompt
from guardrails.schema import Schema, Field
from guardrails.validators import StringValidator, FloatValidator, Validator, FormValidator
from guardrails.exceptions import SchemaMismatchException

from copy import deepcopy

In [2]:
with open('openai_api_key.txt', 'r') as f:
    openai_api_key = f.read()

In [3]:
schema = {
    "fees": Field(
        validator=StringValidator,
        prompt="What fees and charges are associated with my account, including account maintenance fees, transaction fees, and overdraft fees?"),
    "interest_rates": Field(
        validator=FloatValidator,
        prompt="What are the interest rates offered by the bank on savings and checking accounts, loans, and credit products?"),
    "limitations": Field(
        validator=StringValidator,
        prompt="Are there any limitations on the number or frequency of transactions, transfers, or withdrawals from my account?"),
    "liability": Field(
        validator=FloatValidator,
        prompt="What is the maximum amount I am liable for in case of fraud, unauthorized transactions, or other security breaches?"),
    "privacy": Field(
        validator=StringValidator,
        prompt="What are the bank's policies on data collection, sharing, and protection?"),
    "disputes": Field(
        validator=StringValidator,
        prompt="What are the procedures for resolving disputes with the bank, including information on arbitration, mediation, and lawsuits?"),
    "account_termination": Field(
        validator=StringValidator,
        prompt="Under what circumstances can the bank terminate my account, and what notice is required for such termination?"),
    "regulatory_oversight": Field(
        validator=StringValidator,
        prompt="What is the bank's regulatory oversight, including its licensing and compliance status?")
}

base_prompt_template = Prompt("""Given the following document, answer the following questions. If the answer doesn't exist in the document, enter 'None'.

{document}""")

tos_schema = Schema.from_dict(schema, base_prompt=base_prompt_template)

In [7]:
print(tos_schema.base_prompt.format(document=content))

Given the following document, answer the following questions. If the answer doesn't exist in the document, enter 'None'.

Zelle® Service Agreement and Privacy
Notice
Last updated: 11/3/2022
This agreement with Chase is available in Spanish as a courtesy. If there is any difference in
meaning between the Spanish and English versions of this agreement or any related
documents we provide you, either now or in the future, the English version is the official
document and will prevail. Please consult with a translator if you have any questions.
We suggest you read this document carefully and print a copy for your reference.
This Zelle® Service Agreement and Privacy Notice (this "Agreement") states the terms and
conditions that govern your use of the Zelle® Service (sometimes referred to as Chase
QuickPay®, Chase QuickPay® with Zelle®, QuickPay, Send Money with Zelle®, Chase Personto-Person, QuickPay, Chase QuickPay for Small Business, Zelle, Zelle, or other trade name or
trademark as determ

In [4]:
with open("data/chunk_0.txt", "r") as f:
    content = f.read()

In [5]:
extracted_obj = tos_schema.validate(content)

APIError: Internal server error {
    "error": {
        "message": "Internal server error",
        "type": "auth_subrequest_error",
        "param": null,
        "code": "internal_error"
    }
}
 500 {'error': {'message': 'Internal server error', 'type': 'auth_subrequest_error', 'param': None, 'code': 'internal_error'}} {'Date': 'Tue, 21 Feb 2023 08:09:24 GMT', 'Content-Type': 'application/json; charset=utf-8', 'Content-Length': '166', 'Connection': 'keep-alive', 'Vary': 'Origin', 'X-Request-Id': '051f7c45a4ac46e23a6a6bba874d1435', 'Strict-Transport-Security': 'max-age=15724800; includeSubDomains'}

In [9]:
text = content

extracted_object = {}
prev_llm_output = None
format_attributes = tos_schema._schema
content_attributes = tos_schema._schema
attributes_to_extract = tos_schema._schema

format_prompts = tos_schema.get_format_prompts(
    format_attributes,
    llm_output=prev_llm_output
)
content_prompts = tos_schema.get_content_prompts(
    content_attributes,
    extracted_object=extracted_object,
)

final_prompt = tos_schema.get_merged_prompt(format_prompts, content_prompts)
# llm_output = tos_schema.llm_ask(final_prompt.format(document=text))
# Read llm_output from file
with open("data/llm_output.txt", "r") as f:
    llm_output = f.read()

format_attributes = []
content_attributes = []


for i, attr in enumerate(attributes_to_extract):
    field = attr['field']
    name = attr['name']
    try:
        extracted_object[name] = tos_schema.validate_form_for_field(i, name, field, llm_output)

        for content_validator in field.content_validators:
            if not content_validator.validate(extracted_object[name]):
                attr['debugging'] = True
                content_attributes.append(attr)
    except SchemaMismatchException:
        attr['debugging'] = True
        format_attributes.append(attr)
        continue

In [11]:
# print(final_prompt.format(document=text))
print(final_prompt.source)

Given the following document, answer the following questions. If the answer doesn't exist in the document, enter 'None'.

{document}
Questions:
fees: What fees and charges are associated with my account, including account maintenance fees, transaction fees, and overdraft fees?
interest_rates: What are the interest rates offered by the bank on savings and checking accounts, loans, and credit products?
limitations: Are there any limitations on the number or frequency of transactions, transfers, or withdrawals from my account?
liability: What is the maximum amount I am liable for in case of fraud, unauthorized transactions, or other security breaches?
privacy: What are the bank's policies on data collection, sharing, and protection?
disputes: What are the procedures for resolving disputes with the bank, including information on arbitration, mediation, and lawsuits?
account_termination: Under what circumstances can the bank terminate my account, and what notice is required for such termina

In [8]:
format_attributes

[{'name': 'fees',
  'field': <guardrails.schema.Field at 0x7f9fc0b26940>,
  'debugging': True},
 {'name': 'limitations',
  'field': <guardrails.schema.Field at 0x7f9fc0b26820>,
  'debugging': True},
 {'name': 'privacy',
  'field': <guardrails.schema.Field at 0x7f9fc0b23f10>,
  'debugging': True},
 {'name': 'disputes',
  'field': <guardrails.schema.Field at 0x7f9fc0b23880>,
  'debugging': True},
 {'name': 'account_termination',
  'field': <guardrails.schema.Field at 0x7f9fc0b23e50>,
  'debugging': True},
 {'name': 'regulatory_oversight',
  'field': <guardrails.schema.Field at 0x7f9fc0b23d90>,
  'debugging': True}]

In [9]:
content_attributes

[]

In [10]:
prev_llm_output = llm_output
old_content_attributes = deepcopy(content_attributes)
content_attributes = tos_schema.merge_form_content_debugging(
    content_attributes, format_attributes)

In [13]:
len(tos_schema.merge_form_content_debugging(
    old_content_attributes, format_attributes))

12

In [14]:
list_1 = deepcopy(old_content_attributes)
list_2 = deepcopy(format_attributes)

In [15]:
merged_list = []
i = 0
j = 0
while i < len(list_1) and j < len(list_2):

    print(f'Merged list: {merged_list}')
    print(f'i: {i}, j: {j}')

    if tos_schema.name2idx[list_1[i]['name']] < tos_schema.name2idx[list_2[j]['name']]:
        
        merged_list.append(list_1[i])
        i += 1
        print(f'\nAppending 1:')
        print(f'merged_list: {merged_list}')
        print(f'i: {i}, j: {j}')
    elif tos_schema.name2idx[list_1[i]['name']] > tos_schema.name2idx[list_2[j]['name']]:
        copied_item = deepcopy(list_2[j])
        copied_item['debugging'] = False
        merged_list.append(copied_item)
        j += 1
        print(f'\nAppending 2:')
        print(f'merged_list: {merged_list}')
        print(f'i: {i}, j: {j}')
    else:
        merged_list.append(list_1[i])
        i += 1
        j += 1
        print(f'\nAppending 3:')
        print(f'merged_list: {merged_list}')
        print(f'i: {i}, j: {j}')

In [17]:
while i < len(list_1):
    merged_list.append(list_1[i])
    i += 1

In [18]:
merged_list

[]

LLM OUTPUT:  

fees: None
interest_rates: None
limitations: Transfer Limits
liability: $0
privacy: Disclosure of Account Information to Third Parties
disputes: Disputes
account_termination: Termination
regulatory_oversight: None
FORMAT ATTRIBUTES:  [{'name': 'liability', 'field': <guardrails.schema.Field object at 0x7fd8807978b0>, 'debugging': True}]
CONTENT ATTRIBUTES:  []
NEW CONTENT ATTRIBUTES:  [{'name': 'liability', 'field': <guardrails.schema.Field object at 0x7fd880797b80>, 'debugging': False}, {'name': 'liability', 'field': <guardrails.schema.Field object at 0x7fd8807978b0>, 'debugging': True}]
NEW FORMAT ATTRIBUTES:  [{'name': 'liability', 'field': <guardrails.schema.Field object at 0x7fd8807978b0>, 'debugging': True}]
EXTRACTED OBJECT:  {'fees': 'None', 'interest_rates': 'None', 'limitations': 'Transfer Limits', 'privacy': 'Disclosure of Account Information to Third Parties', 'disputes': 'Disputes', 'account_termination': 'Termination', 'regulatory_oversight': 'None'}
NEW LOO

KeyError: 'liability'

In [12]:
import json

d = json.loads("""{
'Account Termination': 'None',
'Disputes': ['Termination', 'Disputes', 'Binding Arbitration'],
'Fees': {
'Zelle Fees': '17. Zelle Fees',
'Overdraft Fees': 'None',
'Transaction Fees': 'None',
'Account Maintenance Fees': 'None'
},
'Interest Rates': 'None',
'Definitions': {
'Bank': 'JPMorgan Chase Bank, National Association, or any affiliate, agent, independent contractor, designee, or assignee that we may, at our sole discretion, involve in the provision of the Service',
'Zelle Service': 'Zelle® Service or J.P. Morgan QuickPay'
}
}""")

JSONDecodeError: Expecting property name enclosed in double quotes: line 2 column 1 (char 2)

In [14]:
s = """
{
    "Account Termination": "None",
    "Disputes": ["Termination", "Disputes", "Binding Arbitration"],
    "Fees": {
        "Zelle Fees": "17. Zelle Fees",
        "Overdraft Fees": "None",
        "Transaction Fees": "None",
        "Account Maintenance Fees": "None"
},
"Interest Rates": "None",
"Definitions": {
    "Bank": "JPMorgan Chase Bank, National Association, or any affiliate, agent, independent contractor, designee, or assignee that we may, at our sole discretion, involve in the provision of the Service",
    "Zelle Service": "Zelle® Service or J.P. Morgan QuickPay"
}
}"""

# Load s into a json object avoiding "name enclosed in double quotes" errors
import json
import re

def load_json(s):
    s = re.sub(r"(\w+):", r'"\1":', s)
    return json.loads(s)

d = load_json(s)


{'Zelle Fees': '17. Zelle Fees',
 'Overdraft Fees': 'None',
 'Transaction Fees': 'None',
 'Account Maintenance Fees': 'None'}