In [1]:
from guardrails.prompt_repo import Prompt
from guardrails.schema import Schema, Field
from guardrails.validators import StringValidator, FloatValidator, Validator, FormValidator

from pyparsing import CaselessKeyword, Regex

In [2]:
with open('openai_api_key.txt', 'r') as f:
    openai_api_key = f.read()

In [3]:
schema = {
    "fees": Field(
        validator=StringValidator,
        prompt="What fees and charges are associated with my account, including account maintenance fees, transaction fees, and overdraft fees?"),
    "interest_rates": Field(
        validator=FloatValidator,
        prompt="What are the interest rates offered by the bank on savings and checking accounts, loans, and credit products?"),
    "limitations": Field(
        validator=StringValidator,
        prompt="Are there any limitations on the number or frequency of transactions, transfers, or withdrawals from my account?"),
    "liability": Field(
        validator=FloatValidator,
        prompt="What is the maximum amount I am liable for in case of fraud, unauthorized transactions, or other security breaches?"),
    "privacy": Field(
        validator=StringValidator,
        prompt="What are the bank's policies on data collection, sharing, and protection?"),
    "disputes": Field(
        validator=StringValidator,
        prompt="What are the procedures for resolving disputes with the bank, including information on arbitration, mediation, and lawsuits?"),
    "account_termination": Field(
        validator=StringValidator,
        prompt="Under what circumstances can the bank terminate my account, and what notice is required for such termination?"),
    "regulatory_oversight": Field(
        validator=StringValidator,
        prompt="What is the bank's regulatory oversight, including its licensing and compliance status?")
}

tos_schema = Schema.from_dict(schema)

In [4]:
def LLM(prompt):
    from openai import Completion
    llm_output = Completion.create(
        model="text-davinci-003",
        prompt=prompt,
        temperature=0,
        max_tokens=2048,
        api_key=openai_api_key
    )
    return llm_output['choices'][0]['text']

In [5]:
base_prompt_template = Prompt("""Given the following document, answer the following questions. If the answer doesn't exist in the document, enter 'None'.

{document}""")

In [6]:
# Get prompt for getting info from the TOS
updated_prompt = tos_schema.add_to_prompt(base_prompt_template)
print(updated_prompt)

Given the following document, answer the following questions. If the answer doesn't exist in the document, enter 'None'.

{document}

Questions:
0. fees: What fees and charges are associated with my account, including account maintenance fees, transaction fees, and overdraft fees?
1. interest_rates: What are the interest rates offered by the bank on savings and checking accounts, loans, and credit products?
2. limitations: Are there any limitations on the number or frequency of transactions, transfers, or withdrawals from my account?
3. liability: What is the maximum amount I am liable for in case of fraud, unauthorized transactions, or other security breaches?
4. privacy: What are the bank's policies on data collection, sharing, and protection?
5. disputes: What are the procedures for resolving disputes with the bank, including information on arbitration, mediation, and lawsuits?
6. account_termination: Under what circumstances can the bank terminate my account, and what notice is req

In [22]:
# Read content from data/chunk_0.txt
with open("data/chunk_0.txt", "r") as f:
    content = f.read()

# llm_output = LLM(updated_prompt.format(document=content))
# with open("data/llm_output.txt", "w") as f:
#     f.write(llm_output)
# print(llm_output)

In [8]:
with open("data/llm_output.txt", "r") as f:
    llm_output = f.read()
print(llm_output)



0. fees: None
1. interest_rates: None
2. limitations: Transfer Limits
3. liability: Your Liability for Unauthorized Transfers
4. privacy: Privacy Policy and Notice
5. disputes: Disputes
6. account_termination: Termination
7. regulatory_oversight: None


In [9]:
from copy import deepcopy

text = llm_output
extracted_object = {}
invalid_forms = []
text_copy = deepcopy(text)

In [10]:
for i, attr in enumerate(tos_schema._schema):
    field = attr['field']
    name = attr['name']
    combined_grammar = CaselessKeyword(f"{i}. {name}:") + field.form_validator.grammar
    matched_form = list(combined_grammar.scan_string(text_copy, maxMatches=1))
    if len(matched_form):
        extracted_object[name] = matched_form[0][0]
        text_copy = text_copy[matched_form[0][2]:]
    else:
        invalid_attr = {'name': name, 'field': field}
        invalid_forms.append(invalid_attr)

In [11]:
extracted_object

{'fees': ParseResults(['0. fees:', 'None'], {}),
 'limitations': ParseResults(['2. limitations:', 'Transfer Limits'], {}),
 'privacy': ParseResults(['4. privacy:', 'Privacy Policy and Notice'], {}),
 'disputes': ParseResults(['5. disputes:', 'Disputes'], {}),
 'account_termination': ParseResults(['6. account_termination:', 'Termination'], {}),
 'regulatory_oversight': ParseResults(['7. regulatory_oversight:', 'None'], {})}

In [12]:
invalid_forms

[{'name': 'interest_rates',
  'field': <guardrails.schema.Field at 0x7fb50898a160>},
 {'name': 'liability', 'field': <guardrails.schema.Field at 0x7fb50898ae80>}]

In [13]:
print(llm_output)



0. fees: None
1. interest_rates: None
2. limitations: Transfer Limits
3. liability: Your Liability for Unauthorized Transfers
4. privacy: Privacy Policy and Notice
5. disputes: Disputes
6. account_termination: Termination
7. regulatory_oversight: None


In [14]:

prompt_copy = deepcopy(base_prompt_template)
prompt_str = "\n\nQuestions:\n"
form_str = "\nTo answer these questions, respond in this format:\n"

In [15]:
for item in invalid_forms:
    attr_name = item['name']
    attr_prompt = item['field'].prompt
    attr_form = item['field'].form_validator.grammar
    attr_idx = tos_schema.name2idx[attr_name]
    attr_debug_prompt = item['field'].form_validator.debug(
        text, placeholder=CaselessKeyword(f"{attr_idx}. {attr_name}:"))
    prompt_str += f"{attr_idx}. {attr_name}: {attr_prompt}\n"
    form_str += f"{attr_idx}. {attr_name}: {attr_form}. {attr_debug_prompt}\n"


In [16]:
prompt_copy.append_to_prompt(prompt_str)
prompt_copy.append_to_prompt(form_str)
prompt_copy.append_to_prompt("""
Try to be as correct and concise as possible. Find all relevant information in the document and answer the questions, even if the answer is 'None'.
If you are unsure of the answer, enter 'None'. If you answer incorrectly, you will be asked again until you get it right which is expensive.""")


In [17]:
print(prompt_copy.source)

Given the following document, answer the following questions. If the answer doesn't exist in the document, enter 'None'.

{document}

Questions:
1. interest_rates: What are the interest rates offered by the bank on savings and checking accounts, loans, and credit products?
3. liability: What is the maximum amount I am liable for in case of fraud, unauthorized transactions, or other security breaches?

To answer these questions, respond in this format:
1. interest_rates: Re:('[-+]?\d*\.\d+|\d+'). You previously responded with " None" after "1. interest_rates:", which is incorrect. The correct output should be of the format: a float after "1. interest_rates:".
3. liability: Re:('[-+]?\d*\.\d+|\d+'). You previously responded with " Your Liability for Unauthorized Transfers" after "3. liability:", which is incorrect. The correct output should be of the format: a float after "3. liability:".

Try to be as correct and concise as possible. Find all relevant information in the document and ans

In [18]:
form_debug_prompt = tos_schema.create_form_debug_prompt(
    llm_output,
    invalid_forms,
    base_prompt_template
)

In [21]:
print(form_debug_prompt.source)

Given the following document, answer the following questions. If the answer doesn't exist in the document, enter 'None'.

{document}

Questions:
1. interest_rates: What are the interest rates offered by the bank on savings and checking accounts, loans, and credit products?
3. liability: What is the maximum amount I am liable for in case of fraud, unauthorized transactions, or other security breaches?

To answer these questions, respond in this format:
1. interest_rates: << a float >>. You previously responded with " None" after "1. interest_rates:", which is incorrect. The correct output should be of the format: a float after "1. interest_rates:".
3. liability: << a float >>. You previously responded with " Your Liability for Unauthorized Transfers" after "3. liability:", which is incorrect. The correct output should be of the format: a float after "3. liability:".

Try to be as correct and concise as possible. Find all relevant information in the document and answer the questions, eve

In [25]:
llm_output = LLM(form_debug_prompt.format(document=content))

In [27]:
print(llm_output)



1. interest_rates: None.
3. liability: None.


In [24]:
print(form_debug_prompt.format(document=content))

Given the following document, answer the following questions. If the answer doesn't exist in the document, enter 'None'.

Zelle® Service Agreement and Privacy
Notice
Last updated: 11/3/2022
This agreement with Chase is available in Spanish as a courtesy. If there is any difference in
meaning between the Spanish and English versions of this agreement or any related
documents we provide you, either now or in the future, the English version is the official
document and will prevail. Please consult with a translator if you have any questions.
We suggest you read this document carefully and print a copy for your reference.
This Zelle® Service Agreement and Privacy Notice (this "Agreement") states the terms and
conditions that govern your use of the Zelle® Service (sometimes referred to as Chase
QuickPay®, Chase QuickPay® with Zelle®, QuickPay, Send Money with Zelle®, Chase Personto-Person, QuickPay, Chase QuickPay for Small Business, Zelle, Zelle, or other trade name or
trademark as determ

In [None]:
{
    "interest_rate": Field(prompt="Interest rate of the lender", validator=[FloatValidator, RationalInterestRateValidator]),
    "url": Field(prompt="URL of the lender", validators=[UrlValidator, ReachableUrlValidator]),
    "list_of_lenders": Field(
        prompt="List of lenders",
        validators=[ListValidator(min_length=1, max_length=10, validator=StringValidator)]),
    "other_object": Field(prompt="Other object to extract", validators={
        "key_1": StringValidator,
        "key_2": [FloatValidator, PositiveFloatValidator],
    })
}

In [2]:
schema = {
    "name": StringValidator,
    "interest_rate": [FloatValidator, RationalInterestRateValidator],
    "url": [UrlValidator, ReachableUrlValidator],
    "list_of_lenders": ListValidator(min_length=1, max_length=10, element_validators=[StringValidator]),
    "other_object": {
        "key_1": StringValidator,
        "key_2": [FloatValidator, PositiveFloatValidator],
    }
}

NameError: name 'StringValidator' is not defined

In [None]:
info_schema = Schema.from_dict(schema)

In [None]:
schema = {
    "name": Field(prompt="Name of the lender", validators=StringValidator),
    "interest_rate": Field(prompt="Interest rate of the lender", validators=[FloatValidator, RationalInterestRateValidator]),
    "url": Field(prompt="URL of the lender", validators=[UrlValidator, ReachableUrlValidator]),
    "list_of_lenders": Field(
        prompt="List of lenders",
        validators=[ListValidator(min_length=1, max_length=10, validator=StringValidator)]),
    "other_object": Field(prompt="Other object to extract", validators={
        "key_1": StringValidator,
        "key_2": [FloatValidator, PositiveFloatValidator],
    })
}

In [None]:
schema = {
    "name": {
        "prompt": "Name of the lender",
        "format": StringValidator,
        "required": True,
        "validators": [VerboseStringValidator]
    },
    "interest_rate": {
        "prompt": "Interest rate of the lender",
        "format": FloatValidator,
        "required": True,
        "validators": [RationalInterestRateValidator]
    },
    "url": {
        "prompt": "URL of the lender",
        "format": UrlValidator,
        "required": True,
        "validators": [ReachableUrlValidator]
    },
    "list_of_lenders": {
        "prompt": "List of lenders",
        "format": ListValidator(min_length=1, max_length=10, format=StringValidator, validators=[VerboseStringValidator]),
        "required": True,
        "validators": []
    },
    "other_object": {
        "key_1": {
            "prompt": "Key 1",
            "format": StringValidator,
            "required": True,
            "validators": [VerboseStringValidator]
        },
        "key_2": {
            "prompt": "Key 2",
            "format": FloatValidator,
            "required": True,
            "validators": [PositiveFloatValidator]
        }
    }
}