In [1]:
import typing as t
from dataclasses import dataclass

from guardrails.prompt_repo import PromptRepo, Prompt
from guardrails.guardrails import Schema
from guardrails.types import DataType, String, URL, Email, Date, Time, Percentage, CodeSnippet, Float
from guardrails.utils import TextSplitter, read_pdf

In [3]:
prompt_repo = PromptRepo()
prompt_repo.add_prompt('Fees', 'What fees and charges are associated with my account, including account maintenance fees, transaction fees, and overdraft fees?')
prompt_repo.add_prompt('Interest Rates', 'What are the interest rates offered by the bank on savings and checking accounts, loans, and credit products?')
prompt_repo.add_prompt("Limitations", "Are there any limitations on the number or frequency of transactions, transfers, or withdrawals from my account?")
prompt_repo.add_prompt("Liability", "What is the maximum amount I am liable for in case of fraud, unauthorized transactions, or other security breaches?")
prompt_repo.add_prompt("Privacy", "What are the bank's policies on data collection, sharing, and protection?")
prompt_repo.add_prompt("Disputes", "What are the procedures for resolving disputes with the bank, including information on arbitration, mediation, and lawsuits?")
prompt_repo.add_prompt("Account Termination", "Under what circumstances can the bank terminate my account, and what notice is required for such termination?")
prompt_repo.add_prompt("Regulatory Oversight", "What is the bank's regulatory oversight, including its licensing and compliance status?")

In [4]:
@dataclass
class ToS(Schema):
    fees: DataType = String(name='Fees')
    interest_rates: DataType = Float(name='Interest Rates')
    limitations: DataType = String(name='Limitations')
    liability: DataType = String(name='Liability')
    privacy: DataType = String(name='Privacy')
    disputes: DataType = String(name='Disputes')
    account_termination: DataType = String(name='Account Termination')
    regulatory_oversight: DataType = String(name='Regulatory Oversight')
    prompt_repo: prompt_repo = prompt_repo

In [5]:
base_prompt_template = Prompt("""Given the following document, answer the following questions. If the answer doesn't exist in the document, enter 'None'.

{document}""")

In [67]:
class ToSReader:
    def __init__(self):
        self.splitter = TextSplitter()
        self.prompt_template = base_prompt_template
        self.schema = ToS()
        self.schema.add_to_prompt(prompt=self.prompt_template)

    def extract(self, path: str):
        content = read_pdf(path)
        content_chunks = self.splitter(content, prompt_template=self.prompt_template)

        # Write each chunk to a file in the data folder
        for i, chunk in enumerate(content_chunks):
            with open(f"data/chunk_{i}.txt", "w") as f:
                f.write(chunk)

        # for chunk in content_chunks:
        #     self.extract_from_chunk(chunk)
        #     break

    def extract_from_chunk(self, chunk: str) -> t.List[t.Dict[str, t.Any]]:
        """Extracts the info from the given chunk."""

        prompt = self.prompt_template.format(document=chunk)
        llm_output = Completion.create(
            model="text-davinci-003",
            prompt=prompt,
            temperature=0,
            max_tokens=2048,
            api_key=api_key
        )
        extracted_info = llm_output['choices'][0]['text']
        print(f"extracted_info: {extracted_info}")
        return extracted_info

In [68]:
extractor = ToSReader()
extractor.extract('chase_zelle.pdf')

In [60]:
extractor.add_to_prompt('Fees', 'What fees and charges are associated with my account, including account maintenance fees, transaction fees, and overdraft fees?')

<__main__.ToSReader at 0x7f9b51172e80>

In [8]:
content = read_pdf('chase_zelle.pdf')
content_chunks = extractor.splitter(content, prompt_template=extractor.prompt_template)

prompt = extractor.prompt_template.format(document=content_chunks[0])
llm_output = Completion.create(
    model="text-davinci-003",
    prompt=prompt,
    temperature=0,
    max_tokens=2048,
    api_key=api_key
)
extracted_info = llm_output['choices'][0]['text']

In [66]:
print(prompt[-2000:])

her agree not to use the Service to request, send or receive money
from anyone to whom you are obligated for tax payments, payments made pursuant to
court orders (including court-ordered amounts for alimony or child support), fines,

Questions:
0. Account Termination: Under what circumstances can the bank terminate my account, and what notice is required for such termination?
1. Disputes: What are the procedures for resolving disputes with the bank, including information on arbitration, mediation, and lawsuits?
2. Fees: What fees and charges are associated with my account, including account maintenance fees, transaction fees, and overdraft fees?
3. Interest Rates: What are the interest rates offered by the bank on savings and checking accounts, loans, and credit products?
4. Liability: What is the maximum amount I am liable for in case of fraud, unauthorized transactions, or other security breaches?
5. Limitations: Are there any limitations on the number or frequency of transactions, t

In [9]:
print(extracted_info)



0. Account Termination: None
1. Disputes: Please refer to Section 23 of this Agreement for information on disputes.
2. Fees: Please refer to Section 17 of this Agreement for information on fees.
3. Interest Rates: None
4. Liability: Please refer to Section 18 of this Agreement for information on liability.
5. Limitations: Please refer to Section 14 of this Agreement for information on transfer limits.
6. Privacy: Please refer to Section 24 of this Agreement for information on privacy.
7. Regulatory Oversight: None


In [11]:
grammar = extractor.schema.grammar_rules()

grammar.parseString(extracted_info)

AttributeError: 'list' object has no attribute 'parseString'

In [12]:
from copy import deepcopy

info_copy = deepcopy(extracted_info)
info_copy = '\n\n1--' + info_copy[3:]
print(info_copy)



1--. Account Termination: None
1. Disputes: Please refer to Section 23 of this Agreement for information on disputes.
2. Fees: Please refer to Section 17 of this Agreement for information on fees.
3. Interest Rates: None
4. Liability: Please refer to Section 18 of this Agreement for information on liability.
5. Limitations: Please refer to Section 14 of this Agreement for information on transfer limits.
6. Privacy: Please refer to Section 24 of this Agreement for information on privacy.
7. Regulatory Oversight: None


In [None]:
from copy import deepcopy

info_copy = deepcopy(extracted_info)
info_copy = '\n\n1--' + info_copy[3:]
print(info_copy)

In [25]:
exceptions[0]

Expected CaselessKeyword '0. Account Termination:', found '1'  (at char 2), (line:3, col:1)

In [28]:
dir(exceptions[0])

['__cause__',
 '__class__',
 '__context__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__le__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__setstate__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 '__suppress_context__',
 '__traceback__',
 '__weakref__',
 '_from_exception',
 'args',
 'col',
 'column',
 'explain',
 'explain_exception',
 'line',
 'lineno',
 'loc',
 'markInputline',
 'mark_input_line',
 'msg',
 'parserElement',
 'parser_element',
 'pstr',
 'with_traceback']

In [30]:
exceptions[0].parserElement

'0. Account Termination:'

In [31]:
exceptions[0].parser_element

'0. Account Termination:'

In [32]:
exceptions[0].parser_element

'DEFAULT_KEYWORD_CHARS'

In [29]:
exceptions[0].msg

"Expected CaselessKeyword '0. Account Termination:'"

In [33]:
exceptions[0].line

'1--. Account Termination: None'

In [22]:
explanation = []
exceptions = []
try:
    for g in grammar:
        g.parseString(info_copy)
except Exception as e:
    # print(e)
    # print(dir(e))
    # print(e.explain())
    explanation.append(e.explain())
    exceptions.append(e)

print(explanation[0])

1--. Account Termination: None
^
ParseException: Expected CaselessKeyword '0. Account Termination:', found '1'  (at char 2), (line:3, col:1)


In [39]:
info_copy

'\n\n1--. Account Termination: None\n1. Disputes: Please refer to Section 23 of this Agreement for information on disputes.\n2. Fees: Please refer to Section 17 of this Agreement for information on fees.\n3. Interest Rates: None\n4. Liability: Please refer to Section 18 of this Agreement for information on liability.\n5. Limitations: Please refer to Section 14 of this Agreement for information on transfer limits.\n6. Privacy: Please refer to Section 24 of this Agreement for information on privacy.\n7. Regulatory Oversight: None'

In [56]:
f'{str(grammar[0])[1: -1]}'

"'0. Account Termination:' Re:('.+')"

In [46]:
try:
    grammar[0].parseString('0. Account Termination:')
except Exception as e:
    other_exc = e

In [53]:
other_exc.parser_element

Re:('.+')

In [21]:
explanation[0]

"1--. Account Termination: None\n^\nParseException: Expected CaselessKeyword '0. Account Termination:', found '1'  (at char 2), (line:3, col:1)"

In [20]:
incorrect_output, exp = explanation[0].split("\n^\n")
print(incorrect_output)
print(exp)

# Write a prompt that corrects the incorrect output based on the explanation
# E.g. for the following incorrect output and explanation:
# incorrect_output = '1--. Account Termination: None'ArithmeticError
# exp = 'ParseException: Expected CaselessKeyword '0. Account Termination:', found '1'  (at char 2), (line:3, col:1)'
# debug_prompt should be 'The following is incorrect: "1--. Account Termination: None". The correct output is: "0. Account Termination:" followed by a string.'
debug_prompt = Prompt(f"""The following is incorrect: "{incorrect_output}". The correct output is: "{exp.parserElement}" followed by a string.""")

1--. Account Termination: None
ParseException: Expected CaselessKeyword '0. Account Termination:', found '1'  (at char 2), (line:3, col:1)


In [1]:
from guardrails.validators import get_rule

In [4]:
from pyparsing import CaselessKeyword, Regex

In [5]:
type(Regex)

abc.ABCMeta

In [58]:
# Write a string longer than 100 characters
long_string = 'a' * 101

# Write a string shorter than 100 characters
short_string = 'a' * 99


from guardrails.validators import VerboseStringValidator

VerboseStringValidator.grammar().parse_string(short_string)

ParseException: Expected Re:('.{100,}'), found 'aaaaaaaaaaaaaaaa'  (at char 0), (line:1, col:1)