In [1]:
from guardrails.prompt_repo import Prompt
from guardrails.schema import Schema, Field
from guardrails.validators import StringValidator, FloatValidator

with open('openai_api_key.txt', 'r') as f:
    openai_api_key = f.read()

In [2]:
schema = {
    "fees": Field(
        validator=StringValidator,
        prompt="What fees and charges are associated with my account, including account maintenance fees, transaction fees, and overdraft fees?"),
    "interest_rates": Field(
        validator=FloatValidator,
        prompt="What are the interest rates offered by the bank on savings and checking accounts, loans, and credit products?"),
    "limitations": Field(
        validator=StringValidator,
        prompt="Are there any limitations on the number or frequency of transactions, transfers, or withdrawals from my account?"),
    "liability": Field(
        validator=FloatValidator,
        prompt="What is the maximum amount I am liable for in case of fraud, unauthorized transactions, or other security breaches?"),
    "privacy": Field(
        validator=StringValidator,
        prompt="What are the bank's policies on data collection, sharing, and protection?"),
    "disputes": Field(
        validator=StringValidator,
        prompt="What are the procedures for resolving disputes with the bank, including information on arbitration, mediation, and lawsuits?"),
    "account_termination": Field(
        validator=StringValidator,
        prompt="Under what circumstances can the bank terminate my account, and what notice is required for such termination?"),
    "regulatory_oversight": Field(
        validator=StringValidator,
        prompt="What is the bank's regulatory oversight, including its licensing and compliance status?")
}

base_prompt_template = Prompt("""Given the following document, answer the following questions. If the answer doesn't exist in the document, enter 'None'.

{document}""")

tos_schema = Schema.from_dict(schema, base_prompt=base_prompt_template)

In [26]:
format_attributes = deepcopy(tos_schema._schema)
content_attributes = deepcopy(tos_schema._schema)
attributes_to_extract = deepcopy(tos_schema._schema)

format_prompts = tos_schema.get_format_prompts(
    format_attributes,
    llm_output=None
)
content_prompts = tos_schema.get_content_prompts(
    content_attributes,
    extracted_object=None,
)

final_prompt = tos_schema.get_merged_prompt(format_prompts, content_prompts)

"Given the following document, answer the following questions. If the answer doesn't exist in the document, enter 'None'.\n\n{document}"

In [3]:
with open("data/chunk_0.txt", "r") as f:
    content = f.read()

In [4]:
# extracted_obj = tos_schema.extract_schemified_response(content)

In [5]:
chunks = [f'data/chunk_{i}.txt' for i in range(0, 30)]
contents = []
for chunk in chunks:
    with open(chunk, 'r') as f:
        contents.append(f.read())

In [6]:
extracted_objs = []

for i, content in enumerate(contents[:10]):
    extracted_obj = tos_schema.extract_schemified_response(content)
    extracted_objs.append(extracted_obj)

    print(f"\n\ni: {chunk}")
    print(extracted_obj)





Iteration = 0
Prev llm output = None
HERE LLM Output: 

fees: None
interest_rates: None
limitations: Transfer Limits
liability: $0
privacy: Disclosure of Account Information to Third Parties
disputes: Disputes
account_termination: Termination
regulatory_oversight: None
Finished printing LLM Output
LLM Output: 

fees: None
interest_rates: None
limitations: Transfer Limits
liability: $0
privacy: Disclosure of Account Information to Third Parties
disputes: Disputes
account_termination: Termination
regulatory_oversight: None
Prev LLM Output: 

fees: None
interest_rates: None
limitations: Transfer Limits
liability: $0
privacy: Disclosure of Account Information to Third Parties
disputes: Disputes
account_termination: Termination
regulatory_oversight: None
Finished printing LLM Output




Iteration = 1
Prev llm output = 

fees: None
interest_rates: None
limitations: Transfer Limits
liability: $0
privacy: Disclosure of Account Information to Third Parties
disputes: Disputes
account_termina

In [24]:
extracted_objs[2]

{'fees': 'None',
 'interest_rates': 'None',
 'limitations': 'None',
 'liability': 'None',
 'privacy': 'We have the right to determine eligibility and to restrict categories of recipients to whom payments may be made using the Service in our sole discretion.',
 'disputes': 'What are the procedures for resolving disputes with the bank, including information on arbitration, mediation, and lawsuits?',
 'account_termination': 'Under what circumstances can the bank terminate my account, and what notice is required for such termination?',
 'regulatory_oversight': "What is the bank's regulatory oversight, including its licensing and compliance status?"}

In [23]:
contents[1]

' Deposit Accounts Only\nErrors and Questions about Services for Chase Business Deposit Accounts Only\nMiscellaneous Terms for Business Accounts\n23. ADDITIONAL TERMS APPLICABLE TO ALL USERS OF THE SERVICE\nComputer Equipment; Browser Access and Internet Services\nPasswords\nNotices\nNew Features\nLimitation of Liability; No Warranties\nOther Agreements\nTermination\nDisputes\nBinding Arbitration\nIndemnity\nRecords; Communications\nSpecial Provisions for Business Customers\nChoice of Law/Successors; Waiver; Severability\nChase Account Information\nPrivacy Policy and Notice\n1. General Terms Applicable to the Service\nWe have partnered with Zelle to enable transfers of money between you and others who are\nenrolled directly with Zelle or enrolled with another financial institution that partners with\nZelle (each, a “User”) using aliases, such as email addresses or mobile phone numbers.\nWhen you use or access, or permit any other person(s) or entity\xa0to whom you have delegated\nto ac