In [34]:
import guardrails as gd
import openai
from rich import print as rprint
import tiktoken
import dotenv

dotenv.load_dotenv()

True

## Defining the Specs of the output via **RAIL Spec**

In [3]:
rail_str = """
<rail version="0.1">

<output>
    <list name="transaction_list">
        <object name="transaction_info">
            <date name="transaction_date" date-format="%Y-%m-%d" />
            <string name="transaction_description"/>
            <float name="debit"/>
            <float name="credit"/>
            <float name="balance"/>
        </object>
    </list>
</output>

<prompt>

I will present you a bank statement that has the following elements:

- transaction_date: The date of the transaction
- transaction_description: The description of the transaction
- debit: The amount of money debited from the account
- credit: The amount of money credited to the account
- balance: The balance of the account after the transaction

I want to extract this information separately for each transaction in the bank statement below:

{{transaction_string}}
@complete_json_suffix_v2</prompt>


</rail>
"""


# Testing prompts

### Credit Card input example

In [4]:
#  Credit card transation input example

# TD

td_example = """
Date
	
Transaction Description

Debit
	
Credit
	Balance
May 13, 2023	REEFTECHNOLOGY.COM	$13.00		$1,448.32
May 13, 2023	ABC*ANYTIME FITNESS	$28.34		$1,435.32
May 8, 2023	GOOGLE*YOUTUBEPREMIUM	$12.59		$227.48
May 7, 2023	Spotify P22E958E88	$16.79		$214.89
May 4, 2023	KOODO MOBILE PAC	$112.82		$198.10
Apr 30, 2023	GITHUB, INC.	$14.01		$85.28
Apr 29, 2023	ABC*ANYTIME FITNESS	$28.34		$71.27
Apr 27, 2023	SQUARE ONE INSURANCE SERV	$21.95		$42.93
Apr 26, 2023	CRAVE	$20.98		$20.98
Apr 20, 2023	ateteu	$123.1		$0.00
Apr 25, 2023	PAYMENT - THANK YOU		$2,285.05	$0.00


"""

td_example2 = """
Date
	
Transaction Description
	
Debit
	
Credit
	Balance
May 13, 2023	REEFTECHNOLOGY.COM	$13.00		$1,448.32
May 13, 2023	ABC*ANYTIME FITNESS	$28.34		$1,435.32
May 8, 2023	GOOGLE*YOUTUBEPREMIUM	$12.59		$227.48
May 7, 2023	Spotify P22E958E88	$16.79		$214.89
May 4, 2023	KOODO MOBILE PAC	$112.82		$198.10
Apr 30, 2023	GITHUB, INC.	$14.01		$85.28
Apr 29, 2023	ABC*ANYTIME FITNESS	$28.34		$71.27
Apr 27, 2023	SQUARE ONE INSURANCE SERV	$21.95		$42.93
Apr 26, 2023	CRAVE	$20.98		$20.98

May 1, 2023	balcblha	$11.00		$1,448.32
May 2, 2023	ABC*ANYTIME FITNESS	$30		$1,435.32
May 8, 2023	GOOGLE*YOUTUBEPREMIUM	$12.59		$227.48
May 7, 2023	Spotify P22E958E88	$16.79		$214.89
May 4, 2023	KOODO MOBILE PAC	$112.82		$198.10
Apr 30, 2023	GITHUB, INC.	$14.01		$85.28
Apr 29, 2023	ABC*ANYTIME FITNESS	$28.34		$71.27
Apr 27, 2023	SQUARE ONE INSURANCE SERV	$21.95		$42.93
Apr 26, 2023	CRAVE	$20.98		$20.98
Apr 25, 2023	PAYMENT - THANK YOU		$2,285.05	$0.00
May 13, 2023	REEFTECHNOLOGY.COM	$13.00		$1,448.32
May 13, 2023	ABC*ANYTIME FITNESS	$28.34		$1,435.32
May 8, 2023	GOOGLE*YOUTUBEPREMIUM	$12.59		$227.48
May 7, 2023	Spotify P22E958E88	$16.79		$214.89
May 4, 2023	KOODO MOBILE PAC	$112.82		$198.10
Apr 30, 2023	GITHUB, INC.	$14.01		$85.28
Apr 29, 2023	ABC*ANYTIME FITNESS	$28.34		$71.27
Apr 27, 2023	SQUARE ONE INSURANCE SERV	$21.95		$42.93
Apr 26, 2023	CRAVE	$20.98		$20.98
Apr 25, 2023	PAYMENT - THANK YOU		$2,285.05	$0.00
May 13, 2023	REEFTECHNOLOGY.COM	$13.00		$1,448.32
May 13, 2023	ABC*ANYTIME FITNESS	$28.34		$1,435.32
May 8, 2023	GOOGLE*YOUTUBEPREMIUM	$12.59		$227.48
May 7, 2023	Spotify P22E958E88	$16.79		$214.89
May 4, 2023	KOODO MOBILE PAC	$112.82		$198.10
Apr 30, 2023	GITHUB, INC.	$14.01		$85.28
Apr 29, 2023	ABC*ANYTIME FITNESS	$28.34		$71.27
Apr 27, 2023	SQUARE ONE INSURANCE SERV	$21.95		$42.93
Apr 26, 2023	CRAVE	$20.98		$20.98
Apr 25, 2023	PAYMENT - THANK YOU		$2,285.05	$0.00


"""

# RBC

rbc_example = """


	Description 	Debit 	Credit 	
May 14, 2023 	
PETROCAN, 
	$25.00		
May 14, 2023 	
SAVE ON FOODS #6622, 
	$21.20		
May 13, 2023 	
CDN TIRE STORE #00304, 
	$221.48		
May 13, 2023 	
SAVE ON FOODS #6622, 
	$21.26		
May 12, 2023 	
PC EXPRESS 1549, 
	$205.04		
May 12, 2023 	
CRUMBLCOOK* RABBITHILL, 
	$29.99		
May 11, 2023 	
PETROCAN, 
	$49.72		
May 11, 2023 	
SHOPPERS DRUG MART #03, 
	$27.39		
May 9, 2023 	
SAVE ON FOODS #6622, 
	$83.97		
May 8, 2023 	
CRUMBLCOOK* RABBITHILL, 
	$29.99		
May 7, 2023 	
GOOD BUDDY RESTAURANT, 
	$18.10		
May 7, 2023 	
SHOPPERS DRUG MART #03, 
	$419.99		
May 7, 2023 	
Nintendo CA979155961, 
	$83.99		
May 6, 2023 	
TICKETMASTER CANADA, TORONTO
	$102.80		
May 6, 2023 	
TICKETMASTER CANADA, TORONTO
	$245.80		
May 6, 2023 	
Subway 71142, 
	$14.47		
May 5, 2023 	
CRUMBLCOOK* RABBITHILL, 
	$31.99		
May 5, 2023 	
TICKET PROTECTOR-CUMIS, CAMBRIDGE
	$16.00		
May 5, 2023 	
PAYMENT - THANK YOU / PAIEMENT - MERCI
		-$2,328.67	
"""


### Prompt Testing

#### Base prompt comming from Guardrails-ai

In [5]:
guard = gd.Guard.from_rail_string(rail_str)


print(guard.base_prompt)



I will present you a bank statement that has the following elements:

- transaction_date: The date of the transaction
- transaction_description: The description of the transaction
- debit: The amount of money debited from the account
- credit: The amount of money credited to the account
- balance: The balance of the account after the transaction

I want to extract this information separately for each transaction in the bank statement below:

{transaction_string}

Given below is XML that describes the information to extract from this document and the tags to extract it into.

<output>
    <list name="transaction_list">
        <object name="transaction_info">
            <date name="transaction_date" date-format="%Y-%m-%d"/>
            <string name="transaction_description"/>
            <float name="debit"/>
            <float name="credit"/>
            <float name="balance"/>
        </object>
    </list>
</output>


ONLY return a valid JSON object (no other text is necessary), wh

In [24]:
rprint(guard.prompt.format(**{'transaction_string': rbc_example}).source)

#### Making LLM call through openAI

In [42]:
model = "text-davinci-003"
enc = tiktoken.encoding_for_model(model)

#getting the formatted prompt from guardrails
formatted_prompt = guard.prompt.format(**{'transaction_string': rbc_example}).source

print(f"Numbers of tokens for prompt: {len(enc.encode(formatted_prompt))}")

response = openai.Completion.create(
  model=model,
  prompt=formatted_prompt,
  temperature=0,
  max_tokens=4097 - len(enc.encode(formatted_prompt)),
)


response


Numbers of tokens for prompt: 1001


<OpenAIObject text_completion id=cmpl-7IkiEvNsd9ou3WNmMN0vbyoEduPA2 at 0x11175b7c0> JSON: {
  "choices": [
    {
      "finish_reason": "stop",
      "index": 0,
      "logprobs": null,
      "text": "\nAnswer:\n{\"transaction_list\": [\n    {\"transaction_info\": {\n        \"transaction_date\": \"2023-05-14\",\n        \"transaction_description\": \"PETROCAN\",\n        \"debit\": 25.00,\n        \"credit\": 0.00,\n        \"balance\": 0.00\n    }},\n    {\"transaction_info\": {\n        \"transaction_date\": \"2023-05-14\",\n        \"transaction_description\": \"SAVE ON FOODS #6622\",\n        \"debit\": 21.20,\n        \"credit\": 0.00,\n        \"balance\": 0.00\n    }},\n    {\"transaction_info\": {\n        \"transaction_date\": \"2023-05-13\",\n        \"transaction_description\": \"CDN TIRE STORE #00304\",\n        \"debit\": 221.48,\n        \"credit\": 0.00,\n        \"balance\": 0.00\n    }},\n    {\"transaction_info\": {\n        \"transaction_date\": \"2023-05-13\",\n   

In [38]:
rprint(response.choices[0].text)

#### Making LLM calls using Guardrails-ai

In [27]:


raw_llm_response, validated_response = guard(
    openai.Completion.create,
    prompt_params={'transaction_string': rbc_example},
    engine="text-davinci-003",
    max_tokens=3000,
    temperature=0.0,
)


In [28]:
rprint(validated_response)