In [1]:
import guardrails as gd
import openai
from rich import print as rprint
import tiktoken
import dotenv

dotenv.load_dotenv()

True

## Defining the Specs of the output via **RAIL Spec**

In [2]:
rail_str = """
<rail version="0.1">

<output>
    <list name="transaction_list">
        <object name="transaction_info">
            <date name="transaction_date" date-format="%Y-%m-%d" />
            <string name="transaction_description"/>
            <float name="debit"/>
            <float name="credit"/>
            <float name="balance"/>
        </object>
    </list>
</output>

<prompt>

I will present you a bank statement that has the following elements:

- transaction_date: The date of the transaction
- transaction_description: The description of the transaction
- debit: The amount of money debited from the account
- credit: The amount of money credited to the account
- balance: The balance of the account after the transaction

I want to extract this information separately for each transaction in the bank statement below:

{{transaction_string}}
@complete_json_suffix_v2</prompt>


</rail>
"""


# Testing prompts

### Credit Card input example

In [3]:
#  Credit card transation input example

# TD

td_example = """
Date
	
Transaction Description

Debit
	
Credit
	Balance
May 13, 2023	REEFTECHNOLOGY.COM	$13.00		$1,448.32
May 13, 2023	ABC*ANYTIME FITNESS	$28.34		$1,435.32
May 8, 2023	GOOGLE*YOUTUBEPREMIUM	$12.59		$227.48
May 7, 2023	Spotify P22E958E88	$16.79		$214.89
May 4, 2023	KOODO MOBILE PAC	$112.82		$198.10
Apr 30, 2023	GITHUB, INC.	$14.01		$85.28
Apr 29, 2023	ABC*ANYTIME FITNESS	$28.34		$71.27
Apr 27, 2023	SQUARE ONE INSURANCE SERV	$21.95		$42.93
Apr 26, 2023	CRAVE	$20.98		$20.98
Apr 20, 2023	ateteu	$123.1		$0.00
Apr 25, 2023	PAYMENT - THANK YOU		$2,285.05	$0.00


"""

td_example_big = """
Date
	
Transaction Description
	
Debit
	
Credit
	Balance
May 13, 2023	REEFTECHNOLOGY.COM	$13.00		$1,448.32
May 13, 2023	ABC*ANYTIME FITNESS	$28.34		$1,435.32
May 8, 2023	GOOGLE*YOUTUBEPREMIUM	$12.59		$227.48
May 7, 2023	Spotify P22E958E88	$16.79		$214.89
May 4, 2023	KOODO MOBILE PAC	$112.82		$198.10
Apr 30, 2023	GITHUB, INC.	$14.01		$85.28
Apr 29, 2023	ABC*ANYTIME FITNESS	$28.34		$71.27
Apr 27, 2023	SQUARE ONE INSURANCE SERV	$21.95		$42.93
Apr 26, 2023	CRAVE	$20.98		$20.98
May 1, 2023	balcblha	$11.00		$1,448.32
May 2, 2023	ABC*ANYTIME FITNESS	$30		$1,435.32
May 8, 2023	GOOGLE*YOUTUBEPREMIUM	$12.59		$227.48
May 7, 2023	Spotify P22E958E88	$16.79		$214.89
May 4, 2023	KOODO MOBILE PAC	$112.82		$198.10
Apr 30, 2023	GITHUB, INC.	$14.01		$85.28
Apr 29, 2023	ABC*ANYTIME FITNESS	$28.34		$71.27
Apr 27, 2023	SQUARE ONE INSURANCE SERV	$21.95		$42.93
Apr 26, 2023	CRAVE	$20.98		$20.98
Apr 25, 2023	PAYMENT - THANK YOU		$2,285.05	$0.00
May 13, 2023	REEFTECHNOLOGY.COM	$13.00		$1,448.32
May 13, 2023	ABC*ANYTIME FITNESS	$28.34		$1,435.32
May 8, 2023	GOOGLE*YOUTUBEPREMIUM	$12.59		$227.48
May 7, 2023	Spotify P22E958E88	$16.79		$214.89
May 4, 2023	KOODO MOBILE PAC	$112.82		$198.10
Apr 30, 2023	GITHUB, INC.	$14.01		$85.28
Apr 29, 2023	ABC*ANYTIME FITNESS	$28.34		$71.27
Apr 27, 2023	SQUARE ONE INSURANCE SERV	$21.95		$42.93
Apr 26, 2023	CRAVE	$20.98		$20.98
Apr 25, 2023	PAYMENT - THANK YOU		$2,285.05	$0.00
May 13, 2023	REEFTECHNOLOGY.COM	$13.00		$1,448.32
May 13, 2023	ABC*ANYTIME FITNESS	$28.34		$1,435.32
May 8, 2023	GOOGLE*YOUTUBEPREMIUM	$12.59		$227.48
May 7, 2023	Spotify P22E958E88	$16.79		$214.89
May 4, 2023	KOODO MOBILE PAC	$112.82		$198.10
Apr 30, 2023	GITHUB, INC.	$14.01		$85.28
Apr 29, 2023	ABC*ANYTIME FITNESS	$28.34		$71.27
Apr 27, 2023	SQUARE ONE INSURANCE SERV	$21.95		$42.93
Apr 26, 2023	CRAVE	$20.98		$20.98
Apr 25, 2023	PAYMENT - THANK YOU		$2,285.05	$0.00
May 13, 2023	REEFTECHNOLOGY.COM	$13.00		$1,448.32
May 13, 2023	ABC*ANYTIME FITNESS	$28.34		$1,435.32
May 8, 2023	GOOGLE*YOUTUBEPREMIUM	$12.59		$227.48
May 7, 2023	Spotify P22E958E88	$16.79		$214.89
May 4, 2023	KOODO MOBILE PAC	$112.82		$198.10
Apr 30, 2023	GITHUB, INC.	$14.01		$85.28
Apr 29, 2023	ABC*ANYTIME FITNESS	$28.34		$71.27
Apr 27, 2023	SQUARE ONE INSURANCE SERV	$21.95		$42.93
Apr 26, 2023	CRAVE	$20.98		$20.98
May 1, 2023	balcblha	$11.00		$1,448.32
May 2, 2023	ABC*ANYTIME FITNESS	$30		$1,435.32
May 8, 2023	GOOGLE*YOUTUBEPREMIUM	$12.59		$227.48
May 7, 2023	Spotify P22E958E88	$16.79		$214.89
May 4, 2023	KOODO MOBILE PAC	$112.82		$198.10
Apr 30, 2023	GITHUB, INC.	$14.01		$85.28
Apr 29, 2023	ABC*ANYTIME FITNESS	$28.34		$71.27
Apr 27, 2023	SQUARE ONE INSURANCE SERV	$21.95		$42.93
Apr 26, 2023	CRAVE	$20.98		$20.98
Apr 25, 2023	PAYMENT - THANK YOU		$2,285.05	$0.00
May 13, 2023	REEFTECHNOLOGY.COM	$13.00		$1,448.32
May 13, 2023	ABC*ANYTIME FITNESS	$28.34		$1,435.32
May 8, 2023	GOOGLE*YOUTUBEPREMIUM	$12.59		$227.48
May 7, 2023	Spotify P22E958E88	$16.79		$214.89
May 4, 2023	KOODO MOBILE PAC	$112.82		$198.10
Apr 30, 2023	GITHUB, INC.	$14.01		$85.28
Apr 29, 2023	ABC*ANYTIME FITNESS	$28.34		$71.27
Apr 27, 2023	SQUARE ONE INSURANCE SERV	$21.95		$42.93
Apr 26, 2023	CRAVE	$20.98		$20.98
Apr 25, 2023	PAYMENT - THANK YOU		$2,285.05	$0.00
May 13, 2023	REEFTECHNOLOGY.COM	$13.00		$1,448.32
May 13, 2023	ABC*ANYTIME FITNESS	$28.34		$1,435.32
May 8, 2023	GOOGLE*YOUTUBEPREMIUM	$12.59		$227.48
May 7, 2023	Spotify P22E958E88	$16.79		$214.89
May 4, 2023	KOODO MOBILE PAC	$112.82		$198.10
Apr 30, 2023	GITHUB, INC.	$14.01		$85.28
Apr 29, 2023	ABC*ANYTIME FITNESS	$28.34		$71.27
Apr 27, 2023	SQUARE ONE INSURANCE SERV	$21.95		$42.93
Apr 26, 2023	CRAVE	$20.98		$20.98
Apr 25, 2023	PAYMENT - THANK YOU		$2,285.05	$0.00
May 13, 2023	REEFTECHNOLOGY.COM	$13.00		$1,448.32
May 13, 2023	ABC*ANYTIME FITNESS	$28.34		$1,435.32
May 8, 2023	GOOGLE*YOUTUBEPREMIUM	$12.59		$227.48
May 7, 2023	Spotify P22E958E88	$16.79		$214.89
May 4, 2023	KOODO MOBILE PAC	$112.82		$198.10
Apr 30, 2023	GITHUB, INC.	$14.01		$85.28
Apr 29, 2023	ABC*ANYTIME FITNESS	$28.34		$71.27
Apr 27, 2023	SQUARE ONE INSURANCE SERV	$21.95		$42.93
Apr 26, 2023	CRAVE	$20.98		$20.98
May 1, 2023	balcblha	$11.00		$1,448.32
May 2, 2023	ABC*ANYTIME FITNESS	$30		$1,435.32
May 8, 2023	GOOGLE*YOUTUBEPREMIUM	$12.59		$227.48
May 7, 2023	Spotify P22E958E88	$16.79		$214.89
May 4, 2023	KOODO MOBILE PAC	$112.82		$198.10
Apr 30, 2023	GITHUB, INC.	$14.01		$85.28
Apr 29, 2023	ABC*ANYTIME FITNESS	$28.34		$71.27
Apr 27, 2023	SQUARE ONE INSURANCE SERV	$21.95		$42.93
Apr 26, 2023	CRAVE	$20.98		$20.98
Apr 25, 2023	PAYMENT - THANK YOU		$2,285.05	$0.00
May 13, 2023	REEFTECHNOLOGY.COM	$13.00		$1,448.32
May 13, 2023	ABC*ANYTIME FITNESS	$28.34		$1,435.32
May 8, 2023	GOOGLE*YOUTUBEPREMIUM	$12.59		$227.48
May 7, 2023	Spotify P22E958E88	$16.79		$214.89
May 4, 2023	KOODO MOBILE PAC	$112.82		$198.10
Apr 30, 2023	GITHUB, INC.	$14.01		$85.28
Apr 29, 2023	ABC*ANYTIME FITNESS	$28.34		$71.27
Apr 27, 2023	SQUARE ONE INSURANCE SERV	$21.95		$42.93
Apr 26, 2023	CRAVE	$20.98		$20.98
Apr 25, 2023	PAYMENT - THANK YOU		$2,285.05	$0.00
May 13, 2023	REEFTECHNOLOGY.COM	$13.00		$1,448.32
May 13, 2023	ABC*ANYTIME FITNESS	$28.34		$1,435.32
May 8, 2023	GOOGLE*YOUTUBEPREMIUM	$12.59		$227.48
May 7, 2023	Spotify P22E958E88	$16.79		$214.89
May 4, 2023	KOODO MOBILE PAC	$112.82		$198.10
Apr 30, 2023	GITHUB, INC.	$14.01		$85.28
Apr 29, 2023	ABC*ANYTIME FITNESS	$28.34		$71.27
Apr 27, 2023	SQUARE ONE INSURANCE SERV	$21.95		$42.93
Apr 26, 2023	CRAVE	$20.98		$20.98
Apr 25, 2023	PAYMENT - THANK YOU		$2,285.05	$0.00
May 13, 2023	REEFTECHNOLOGY.COM	$13.00		$1,448.32
May 13, 2023	ABC*ANYTIME FITNESS	$28.34		$1,435.32
May 8, 2023	GOOGLE*YOUTUBEPREMIUM	$12.59		$227.48
May 7, 2023	Spotify P22E958E88	$16.79		$214.89
May 4, 2023	KOODO MOBILE PAC	$112.82		$198.10
Apr 30, 2023	GITHUB, INC.	$14.01		$85.28
Apr 29, 2023	ABC*ANYTIME FITNESS	$28.34		$71.27
Apr 27, 2023	SQUARE ONE INSURANCE SERV	$21.95		$42.93
Apr 26, 2023	CRAVE	$20.98		$20.98
May 1, 2023	balcblha	$11.00		$1,448.32
May 2, 2023	ABC*ANYTIME FITNESS	$30		$1,435.32
May 8, 2023	GOOGLE*YOUTUBEPREMIUM	$12.59		$227.48
May 7, 2023	Spotify P22E958E88	$16.79		$214.89
May 4, 2023	KOODO MOBILE PAC	$112.82		$198.10
Apr 30, 2023	GITHUB, INC.	$14.01		$85.28
Apr 29, 2023	ABC*ANYTIME FITNESS	$28.34		$71.27
Apr 27, 2023	SQUARE ONE INSURANCE SERV	$21.95		$42.93
Apr 26, 2023	CRAVE	$20.98		$20.98
Apr 25, 2023	PAYMENT - THANK YOU		$2,285.05	$0.00
May 13, 2023	REEFTECHNOLOGY.COM	$13.00		$1,448.32
May 13, 2023	ABC*ANYTIME FITNESS	$28.34		$1,435.32
May 8, 2023	GOOGLE*YOUTUBEPREMIUM	$12.59		$227.48
May 7, 2023	Spotify P22E958E88	$16.79		$214.89
May 4, 2023	KOODO MOBILE PAC	$112.82		$198.10
Apr 30, 2023	GITHUB, INC.	$14.01		$85.28
Apr 29, 2023	ABC*ANYTIME FITNESS	$28.34		$71.27
Apr 27, 2023	SQUARE ONE INSURANCE SERV	$21.95		$42.93
Apr 26, 2023	CRAVE	$20.98		$20.98
Apr 25, 2023	PAYMENT - THANK YOU		$2,285.05	$0.00
May 13, 2023	REEFTECHNOLOGY.COM	$13.00		$1,448.32
May 13, 2023	ABC*ANYTIME FITNESS	$28.34		$1,435.32
May 8, 2023	GOOGLE*YOUTUBEPREMIUM	$12.59		$227.48
May 7, 2023	Spotify P22E958E88	$16.79		$214.89
May 4, 2023	KOODO MOBILE PAC	$112.82		$198.10
Apr 30, 2023	GITHUB, INC.	$14.01		$85.28
Apr 29, 2023	ABC*ANYTIME FITNESS	$28.34		$71.27
Apr 27, 2023	SQUARE ONE INSURANCE SERV	$21.95		$42.93
Apr 26, 2023	CRAVE	$20.98		$20.98
Apr 25, 2023	PAYMENT - THANK YOU		$2,285.05	$0.00
"""

# RBC

rbc_example = """


	Description 	Debit 	Credit 	
May 14, 2023 	
PETROCAN, 
	$25.00		
May 14, 2023 	
SAVE ON FOODS #6622, 
	$21.20		
May 13, 2023 	
CDN TIRE STORE #00304, 
	$221.48		
May 13, 2023 	
SAVE ON FOODS #6622, 
	$21.26		
May 12, 2023 	
PC EXPRESS 1549, 
	$205.04		
May 12, 2023 	
CRUMBLCOOK* RABBITHILL, 
	$29.99		
May 11, 2023 	
PETROCAN, 
	$49.72		
May 11, 2023 	
SHOPPERS DRUG MART #03, 
	$27.39		
May 9, 2023 	
SAVE ON FOODS #6622, 
	$83.97		
May 8, 2023 	
CRUMBLCOOK* RABBITHILL, 
	$29.99		
May 7, 2023 	
GOOD BUDDY RESTAURANT, 
	$18.10		
May 7, 2023 	
SHOPPERS DRUG MART #03, 
	$419.99		
May 7, 2023 	
Nintendo CA979155961, 
	$83.99		
May 6, 2023 	
TICKETMASTER CANADA, TORONTO
	$102.80		
May 6, 2023 	
TICKETMASTER CANADA, TORONTO
	$245.80		
May 6, 2023 	
Subway 71142, 
	$14.47		
May 5, 2023 	
CRUMBLCOOK* RABBITHILL, 
	$31.99		
May 5, 2023 	
TICKET PROTECTOR-CUMIS, CAMBRIDGE
	$16.00		
May 5, 2023 	
PAYMENT - THANK YOU / PAIEMENT - MERCI
		-$2,328.67	
"""

rbc_example_big = """


	Description 	Debit 	Credit 	
May 14, 2023 	
PETROCAN, 
	$25.00		
May 14, 2023 	
SAVE ON FOODS #6622, 
	$21.20		
May 13, 2023 	
CDN TIRE STORE #00304, 
	$221.48		
May 13, 2023 	
SAVE ON FOODS #6622, 
	$21.26		
May 12, 2023 	
PC EXPRESS 1549, 
	$205.04		
May 12, 2023 	
CRUMBLCOOK* RABBITHILL, 
	$29.99		
May 11, 2023 	
PETROCAN, 
	$49.72		
May 11, 2023 	
SHOPPERS DRUG MART #03, 
	$27.39		
May 9, 2023 	
SAVE ON FOODS #6622, 
	$83.97		
May 8, 2023 	
CRUMBLCOOK* RABBITHILL, 
	$29.99		
May 7, 2023 	
GOOD BUDDY RESTAURANT, 
	$18.10		
May 7, 2023 	
SHOPPERS DRUG MART #03, 
	$419.99		
May 7, 2023 	
Nintendo CA979155961, 
	$83.99		
May 6, 2023 	
TICKETMASTER CANADA, TORONTO
	$102.80		
May 6, 2023 	
TICKETMASTER CANADA, TORONTO
	$245.80		
May 6, 2023 	
Subway 71142, 
	$14.47		
May 5, 2023 	
CRUMBLCOOK* RABBITHILL, 
	$31.99		
May 5, 2023 	
TICKET PROTECTOR-CUMIS, CAMBRIDGE
	$16.00		
May 5, 2023 	
PAYMENT - THANK YOU / PAIEMENT - MERCI
		-$2,328.67	
CRUMBLCOOK* RABBITHILL, 
	$29.99		
May 11, 2023 	
PETROCAN, 
	$49.72		
May 11, 2023 	
SHOPPERS DRUG MART #03, 
	$27.39		
May 9, 2023 	
SAVE ON FOODS #6622, 
	$83.97		
May 8, 2023 	
CRUMBLCOOK* RABBITHILL, 
	$29.99		
May 7, 2023 	
GOOD BUDDY RESTAURANT, 
	$18.10		
May 7, 2023 	
SHOPPERS DRUG MART #03, 
	$419.99		
May 7, 2023 	
Nintendo CA979155961, 
	$83.99		
May 6, 2023 	
TICKETMASTER CANADA, TORONTO
	$102.80		
May 6, 2023 	
TICKETMASTER CANADA, TORONTO
	$245.80		
May 6, 2023 	
Subway 71142, 
	$14.47		
May 5, 2023 	
CRUMBLCOOK* RABBITHILL, 
	$31.99		
May 5, 2023 	
TICKET PROTECTOR-CUMIS, CAMBRIDGE
	$16.00		
May 5, 2023 	
PAYMENT - THANK YOU / PAIEMENT - MERCI
		-$2,328.67	
CRUMBLCOOK* RABBITHILL, 
	$29.99		
May 11, 2023 	
PETROCAN, 
	$49.72		
May 11, 2023 	
SHOPPERS DRUG MART #03, 
	$27.39		
May 9, 2023 	
SAVE ON FOODS #6622, 
	$83.97		
May 8, 2023 	
CRUMBLCOOK* RABBITHILL, 
	$29.99		
May 7, 2023 	
GOOD BUDDY RESTAURANT, 
	$18.10		
May 7, 2023 	
SHOPPERS DRUG MART #03, 
	$419.99		
May 7, 2023 	
Nintendo CA979155961, 
	$83.99		
May 6, 2023 	
TICKETMASTER CANADA, TORONTO
	$102.80		
May 6, 2023 	
TICKETMASTER CANADA, TORONTO
	$245.80		
May 6, 2023 	
Subway 71142, 
	$14.47		
May 5, 2023 	
CRUMBLCOOK* RABBITHILL, 
	$31.99		
May 5, 2023 	
TICKET PROTECTOR-CUMIS, CAMBRIDGE
	$16.00		
May 5, 2023 	
PAYMENT - THANK YOU / PAIEMENT - MERCI
		-$2,328.67	
"""



### Prompt Testing

#### Base prompt comming from Guardrails-ai

In [4]:
guard = gd.Guard.from_rail_string(rail_str)


print(guard.base_prompt)



I will present you a bank statement that has the following elements:

- transaction_date: The date of the transaction
- transaction_description: The description of the transaction
- debit: The amount of money debited from the account
- credit: The amount of money credited to the account
- balance: The balance of the account after the transaction

I want to extract this information separately for each transaction in the bank statement below:

{transaction_string}

Given below is XML that describes the information to extract from this document and the tags to extract it into.

<output>
    <list name="transaction_list">
        <object name="transaction_info">
            <date name="transaction_date" date-format="%Y-%m-%d"/>
            <string name="transaction_description"/>
            <float name="debit"/>
            <float name="credit"/>
            <float name="balance"/>
        </object>
    </list>
</output>


ONLY return a valid JSON object (no other text is necessary), wh

In [5]:
rprint(guard.prompt.format(**{'transaction_string': rbc_example}).source)

#### Making LLM call through openAI

In [6]:
model = "text-davinci-003"
enc = tiktoken.encoding_for_model(model)

#getting the formatted prompt from guardrails
formatted_prompt = guard.prompt.format(**{'transaction_string': rbc_example}).source

print(f"Numbers of tokens for prompt: {len(enc.encode(formatted_prompt))}")

response = openai.Completion.create(
  model=model,
  prompt=formatted_prompt,
  temperature=0,
  max_tokens=4097 - len(enc.encode(formatted_prompt)),
)


response


Numbers of tokens for prompt: 1001


<OpenAIObject text_completion id=cmpl-7OcSgrmU460X8lKDw7NX9ygKRjKop at 0x115d82f90> JSON: {
  "choices": [
    {
      "finish_reason": "stop",
      "index": 0,
      "logprobs": null,
      "text": "\nAnswer:\n{\"transaction_list\": [\n    {\"transaction_info\": {\n        \"transaction_date\": \"2023-05-14\",\n        \"transaction_description\": \"PETROCAN\",\n        \"debit\": 25.00,\n        \"credit\": 0.00,\n        \"balance\": 0.00\n    }},\n    {\"transaction_info\": {\n        \"transaction_date\": \"2023-05-14\",\n        \"transaction_description\": \"SAVE ON FOODS #6622\",\n        \"debit\": 21.20,\n        \"credit\": 0.00,\n        \"balance\": 0.00\n    }},\n    {\"transaction_info\": {\n        \"transaction_date\": \"2023-05-13\",\n        \"transaction_description\": \"CDN TIRE STORE #00304\",\n        \"debit\": 221.48,\n        \"credit\": 0.00,\n        \"balance\": 0.00\n    }},\n    {\"transaction_info\": {\n        \"transaction_date\": \"2023-05-13\",\n   

In [7]:
rprint(response.choices[0].text)

#### Making LLM calls using Guardrails-ai

In [8]:


raw_llm_response, validated_response = guard(
    openai.Completion.create,
    prompt_params={'transaction_string': rbc_example},
    engine="text-davinci-003",
    max_tokens=3000,
    temperature=0.0,
)

rprint(validated_response)

#### Creating the re-query for big input queries

##### - Token split class

In [9]:
model = "text-davinci-003"
enc = tiktoken.encoding_for_model(model)

#getting the formatted prompt from guardrails
base_prompt = guard.base_prompt
formatted_prompt = guard.prompt.format(**{'transaction_string': rbc_example}).source

raw_llm_response, _ = guard(
    openai.Completion.create,
    prompt_params={'transaction_string': rbc_example},
    engine="text-davinci-003",
    max_tokens=3000,
    temperature=0.0,
)

base_prompt_tokens = len(enc.encode(base_prompt))
transaction_input_tokens = len(enc.encode(rbc_example))
final_prompt_tokens = len(enc.encode(formatted_prompt))
response_tokens = len(enc.encode(raw_llm_response))

print(f"Numbers of tokens for baseprompt: {len(enc.encode(base_prompt))}")
print(f"Numbers of tokens for the transaction input: {len(enc.encode(rbc_example))}")
print(f"Numbers of tokens for prompt: {len(enc.encode(formatted_prompt))}")
print(f"Numbers of tokens for response: {len(enc.encode(raw_llm_response))}")

Numbers of tokens for baseprompt: 468
Numbers of tokens for the transaction input: 543
Numbers of tokens for prompt: 1001
Numbers of tokens for response: 1311


In [27]:
model = "text-davinci-003"
enc = tiktoken.encoding_for_model(model)

#getting the formatted prompt from guardrails
base_prompt = guard.base_prompt
formatted_prompt = guard.prompt.format(**{'transaction_string': td_example}).source

raw_llm_response, _ = guard(
    openai.Completion.create,
    prompt_params={'transaction_string': td_example},
    engine="text-davinci-003",
    max_tokens=3000,
    temperature=0.0,
)

base_prompt_tokens = len(enc.encode(base_prompt))
transaction_input_tokens = len(enc.encode(td_example))
final_prompt_tokens = len(enc.encode(formatted_prompt))
response_tokens = len(enc.encode(raw_llm_response))

print(f"Numbers of tokens for baseprompt: {len(enc.encode(base_prompt))}")
print(f"Numbers of tokens for the transaction input: {len(enc.encode(td_example))}")
print(f"Numbers of tokens for prompt: {len(enc.encode(formatted_prompt))}")
print(f"Numbers of tokens for response: {len(enc.encode(raw_llm_response))}")

Numbers of tokens for baseprompt: 468
Numbers of tokens for the transaction input: 297
Numbers of tokens for prompt: 755
Numbers of tokens for response: 871


Outlining the plan:

Formula >  Final_max_tokens = (2 * final_prompt) + transaction_inp

- If: Final_max_tokens > 4090
    - split transaction_inp

How to split transaction_inp:

- Try n>=2, for each number se of the split meet the requirements above, if not, n++ until all the splits are good


In [37]:
class TokenValidator:
    def __init__(self, model:str):
        self.model = model
        
    def get_token_count(self, input:str):
        enc = tiktoken.encoding_for_model(self.model)
        return len(enc.encode(input))

    def is_valid_count(self, base_prompt:str, transaction_input:str, max_tokens_threshold:int=4090):
        base_prompt_tokens = self.get_token_count(base_prompt) # need to add a way to inject the prompt template with transaction_input (might add guardrails obj as class attribute)
        transaction_input_tokens = self.get_token_count(transaction_input)
        final_expected_tokens = ((base_prompt_tokens + transaction_input_tokens) * 2) + transaction_input_tokens # * 2 here because we are adding the prompt and the response
        self.expected_tokens = final_expected_tokens
        return final_expected_tokens <= max_tokens_threshold

    def split_tokens(self, input:str, str_sep:str="\n", split_factor:int=2, max_tokens_threshold:int=4090):
        
        split_input = input.split(str_sep)
        split_divider = len(split_input) // split_factor

        splitted_inputs = [str_sep.join(split_input[i:i+split_divider]) for i in range(0, len(split_input), split_divider)]

        return splitted_inputs

    def validate_input(self, inputs:list[str], base_prompt:str, max_tokens_threshold:int=4090):
        final_inputs = []
        for input in inputs:

            if self.is_valid_count(base_prompt=base_prompt, transaction_input=input, max_tokens_threshold=max_tokens_threshold):
                final_inputs.append((input, self.expected_tokens))  # when inputs are invalid the return value will be a list, so just making this standard
            else:
                new_input = self.split_tokens(input=input)
                return self.validate_input(inputs = new_input, base_prompt=base_prompt, max_tokens_threshold=max_tokens_threshold)
        
        return final_inputs

In [38]:
model = "text-davinci-003"

tv = TokenValidator(model=model)

x = tv.validate_input(inputs=[td_example_big], base_prompt=base_prompt, max_tokens_threshold=2000)

rprint(x)

##### - Re-query rountine

In [39]:
# Loop through validated and splited inputs
# get output for each one

model = "text-davinci-003"

tv = TokenValidator(model=model)

x = tv.validate_input(inputs=[td_example_big], base_prompt=base_prompt, max_tokens_threshold=2000)

final_values = []
for entry in x:
    rprint(entry[1])
    raw_llm_response, validated_response = guard(
        openai.Completion.create,
        prompt_params={'transaction_string': entry[0]},
        engine="text-davinci-003",
        max_tokens=2000,
        temperature=0.0,
    )
    final_values.append(validated_response)

rprint(final_values)



#### Creating an adaptive categorization query

##### - Level 1 = random assigner

In [58]:
from langchain.llms import OpenAI
from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate

# Base will be a langchain Chain object

# Base model will be a sequence chain with transaction_parse > categorization

template = """
You are an expert in assigning categories to a transaction. 
Be very brief and try to use the least amount of words to the categories you create as possible.
It's very important that if you can't identify a category you assign "N/A" to it.

An example of how important it's to be brief:

example_description: Netflix Super PRemium ultra mega plan
category: Entertainment

You will be given the transaction description within triple backticks.


Your transaction is: ```{transaction_description}```

Return your answer using the following json format.
"""

prompt_template = PromptTemplate(input_variables=["transaction_description"], template=template)

llm = OpenAI(temperature=0)
llm_chain = LLMChain(
    llm=llm,
    prompt=PromptTemplate.from_template(template)
)

print(llm_chain("Spotify P22E958E88")['text'])




{"category": "Entertainment"}


In [63]:
# creating the loop to get all the categories from all transactions
for item in final_values:
    for transaction in item['transaction_list']:
        print(transaction['transaction_description'], llm_chain(transaction['transaction_description'])['text'])

GOOGLE*YOUTUBEPREMIUM
Spotify P22E958E88
KOODO MOBILE PAC
GITHUB, INC.
ABC*ANYTIME FITNESS
SQUARE ONE INSURANCE SERV
CRAVE
PAYMENT - THANK YOU
REEFTECHNOLOGY.COM
ABC*ANYTIME FITNESS
GOOGLE*YOUTUBEPREMIUM
Spotify P22E958E88
KOODO MOBILE PAC
GITHUB, INC.
ABC*ANYTIME FITNESS
SQUARE ONE INSURANCE SERV
CRAVE
PAYMENT - THANK YOU
REEFTECHNOLOGY.COM
ABC*ANYTIME FITNESS


##### - Level 2 = consistent assigner - uses database entries for the user