In [15]:
import pandas as pd
import numpy as np
import torch
from sentence_transformers import util, SentenceTransformer
import matplotlib.pyplot as plt
from transformers import AutoTokenizer, AutoModelForCausalLM, TextStreamer
import random

device = 'mps'

In [16]:
emb_chunks_df = pd.read_csv('emb_chunks_df.csv')
emb_chunks_df.head()

Unnamed: 0,page_n,sentence_chunk,embedding
0,0,RELATIONSHIP \nTERMS & CONDITIONS\nCORE BANKIN...,"[-0.28527048230171204, -0.501066267490387, -0...."
1,2,Contents\n \nImportant information \n \n1\n1 G...,"[-0.0739240050315857, -0.6000300049781799, -0...."
2,2,21. Information about us and our regulators \...,"[-0.5118199586868286, -1.0858474969863892, -0...."
3,2,36. Your obligations relating to the security...,"[-0.3614851236343384, -0.8171429634094238, -0...."
4,3,Core Banking Agreement\n(“The Agreement”) cont...,"[-0.3948034644126892, -0.8225910663604736, -0...."


In [17]:
# convert embeddings back to np.array
emb_chunks_df['embedding'] = emb_chunks_df['embedding'].apply(lambda x: np.fromstring(x.strip('[]'), sep=', '))
embs = torch.tensor(np.stack(emb_chunks_df['embedding'].tolist(), axis=0), dtype=torch.float32).to(device)

pages_n_chunks = emb_chunks_df.to_dict(orient='records')

emb_chunks_df.head()

Unnamed: 0,page_n,sentence_chunk,embedding
0,0,RELATIONSHIP \nTERMS & CONDITIONS\nCORE BANKIN...,"[-0.28527048230171204, -0.501066267490387, -0...."
1,2,Contents\n \nImportant information \n \n1\n1 G...,"[-0.0739240050315857, -0.6000300049781799, -0...."
2,2,21. Information about us and our regulators \...,"[-0.5118199586868286, -1.0858474969863892, -0...."
3,2,36. Your obligations relating to the security...,"[-0.3614851236343384, -0.8171429634094238, -0...."
4,3,Core Banking Agreement\n(“The Agreement”) cont...,"[-0.3948034644126892, -0.8225910663604736, -0...."


In [18]:
embs.shape, embs

(torch.Size([176, 1024]),
 tensor([[-0.2853, -0.5011, -0.1342,  ..., -0.5641, -0.0289,  0.3733],
         [-0.0739, -0.6000, -0.7002,  ..., -0.3746,  0.3071,  0.2757],
         [-0.5118, -1.0858, -0.3794,  ..., -0.3095, -0.1772,  0.3729],
         ...,
         [-0.7722, -0.6666, -0.9311,  ..., -0.2837, -0.4857,  0.9072],
         [-0.4814, -0.1554, -0.4521,  ..., -0.4333,  0.0816,  0.3699],
         [ 0.1392, -0.1616,  0.5991,  ...,  0.2702,  0.2376, -0.7100]],
        device='mps:0'))

In [19]:
emb_model = SentenceTransformer('mixedbread-ai/mxbai-embed-large-v1', device=device)
# mixedbread-ai/mxbai-embed-large-v1 all-mpnet-base-v2

In [20]:
query = 'foreign currency exchange'

query_emb = emb_model.encode(query, convert_to_tensor=True).to(device)

scores = util.cos_sim(a=query_emb, b=embs)[0] # cos_sim dot_score

top_results = torch.topk(scores, k=5)

print(f'*** Query: {query} ***\n')
for score, idx in zip(top_results[0], top_results[1]):
    print(f'Score: {score:.4f}')
    print(f'Text: {pages_n_chunks[idx]["sentence_chunk"]}')
    print(f'Page number: {pages_n_chunks[idx]["page_n"]}\n')


*** Query: foreign currency exchange ***

Score: 0.7228
Text: The exchange rate  
applied to your payments  
will appear  
on your statement
FOREIGN 
CURRENCY
C
26.  Payments involving a  
foreign currency exchange
Foreign currency exchange rate information
26.1 The exchange rates we use are variable exchange rates 
which are changing constantly throughout the day (for 
example, to reflect movements in foreign exchange 
markets). The exchange rate applied to your payments 
will appear on your statement. Unless otherwise agreed 
with you, the exchange rate we will apply to payments 
you make involving a currency exchange (including 
any future dated payments) and payments you receive 
which are in a different currency to the denomination of 
your account will be the Lloyds Bank Foreign Exchange 
Rate applicable at the time that your payment is
Page number: 40

Score: 0.7093
Text: end of the second Business Day 
if your instructions were initiated 
in paper form)
Payments in euro to a fi

In [21]:
def retrieve_relevant_info(query: str, embeddings: torch.tensor, model: SentenceTransformer=emb_model, n_to_retrieve: int=5) -> torch.tensor:
    query_emb = model.encode(query, convert_to_tensor=True)
    dot_scores = util.cos_sim(query_emb, embeddings)[0]
    scores, indices = torch.topk(dot_scores, n_to_retrieve)
    print(scores)
    return scores, indices

def print_topk(query: str, embeddings: torch.tensor, pages_n_chunks: list[dict]=pages_n_chunks, n_to_retrieve: int=5):
    scores, indices = retrieve_relevant_info(query, embeddings)

    print(f'--- Query: {query} ---')
    for score, idx in zip(scores, indices):
        print(f'Score: {score:.4f}')
        print(f'Text: {pages_n_chunks[idx]["sentence_chunk"]}')
        print(f'Page number: {pages_n_chunks[idx]["page_n"]}\n')

In [22]:
query = 'exchange rates abroad'
print_topk(query,embs)

tensor([0.6921, 0.6424, 0.6267, 0.6259, 0.6217], device='mps:0')
--- Query: exchange rates abroad ---
Score: 0.6921
Text: The exchange rate  
applied to your payments  
will appear  
on your statement
FOREIGN 
CURRENCY
C
26.  Payments involving a  
foreign currency exchange
Foreign currency exchange rate information
26.1 The exchange rates we use are variable exchange rates 
which are changing constantly throughout the day (for 
example, to reflect movements in foreign exchange 
markets). The exchange rate applied to your payments 
will appear on your statement. Unless otherwise agreed 
with you, the exchange rate we will apply to payments 
you make involving a currency exchange (including 
any future dated payments) and payments you receive 
which are in a different currency to the denomination of 
your account will be the Lloyds Bank Foreign Exchange 
Rate applicable at the time that your payment is
Page number: 40

Score: 0.6424
Text: other than sterling, we will use the Lloyds Bank

In [23]:
model_id = 'google/gemma-2b-it'

tokenizer = AutoTokenizer.from_pretrained(model_id)
llm_model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.float16, low_cpu_mem_usage=False, attn_implementation='sdpa').to(device)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [24]:
queries = [
    'In what cases may I close my account?',
    'How do you pay interest?',
    'Can the terms and conditions be changed?',
    'What is meant by Business Day?',
    'How can I reach you?',
    'How do you receive my payment instructions?',
    'In which cases can you terminate my account?',
    'Who is authorised to give you instructions?'
]

In [34]:
def prompt_formatter(query: str, context_items: list[dict]) -> str:
    context = '- ' + '\n- '.join([item['sentence_chunk'] for item in context_items])
    base_prompt = """You are a helpful assisstant to customers about a bank's terms and conditions. 
Give yourself room to think by extracting relevant passages from the context before answering the query.
Don't return the thinking, only return the answer.
Make sure your answers are as clear and concise.
Use the following examples as reference for the ideal answer style, but don't use the below example answers as answers to the query.
\nExample 1:
User query: Who can provide instructions to the bank according to the terms and conditions?
AI answer: According to the terms and conditions, only authorized individuals can give instructions to the bank.
\nExample 2:
User query: What are your rights regarding the termination of services as outlined in the terms and conditions?
AI answer: The terms and conditions specify the rights granted to you in the event of termination, including any associated procedures or obligations.
\nExample 3:
User query: How does the bank handle refunds for incorrectly executed payment instructions, as per the terms and conditions?
AI answer: The terms and conditions detail the process for obtaining refunds in the case of payment instructions being incorrectly executed by the bank.
\nExample 4:
User query: What measures are outlined in the terms and conditions to ensure the security of your accounts and payment instruments?
AI answer: The terms and conditions lay out your obligations regarding the security of your accounts, payments, and payment instruments, along with any corresponding measures implemented by the bank.
\nIn case you do not know the answer, answer with \'Apologies, but I am unable to answer your query.\'.
\nNow use the following context items:
{context}
\n And answer the user's query:
User query: <start_of_turn>user{query}<end_of_turn>
AI answer:<start_of_turn>model"""

    base_prompt = base_prompt.format(context=context, query=query)
    
    # make sure the inputs to the model are in the same way that they have been trained
    dialogue_template = [
        {
            'role': 'user',
            'content': base_prompt
        }
    ]
    prompt = tokenizer.apply_chat_template(conversation=dialogue_template, tokenize=False, add_generation_prompt=True)

    return prompt

In [35]:
def ask(query: str, temperature: float=0.2, max_new_tokens: int=256, format_answer_text: bool=True, return_context: bool=False):
    # -------- RETRIEVAL --------
    scores, indices = retrieve_relevant_info(query, embs, n_to_retrieve=5)
    context_items = [pages_n_chunks[i] for i in indices]
    for i, item in enumerate(context_items):
        item['score'] = scores[i].cpu()

    # -------- AUGMENTATION --------
    prompt = prompt_formatter(query, context_items)

    # -------- GENERATION --------
    input_ids = tokenizer(prompt, return_tensors='pt').to(device)
    streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
    outputs = llm_model.generate(**input_ids, streamer=streamer, temperature=temperature, do_sample=True, max_new_tokens=max_new_tokens)
    output_text = tokenizer.decode(outputs[0])

    if format_answer_text:
        output_text = output_text.replace(prompt, '').replace('<bos>', '').replace('<eos>', '')

    if not return_context:
        return output_text
    
    return output_text, context_items

In [36]:
# query = random.choice(queries) # 'What is meant by Business Day?'
for query in queries:
    print(f'Query: {query}')
    ask(query, temperature=0.7, return_context=False)

Query: In what cases may I close my account?
tensor([0.7036, 0.6865, 0.6694, 0.6637, 0.6634], device='mps:0')
The passage that mentions that the account can be closed if the user stops receiving or we stop providing them with a Product and such Product is required in order for us to provide other Product(s) to them suggests that the account can be closed if the user stops receiving or the bank stops providing them with the required Product in order to maintain their access to other products and services.
Query: How do you pay interest?
tensor([0.7453, 0.6905, 0.6700, 0.6598, 0.6518], device='mps:0')
The passage does not provide information about how the bank pays interest, so I cannot answer this question from the context.
Query: Can the terms and conditions be changed?
tensor([0.7554, 0.7355, 0.7136, 0.7108, 0.6858], device='mps:0')
No, according to the context, changes to the terms and conditions require at least two months' written notice from the bank before the changes come into e

In [14]:
torch.mps.empty_cache()