In [1]:
from transformers import AutoTokenizer, AutoModelForCausalLM, TextStreamer
from sentence_transformers import util, SentenceTransformer
import pandas as pd
import numpy as np
import torch

device = 'mps'

In [2]:
emb_chunks_df = pd.read_csv('emb_chunks_df.csv')
emb_chunks_df.head()

Unnamed: 0,page_n,sentence_chunk,embedding
0,0,CORE BANKING AGREEMENT RELATIONSHIP TERMS & CO...,"[-0.3656424283981323, -0.6950072050094604, -0...."
1,1,|Payments involving a foreign currency exchang...,"[-0.45282310247421265, -0.8091470003128052, -0..."
2,2,"(""The Agreement"") contains terms, conditions a...","[-0.35574114322662354, -0.7597120404243469, -0..."
3,3,You need to read Product & Services Terms & Co...,"[-0.11259084939956665, -0.4700072407722473, -0..."
4,4,"The Agreement contains terms, conditions and i...","[-0.09853293001651764, -0.5731832981109619, -0..."


In [3]:
# convert embeddings back to np.array
emb_chunks_df['embedding'] = emb_chunks_df['embedding'].apply(lambda x: np.fromstring(x.strip('[]'), sep=', '))
embs = torch.tensor(np.stack(emb_chunks_df['embedding'].tolist(), axis=0), dtype=torch.float32).to(device)

pages_n_chunks = emb_chunks_df.to_dict(orient='records')

emb_chunks_df.head()

Unnamed: 0,page_n,sentence_chunk,embedding
0,0,CORE BANKING AGREEMENT RELATIONSHIP TERMS & CO...,"[-0.3656424283981323, -0.6950072050094604, -0...."
1,1,|Payments involving a foreign currency exchang...,"[-0.45282310247421265, -0.8091470003128052, -0..."
2,2,"(""The Agreement"") contains terms, conditions a...","[-0.35574114322662354, -0.7597120404243469, -0..."
3,3,You need to read Product & Services Terms & Co...,"[-0.11259084939956665, -0.4700072407722473, -0..."
4,4,"The Agreement contains terms, conditions and i...","[-0.09853293001651764, -0.5731832981109619, -0..."


In [4]:
embs.shape, embs

(torch.Size([148, 1024]),
 tensor([[-3.6564e-01, -6.9501e-01, -3.4750e-01,  ..., -3.3676e-01,
           9.8287e-02,  3.7684e-01],
         [-4.5282e-01, -8.0915e-01, -4.4601e-01,  ..., -5.7205e-01,
          -7.6268e-01,  6.9607e-01],
         [-3.5574e-01, -7.5971e-01, -3.9172e-01,  ...,  1.7896e-04,
          -3.0669e-01,  3.0704e-01],
         ...,
         [ 1.6842e-02, -8.7632e-02,  4.0924e-02,  ..., -1.3430e-01,
          -3.5314e-01,  3.4196e-01],
         [-8.3761e-01, -6.4777e-01, -3.3409e-01,  ..., -5.6327e-01,
          -1.8565e-01,  5.6231e-01],
         [-7.4132e-01, -1.0874e+00, -4.3583e-01,  ..., -5.0362e-01,
          -4.0053e-01,  6.6981e-01]], device='mps:0'))

In [5]:
emb_model = SentenceTransformer('mixedbread-ai/mxbai-embed-large-v1', device=device)
# mixedbread-ai/mxbai-embed-large-v1 all-mpnet-base-v2

### Retrieve function

In [6]:
def retrieve_relevant_info(query: str, embeddings: torch.tensor, model: SentenceTransformer=emb_model, n_to_retrieve: int=5) -> torch.tensor:
    query_emb = model.encode(query, convert_to_tensor=True)
    dot_scores = util.cos_sim(query_emb, embeddings)[0]
    scores, indices = torch.topk(dot_scores, n_to_retrieve)

    return scores, indices

### Sample usage

In [7]:
def print_topk(query: str, embeddings: torch.tensor, pages_n_chunks: list[dict]=pages_n_chunks, n_to_retrieve: int=5):
    scores, indices = retrieve_relevant_info(query, embeddings)

    print(f'--- Query: {query} ---')
    for score, idx in zip(scores, indices):
        print(f'Score: {score:.4f}')
        print(f'Text: {pages_n_chunks[idx]["sentence_chunk"]}')
        print(f'Page number: {pages_n_chunks[idx]["page_n"]}\n')

query = 'exchange rates abroad'
print_topk(query,embs)

--- Query: exchange rates abroad ---
Score: 0.7140
Text: beneficiary’s bank, it will be converted using that bank’s applicable exchange rate at the time.
Page number: 101

Score: 0.6870
Text: The exchange rates we use are variable exchange rates which are changing constantly throughout the day (for example, to reflect movements in foreign exchange markets). The exchange rate applied to your payments will appear on your statement. Unless otherwise agreed with you, the exchange rate we will apply to payments you make involving a currency exchange (including any future dated payments) and payments you receive which are in a different currency to the denomination of your account will be the Lloyds Bank Foreign Exchange Rate applicable at the time that your payment is processed. You can contact us to find out the rate which will apply and you can find details of how to contact us in the General Information On Payments, Charges & Contacts or by contacting your relationship team.

## Unavaila

### Download tokenizer and LLM

In [8]:
model_id = 'google/gemma-2b-it'

tokenizer = AutoTokenizer.from_pretrained(model_id)
llm_model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.float16, low_cpu_mem_usage=False, attn_implementation='sdpa').to(device)

Gemma's activation function should be approximate GeLU and not exact GeLU.
Changing the activation function to `gelu_pytorch_tanh`.if you want to use the legacy `gelu`, edit the `model.config` to set `hidden_activation=gelu`   instead of `hidden_act`. See https://github.com/huggingface/transformers/pull/29402 for more details.


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

### Queries list

In [15]:
queries = [
    'Do you pay interest or charge interest?',
    'What exchange rates do you apply to payments?',
    'Can the terms and conditions be changed?',
    'What is meant by business day?',
    'What is a PIN?',
    'To whom can you disclose my confidential information?',
    'How can I contact you?',
    'How can I give you payment instructions?',
    'In which cases can you terminate my account?',
    'Who is authorised to give you instructions?',
]

### Base prompt

In [16]:
def prompt_formatter(query: str, context_items: list[dict]) -> str:
    context = '- ' + '\n- '.join([item['sentence_chunk'] for item in context_items])
    base_prompt = """You are a helpful assisstant to customers about a bank's terms and conditions. 
Give yourself room to think by extracting relevant passages from the context before answering the query.
Don't return the thinking, only return the answer.
Make sure your answers are as clear and concise.
Use the following couple of examples as reference for the ideal answer style, but don't use the below example answers as answers to the query.
\nExample 1:
User query: I'm considering opening a new savings account with a competitive interest rate. However, I noticed a clause regarding minimum balance requirements. Could you elaborate on the potential implications of not maintaining this minimum balance?
AI answer: That's a prudent inquiry!  Many banks offer attractive interest rates on savings accounts, but they may stipulate a minimum balance requirement.  Failing to maintain this minimum can trigger various consequences, including incurring fees or forfeiting the advertised interest rate. Carefully review the minimum balance stipulation within the T&Cs to ensure it aligns with your financial situation.
\nExample 2:
User query: My bank has been sending frequent notifications regarding mobile banking security. While I appreciate the reminder, is utilizing mobile banking inherently risky?
AI answer: Mobile banking offers undeniable convenience but does necessitate vigilance. While not inherently risky, online transactions always carry a certain level of risk.  To mitigate these risks, ensure your mobile device is equipped with a strong password and avoid using public Wi-Fi networks for banking activities. Your bank's security notifications serve as a valuable reminder to prioritize online safety measures.
\nNow based on the following context items:
{context};
\n And answer the user's query:
User query: <start_of_turn>user{query}<end_of_turn>
AI answer:<start_of_turn>model"""

    base_prompt = base_prompt.format(context=context, query=query)
    
    # make sure the inputs to the model are in the same way that they have been trained
    dialogue_template = [
        {
            'role': 'user',
            'content': base_prompt
        }
    ]
    prompt = tokenizer.apply_chat_template(conversation=dialogue_template, tokenize=False, add_generation_prompt=True)

    return prompt

### Full RAG function

In [17]:
def ask(query: str, temperature: float=0.2, max_new_tokens: int=256, format_answer_text: bool=True, return_context: bool=False):
    # -------- RETRIEVAL --------
    scores, indices = retrieve_relevant_info(query, embs, n_to_retrieve=5)
    context_items = [pages_n_chunks[i] for i in indices]
    for i, item in enumerate(context_items):
        item['score'] = scores[i].cpu()

    # -------- AUGMENTATION --------
    prompt = prompt_formatter(query, context_items)

    # -------- GENERATION --------
    input_ids = tokenizer(prompt, return_tensors='pt').to(device)
    streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
    outputs = llm_model.generate(**input_ids, streamer=streamer, temperature=temperature, do_sample=True, max_new_tokens=max_new_tokens)
    output_text = tokenizer.decode(outputs[0])

    if format_answer_text:
        output_text = output_text.replace(prompt, '').replace('<bos>', '').replace('<eos>', '')

    if not return_context:
        return output_text
    
    return output_text, context_items

### PDF Q&A

In [18]:
for query in queries:
    print(f'Query: {query}')
    ask(query, temperature=0.7, return_context=False)
    print('\n')

Query: Do you pay interest or charge interest?
According to the Core Banking Agreement, the interest rate applicable to your account(s) will track the Bank of England bank rate at all times. If the Bank of England bank rate falls to -1%, the interest rate applicable to your account(s) falls to -1%. Similarly, if the Bank of England bank rate rises to 0.5%, the interest rate applicable to your account(s) rises to 0.5%.


Query: What exchange rates do you apply to payments?
The passage does not specify the exchange rates used by the bank, only that they apply variable exchange rates that fluctuate constantly throughout the day.


Query: Can the terms and conditions be changed?
Yes, the terms and conditions can be changed according to the following sections of the document:

- 8.2 If we intend to make changes to the Terms and Conditions, we will give you at least two months’ written notice before the changes come into effect.

- 20.4 Each of the terms within the Terms And Conditions opera