# Lab

```text
Retriever Augmented Generation
------------------------------
- Create a chatbot using RAG (Retriever Augmented Generation) or Generative Question Answering (GQA)
- RAG is a type of language generation model that combines pre-trained parametric and non-parametric (source) memory for language generation.
- In this case, source knowledge is used to update the knowledge of the LLM.
- Examples of source knowledge include data from external sources like PDFs, URLs, CSVs, etc.
```

In [1]:
# Built-in library
import re
import json
from typing import Any, Dict, List, Optional, Union
import logging
import warnings

# Standard imports
import numpy as np
from pprint import pprint
import pandas as pd

# Visualization
import matplotlib.pyplot as plt


# pandas settings
pd.options.display.max_rows = 1_000
pd.options.display.max_columns = 1_000
pd.options.display.max_colwidth = 600

warnings.filterwarnings("ignore")

# Black code formatter (Optional)
%load_ext lab_black
# auto reload imports
%load_ext autoreload
%autoreload 2

In [3]:
fp = "../../data/res.json"

with open(fp, "r") as f:
    json_data = json.load(f)

In [None]:
pprint(json_data)

## Retriever Augmented Generation

- [Example notebook](https://github.com/pinecone-io/examples/blob/master/generation/langchain/handbook/05-langchain-retrieval-augmentation.ipynb)

<br>

[![image.png](https://i.postimg.cc/7PWxf0dh/image.png)](https://postimg.cc/YjQcPGpB)

<br>

```text
1. Using the embedding model create vector embeddings for the context to be indexed.
2. Insert the veector embeddings into the vector DB with some reference to the original context the embeddings were created from.
3. When a query is made using the application, embed the query using the same embedding model and query the vector DB for similar vector embeddings.
```

In [4]:
import tiktoken


# Get the tokenizer name
model_name = "gpt-3.5-turbo"
encoding_name = tiktoken.encoding_for_model(model_name=model_name)
encoding_name

<Encoding 'cl100k_base'>

In [5]:
from langchain.callbacks import get_openai_callback


def count_tokens(*, chain: Any, query: str) -> Any:
    """This is used to count the number of tokens sent to the LLM."""
    with get_openai_callback() as cb:
        result = chain.run(query)
        print(f"Spent a total of {cb.total_tokens!r} tokens")

    return result


def calculate_number_of_tokens(*, text: str, model_name: str = "gpt-3.5-turbo") -> int:
    """This is usd to count the number of tokens."""
    import tiktoken

    encoding_name = tiktoken.encoding_for_model(model_name=model_name)
    encoding_name = re.compile(pattern=r"(\<|\>|[\'\"\s]|Encoding)").sub(
        repl="", string=str(encoding_name)
    )
    tokenizer = tiktoken.get_encoding(encoding_name)
    tokens = tokenizer.encode(text, disallowed_special=())
    return len(tokens)

In [6]:
text = (
    "hello I am a chunk of text and using the calculate_number_of_tokens function "
    "we can find the length of this chunk of text in tokens"
)
calculate_number_of_tokens(text=text)

27

### Steps

```text
- Load data
- Split data
  * To improve the accuracy of the embeddings.
  * To improve the efficiency of the embeddings. Embedding a large piece of text can be computationally expensive. By splitting the text into smaller chunks, we can reduce the amount of computation required to embed the text.
  * To make the embeddings more interpretable. 

- Embed the data. i.e. convert to numerical representations w/o losing info about the data.
- Store the embeddings in a vector store.
- Build the chatbot.

Using the chatbot
-----------------
- Send a query.
- Embed the query using the same embedding model.
- Compare the embeddings of the query with the embeddings of the original source document.
- Retrieve the docs that are similar to the query.
- Summarize the retrieved doc using an LLM.
- Return the summarized result as the query result.
```

<br>


```sh
pip install jq
pip install tiktoken
```

<br><hr>

#### 1. Load Source Data

In [7]:
from langchain.document_loaders import JSONLoader
from langchain.vectorstores import Chroma
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.text_splitter import (
    CharacterTextSplitter,
    RecursiveCharacterTextSplitter,
)


# Load JSON data
loader = JSONLoader(
    file_path=fp,
    jq_schema=".data",
    text_content=False,
)

data = loader.load()

# Embedding model
embedding = OpenAIEmbeddings()
persist_directory = "../../data/doc_db"


# Count the number tokens
# LLMs struggle when the number of tokens are too many
calculate_number_of_tokens(text=data[0].page_content)

1425

#### 2. Preprocess

```text
- Split the data into chunks (docs).
- Embed the docs.
```

In [8]:
CHUNK_SIZE = 2_000
CHUNK_OVERLAP = 50

# Split the doc(s)
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=CHUNK_SIZE,
    chunk_overlap=CHUNK_OVERLAP,
    separators=["\n\n", "\n", r"\{\}", ""],
)
splits = text_splitter.split_documents(documents=data)
len(splits)

2

In [9]:
# Run once!
# Create and save vectorstore to disk
vectordb = Chroma.from_documents(
    documents=splits,
    embedding=embedding,
    persist_directory=persist_directory,
)

In [10]:
# Load data
vectordb = Chroma(persist_directory=persist_directory, embedding_function=embedding)

# Number of docs
print(vectordb._collection.count())

2


#### 3. Create AI Bot

In [12]:
from langchain.memory import (
    ConversationBufferWindowMemory,
    ConversationTokenBufferMemory,
)
from langchain.chains import RetrievalQA
from langchain.chat_models import ChatOpenAI

#Init LLM
llm = ChatOpenAI(temperature=0)

# Create memory
memory = ConversationBufferWindowMemory(
    k=5,  # window size
    input_key="question",
    memory_key="chat_history",
)
retriever = vectordb.as_retriever()

# Init Chatbot
decide_bot = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=retriever,  # retrieve data from data source
    verbose=False,
    chain_type_kwargs={
        "document_separator": "<<<<>>>>>",
        "memory": memory,
    },
)

#### 4. Send Queris To The Chatbot

In [14]:
question = "What is the maxEMIEligibility of the customer?"
result = count_tokens(chain=decide_bot, query={"query": question})

pprint(result)

Number of requested results 4 is greater than number of elements in index 2, updating n_results = 2


Spent a total of 1521 tokens
'The maximum EMI eligibility of the customer is 2898.'


In [16]:
question = "What is the average monthly debit?"
result = decide_bot({"query": question})

pprint(result)

Number of requested results 4 is greater than number of elements in index 2, updating n_results = 2


{'query': 'What is the average monthly debit?',
 'result': 'The average monthly debit is as follows:\n'
           '\n'
           '- February: ₦3,065.48\n'
           '- March: ₦1,599.84\n'
           '- April: ₦5,705.34\n'
           '- May: ₦5,401.95\n'
           '\n'
           'Please note that these values are in Nigerian Naira (NGN).'}


In [17]:
# It's better to use the exact variable name(s)
question = "What is the averageMonthlyDebit?"
result = decide_bot({"query": question})

pprint(result)

Number of requested results 4 is greater than number of elements in index 2, updating n_results = 2


{'query': 'What is the averageMonthlyDebit?',
 'result': 'The averageMonthlyDebit is [{"month": "2022-04", "amount": '
           '5705.34}, {"month": "2022-05", "amount": 5401.95}, {"month": '
           '"2022-02", "amount": 383.19}, {"month": "2022-03", "amount": '
           '1599.84}].'}


In [24]:
# It's better to use the exact variable name(s)
question = "What is the mean of the averageMonthlyCredit in the data?"
result = decide_bot({"query": question})

pprint(result)

Number of requested results 4 is greater than number of elements in index 2, updating n_results = 2


{'query': 'What is the mean of the averageMonthlyCredit in the data?',
 'result': 'The mean of the averageMonthlyCredit in the data is 8,070.71.'}


In [18]:
question = "What is the customer ID and is the customer a salary earner?"
result = decide_bot({"query": question})

pprint(result)

Number of requested results 4 is greater than number of elements in index 2, updating n_results = 2


{'query': 'What is the customer ID and is the customer a salary earner?',
 'result': 'The customer ID is not provided in the given context. However, '
           'based on the information provided, the customer is a salary '
           'earner.'}


In [19]:
question = "Are there any recurringExpenses in the transaction data? If yes, list them"
result = decide_bot({"query": question})

pprint(result)

Number of requested results 4 is greater than number of elements in index 2, updating n_results = 2


{'query': 'Are there any recurringExpenses in the transaction data? If yes, '
          'list them',
 'result': 'Yes, there are recurring expenses in the transaction data. The '
           'recurring expenses are as follows:\n'
           '\n'
           '1. Amount: 10026.88, Description: NEXTGENUSR TRF/Payment of '
           'money/FRM FAITH OLUWASEYI OLODU TO AMEENAH OYINKANSOLA OYESOJI- '
           '033\n'
           '2. Amount: 2010.75, Description: NEXTGENUSR TRF/From Benson/FRM '
           'FAITH OLUWASEYI OLODU TO CHINENYE JENNIFER MADU- 057\n'
           '3. Amount: 2500, Description: FLEXSWITCH WT|KENNETH UGOCHI UNACHI '
           'ABAKALIKI NG'}


In [20]:
question = "Which month had the highest totalMonthlyDebit and what was the amount?"
result = decide_bot({"query": question})

pprint(result)

Number of requested results 4 is greater than number of elements in index 2, updating n_results = 2


{'query': 'Which month had the highest totalMonthlyDebit and what was the '
          'amount?',
 'result': 'The month with the highest totalMonthlyDebit was April, and the '
           'amount was 114106.87 NGN.'}


In [21]:
# Most recent chat history.
pprint(memory.load_memory_variables({}))

{'chat_history': 'Human: What is the average monthly debit?\n'
                 'AI: The average monthly debit is as follows:\n'
                 '\n'
                 '- February: ₦3,065.48\n'
                 '- March: ₦1,599.84\n'
                 '- April: ₦5,705.34\n'
                 '- May: ₦5,401.95\n'
                 '\n'
                 'Please note that these values are in Nigerian Naira (NGN).\n'
                 'Human: What is the averageMonthlyDebit?\n'
                 'AI: The averageMonthlyDebit is [{"month": "2022-04", '
                 '"amount": 5705.34}, {"month": "2022-05", "amount": 5401.95}, '
                 '{"month": "2022-02", "amount": 383.19}, {"month": "2022-03", '
                 '"amount": 1599.84}].\n'
                 'Human: What is the customer ID and is the customer a salary '
                 'earner?\n'
                 'AI: The customer ID is not provided in the given context. '
                 'However, based on the information provided,

## Requirements

```text
1. Data source

2. Vector store/database

3. Create an API and expose the endpoints
   - Upload source data
   - Create and use chatbot
     - Preprocess the source data
     - Send queries to the bot
```

# Feedback

- Break it down into pieces.
  - Start with a specific use case.


- MVP and 1st use case
  - MVP that allows first time users to interact with the Decide UI easily.
  - How can conversational chatbots be useful to the users?
  - Interpretability of the Decide variables. e.g. what is the gamblingRate and what insights can gotten from the variable. 
  - Users should be able to interact with Decide docs using the chatbot.
- What is the best way for the LLM to retrieve the info? text, json, csv??
  - Transform the source data?
- How do you improve the performance the chatbot? 
  - Latency
  - reduce hallucination
  - reduce token usage

- Use cases:
  1. Connect to the Decide Docs.
     1. What are the Decide variables?
     2. How are they calculated?
     3. What is a good value for this variable?
  2. Add validation to the query response using an LLM to make the response very succint. 

In [None]:
from pprint import pprint
from typing import Any, Dict, List

import click
from langchain.document_loaders import JSONLoader
from langchain.vectorstores import Chroma
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.memory import ConversationBufferWindowMemory
from langchain.chains import RetrievalQA
from langchain.chat_models import ChatOpenAI
from pydantic import BaseModel


class DecideBot(BaseModel):
    """This is used to create the Decide AI Assistant."""

    # Constants
    PERSIST_DIRECTORY: str = "./data/doc_db"
    CHUNK_SIZE: int = 1_000
    CHUNK_OVERLAP: int = 50
    TEMPERATURE: float = 0.0

    # Variables
    filepath: str

    def _load_data(self) -> List[Any]:
        """This is used to load the JSON data."""
        loader = JSONLoader(
            file_path=self.filepath,
            jq_schema=".data",
            text_content=False,
        )
        data = loader.load()
        return data

    def _preprocess_data(self) -> Chroma:
        """This is used to split and store the embedded data in a vector store."""
        data = self._load_data()

        # Embedding model
        embedding = OpenAIEmbeddings()

        # Split the doc(s)
        text_splitter = RecursiveCharacterTextSplitter(
            chunk_size=self.CHUNK_SIZE,
            chunk_overlap=self.CHUNK_OVERLAP,
            separators=["\n\n", "\n", r"\{\}", ""],
        )
        splits = text_splitter.split_documents(documents=data)

        # Create and save vectors tore to disk
        vector_db = Chroma.from_documents(
            documents=splits,
            embedding=embedding,
            persist_directory=self.PERSIST_DIRECTORY,
        )
        return vector_db

    # Create bot
    def _create_RAG_model(self) -> RetrievalQA:
        """This is used to create a RAG model for question and answering."""
        llm = ChatOpenAI(temperature=self.TEMPERATURE)
        # Create memory
        memory = ConversationBufferWindowMemory(
            k=5,  # window size
            input_key="question",
            memory_key="chat_history",
        )
        vector_db = self._preprocess_data()

        # Init Chatbot
        decide_bot = RetrievalQA.from_chain_type(
            llm=llm,
            chain_type="stuff",
            retriever=vector_db.as_retriever(),  # retrieve data from data source
            verbose=False,
            chain_type_kwargs={
                "document_separator": "<<<<>>>>>",
                "memory": memory,
            },
        )
        return decide_bot

    # Generate answers
    def generate_response(self, *, question: str) -> Dict[str, Any]:
        """This is a chat bot used for question and answering."""
        decide_bot = self._create_RAG_model()
        result = decide_bot({"query": question})
        return result