# Lab

```text
Retriever Augmented Generation
------------------------------
- Create a chatbot using RAG (Retriever Augmented Generation)
- RAG is a type of language generation model that combines pre-trained parametric and non-parametric (source) memory for language generation.
- In this case, source knoledge is used to update the knowledge of the LLM.
- Examples of source knowledge include data from external sources like PDFs, URLs, CSVs, etc.
```

In [1]:
# Built-in library
import itertools
import re
import json
from typing import Any, Dict, List, Optional, Union
import logging
import warnings

# Standard imports
import numpy as np
from pprint import pprint
import pandas as pd

# Visualization
import matplotlib.pyplot as plt


# pandas settings
pd.options.display.max_rows = 1_000
pd.options.display.max_columns = 1_000
pd.options.display.max_colwidth = 600

warnings.filterwarnings("ignore")

# Black code formatter (Optional)
%load_ext lab_black
# auto reload imports
%load_ext autoreload
%autoreload 2

### Steps

```text
- Load data
- Split data
- Store the data (i.e. embeddings).
- Retrieve the data.
- Add memory
- Engineer prompt template
```

<br>


```sh
pip install jq
pip install tiktoken
```

In [3]:
fp = "../../data/res.json"

with open(fp, "r") as f:
    json_data = json.load(f)

In [None]:
pprint(json_data)

In [8]:
import tiktoken


# Get the tokenizer name
model_name = "gpt-3.5-turbo"
encoding_name = tiktoken.encoding_for_model(model_name=model_name)
encoding_name

<Encoding 'cl100k_base'>

In [21]:
from langchain.callbacks import get_openai_callback


def count_tokens(*, chain: Any, query: str) -> Any:
    """This is used to count the number of tokens sent to the LLM."""
    with get_openai_callback() as cb:
        result = chain.run(query)
        print(f"Spent a total of {cb.total_tokens!r} tokens")

    return result


def calculate_number_of_tokens(*, text: str, model_name: str = "gpt-3.5-turbo") -> int:
    """This is usd to count the number of tokens."""
    import tiktoken

    encoding_name = tiktoken.encoding_for_model(model_name=model_name)
    encoding_name = re.compile(pattern=r"(\<|\>|[\'\"\s]|Encoding)").sub(
        repl="", string=str(encoding_name)
    )
    tokenizer = tiktoken.get_encoding(encoding_name)
    tokens = tokenizer.encode(text, disallowed_special=())
    return len(tokens)

In [22]:
text = (
    "hello I am a chunk of text and using the calculate_number_of_tokens function "
    "we can find the length of this chunk of text in tokens"
)
calculate_number_of_tokens(text=text)

27

In [28]:
from langchain.document_loaders import JSONLoader
from langchain.vectorstores import Chroma
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.text_splitter import (
    CharacterTextSplitter,
    RecursiveCharacterTextSplitter,
)


# Load JSON data
loader = JSONLoader(
    file_path=fp,
    jq_schema=".data",
    text_content=False,
)

data = loader.load()

# Embedding model
embedding = OpenAIEmbeddings()
persist_directory = "../../data/doc_db"


# Count the number tokens
# LLMs struggle when the number of tokens are too many
calculate_number_of_tokens(text=data[0].page_content)

1425

In [29]:
CHUNK_SIZE = 1_000
CHUNK_OVERLAP = 50

# Split the doc(s)
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=CHUNK_SIZE,
    chunk_overlap=CHUNK_OVERLAP,
    separators=["\n\n", "\n", r"\{\}", ""],
)
splits = text_splitter.split_documents(documents=data)
len(splits)

5

In [36]:
# Run once!
# Create and save vectorstore to disk
vectordb = Chroma.from_documents(
    documents=splits,
    embedding=embedding,
    persist_directory=persist_directory,
)

In [39]:
# Load data
vectordb = Chroma(persist_directory=persist_directory, embedding_function=embedding)

# Number of docs
print(vectordb._collection.count())

5


In [40]:
from langchain.memory import (
    ConversationBufferWindowMemory,
    ConversationTokenBufferMemory,
)
from langchain.chains import RetrievalQA
from langchain.chat_models import ChatOpenAI


llm = ChatOpenAI(temperature=0)

# Create memory
memory = ConversationBufferWindowMemory(
    k=5,  # window size
    input_key="question",
    memory_key="chat_history",
)

retriever = vectordb.as_retriever()

In [41]:
# Init Chatbot
decide_bot = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=retriever,  # retrieve data from data source
    verbose=False,
    chain_type_kwargs={
        "document_separator": "<<<<>>>>>",
        "memory": memory,
    },
)

In [42]:
question = "What is the maxEMIEligibility of the customer?"
# result = count_tokens(qa_bot, {"query": question})
result = decide_bot({"query": question})

result

{'query': 'What is the maxEMIEligibility of the customer?',
 'result': 'The maxEMIEligibility of the customer is 2898.'}

In [43]:
question = "What is the average monthly debit?"
result = decide_bot({"query": question})

result

{'query': 'What is the average monthly debit?',
 'result': 'The average monthly debit is as follows:\n- February 2022: ₦3,065.48\n- March 2022: ₦60,793.96\n- April 2022: ₦114,106.87\n- May 2022: ₦86,431.17'}

In [44]:
question = (
    "What is the averageMonthlyDebit?"  # It's better to use the exact variable names
)
result = decide_bot({"query": question})

result

{'query': 'What is the averageMonthlyDebit?',
 'result': 'The averageMonthlyDebit is [{"month": "2022-04", "amount": 5705.34}, {"month": "2022-05", "amount": 5401.95}, {"month": "2022-02", "amount": 383.19}, {"month": "2022-03", "amount": 1599.84}].'}

In [None]:
question = "What is the customer ID and is the customer a salary earner?"
result = decide_bot({"query": question})

result

In [None]:
question = "Are there any recurringExpenses in the transaction data? If yes, list them"
result = decide_bot({"query": question})

result

In [None]:
question = "Which month had the highest totalMonthlyDebit and what was the amount?"
result = decide_bot({"query": question})

result

In [None]:
# Most recent chat history.
pprint(memory.load_memory_variables({}))

In [45]:
from pprint import pprint
from typing import Any, Dict, List

import click
from langchain.document_loaders import JSONLoader
from langchain.vectorstores import Chroma
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.memory import ConversationBufferWindowMemory
from langchain.chains import RetrievalQA
from langchain.chat_models import ChatOpenAI
from pydantic import BaseModel


class DecideBot(BaseModel):
    """This is used to create the Decide AI Assistant."""

    # Constants
    PERSIST_DIRECTORY: str = "./data/doc_db"
    CHUNK_SIZE: int = 1_000
    CHUNK_OVERLAP: int = 50
    TEMPERATURE: float = 0.0

    # Variables
    filepath: str

    def _load_data(self) -> List[Any]:
        """This is used to load the JSON data."""
        loader = JSONLoader(
            file_path=self.filepath,
            jq_schema=".data",
            text_content=False,
        )
        data = loader.load()
        return data

    def _preprocess_data(self) -> Chroma:
        """This is used to split and store the embedded data in a vector store."""
        data = self._load_data()

        # Embedding model
        embedding = OpenAIEmbeddings()

        # Split the doc(s)
        text_splitter = RecursiveCharacterTextSplitter(
            chunk_size=self.CHUNK_SIZE,
            chunk_overlap=self.CHUNK_OVERLAP,
            separators=["\n\n", "\n", r"\{\}", ""],
        )
        splits = text_splitter.split_documents(documents=data)

        # Create and save vectors tore to disk
        vector_db = Chroma.from_documents(
            documents=splits,
            embedding=embedding,
            persist_directory=self.PERSIST_DIRECTORY,
        )
        return vector_db

    # Create bot
    def _create_RAG_model(self) -> RetrievalQA:
        """This is used to create a RAG model for question and answering."""
        llm = ChatOpenAI(temperature=self.TEMPERATURE)
        # Create memory
        memory = ConversationBufferWindowMemory(
            k=5,  # window size
            input_key="question",
            memory_key="chat_history",
        )
        vector_db = self._preprocess_data()

        # Init Chatbot
        decide_bot = RetrievalQA.from_chain_type(
            llm=llm,
            chain_type="stuff",
            retriever=vector_db.as_retriever(),  # retrieve data from data source
            verbose=False,
            chain_type_kwargs={
                "document_separator": "<<<<>>>>>",
                "memory": memory,
            },
        )
        return decide_bot

    # Generate answers
    def generate_response(self, *, question: str) -> Dict[str, Any]:
        """This is a chat bot used for question and answering."""
        decide_bot = self._create_RAG_model()
        result = decide_bot({"query": question})
        return result

In [48]:
d_ai = DecideBot(filepath=fp)

d_ai

DecideBot(PERSIST_DIRECTORY='./data/doc_db', CHUNK_SIZE=1000, CHUNK_OVERLAP=50, TEMPERATURE=0.0, filepath='../../data/res.json')