[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/jmalbornoz/SimpleRAG/blob/main/2_simple_rag_example.ipynb)

# Simple RAG workflow
## Dr José M Albornoz
### April 2024

In [None]:
a = []
while(1):
    a.append('1')

# 0.- Install dependencies

In [1]:
import sys
import os
!pip install torch==2.2.0 --no-warn-script-location > /dev/null
!pip install langchain==0.0.335 --no-warn-script-location > /dev/null
!pip install pygpt4all==1.1.0 --no-warn-script-location > /dev/null
!pip install gpt4all==1.0.12 --no-warn-script-location > /dev/null
!pip install transformers==4.35.1 --no-warn-script-location > /dev/null
!pip install datasets==2.14.6 --no-warn-script-location > /dev/null
!pip install tiktoken==0.4.0 --no-warn-script-location > /dev/null
!pip install chromadb==0.4.15 --no-warn-script-location > /dev/null
!pip install sentence_transformers==2.2.2 --no-warn-script-location > /dev/null

[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
torchaudio 2.2.1+cu121 requires torch==2.2.1, but you have torch 2.2.0 which is incompatible.
torchtext 0.17.1 requires torch==2.2.1, but you have torch 2.2.0 which is incompatible.
torchvision 0.17.1+cu121 requires torch==2.2.1, but you have torch 2.2.0 which is incompatible.[0m[31m
[0m

# 1.- Imports

In [2]:
import requests
import contextlib
import pandas as pd
import time
import io

from tqdm import tqdm
from langchain.llms import GPT4All
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler

from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate
from langchain.document_loaders import TextLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.indexes import VectorstoreIndexCreator
from langchain.embeddings import HuggingFaceEmbeddings
from datasets import load_dataset

# 2.- Define model URL and model folder

In [3]:
! mkdir models

mkdir: cannot create directory ‘models’: File exists


In [4]:
url = 'https://huggingface.co/nomic-ai/gpt4all-falcon-ggml/resolve/main/ggml-model-gpt4all-falcon-q4_0.bin'

# 3.- Define RagBot class

In [5]:
class RAGBot:
    """
    A class to handle model downloading, dataset management, model loading, vector database
    creation, retrieval mechanisms, and inference for a response generation bot.

    Attributes
    ----------
    model_path : str
        The file path where the model is stored.
    data_path : str
        The file path where the dataset is stored.
    user_input : str
        The input provided by the user for generating a response.
    model : str
        The name of the model being used.
    """

    def __init__(self):
        """
        Initializes the RAGBot with default values for model path, data path,
        user input, and model.
        """
        self.model_path = ""
        self.data_path = ""
        self.user_input = ""
        self.model = ""

    def get_model(self, model, chunk_size: int = 10000):
        """
        Downloads the specified model to the model path. Supports downloading of large
        models in chunks.

        Additional download tooling is reserved for users to add their own models. Currently hardcoded to load Falcon from

        Parameters
        ----------
        model : str
            The name of the model to be downloaded.
        chunk_size : int, optional
            The size of each chunk of data to download at a time, by default 10000.
        """

        self.model = model
        self.model_path = "/content/models/ggml-model-gpt4all-falcon-q4_0.bin"

        if not os.path.isfile(self.model_path):

            print('Downloading ggml model')

            # send a GET request to the URL to download the file. Stream since it's large
            response = requests.get(url, stream=True)

            # open the file in binary mode and write the contents of the response to it in chunks
            # This is a large file, so be prepared to wait.
            with open(self.model_path, 'wb') as f:
                for chunk in tqdm(response.iter_content(chunk_size=10000)):
                    if chunk:
                        f.write(chunk)
        else:
            print('model already exists in path.')

    def download_dataset(self, dataset):
        """
        Downloads the specified dataset and saves it to the data path.

        Parameters
        ----------
        dataset : str
            The name of the dataset to be downloaded.
        """
        self.data_path = dataset + '_dialogues.txt'

        if not os.path.isfile(self.data_path):

            datasets = {"robot maintenance": "FunDialogues/customer-service-robot-support",
                        "basketball coach": "FunDialogues/sports-basketball-coach",
                        "physics professor": "FunDialogues/academia-physics-office-hours",
                        "grocery cashier" : "FunDialogues/customer-service-grocery-cashier"}

            # Download the dialogue from hugging face
            print('downloading dialog dataset')
            dataset = load_dataset(f"{datasets[dataset]}")
            # Convert the dataset to a pandas dataframe
            dialogues = dataset['train']
            df = pd.DataFrame(dialogues, columns=['id', 'description', 'dialogue'])
            # Print the first 5 rows of the dataframe
            df.head()
            # only keep the dialogue column
            dialog_df = df['dialogue']

            # save the data to txt file
            dialog_df.to_csv(self.data_path, sep=' ', index=False)
        else:
            print('data already exists in path.')

    def load_model(self, n_threads, max_tokens, repeat_penalty, n_batch, top_k, temp):
        """
        Loads the model with specified parameters for parallel processing.

        Parameters
        ----------
        n_threads : int
            The number of threads for parallel processing.
        max_tokens : int
            The maximum number of tokens for model prediction.
        repeat_penalty : float
            The penalty for repeated tokens in generation.
        n_batch : int
            The number of batches for processing.
        top_k : int
            The number of top k tokens to be considered in sampling.
        """
        # Callbacks support token-wise streaming
        callbacks = [StreamingStdOutCallbackHandler()]

        # Verbose is required to pass to the callback manager
        print('Loading model...')

        self.llm = GPT4All(model=self.model_path, callbacks=callbacks, verbose=False,
                           n_threads=n_threads, n_predict=max_tokens, repeat_penalty=repeat_penalty,
                           n_batch=n_batch, top_k=top_k, temp=temp)

    def build_vectordb(self, chunk_size, overlap):
        """
        Builds a vector database from the dataset for retrieval purposes.

        Parameters
        ----------
        chunk_size : int
            The size of text chunks for vectorization.
        overlap : int
            The overlap size between chunks.
        """
        loader = TextLoader(self.data_path)

        # Text Splitter
        text_splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=overlap)

        # Embed the document and store into chroma DB
        self.index = VectorstoreIndexCreator(embedding= HuggingFaceEmbeddings(), text_splitter=text_splitter).from_loaders([loader])

    def retrieval_mechanism(self, user_input, top_k=1, context_verbosity = False, rag_off= False):
        """
        Retrieves relevant document snippets based on the user's query.

        Parameters
        ----------
        user_input : str
            The user's input or query.
        top_k : int, optional
            The number of top results to return, by default 1.
        context_verbosity : bool, optional
            If True, additional context information is printed, by default False.
        rag_off : bool, optional
            If True, disables the retrieval-augmented generation, by default False.
        """

        self.user_input = user_input
        self.context_verbosity = context_verbosity

        # perform a similarity search and retrieve the context from our documents
        results = self.index.vectorstore.similarity_search(self.user_input, k=top_k)

        # join all context information into one string
        context = "\n".join([document.page_content for document in results])
        if self.context_verbosity:
            print(f"Retrieving information related to your question...")
            print(f"Found this content which is most similar to your question: {context}")

        if rag_off:
            template = """Question: {question}
            Answer: This is the response: """
            self.prompt = PromptTemplate(template=template, input_variables=["question"])
        else:
            template = """ Don't just repeat the following context, use it in combination with your knowledge to improve your answer to the question:{context}

            Question: {question}
            """
            self.prompt = PromptTemplate(template=template, input_variables=["context", "question"]).partial(context=context)


    def inference(self):
        """
        Performs inference to generate a response based on the user's query.

        Returns
        -------
        str
            The generated response.
        """

        if self.context_verbosity:
            print(f"Your Query: {self.prompt}")

        llm_chain = LLMChain(prompt=self.prompt, llm=self.llm)

        print("Processing the information with gpt4all...\n")
        response = llm_chain.run(self.user_input)

        return response


# 3.- Implement RAGBot

In [6]:
bot = RAGBot()

In [7]:
n_threads = 64

In [8]:
max_tokens = 50

In [9]:
repeat_penalty = 1.5

In [10]:
n_batch = n_threads

In [11]:
top_k = 2

In [12]:
temp = 0.7

In [13]:
bot.get_model(model = 'Falcon')

model already exists in path.


In [14]:
bot.load_model(n_threads=n_threads, max_tokens=max_tokens, repeat_penalty=repeat_penalty, n_batch=n_threads, top_k=top_k, temp=temp)

Loading model...
Found model file at  /content/models/ggml-model-gpt4all-falcon-q4_0.bin


In [15]:
bot.download_dataset(dataset = 'grocery cashier')

data already exists in path.


In [16]:
bot.build_vectordb(chunk_size = 500, overlap = 50)

  _torch_pytree._register_pytree_node(
  _torch_pytree._register_pytree_node(


In [20]:
query = "do you sell oat milk?"

In [21]:
bot.retrieval_mechanism(user_input = query, rag_off = False)

In [22]:
response = bot.inference()

Processing the information with gpt4all...


Answer: No, we only offer almond milk at our store.

In [23]:
import psutil
psutil.virtual_memory()

svmem(total=13609451520, available=7155441664, percent=47.4, used=6115717120, free=4698497024, active=1174745088, inactive=7248003072, buffers=350019584, cached=2445217792, shared=5074944, slab=360239104)