<a target="_blank" href="https://colab.research.google.com/github/castillosebastian/genai0/blob/main/exp/Ragbot_template.ipynb">
  <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/>
</a>

# Experiment Plan

Evaluate results on accuracy and speed.
Comparison:
- Basic Tables retriever.
- Advance Table retriever

# RAGbot Parameters

In [None]:
# Target doc to chat with
url_doc_to_chat = "https://ir.tesla.com/_flysystem/s3/sec/000095017023001409/tsla-20221231-gen.pdf"
# Build VDB with Chroma
chunk_size = 1024
chunk_overlap = 100
k_docs_context = 3
embeddings_model = "thenlper/gte-large"

# Set-up LLM Mistral 7b-Q4

In [None]:
# Inspired by, Nour Eddine Zekaoui, in his post 'Your Web Pages Using Mistral-7b & LangChain', 
# [github](https://github.com/zekaouinoureddine/Adding-Private-Data-to-LLMs/tree/master)

!pip install gradio --quiet
!pip install xformer --quiet
!pip install chromadb --quiet
!pip install langchain --quiet
!pip install accelerate --quiet
!pip install transformers --quiet
!pip install bitsandbytes --quiet
!pip install unstructured --quiet
!pip install sentence-transformers --quiet

import locale
locale.getpreferredencoding = lambda: "UTF-8"

!pip install pypdfium2

Mistral-7b LLM

In [None]:
import torch
import gradio as gr

from textwrap import fill
from IPython.display import Markdown, display

from langchain.prompts.chat import (
    ChatPromptTemplate,
    HumanMessagePromptTemplate,
    SystemMessagePromptTemplate,
    )

from langchain import PromptTemplate
from langchain import HuggingFacePipeline

from langchain.vectorstores import Chroma
from langchain.schema import AIMessage, HumanMessage
from langchain.memory import ConversationBufferMemory
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.document_loaders import UnstructuredMarkdownLoader, UnstructuredURLLoader
from langchain.chains import LLMChain, SimpleSequentialChain, RetrievalQA, ConversationalRetrievalChain

from transformers import BitsAndBytesConfig, AutoModelForCausalLM, AutoTokenizer, GenerationConfig, pipeline

import warnings
warnings.filterwarnings('ignore')


# The following code sets up a text generation pipeline using a base LLM, Mistral-7b 
# developed by Mistral AI. It instructs a pre-trained language model, configures it with 
# quantization settings, tokenization, and generation parameters, and creates a pipeline that 
# can be used for generating text based on the Mistral-7b LLM and configurations. 

MODEL_NAME = "mistralai/Mistral-7B-Instruct-v0.1"

quantization_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_use_double_quant=True,
)

tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, use_fast=True)
tokenizer.pad_token = tokenizer.eos_token

model = AutoModelForCausalLM.from_pretrained(
    MODEL_NAME, torch_dtype=torch.float16,
    trust_remote_code=True,
    device_map="auto",
    quantization_config=quantization_config
)

generation_config = GenerationConfig.from_pretrained(MODEL_NAME)
generation_config.max_new_tokens = 1024
generation_config.temperature = 0.0001
generation_config.top_p = 0.95
generation_config.do_sample = True
generation_config.repetition_penalty = 1.15

pipeline = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    return_full_text=True,
    generation_config=generation_config,
)

# The star!
llm = HuggingFacePipeline(
    pipeline=pipeline, #HuggingFacePipeline is a class that allows you to run Hugging Face models locally
)

# Retriever with Tables

In [3]:
import time
from typing import Any
from pydantic import BaseModel
from unstructured.partition.pdf import partition_pdf
from langchain.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain.vectorstores import DocArrayInMemorySearch, Chroma
import uuid
from langchain.retrievers.multi_vector import MultiVectorRetriever
from langchain.storage import InMemoryStore
from langchain_core.documents import Document
from langchain_core.runnables import RunnablePassthrough

# Ref
# intall tesseract!
# https://python.langchain.com/docs/integrations/providers/unstructured
# https://github.com/Unstructured-IO/unstructured

# Process PDF----------------------------------------------------------------------------
# See: https://unstructured-io.github.io/unstructured/core/partition.html#partition-pdf
start_time_partitionpdf = time.perf_counter()
raw_pdf_elements = partition_pdf(    
    filename= '/home/sebacastillo/genai0/bd/Apple_2023.pdf',
    # Unstructured first finds embedded image blocks
    extract_images_in_pdf=False,
    # Use layout model (YOLOX) to get bounding boxes (for tables) and find titles
    # Titles are any sub-section of the document
    infer_table_structure=True,
    # Post processing to aggregate text once we have the title
    chunking_strategy="by_title",
    # Chunking params to aggregate text blocks
    # Attempt to create a new chunk 3800 chars
    # Attempt to keep chunks > 2000 chars
    max_characters=4000,
    new_after_n_chars=3800,
    combine_text_under_n_chars=2000,
    image_output_dir_path='bd/image',
)
end_time_partitionpdf = time.perf_counter()
duration_partition_pdf = end_time_partitionpdf - start_time_partitionpdf

In [5]:
# collect element by type
class Element(BaseModel):
    type: str
    text: Any

# Categorize by type
categorized_elements = []
for element in raw_pdf_elements:
    if "unstructured.documents.elements.Table" in str(type(element)):
        categorized_elements.append(Element(type="table", text=str(element)))
    elif "unstructured.documents.elements.CompositeElement" in str(type(element)):
        categorized_elements.append(Element(type="text", text=str(element)))

# Tables
table_elements = [e for e in categorized_elements if e.type == "table"]
print(len(table_elements))
# Text
text_elements = [e for e in categorized_elements if e.type == "text"]
print(len(text_elements))

57
125


In [10]:
print(table_elements[20])

type='table' text='2023 Adjusted Cost Unrealized Gains Unrealized Losses Fair Value Cash and Cash Equivalents Current Marketable Securities Cash $ 28,359 $ — $ — $ 28,359 $ 28,359 $ — $ Level 1: Money market funds Mutual funds and equity securities 481 442 — 12 — (26) 481 428 481 — — 428 Subtotal (1) Level 2 : 923 12 (26) 909 481 428 U.S. Treasury securities U.S. agency securities Non-U.S. government securities Certificates of deposit and time deposits Commercial paper Corporate debt securities Municipal securities Mortgage- and asset-backed securities 19,406 5,736 17,533 1,354 608 76,840 628 22,365 — — 6 — — 6 — 6 (1,292) (600) (1,048) — — (5,956) (26) (2,735) 18,114 5,136 16,491 1,354 608 70,890 602 19,636 35 36 — 1,034 — 20 — — 5,468 271 11,332 320 608 12,627 192 344 Subtotal 144,470 18 (11,657) 132,831 1,125 31,162 Total (2) $ 173,752 $ 30 $ (11,683) $ 162,099 $ 29,965 $ 31,590 $ Non-Current Marketable Securities — — — — 12,611 4,829 5,159 — — 58,243 410 19,292 100,544 100,544'


# RAGbot SetUp

In [None]:
embeddings = HuggingFaceEmbeddings(
    model_name=embeddings_model,
    model_kwargs={"device": "cuda"},
    encode_kwargs={"normalize_embeddings": True},
)

In [None]:
from langchain.document_loaders import PyPDFium2Loader
loader = PyPDFium2Loader(url_doc_to_chat)
docs = loader.load()

# Vector DB
text_splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap)
texts_chunks = text_splitter.split_documents(docs)
db = Chroma.from_documents(texts_chunks, embeddings, persist_directory="db")

# Prompt
custom_template = """You are a Financial AI Assistant. Given the
following conversation and a follow up question, rephrase the follow up question
to be a standalone question. At the end of standalone question add this
'Answer the question.' If you do not know the answer reply with 'I do not have enough information'.
Chat History:
{chat_history}
Follow Up Input: {question}
Standalone question:
"""
CUSTOM_QUESTION_PROMPT = PromptTemplate.from_template(custom_template)

# R-AG function

def querying(query, history):
  memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)

  qa_chain = ConversationalRetrievalChain.from_llm(
      llm=llm,
      retriever=db.as_retriever(search_kwargs={"k": k_docs_context}), # Top n doc of db
      memory=memory,
      condense_question_prompt=CUSTOM_QUESTION_PROMPT,
)

  result = qa_chain({"question": query})
  
  return result["answer"].strip()

iface = gr.ChatInterface(
    fn = querying,
    chatbot=gr.Chatbot(height=600),
    textbox=gr.Textbox(placeholder="Message", container=False, scale=7),
    title="POC: RAGgbot",
    theme="soft",
    examples=["What is the Tesla revenue in 2022",
              "Summarize the balance sheet of Tesla"],

    cache_examples=True,
    retry_btn="Retry",
    undo_btn="Undo",
    clear_btn="Clear",
    submit_btn="Submit"

)

# Launch app

In [None]:
iface.launch(share=True)