<a target="_blank" href="https://colab.research.google.com/github/castillosebastian/genai0/blob/main/exp/exp2_rag_mistral/exp.ipynb">
  <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/>
</a>

# Add financial document for the RAGbot

In [None]:
pdf_ulr = "https://ir.tesla.com/_flysystem/s3/sec/000095017023001409/tsla-20221231-gen.pdf"

# Set-up LLM Mistral 7b-Q4

In [None]:
# Inspired by, Nour Eddine Zekaoui, in his post 'Your Web Pages Using Mistral-7b & LangChain', 
# [github](https://github.com/zekaouinoureddine/Adding-Private-Data-to-LLMs/tree/master)

!pip install gradio --quiet
!pip install xformer --quiet
!pip install chromadb --quiet
!pip install langchain --quiet
!pip install accelerate --quiet
!pip install transformers --quiet
!pip install bitsandbytes --quiet
!pip install unstructured --quiet
!pip install sentence-transformers --quiet

import locale
locale.getpreferredencoding = lambda: "UTF-8"

!pip install pypdfium2

Mistral-7b LLM

In [None]:
import torch
import gradio as gr

from textwrap import fill
from IPython.display import Markdown, display

from langchain.prompts.chat import (
    ChatPromptTemplate,
    HumanMessagePromptTemplate,
    SystemMessagePromptTemplate,
    )

from langchain import PromptTemplate
from langchain import HuggingFacePipeline

from langchain.vectorstores import Chroma
from langchain.schema import AIMessage, HumanMessage
from langchain.memory import ConversationBufferMemory
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.document_loaders import UnstructuredMarkdownLoader, UnstructuredURLLoader
from langchain.chains import LLMChain, SimpleSequentialChain, RetrievalQA, ConversationalRetrievalChain

from transformers import BitsAndBytesConfig, AutoModelForCausalLM, AutoTokenizer, GenerationConfig, pipeline

import warnings
warnings.filterwarnings('ignore')


# The following code sets up a text generation pipeline using a base LLM, Mistral-7b 
# developed by Mistral AI. It instructs a pre-trained language model, configures it with 
# quantization settings, tokenization, and generation parameters, and creates a pipeline that 
# can be used for generating text based on the Mistral-7b LLM and configurations. 

MODEL_NAME = "mistralai/Mistral-7B-Instruct-v0.1"

quantization_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_use_double_quant=True,
)

tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, use_fast=True)
tokenizer.pad_token = tokenizer.eos_token

model = AutoModelForCausalLM.from_pretrained(
    MODEL_NAME, torch_dtype=torch.float16,
    trust_remote_code=True,
    device_map="auto",
    quantization_config=quantization_config
)

generation_config = GenerationConfig.from_pretrained(MODEL_NAME)
generation_config.max_new_tokens = 1024
generation_config.temperature = 0.0001
generation_config.top_p = 0.95
generation_config.do_sample = True
generation_config.repetition_penalty = 1.15

pipeline = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    return_full_text=True,
    generation_config=generation_config,
)

# The star!
llm = HuggingFacePipeline(
    pipeline=pipeline, #HuggingFacePipeline is a class that allows you to run Hugging Face models locally
)

# FinSight RAGbot

In [None]:
embeddings = HuggingFaceEmbeddings(
    model_name="thenlper/gte-large",
    model_kwargs={"device": "cuda"},
    encode_kwargs={"normalize_embeddings": True},
)

In [None]:
from langchain.document_loaders import PyPDFium2Loader
loader = PyPDFium2Loader(pdf_ulr)
docs = loader.load()
#len(docs)
# docs[0].metadata

# Vector DB
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1024, chunk_overlap=64)
texts_chunks = text_splitter.split_documents(docs)
db = Chroma.from_documents(texts_chunks, embeddings, persist_directory="db")


# Prompt
custom_template = """You are a Financial AI Assistant. Given the
following conversation and a follow up question, rephrase the follow up question
to be a standalone question. At the end of standalone question add this
'Answer the question in English language.' If you do not know the answer reply with 'I am sorry, I do not have enough information'.
Chat History:
{chat_history}
Follow Up Input: {question}
Standalone question:
"""
CUSTOM_QUESTION_PROMPT = PromptTemplate.from_template(custom_template)

# R-AG function

def querying(query, history):
  memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)

  qa_chain = ConversationalRetrievalChain.from_llm(
      llm=llm,
      retriever=db.as_retriever(search_kwargs={"k": 3}), # Top n doc of db
      memory=memory,
      condense_question_prompt=CUSTOM_QUESTION_PROMPT,
)

  result = qa_chain({"question": query})
  
  return result["answer"].strip()

iface = gr.ChatInterface(
    fn = querying,
    chatbot=gr.Chatbot(height=600),
    textbox=gr.Textbox(placeholder="Message FinSight", container=False, scale=7),
    title="FinSight Ragbot",
    theme="soft",
    examples=["What is the Tesla revenue in 2022",
              "Summarize the balance sheet of Tesla"],

    cache_examples=True,
    retry_btn="Retry",
    undo_btn="Undo",
    clear_btn="Clear",
    submit_btn="Submit"

)

In [None]:
iface.launch(share=True)