In [None]:
# Advanced RAG Implementation on Custom Data Using Hybrid Search, Embed Caching And Mistral-AI
# https://medium.aiplanet.com/advanced-rag-implementation-on-custom-data-using-hybrid-search-embed-caching-and-mistral-ai-ce78fdae4ef6

In [None]:
# RUN: chainlit run model.py

# 01: CONFIGURE
MODEL_ID = "/work/u00cjz00/slurm_jobs/github/models/Llama-2-7B-Chat-GPTQ"
DB_FAISS_PATH = 'vectorstore/db_faiss'

# 02: Load LIBRARY
from langchain.document_loaders import PyPDFLoader, DirectoryLoader
from langchain import PromptTemplate
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain.llms import CTransformers
from langchain.chains import RetrievalQA
import chainlit as cl
import transformers
import torch
from langchain.llms import HuggingFacePipeline
#from transformers import AutoTokenizer
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline

#warnings.filterwarnings('ignore')

# 03: custom_prompt_template
custom_prompt_template = """Use the following pieces of information to answer the user's question.
If you don't know the answer, just say that you don't know, don't try to make up an answer.

Context: {context}
Question: {question}

Only return the helpful answer below and nothing else.
Helpful answer:
"""
# 03 tmp: custom_prompt_template

custom_prompt_template_temp = '''
You are my financial advisor. You are great at providing tips on investments, savings and on financial markets with your knowledge in finances.
With the information being provided try to answer the question. 
If you cant answer the question based on the information either say you cant find an answer or unable to find an answer.
So try to understand in depth about the context and answer only based on the information provided. Dont generate irrelevant answers

Context: {context}
Question: {question}
Do provide only helpful answers

Helpful answer:
'''


def set_custom_prompt():
    """
    Prompt template for QA retrieval for each vectorstore
    """
    prompt = PromptTemplate(template=custom_prompt_template,
                            input_variables=['context', 'question'])
    return prompt

# 04: Retrieval QA Chain
def retrieval_qa_chain(llm, prompt, db):
    qa_chain = RetrievalQA.from_chain_type(llm=llm,
                                       chain_type='stuff',
                                       retriever=db.as_retriever(search_kwargs={'k': 5}),
                                       return_source_documents=True,
                                       chain_type_kwargs={'prompt': prompt}
                                       )
    return qa_chain

# 05: QA Model Function
def load_llm():
    model = AutoModelForCausalLM.from_pretrained(MODEL_ID, device_map="auto")
    tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
    pipeline = transformers.pipeline(
        "text-generation",
        model=model,
        tokenizer=tokenizer,
        max_new_tokens=2048,
        do_sample=True,
        temperature=0.7,
        top_p=0.95,
        top_k=40,
        repetition_penalty=1.1
    )
    llm=HuggingFacePipeline(pipeline=pipeline, model_kwargs={'temperature':0})
    
    return llm

def qa_bot():
    embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
    db = FAISS.load_local(DB_FAISS_PATH, embeddings)
    llm = load_llm()
    qa_prompt = set_custom_prompt()
    qa = retrieval_qa_chain(llm, qa_prompt, db)

    return qa

# 06: output function
def simple_result(qa_bot_cahin,query):
    res = qa_bot_cahin({'query': query})
    
    return res

# 07: output function
def final_result(qa_bot_cahin,query):
    res = qa_bot_cahin({'query': query})
    answer = res["result"]
    sources = res["source_documents"]
    if sources:
        answer += f"\n\n\n資料來源, Sources:" + str(sources)
    else:
        answer += "\n\n\nNo sources found"

    return answer

In [None]:
# load model
qa_bot_cahin = qa_bot()

In [None]:
# 06 從向量庫中檢索與查詢相似的段落
query = "What is Acupuncture?"
response=simple_result(qa_bot_cahin,query)
print(f"Response generated : \n {response['result']}")
print(f"Source Documents : \n {response['source_documents']}")

In [None]:
# 07 從向量庫中檢索與查詢相似的段落
query = "What is Acupuncture?"
answer=final_result(qa_bot_cahin,query)
print(answer)