# MEDICAL CHATBOT
CHAINLIT+PDF+FAISS_DB+LLAMA2

## 初始環境設定

In [None]:
# 初始環境設定
import os
from pathlib import Path
HOME = str(Path.home())
Add_Binarry_Path=HOME+'/.local/bin:/usr/ubuntu_bin'
os.environ['PATH']=os.environ['PATH']+':'+Add_Binarry_Path

## 確認CUDA版本, 以及否能使用GPU
若無gpu 請點選右側->已連線->變更執行階段類型->T4 Gpu

In [None]:
!nvidia-smi
import torch
torch.cuda.is_available()

## 安裝套件

In [None]:
!pip install cohere gdown==4.7.3 kaleido langchain openai pyngrok pypdf python-dotenv sentence-transformers tiktoken -q
!pip install accelerate bitsandbytes chainlit==0.7.700 faiss-cpu hf_transfer huggingface_hub optimum transformers -q
!pip install auto-gptq -q
#!pip install auto-gptq --extra-index-url https://huggingface.github.io/autogptq-index/whl/cu118/  -q # Use cu117 if on CUDA 11.7

### 文件下載

In [None]:
!mkdir -p data/book/
!gdown 1pUDgs3YMnlr8See8Rld3L1ZRiTeeOlMM -O data/book/

## Step1: create ingest.py

In [None]:
%%bash
cat << \EOF >  ingest.py

# RUN: python3 ingest.py

from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain.document_loaders import PyPDFLoader, DirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter

DATA_PATH = 'data/book/'
DB_FAISS_PATH = 'vectorstore/db_faiss'

# Create vector database
def create_vector_db():
    loader = DirectoryLoader(DATA_PATH,
                             glob='*.pdf',
                             loader_cls=PyPDFLoader)

    documents = loader.load()
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=500,
                                                   chunk_overlap=50)
    texts = text_splitter.split_documents(documents)

    embeddings = HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2', model_kwargs={'device': 'cpu'})

    db = FAISS.from_documents(texts, embeddings)
    db.save_local(DB_FAISS_PATH)

if __name__ == "__main__":
    create_vector_db()

EOF

### 資料入庫

In [None]:
# 資料入庫 to FAISS
!python3 ingest.py

## Step2: create model.py

In [None]:
%%bash
cat << \EOF >  model.py
# RUN: chainlit run model.py

# 01: CONFIGURE MODEL_ID and DB PATHs
MODEL_ID = "TheBloke/Llama-2-7b-Chat-GPTQ"
DB_FAISS_PATH = 'vectorstore/db_faiss'

# 02: Load LIBRARY
from langchain.document_loaders import PyPDFLoader, DirectoryLoader
from langchain.prompts import PromptTemplate
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain.chains import RetrievalQA
import chainlit as cl
import torch
from langchain.llms import HuggingFacePipeline
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
import os
#warnings.filterwarnings('ignore')

# 03: custom_prompt_template
custom_prompt_template3 = """Use the following pieces of information to answer the user's question.
If you don't know the answer, just say that you don't know, don't try to make up an answer.

Context: {context}
Question: {question}

Only return the helpful answer below and nothing else.
Helpful answer:
"""


custom_prompt_template="""<|im_start|>system
Use the following pieces of information to answer the user's question.
If you don't know the answer, just say that you don't know, don't try to make up an answer.

{context}

<|im_end|>
<|im_start|>user
Question: {question}
<|im_end|>
<|im_start|>assistant
"""

def set_custom_prompt():
    """
    Prompt template for QA retrieval for each vectorstore
    """
    prompt = PromptTemplate(template=custom_prompt_template,
                            input_variables=['context', 'question'])
    return prompt

# 04: Retrieval QA Chain
def retrieval_qa_chain(llm, prompt, db):
    qa_chain = RetrievalQA.from_chain_type(llm=llm,
                                       chain_type='stuff',
                                       retriever=db.as_retriever(search_kwargs={'k': 5}),
                                       return_source_documents=True,
                                       chain_type_kwargs={'prompt': prompt}
                                       )
    return qa_chain

def load_llm():
    # 04: LLM模型 GPTQ
    model = AutoModelForCausalLM.from_pretrained(MODEL_ID, device_map="auto")
    tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
    pipe = pipeline(
        "text-generation",
        model=model,
        tokenizer=tokenizer,
        max_new_tokens=1024,
        do_sample=True,
        temperature=0.7,
        top_p=0.95,
        top_k=40,
        repetition_penalty=1.1
    )
    llm=HuggingFacePipeline(pipeline=pipe, model_kwargs={'temperature':0.7})

    return llm

# 05: QA Model Function
def qa_bot():
    embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2", model_kwargs={'device': 'cpu'})
    db = FAISS.load_local(DB_FAISS_PATH, embeddings)
    llm = load_llm()
    qa_prompt = set_custom_prompt()
    qa = retrieval_qa_chain(llm, qa_prompt, db)

    return qa

# 06: output function
def final_result(query):
    qa_result = qa_bot()
    response = qa_result({'query': query})
    return response

# 07: chainlit code
@cl.on_chat_start
async def start():
    chain = qa_bot()
    msg = cl.Message(content="Starting the bot...")
    await msg.send()
    msg.content = "Hi, Welcome to Medical Bot. What is your query?"
    await msg.update()

    cl.user_session.set("chain", chain)

@cl.on_message
async def main(message: cl.Message):
    chain = cl.user_session.get("chain")
    cb = cl.AsyncLangchainCallbackHandler(
        stream_final_answer=True, answer_prefix_tokens=["FINAL", "ANSWER"]
    )
    cb.answer_reached = True
    res = await chain.acall(message.content, callbacks=[cb])
    answer = res["result"]
    sources = res["source_documents"]
    source_elements = []

    found_sources = []

    for i, doc in enumerate(sources):
        page_content=(res['source_documents'][i].page_content)
        page=(res['source_documents'][i].metadata["page"])
        source=res['source_documents'][i].metadata["source"]
        file = os.path.basename(source)
        #print("SOURCE: "+file+", PAGE: "+str(page) )
        source_name=f"source_{i}"

        found_sources.append(source_name)
        text="[Document: "+file+", Page: "+str(page)+"]\n\nContent: "+page_content
        source_elements.append(cl.Text(content=text, name=source_name))



    if sources:
        #answer += f"\n\n\n資料來源, Sources:" + str(sources)
        answer += f"\nSources: {', '.join(found_sources)}"
    else:
        answer += "\n\n\nNo sources found"

    #await cl.Message(content=answer).send()
    await cl.Message(content=answer, elements=source_elements).send()


EOF

## 執行模型

In [None]:
# CHAINLIT
!chainlit run model.py -w &> /content/logs.txt &

In [None]:
!ngrok config add-authtoken xxxxxxxxxxxxxxxx

from pyngrok import ngrok
ngrok_tunnel = ngrok.connect(8000)
print('Public URL:', ngrok_tunnel.public_url)

## DELETE JOB, 結束前再執行

In [None]:
ngrok.kill()

In [None]:
!ps -ef |grep chainlit | awk '{print $2}' | xargs kill -9
!ps -ef |grep ngrok | awk '{print $2}' | xargs kill -9
