In [None]:
## INSTALL PACKAGE
!pip install -q markdown pinecone-client openai gdown xformers ctransformers tokenizers transformers accelerate langchain chainlit sentence_transformers chromadb unstructured PyPDF2 pypdf bitsandbytes faiss_cpu faiss_gpu huggingface_hub hf_transfer optimum -q
!pip install auto-gptq --extra-index-url https://huggingface.github.io/autogptq-index/whl/cu118/  -q # Use cu117 if on CUDA 11.7

In [None]:
# 01: Configure
pdf_file='Medical_Chatbot.pdf'
PINECONE_API_KEY='20163887-a4fa-44e7-98d2-ab1eb38937f6'
PINECONE_API_ENV='gcp-starter'
index_name="cjz-medical"
Embeddings_ID="sentence-transformers/all-MiniLM-L6-v2"

In [None]:
### 02.1: LOAD LIBRARY
from torch import cuda, bfloat16, float16
from langchain.chains import LLMChain, SimpleSequentialChain, RetrievalQA
from langchain.llms.huggingface_pipeline import HuggingFacePipeline
from langchain.prompts import PromptTemplate
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, GenerationConfig, pipeline, TextStreamer

In [None]:
# 02.2: Load LIBRARY
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.llms import HuggingFacePipeline
from langchain.vectorstores import Pinecone
from langchain.chains.question_answering import load_qa_chain
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
import transformers
import torch
import pinecone

In [None]:
%%bash
# 3. Download model
mkdir -p Llama-7B-Chat-GPTQ
HF_HUB_ENABLE_HF_TRANSFER=1 huggingface-cli download TheBloke/Llama-2-7B-Chat-GPTQ --local-dir Llama-7B-Chat-GPTQ --local-dir-use-symlinks False


In [None]:
# 04: Embeddings 模型 384維度
embeddings=HuggingFaceEmbeddings(model_name=Embeddings_ID)

In [None]:
# 05: 連線 pinecone 向量資料庫
pinecone.init(api_key=PINECONE_API_KEY, environment=PINECONE_API_ENV)
docsearch=Pinecone.from_existing_index(index_name, embeddings)

In [None]:
# 06: 搜尋 pinecone 向量資料庫, 列出前三名
query = "What are Allergies"
docs=docsearch.similarity_search(query, k=3)
docs

In [None]:
# 07: LLM模型

from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline

MODEL_ID = "./Llama-7B-Chat-GPTQ"
model = AutoModelForCausalLM.from_pretrained(MODEL_ID, device_map="auto")
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, use_fast=True)

pipe = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    max_new_tokens=512,
    do_sample=True,
    temperature=0.7,
    top_p=0.95,
    top_k=40,
    repetition_penalty=1.1
)

llm = HuggingFacePipeline(pipeline=pipe, model_kwargs={"temperature": 0})

In [None]:
## 08. SET QA Search module
from langchain.chains import RetrievalQA
from langchain.chains import RetrievalQAWithSourcesChain
retriever = docsearch.as_retriever(search_kwargs={"k": 3})
retrieval_qa_chain = RetrievalQA.from_chain_type(llm=llm,
                                  chain_type="stuff",
                                  retriever=retriever,
                                  return_source_documents=True)

In [None]:
## 09. DO QA Search
query = "What are Allergies"
llm_response = retrieval_qa_chain(query)
print(llm_response['query'])
print(llm_response['result'])
print(llm_response['source_documents'])