In [1]:
import os
from pathlib import Path
HOME = str(Path.home())
Add_Binarry_Path=HOME+'/.local/bin:/usr/ubuntu_bin'
os.environ['PATH']=os.environ['PATH']+':'+Add_Binarry_Path

In [2]:
pip install -r requirements.txt -q

Note: you may need to restart the kernel to use updated packages.


In [3]:
# 01: Configure
pdf_file='Medical_Chatbot.pdf'
PINECONE_API_KEY='20163887-a4fa-44e7-98d2-ab1eb38937f6'
PINECONE_API_ENV='gcp-starter'
index_name="cjz-medical"
Embeddings_ID="/work/u00cjz00/nvidia/all-MiniLM-L6-v2"
MODEL_ID = "/work/u00cjz00/slurm_jobs/github/models/Llama-2-7B-Chat-GPTQ"

In [4]:
# 02: Load LIBRARY
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.llms import HuggingFacePipeline
from langchain.vectorstores import Pinecone
from langchain.chains.question_answering import load_qa_chain
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
import transformers
import torch
import pinecone

In [5]:
# 03: Embeddings 模型 384維度 
embeddings=HuggingFaceEmbeddings(model_name=Embeddings_ID)

In [6]:
# 04: LLM模型
model = AutoModelForCausalLM.from_pretrained(MODEL_ID, device_map="auto")
tokenizer=AutoTokenizer.from_pretrained(MODEL_ID)
pipeline = transformers.pipeline(
        "text-generation",
        model=model,
        tokenizer=tokenizer,
        torch_dtype=torch.float16,
        trust_remote_code=True,
        device_map="auto",
        max_length=1024,
        do_sample=True,
        top_k=10,
        num_return_sequences=1,
        eos_token_id=tokenizer.eos_token_id,
        #streamer=streamer
)
llm=HuggingFacePipeline(pipeline=pipeline, model_kwargs={'temperature':0.7})

In [7]:
# 05: 連線 pinecone 向量資料庫
pinecone.init(api_key=PINECONE_API_KEY, environment=PINECONE_API_ENV)
docsearch=Pinecone.from_existing_index(index_name, embeddings)

In [8]:
# 06: 搜尋 pinecone 向量資料庫, 列出前三名
query = "What are Allergies"
docs=docsearch.similarity_search(query, k=3)
docs

[Document(page_content='allergy develops against the offending sub-stance (an allergen.)'),
 Document(page_content="GALE ENCYCLOPEDIA OF MEDICINE 2 117Allergies\nAllergic rhinitis is commonly triggered by\nexposure to household dust, animal fur,or pollen. The foreign substance thattriggers an allergic reaction is calledan allergen.\nThe presence of an allergen causes the\nbody's lymphocytes to begin producingIgE antibodies. The lymphocytes of an allergy sufferer produce an unusuallylarge amount of IgE.\nIgE molecules attach to mast\ncells, which contain histamine.HistaminePollen grains\nLymphocyte\nFIRST EXPOSURE"),
 Document(page_content='allergens are the following:\n• plant pollens\n• animal fur and dander\n• body parts from house mites (microscopic creatures\nfound in all houses)\n• house dust• mold spores• cigarette smoke• solvents• cleaners\nCommon food allergens include the following:\n• nuts, especially peanuts, walnuts, and brazil nuts\n• fish, mollusks, and shellfish• eggs• w

In [9]:
# 07. LLM彙整
chain = load_qa_chain(llm, chain_type="stuff")
result=chain.run(input_documents=docs, question=query)
result



' Allergy is an exaggerated immune system response to a harmless substance (an allergen). The immune system mistakes the allergen as harmful and tries to fight it with various chemicals and symptoms.'