In [None]:
%pip install -r requirements.txt

In [2]:
!huggingface-cli login


    _|    _|  _|    _|    _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|_|_|_|    _|_|      _|_|_|  _|_|_|_|
    _|    _|  _|    _|  _|        _|          _|    _|_|    _|  _|            _|        _|    _|  _|        _|
    _|_|_|_|  _|    _|  _|  _|_|  _|  _|_|    _|    _|  _|  _|  _|  _|_|      _|_|_|    _|_|_|_|  _|        _|_|_|
    _|    _|  _|    _|  _|    _|  _|    _|    _|    _|    _|_|  _|    _|      _|        _|    _|  _|        _|
    _|    _|    _|_|      _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|        _|    _|    _|_|_|  _|_|_|_|

    To login, `huggingface_hub` requires a token generated from https://huggingface.co/settings/tokens .
Token: 
Add token as git credential? (Y/n) n
Token is valid (permission: read).
Your token has been saved to /root/.cache/huggingface/token
Login successful


In [3]:
import os
import glob
import pypdf

from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate
from langchain.document_loaders import PyPDFLoader
from langchain.document_loaders import DirectoryLoader
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter

from langchain_google_vertexai import gemma

from langchain_community.vectorstores import Chroma

from transformers import AutoTokenizer
from transformers import BitsAndBytesConfig
from transformers import AutoModelForCausalLM

os.environ['HF_HUB_ENABLE_HF_TRANSFER'] = '1'

In [4]:
# device_name = 'cuda'
# torch.set_default_device(device_name)

In [5]:
def pdfload(data): return DirectoryLoader(data, glob='*.pdf', loader_cls=PyPDFLoader).load()
def chunkify(data): return RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=20).split_documents(data)

In [None]:
persist_directory = 'db'
chunks = chunkify(pdfload('/content/drive/MyDrive/Chatbot Data'))
embeddings = HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2')#, model_kwargs={'device': })

In [7]:
vectordb = Chroma.from_documents(chunks, embeddings, persist_directory=persist_directory)
vectordb.persist()
# vectordb = Chroma(embedding_function=embeddings, persist_directory=persist_directory)
retriever = vectordb.as_retriever(search_kwargs={'k':3})

In [8]:
query = 'What are allergies'
docs = retriever.get_relevant_documents(query)
docs

[Document(page_content="GALE ENCYCLOPEDIA OF MEDICINE 2 117Allergies\nAllergic rhinitis is commonly triggered by\nexposure to household dust, animal fur,or pollen. The foreign substance thattriggers an allergic reaction is calledan allergen.\nThe presence of an allergen causes the\nbody's lymphocytes to begin producingIgE antibodies. The lymphocytes of an allergy sufferer produce an unusuallylarge amount of IgE.\nIgE molecules attach to mast\ncells, which contain histamine.HistaminePollen grains\nLymphocyte\nFIRST EXPOSURE", metadata={'page': 130, 'source': '/content/drive/MyDrive/Chatbot Data/Medical_book.pdf'}),
 Document(page_content='allergens are the following:\n• plant pollens\n• animal fur and dander\n• body parts from house mites (microscopic creatures\nfound in all houses)\n• house dust• mold spores• cigarette smoke• solvents• cleaners\nCommon food allergens include the following:\n• nuts, especially peanuts, walnuts, and brazil nuts\n• fish, mollusks, and shellfish• eggs• whe

In [9]:
template = '''
Use the following pieces of information to answer the user's question.
If you don't know the answer, just say that you don't know. Do not make up an answer.

Context: {context}
Question: {question}

Only return the helpful answer as below and nothing else.
Helpful answer:
'''

In [10]:
prompt = PromptTemplate(template=template, input_variables=['context', 'question'])
chain_type_kwargs = {'prompt': prompt}

In [None]:
quantization_config = BitsAndBytesConfig(load_in_4_bit=True)

model = AutoModelForCausalLM.from_pretrained(
    'google/gemma-7b-it',
    quantization_config=quantization_config,
    low_cpu_mem_usage=True,
    torch_dtype='auto',
    device_map='auto'
)
tokenizer = AutoTokenizer.from_pretrained('google/gemma-7b-it')

config.json:   0%|          | 0.00/694 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/20.9k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/4 [00:00<?, ?it/s]

model-00001-of-00004.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

model-00002-of-00004.safetensors:   0%|          | 0.00/4.98G [00:00<?, ?B/s]

model-00003-of-00004.safetensors:   0%|          | 0.00/4.98G [00:00<?, ?B/s]

model-00004-of-00004.safetensors:   0%|          | 0.00/2.11G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

In [None]:
qa = RetrievalQA.from_chain_type(
    llm=model,
    chain_type='stuff',
    retriever=retriever,
    return_source_documents=True,
    chain_type_kwargs=chain_type_kwargs
)