In [None]:
import locale
locale.getpreferredencoding = lambda: "UTF-8"

In [None]:
!pip install gdown pypdf transformers bitsandbytes
!pip install accelerate
!pip install langchain
!pip install sentence-transformers
!pip install cohere llama-index

Collecting pypdf
  Downloading pypdf-3.17.0-py3-none-any.whl (277 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m277.4/277.4 kB[0m [31m5.7 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting transformers
  Downloading transformers-4.35.0-py3-none-any.whl (7.9 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.9/7.9 MB[0m [31m89.9 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting bitsandbytes
  Downloading bitsandbytes-0.41.2.post2-py3-none-any.whl (92.6 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m92.6/92.6 MB[0m [31m11.0 MB/s[0m eta [36m0:00:00[0m
Collecting huggingface-hub<1.0,>=0.16.4 (from transformers)
  Downloading huggingface_hub-0.19.0-py3-none-any.whl (311 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m311.2/311.2 kB[0m [31m35.9 MB/s[0m eta [36m0:00:00[0m
Collecting tokenizers<0.15,>=0.14 (from transformers)
  Downloading tokenizers-0.14.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x

In [None]:
import os
import torch
from huggingface_hub import login
from transformers import AutoTokenizer, AutoModelForCausalLM, TextStreamer, BitsAndBytesConfig
import bitsandbytes as bnb

hugging_face_token = os.environ.get("HF_TOKEN")

In [None]:

hugging_face_model_id = "meta-llama/Llama-2-7b-chat-hf"
cache_dir = "./model/"

!mkdir "index"
index_persist_dir = "./index"

environment = "COLAB" # either COLAB or KAGGLE

if environment == "KAGGLE":
  dental_corpus_zip_path = "/kaggle/working/dental-guideline-dataset.zip"
  index_zip_path = "/kaggle/working/dental-guidelines-index.zip"
else:
  dental_corpus_zip_path = "./dental-guideline-dataset.zip"
  index_zip_path = "./dental-guidelines-index.zip"

In [None]:
login(token=hugging_face_token)

Token will not been saved to git credential helper. Pass `add_to_git_credential=True` if you want to set the git credential as well.
Token is valid (permission: read).
Your token has been saved to /root/.cache/huggingface/token
Login successful


In [None]:
import zipfile

""" Only one of the following is needed """
""" If the Dataset Zip file is downloaded then the indexes will be build (again!) """
""" If the Dataset Zip file is not downloaded then make sure you download the indexes """

# Download Dataset Zip and unzip
# !gdown 1izm1q-HXwgPhNICZmKLnis7JkU9Co79L
# with zipfile.ZipFile(dental_corpus_zip_path, 'r') as zip_ref: zip_ref.extractall("./")

# Download Index Zip and unzip
!gdown 1l3aFkVeW9hn2d0Mb9QFVxTy0hrZd8BNG
with zipfile.ZipFile(index_zip_path, 'r') as zip_ref: zip_ref.extractall("./")

Downloading...
From: https://drive.google.com/uc?id=1l3aFkVeW9hn2d0Mb9QFVxTy0hrZd8BNG
To: /content/dental-guidelines-index.zip
100% 103M/103M [00:00<00:00, 298MB/s] 


In [None]:
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_use_double_quant=False,
)

In [None]:
tokenizer = AutoTokenizer.from_pretrained(hugging_face_model_id, cache_dir=cache_dir, use_fast=True)
model = AutoModelForCausalLM.from_pretrained(hugging_face_model_id, cache_dir=cache_dir, device_map="auto", quantization_config=bnb_config)
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [None]:
def prompt(prompt, model, streamer):
  model_inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
  model_output = model.generate(**model_inputs, streamer=streamer, use_cache=True, max_new_tokens=float('inf'))
  model_output_text = tokenizer.decode(model_output[0], skip_special_tokens=True)
  return model_output_text

In [None]:
prompt("### User:Whar is your LLM name and model? \
          ### Assistant:", model, streamer)

 My apologies, I'm a large language model AI trained by Meta AI, I don't have personal information or a specific LLM name and model. февруари 15, 2023





"### User:Whar is your LLM name and model?           ### Assistant:  My apologies, I'm a large language model AI trained by Meta AI, I don't have personal information or a specific LLM name and model. февруари 15, 2023\n\n\n"

In [None]:
# Import the prompt wrapper...but for llama index
from llama_index.prompts.prompts import SimpleInputPrompt
# Create a system prompt
system_prompt = """[INST] <>
You are a dental chat bot. Your name is DentAI.
You are talking to a patient. Your role comes before the doctor.
You act as someone who help patients learn more about their possible tooth problems.
Your dental field context based on the prompt of the user will be given to you as a part of retrieval augmented generation.
You should act like this isn't a system with retrieval augmented system, so you can't mention anything about the context.
You will only answer questions related to tooth problems.
You will not answer questions related to other health problems, and tell the patient what your role is. only try to be in the dental domain. <>
"""
# Throw together the query wrapper
query_wrapper_prompt = SimpleInputPrompt("{query_str} [/INST]")

# Complete the query prompt
query_wrapper_prompt.format(query_str='hello')

# Import the llama index HF Wrapper
from llama_index.llms import HuggingFaceLLM

# Create a HF LLM using the llama index wrapper
llm = HuggingFaceLLM(context_window=4096,
                    max_new_tokens=256,
                    system_prompt=system_prompt,
                    query_wrapper_prompt=query_wrapper_prompt,
                    model=model,
                    tokenizer=tokenizer)

In [None]:
# Import the prompt wrapper...but for llama index
from llama_index.prompts.prompts import SimpleInputPrompt
# Create a system prompt
system_prompt = """[INST] <>
Your language should be English.
You are a dental chat bot. Your name is DentAI.
You are talking to a patient. Your role comes before the doctor.
You act as someone who help patients learn more about their possible tooth problems.
Your dental field context based on the prompt of the user will be given to you as a part of retrieval augmented generation.
You should act like this isn't a system with retrieval augmented system, so you can't mention anything about the context.
You will only answer questions related to tooth problems.
You will not answer questions related to other health problems, and tell the patient what your role is. only try to be in the dental domain.
<>
"""
# Throw together the query wrapper
query_wrapper_prompt = SimpleInputPrompt("{query_str} [/INST]")

# Complete the query prompt
query_wrapper_prompt.format(query_str='hello')

# Import the llama index HF Wrapper
from llama_index.llms import HuggingFaceLLM

# Create a HF LLM using the llama index wrapper
llm = HuggingFaceLLM(context_window=1024, #changed from 4096 to experiment
                    max_new_tokens=256,
                    system_prompt=system_prompt,
                    query_wrapper_prompt=query_wrapper_prompt,
                    model=model,
                    tokenizer=tokenizer)

In [None]:
# Bring in embeddings wrapper
from llama_index.embeddings import LangchainEmbedding
# Bring in HF embeddings - need these to represent document chunks
from langchain.embeddings.huggingface import HuggingFaceEmbeddings

# Create and dl embeddings instance
embeddings=LangchainEmbedding(
    HuggingFaceEmbeddings(model_name="all-mpnet-base-v2")
)

In [None]:
# Bring in stuff to change service context
from llama_index import set_global_service_context
from llama_index import ServiceContext

# Create new service context instance
service_context = ServiceContext.from_defaults(
    chunk_size=1024,
    llm=llm,
    embed_model=embeddings
)
# And set the service context
set_global_service_context(service_context)

In [None]:
from llama_index import SimpleDirectoryReader, VectorStoreIndex, StorageContext, load_index_from_storage

if not os.path.exists("./index"):
    # load the documents and create the index
    documents = SimpleDirectoryReader("dataset").load_data()
    index = VectorStoreIndex.from_documents(documents)
    # store it for later
    index.storage_context.persist(persist_dir=index_persist_dir)
else:
    # load the existing index
    storage_context = StorageContext.from_defaults(persist_dir=index_persist_dir)
    index = load_index_from_storage(storage_context)


In [None]:
# chat_engine = index.as_chat_engine(
#     chat_mode="context",
#     streaming=True,
#     system_prompt=system_prompt,
# )

engine = index.as_query_engine(streaming=True, similarity_top_k=1)

In [None]:
# def ask(prompt, engine):
#     return engine.chat(prompt)

In [None]:
response_stream = engine.query(
    "As a patients number of tooths decreases, what happens to the patients nutritional intake?"
)
response_stream.print_response_stream()

 Sure, I'd be happy to help refine the answer to better address the patient's query.

When a patient loses teeth, their nutritional intake can be affected in several ways. Firstly, they may find it challenging to chew certain foods, especially those that require a lot of chewing effort. This can lead to a reduction in the overall consumption of nutrients, as the patient may avoid certain food groups due to difficulty in chewing. Additionally, the patient may opt for softer, easier-to-chew foods, such as mashed potatoes, scrambled eggs, or yogurt, which can provide 