https://huggingface.co/blog/not-lain/rag-chatbot-using-llama3

In [None]:
!pip install datasets sentence-transformers faiss-cpu accelerate

In [45]:
from sentence_transformers import SentenceTransformer
from datasets import load_dataset

ST = SentenceTransformer("all-MiniLM-L6-v2", cache_folder="models")
dataset = load_dataset("JLK-ptbk/faq", cache_dir="datasets", split="train")

In [50]:
# Add embeddings to the dataset
dataset = dataset.map(
    lambda example: {"embeddings": ST.encode(example["data"])},
    remove_columns=["index", "Unnamed: 0"],
)

# Add the index to the data
dataset.add_faiss_index("embeddings")

Map:   0%|          | 0/182 [00:00<?, ? examples/s]

  0%|          | 0/1 [00:00<?, ?it/s]

Dataset({
    features: ['data', 'embeddings'],
    num_rows: 182
})

In [56]:
def search(query: str, k: int = 3):
    """a function that embeds a new query and returns the most probable results"""
    embedded_query = ST.encode(query)  # embed new query
    scores, retrieved_examples = dataset.get_nearest_examples(  # retrieve results
        "embeddings",
        embedded_query,  # compare our new embedded query with the dataset embeddings
        k=k,  # get only top k results
    )
    return scores, retrieved_examples
search("rebook")[1]['data']

['[\'How do I rebook/extend booking with a sitter again?\', \'<p>Follow the steps to <strong>rebook/extend your ongoing booking</strong>:</p>\\r\\n<p>1) Navigate to the chat room with the booked sitter</p>\\r\\n<p>3) Tap on "Rebook/Extend Booking"</p><br />\\r\\n<p style=" text-align: center; "><img width="300px" src="https://storage.googleapis.com/petbacker/images/faq/en/how-to-extend-booking/part-1.gif" alt="rebook 1" /></p>\\r\\n<br />\\r\\n<p>To <strong>rebook again</strong> your completed booking:</p>\\r\\n<p>1) Navigate to the chat room with the booked sitter</p>\\r\\n<p>3) Tap on "Book Again"</p><br />\\r\\n<p style=" text-align: center; "><img width="300px" src="https://storage.googleapis.com/petbacker/images/faq/en/how-to-extend-booking/part-2.gif" alt="rebook 2" /></p>\\r\\n<br />\\r\\n<p>4) Fill in all required fields and submit your Request!</p><br />\\r\\n<p style=" text-align: center; "><img width="300px" src="https://storage.googleapis.com/petbacker/images/faq/en/how-to-

In [57]:
SYS_PROMPT = """You are an assistant for answering questions.
You are given the extracted parts of a long document and a question. Provide a conversational answer.
If you don't know the answer, just say "I do not know." Don't make up an answer."""

In [58]:
from transformers import AutoTokenizer, AutoModelForCausalLM

tokenizer = AutoTokenizer.from_pretrained(
    "HuggingFaceTB/SmolLM-135M-Instruct", cache_dir="models"
)
model = AutoModelForCausalLM.from_pretrained(
    "HuggingFaceTB/SmolLM-135M-Instruct", cache_dir="models"
).to("cuda")
terminators = [tokenizer.eos_token_id, tokenizer.convert_tokens_to_ids("<|eot_id|>")]

In [59]:
def format_prompt(prompt, retrieved_documents, k):
    """using the retrieved documents we will prompt the model to generate our responses"""
    PROMPT = f"Question:{prompt}\nContext:"
    for idx in range(k):
        PROMPT += f"{retrieved_documents['data'][idx]}\n"
    return PROMPT


def generate(formatted_prompt):
    formatted_prompt = formatted_prompt[:2000]  # to avoid GPU OOM
    messages = [
        {"role": "system", "content": SYS_PROMPT},
        {"role": "user", "content": formatted_prompt},
    ]
    input_text = tokenizer.apply_chat_template(messages, tokenize=False)
    inputs = tokenizer.encode(input_text, return_tensors="pt").to("cuda")
    outputs = model.generate(
        inputs, max_new_tokens=50, temperature=0.5, top_p=0.9, do_sample=True, eos_token_id=terminators
    )
    response = outputs[0][inputs.shape[-1] :]
    return tokenizer.decode(response, skip_special_tokens=True)

def rag_chatbot(prompt: str, k: int = 2):
    scores, retrieved_documents = search(prompt, k)
    formatted_prompt = format_prompt(prompt, retrieved_documents, k)
    return generate(formatted_prompt)

In [60]:
rag_chatbot("What is Pet Backers?", k=2)

'assistant\nHere\'s an example of how you could answer the question:\n\n"What is PetBackers?'