In [1]:
# !pip install transformers
# !pip install bitsandbytes

In [2]:
from typing import List
from transformers import LlamaForCausalLM, LlamaTokenizer, BitsAndBytesConfig
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np
import torch

In [3]:
HF_TOKEN = "<your-hf-token>"

retriever_model = SentenceTransformer('all-MiniLM-L6-v2')
llm_model_name = "meta-llama/Llama-2-7b-chat-hf"  # Replace with the desired local model

use_4bit = True
bnb_4bit_compute_dtype = "float16"
bnb_4bit_quant_type = "nf4"
use_nested_quant = False
compute_dtype = getattr(torch, bnb_4bit_compute_dtype)
bnb_config = BitsAndBytesConfig(
    load_in_4bit=use_4bit,
    bnb_4bit_quant_type=bnb_4bit_quant_type,
    bnb_4bit_compute_dtype=compute_dtype,
    bnb_4bit_use_double_quant=use_nested_quant,
)


llm_tokenizer = LlamaTokenizer.from_pretrained(llm_model_name, token=HF_TOKEN)
llm_model = LlamaForCausalLM.from_pretrained(llm_model_name, device_map = {"": 0},token=HF_TOKEN, quantization_config=bnb_config)


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [4]:
# Example knowledge base
documents = [
    "Lila: Lila spent years crafting the perfect sculpture, but when she finally finished, it stood as a silent reminder of the dreams she had abandoned. In the end, she realized the real art was in the journey, not the finished piece.",
    "Ethan: Ethan was always the skeptic, questioning everything, until one day, he stumbled upon a mystery that could not be explained. For the first time, he felt the thrill of believing in something beyond reason.",
    "Zara: Zara had always dreamed of adventure, and when the opportunity finally came, she found herself in the middle of a city she could never have imagined. But as the days passed, the unfamiliarity began to feel like home.",
    "Noah: Noah's heart longed for peace, yet his restless mind led him to constantly chase after new challenges. One quiet afternoon, he sat still and realized that peace had always been within him, waiting to be discovered."
]

# Encode the documents into embeddings
doc_embeddings = retriever_model.encode(documents)

# Function to retrieve relevant documents based on query
def retrieve(query: str, top_k: int = 1) -> List[str]:
    query_embedding = retriever_model.encode([query])
    similarities = cosine_similarity(query_embedding, doc_embeddings)
    top_indices = np.argsort(similarities[0])[::-1][:top_k]
    return [documents[i] for i in top_indices]


In [5]:
# Function to generate a response using local AutoModel
def generate(query: str, context: List[str]) -> str:
    prompt = """
    Context:
    {context}
    Using the context above answer the following question. Do not add anything else.
    Question: {query}
    Answer:
    """.format(context="\n".join(context), query=query)

    inputs = llm_tokenizer(prompt, return_tensors="pt").to(llm_model.device)
    outputs = llm_model.generate(**inputs, max_length=200, num_return_sequences=1)
    return llm_tokenizer.decode(outputs[0], skip_special_tokens=True)


In [6]:

# Main function to combine retrieval and generation
def rag_pipeline(query: str, top_k: int = 1):
    retrieved_docs = retrieve(query, top_k)
    response = generate(query, retrieved_docs)
    return response

user_query = "Tell me something about Ethan's life in 20 words?"
answer = rag_pipeline(user_query)
print("Query:", user_query)
print("Answer:", answer)

Query: Tell me something about Ethan's life in 20 words?
Answer: 
    Context:
    Ethan: Ethan was always the skeptic, questioning everything, until one day, he stumbled upon a mystery that could not be explained. For the first time, he felt the thrill of believing in something beyond reason.
    Using the context above answer the following question. Do not add anything else.
    Question: Tell me something about Ethan's life in 20 words?
    Answer:
     Ethan was a logical, analytical thinker until he encountered an unsolvable mystery that awakened his curiosity.
