# I

In [1]:
import torch
import numpy as np
from datasets import load_dataset
from transformers import AutoTokenizer, AutoModelForCausalLM

#1.1
dataset = load_dataset("tcltcl/small-simple-wikipedia")
texts = dataset['train']['text']

#1.2
chunk_size = 200
overlap = 50

#1.3
chunks = []
for text in texts:
    words = text.split()
    for i in range(0, len(words), chunk_size - overlap):
        chunk = ' '.join(words[i:i + chunk_size])
        if chunk.strip():
            chunks.append(chunk)

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
#2.1 
from sentence_transformers import SentenceTransformer

#2.2 
retriever_model = SentenceTransformer("all-MiniLM-L6-v2")

#2.3
embeddings = retriever_model.encode(chunks)

In [3]:
#3.1 
def retriever(question):
    #3.2
    question_embedding = retriever_model.encode([question])
    
    #3.3 
    similarities = retriever_model.similarity(question_embedding, embeddings)
    
    #3.4 
    best_chunk_idx = np.argmax(similarities)
    return chunks[best_chunk_idx]


In [4]:
#4.1
tokenizer = AutoTokenizer.from_pretrained("HuggingFaceH4/zephyr-7b-beta")
model = AutoModelForCausalLM.from_pretrained(
    "HuggingFaceH4/zephyr-7b-beta",
    torch_dtype=torch.float16,
    device_map="auto"
)

#4.2
def create_prompt(question, retrieved_context):
    return f"<|user|>\nUse the following context to answer the question.\n\nContext: {retrieved_context}\n\nQuestion: {question}\n<|assistant|>"

def rag_answer(question):
    retrieved_context = retriever(question)
    
    prompt = create_prompt(question, retrieved_context)
    
    #4.3
    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
    
    #4.4
    output = model.generate(**inputs)
    
    #4.5
    answer = tokenizer.decode(output[0], skip_special_tokens=True)
    
    #4.6
    #answer = answer.split("<|assistant|>")[-1].strip()
    
    return answer

Loading checkpoint shards: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 8/8 [00:01<00:00,  4.47it/s]
Some parameters are on the meta device because they were offloaded to the cpu.


In [5]:
question = "What is compil?"
answer = rag_answer(question)
print(f"Question: {question}")
print(f"Answer: {answer}")

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Question: What is AI?
Answer: <|user|>
Use the following context to answer the question.

Context: control large and small machines which in the past were controlled by humans. Most people have used a personal computer in their home or at work. They are used for things such as calculation, listening to music, reading an article, writing, playing games etc Hardware Modern computers are electronic computer hardware. They do mathematical arithmetic very quickly but computers do not really "think". They only follow the instructions in their software programs. The software uses the hardware when the user gives it instructions, and gives useful outputs. Controls Computers are controlled with user interfaces. Input devices which include keyboards, computer mice, buttons, and touch screens, etc. Programs Computer programs are designed or written by computer programmers. A few programmers write programs in the computer's own language called machine code. Most programs are written using a progra