If you're not running in Saturn Cloud, you need to install these libraries:

Make sure you use the latest versions

```
pip install -U transformers accelerate bitsandbytes
```

## 第一章的內容

!rm -f minsearch.py
!wget https://raw.githubusercontent.com/alexeygrigorev/minsearch/main/minsearch.py -O minsearch.py  # powershell的指令寫法

In [1]:
import requests 
import minsearch

docs_url = 'https://github.com/DataTalksClub/llm-zoomcamp/blob/main/01-intro/documents.json?raw=1'
docs_response = requests.get(docs_url)
documents_raw = docs_response.json()

In [2]:
documents = []

for course in documents_raw:
    course_name = course['course']

    for doc in course['documents']:
        doc['course'] = course_name
        documents.append(doc)

index = minsearch.Index(
    text_fields=["question", "text", "section"],
    keyword_fields=["course"]
)

index.fit(documents)

<minsearch.Index at 0x26db303b1c0>

In [3]:
def search(query):
    boost = {'question': 3.0, 'section': 0.5}

    results = index.search(
        query=query,
        filter_dict={'course': 'data-engineering-zoomcamp'},
        boost_dict=boost,
        num_results=3
    )

    return results

```python
def build_prompt(query, search_results):
    prompt_template = """
You're a course teaching assistant. Answer the QUESTION based on the CONTEXT from the FAQ database.
Use only the facts from the CONTEXT when answering the QUESTION.

QUESTION: {question}

CONTEXT: 
{context}
""".strip()

    context = ""
    
    for doc in search_results:
        context = context + f"section: {doc['section']}\nquestion: {doc['question']}\nanswer: {doc['text']}\n\n"
    
    prompt = prompt_template.format(question=query, context=context).strip()
    return prompt

def llm(prompt):
    response = client.chat.completions.create(
        model='gpt-4o',
        messages=[{"role": "user", "content": prompt}]
    )
    
    return response.choices[0].message.content

def rag(query):
    search_results = search(query)
    prompt = build_prompt(query, search_results)
    answer = llm(prompt)
    return answer
```

## 嘗試新模型

In [36]:
# 先設置模型下載位置
import os
os.environ['HF_HOME'] = '../../models'


In [None]:
# pip install accelerate
from transformers import T5Tokenizer, T5ForConditionalGeneration

tokenizer = T5Tokenizer.from_pretrained("google/flan-t5-large")
model = T5ForConditionalGeneration.from_pretrained("google/flan-t5-large", device_map="auto")

In [6]:
input_text = "translate English to German: How old are you?"
input_ids = tokenizer(input_text, return_tensors="pt").input_ids.to("cuda")

input_ids

tensor([[9348,   25,  482,    3,    9, 3714,   81,  140,    6,   82,  564,   19,
            3,   15, 2234,   58,    1]], device='cuda:0')

In [7]:
outputs = model.generate(input_ids,max_length=100)
print(outputs)
print(tokenizer.decode(outputs[0]))

tensor([[    0,  6964,    19,     3,     9,  1021,  4940,   113,  1342,    28,
           112,  1362,    16,     3,     9,   422,  1511,    16,     8,  2214,
            13, 16715,     5,   555,   239,     6,  6964,  7864,     3,     9,
          3202,  2650,  8077,    11,    79,  1590,    16,   333,     5,  6964,
            19,   182,  1095,    11,  2764,    13,   112,   126,  1675,     5,
          8077,    19,   182,  1095,    11,  2764,    13,  6964,     5,  6964,
            11,  8077,   129,  4464,    11,    43,     3,     9,  1871,  3202,
             5,  6964,    19,   182,  1095,    11,  2764,    13,   112,   126,
          1675,     5,  8077,    19,   182,  1095,    11,  2764,    13,  6964,
             5,  8077,    11,  6964,    33,   182,  1095,    11,  2764,    13]],
       device='cuda:0')
<pad>Eric is a young boy who lives with his parents in a small town in the middle of nowhere. One day, Eric meets a girl named Sarah and they fall in love. Eric is very happy and proud

## 結合在RAG中

In [25]:
def build_prompt(query, search_results):
    prompt_template = """
You're a course assistant. Answer the QUESTION based on the CONTEXT from the FAQ database.
Use only the facts from the CONTEXT when answering the QUESTION.

QUESTION: {question}

CONTEXT: 
{context}
""".strip()

    context = ""
    
    for doc in search_results:
        context = context + f"section: {doc['section']}\nquestion: {doc['question']}\nanswer: {doc['text']}\n\n"
    
    prompt = prompt_template.format(question=query, context=context).strip()
    return prompt

def llm(prompt):
    input_ids = tokenizer(prompt, return_tensors="pt").input_ids.to("cuda")
    outputs = model.generate(input_ids,max_length=100)
    result = tokenizer.decode(outputs[0])
    return result

def rag(query):
    search_results = search(query)
    prompt = build_prompt(query, search_results)
    answer = llm(prompt)
    return answer

In [27]:
rag('I missed the class. What should I do?')

'<pad> CONTEXT: section: Module 2: Workflow Orchestration</s>'

In [39]:
# 看起來在長文的情況下，會報錯，可能是因為flan-t5-large模型的問題，先嘗試其他模型