If you're not running in Saturn Cloud, you need to install these libraries:

Make sure you use the latest versions

```
pip install -U transformers accelerate bitsandbytes
```

In [15]:
!pip install -U transformers accelerate bitsandbytes

Collecting flash-attn
  Downloading flash_attn-2.5.9.post1.tar.gz (2.6 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.6/2.6 MB[0m [31m15.0 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting einops (from flash-attn)
  Downloading einops-0.8.0-py3-none-any.whl (43 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m43.2/43.2 kB[0m [31m6.5 MB/s[0m eta [36m0:00:00[0m
Building wheels for collected packages: flash-attn
  Building wheel for flash-attn (setup.py) ... [?25l[?25hdone
  Created wheel for flash-attn: filename=flash_attn-2.5.9.post1-cp310-cp310-linux_x86_64.whl size=120889689 sha256=5022ba11d48bf74926da9c16260f4ea2b9bb7f4e29bdb4bd6e1383ad1c55d16f
  Stored in directory: /root/.cache/pip/wheels/cc/ad/f6/7ccf0238790d6346e9fe622923a76ec218e890d356b9a2754a
Successfully built flash-attn
Installing collected packages: einops, flash-attn
Successfully installed einops-0.8.0 flash-attn-2.5.9.post

In [2]:
# saturn cloud only
# import os
# os.environ['HF_HOME'] = '/run/cache/'

In [3]:
!rm -f minsearch.py
!wget https://raw.githubusercontent.com/alexeygrigorev/minsearch/main/minsearch.py

--2024-07-03 14:15:56--  https://raw.githubusercontent.com/alexeygrigorev/minsearch/main/minsearch.py
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 3832 (3.7K) [text/plain]
Saving to: ‘minsearch.py’


2024-07-03 14:15:56 (63.4 MB/s) - ‘minsearch.py’ saved [3832/3832]



In [1]:
import requests
import minsearch

docs_url = 'https://github.com/DataTalksClub/llm-zoomcamp/blob/main/01-intro/documents.json?raw=1'
docs_response = requests.get(docs_url)
documents_raw = docs_response.json()

documents = []

for course in documents_raw:
    course_name = course['course']

    for doc in course['documents']:
        doc['course'] = course_name
        documents.append(doc)

index = minsearch.Index(
    text_fields=["question", "text", "section"],
    keyword_fields=["course"]
)

index.fit(documents)

<minsearch.Index at 0x7a2037d76470>

In [2]:
def search(query):
    boost = {'question': 3.0, 'section': 0.5}

    results = index.search(
        query=query,
        filter_dict={'course': 'data-engineering-zoomcamp'},
        boost_dict=boost,
        num_results=5
    )

    return results

In [3]:
def rag(query):
    search_results = search(query)
    prompt = build_prompt(query, search_results)
    answer = llm(prompt)
    return answer

In [4]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline

torch.random.manual_seed(0)

<torch._C.Generator at 0x7a2037582a90>

In [5]:
model = AutoModelForCausalLM.from_pretrained(
    "microsoft/Phi-3-mini-128k-instruct",
    device_map="cuda", #"mps", #"cuda",
    torch_dtype="auto",
    trust_remote_code=True,
)
tokenizer = AutoTokenizer.from_pretrained("microsoft/Phi-3-mini-128k-instruct")

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


In [6]:
pipe = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
)

In [7]:
def build_prompt(query, search_results):
    prompt_template = """
You're a course teaching assistant. Answer the QUESTION based on the CONTEXT from the FAQ database.
Use only the facts from the CONTEXT when answering the QUESTION.

QUESTION: {question}

CONTEXT:
{context}
""".strip()

    context = ""

    for doc in search_results:
        context = context + f"section: {doc['section']}\nquestion: {doc['question']}\nanswer: {doc['text']}\n\n"

    prompt = prompt_template.format(question=query, context=context).strip()
    return prompt

def llm(prompt):
    messages = [
        {"role": "user", "content": prompt},
    ]

    generation_args = {
        "max_new_tokens": 500,
        "return_full_text": False,
        "temperature": 0.0,
        "do_sample": False,
    }

    output = pipe(messages, **generation_args)
    return output[0]['generated_text'].strip()

In [None]:
%%time
# cpu, macbook pro m1-pro
rag("I just discovered the course. Can I still join it?")

CPU times: user 21min 19s, sys: 6min 19s, total: 27min 38s
Wall time: 13min 15s


"I'm sorry, but the provided context does not directly answer the question about joining the course after the start date. However, based on the information given, you can still submit homework even if you don't register before the course starts. Just remember that there will be deadlines for final projects, so it's best to register as soon as possible."

In [None]:
%%time
# mps, macbook pro m1-pro
rag("I just discovered the course. Can I still join it?")

CPU times: user 13.1 s, sys: 6.68 s, total: 19.7 s
Wall time: 45.8 s


"I'm sorry, but the provided context does not contain information about joining a course after the start date. The context only mentions that you can still submit homeworks even if you don't register, but it doesn't explicitly state whether you can join the course after the start date."

In [14]:
%%time
# gpu, colab
rag("I just discovered the course. Can I still join it?")

CPU times: user 6.57 s, sys: 526 µs, total: 6.57 s
Wall time: 6.67 s


'You can still join the course even if you discover it after the start date. You are eligible to submit the homeworks, but remember to meet the deadlines for the final projects. You can also follow the course materials at your own pace after it finishes, and start working on your final capstone project.'