If you're not running in Saturn Cloud, you need to install these libraries:

Make sure you use the latest versions

```
pip install -U transformers accelerate bitsandbytes
```

In [1]:
import os
os.environ['HF_HOME'] = '/run/cache/'


In [2]:
!rm -f minsearch.py
!wget https://raw.githubusercontent.com/alexeygrigorev/minsearch/main/minsearch.py

--2024-07-08 14:47:42--  https://raw.githubusercontent.com/alexeygrigorev/minsearch/main/minsearch.py
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 2606:50c0:8000::154, 2606:50c0:8003::154, 2606:50c0:8002::154, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|2606:50c0:8000::154|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 3832 (3,7K) [text/plain]
Saving to: ‘minsearch.py’


2024-07-08 14:47:42 (58,9 MB/s) - ‘minsearch.py’ saved [3832/3832]



In [3]:
import requests 
import minsearch

docs_url = 'https://github.com/DataTalksClub/llm-zoomcamp/blob/main/01-intro/documents.json?raw=1'
docs_response = requests.get(docs_url)
documents_raw = docs_response.json()

documents = []

for course in documents_raw:
    course_name = course['course']

    for doc in course['documents']:
        doc['course'] = course_name
        documents.append(doc)

index = minsearch.Index(
    text_fields=["question", "text", "section"],
    keyword_fields=["course"]
)

index.fit(documents)

<minsearch.Index at 0x7a5d3111b700>

In [4]:
def search(query):
    boost = {'question': 3.0, 'section': 0.5}

    results = index.search(
        query=query,
        filter_dict={'course': 'data-engineering-zoomcamp'},
        boost_dict=boost,
        num_results=5
    )

    return results

In [10]:
def build_prompt(query, search_results):
    prompt_template = """
You're a course teaching assistant. Answer the QUESTION based on the CONTEXT from the FAQ database.
Use only the facts from the CONTEXT when answering the QUESTION.

QUESTION: {question}

CONTEXT: 
{context}
""".strip()

    context = ""
    
    for doc in search_results:
        context = context + f"section: {doc['section']}\nquestion: {doc['question']}\nanswer: {doc['text']}\n\n"
    
    prompt = prompt_template.format(question=query, context=context).strip()
    return prompt

def llm(prompt):
    response = client.chat.completions.create(
        model='phi3',
        messages=[{"role": "user", "content": prompt}]
    )
    
    return response.choices[0].message.content

In [6]:
def rag(query):
    search_results = search(query)
    prompt = build_prompt(query, search_results)
    answer = llm(prompt)
    return answer

In [8]:
pip install openai

Collecting openai
  Downloading openai-1.35.10-py3-none-any.whl (328 kB)
[K     |████████████████████████████████| 328 kB 2.7 MB/s eta 0:00:01
Collecting distro<2,>=1.7.0
  Using cached distro-1.9.0-py3-none-any.whl (20 kB)
Installing collected packages: distro, openai
Successfully installed distro-1.9.0 openai-1.35.10
Note: you may need to restart the kernel to use updated packages.


In [14]:
from openai import OpenAI

client = OpenAI(
    base_url='http://localhost:11434/v1/',
    api_key='ollama',
)


In [16]:
llm("write that this is a test")

' This is a test.\n\n\nThe sentence provided succinctly conveys the purpose of an action—to verify functionality or readiness in English, using clear and straightforward language appropriate for such scenarios as testing computer programs, systems, or processes without any ambiguity. It fits well into contexts where automated tests are performed to ensure everything is working correctly before a product goes live.'

In [12]:
print(_)

 This is a test.


In this straightforward sentence, we are simply conveying the message to run or execute some form of examination without any additional details—it's concise and clear. The intent behind such instructions could be for systems programming where automated tests need to confirm proper functioning of code in various scenarios, enscurating confidence before deployment.


In [7]:
!df -h  # how much space we have? because it is gonna download lots of data from hugginfface. We will tell huggingface to use another place to download the code.
# we will use /run because it has lots of space in saturn cloud

Filesystem      Size  Used Avail Use% Mounted on
tmpfs           3,2G  2,7M  3,2G   1% /run
/dev/sdb5       480G  451G  4,2G 100% /
tmpfs            16G  663M   15G   5% /dev/shm
tmpfs           5,0M   16K  5,0M   1% /run/lock
efivarfs        128K   21K  103K  17% /sys/firmware/efi/efivars
tmpfs            16G     0   16G   0% /run/qemu
/dev/sdb2        95M   34M   62M  36% /boot/efi
tmpfs           3,2G  2,6M  3,2G   1% /run/user/1000


In [None]:
HF_HOME

In [6]:
from transformers import T5Tokenizer, T5ForConditionalGeneration


ModuleNotFoundError: No module named 'transformers'

In [None]:
tokenizer = T5Tokenizer.from_pretrained("google/flan-t5-xl")
model = T5ForConditionalGeneration.from_pretrained("google/flan-t5-xl", device_map="auto")

In [None]:
input_text = "translate English to German: How old are you?"
input_ids = tokenizer(input_text, return_tensors="pt").input_ids.to("cuda")
input_ids

In [None]:
outputs = model.generate(input_ids)
print(tokenizer.decode(outputs[0]))