# Q1. Running Ollama with Docker

#### [Q1 Answer] 
ollama version is 0.1.48

In [None]:
# run ollama
$ docker run -it \
    --rm \
    -v ollama:/root/.ollama \
    -p 11434:11434 \
    --name ollama \
    ollama/ollama

# get container id
$ docker ps

# enter container
$ docker exec -it 7a30839483c3 bash

# get ollama version
7a30839483c3$ ollama -v
ollama version is 0.1.48

# Q2. Downloading an LLM

#### [Q2 Answer] 
{"schemaVersion":2,"mediaType":"application/vnd.docker.distribution.manifest.v2+json","config":{"mediaType":"application/vnd.docker.container.image.v1+json","digest":"sha256:887433b89a901c156f7e6944442f3c9e57f3c55d6ed52042cbb7303aea994290","size":483},"layers":[{"mediaType":"application/vnd.ollama.image.model","digest":"sha256:c1864a5eb19305c40519da12cc543519e48a0697ecd30e15d5ac228644957d12","size":1678447520},{"mediaType":"application/vnd.ollama.image.license","digest":"sha256:097a36493f718248845233af1d3fefe7a303f864fae13bc31a3a9704229378ca","size":8433},{"mediaType":"application/vnd.ollama.image.template","digest":"sha256:109037bec39c0becc8221222ae23557559bc594290945a2c4221ab4f303b8871","size":136},{"mediaType":"application/vnd.ollama.image.params","digest":"sha256:22a838ceb7fb22755a3b0ae9b4eadde629d19be1f651f73efb8c6b4e2cd0eea0","size":84}]}

In [None]:
# pull gemma in container
7a30839483c3$ ollama pull gemma:2b

# get gemma metadata
7a30839483c3$ cat /root/.ollama/models/manifests/registry.ollama.ai/library/gemma/2b
{
	"schemaVersion": 2,
	"mediaType": "application/vnd.docker.distribution.manifest.v2+json",
	"config": {
		"mediaType": "application/vnd.docker.container.image.v1+json",
		"digest": "sha256:887433b89a901c156f7e6944442f3c9e57f3c55d6ed52042cbb7303aea994290",
		"size": 483
	},
	"layers": [
		{
			"mediaType": "application/vnd.ollama.image.model",
			"digest": "sha256:c1864a5eb19305c40519da12cc543519e48a0697ecd30e15d5ac228644957d12",
			"size": 1678447520
		},
		{
			"mediaType": "application/vnd.ollama.image.license",
			"digest": "sha256:097a36493f718248845233af1d3fefe7a303f864fae13bc31a3a9704229378ca",
			"size": 8433
		},
		{
			"mediaType": "application/vnd.ollama.image.template",
			"digest": "sha256:109037bec39c0becc8221222ae23557559bc594290945a2c4221ab4f303b8871",
			"size": 136
		},
		{
			"mediaType": "application/vnd.ollama.image.params",
			"digest": "sha256:22a838ceb7fb22755a3b0ae9b4eadde629d19be1f651f73efb8c6b4e2cd0eea0",
			"size": 84
		}
	]
}

# Q3. Running the LLM

#### [Q3 Answer] 
 To solve this, we simply perform the multiplication of the two numbers provided:


```plaintext

  10

x 10

———

  100

```


Therefore, `1 startMultiplication` equals `100`.

In [None]:
# run llm
7a30839483c3$ ollama run phi3

# given prompt
>>> 10 * 10
 To solve this, we simply perform the multiplication of the two numbers provided:


```plaintext

  10

x 10

———

  100

```


Therefore, `1 startMultiplication` equals `100`.

# Q4. Donwloading the weights

#### [Q4 Answer] 
1.7G

In [None]:
# create local ollama folder
$ mkdir /home/jupyter/llm/ollama_files

# run ollama container again with mapping to local folder
$ docker run -it \
    --rm \
    -v /home/jupyter/llm/ollama_files:/root/.ollama \
    -p 11434:11434 \
    --name ollama \
    ollama/ollama

# pulla gemma model in new container
$ docker exec -it ollama ollama pull gemma:2b 

# enter container
$ docker exec -it ollama bash

# get models folder size (from container)
da2271fff893$ du -h /root/.ollama/models
8.0K    models/manifests/registry.ollama.ai/library/gemma
12K     models/manifests/registry.ollama.ai/library
16K     models/manifests/registry.ollama.ai
20K     models/manifests
1.6G    models/blobs
1.6G    models

# get models folder size (from local)
$ du -h /home/jupyter/llm/ollama_files
8.0K    /home/jupyter/llm/ollama_files/models/manifests/registry.ollama.ai/library/gemma
12K     /home/jupyter/llm/ollama_files/models/manifests/registry.ollama.ai/library
16K     /home/jupyter/llm/ollama_files/models/manifests/registry.ollama.ai
20K     /home/jupyter/llm/ollama_files/models/manifests
1.6G    /home/jupyter/llm/ollama_files/models/blobs
1.6G    /home/jupyter/llm/ollama_files/models
1.6G    /home/jupyter/llm/ollama_files

# Q5. Adding the weights

#### [Q5 Answer] 
./ollama_files /root/.ollama

In [None]:
# Create Dockerfile
'''
FROM ollama/ollama
COPY ./ollama_files /root/.ollama
'''

# Q6. Serving it

#### [Q6 Answer] 


In [None]:
# build it
$ cd /home/jupyter/llm/
$ docker build -t ollama-gemma2b .

# run it
$ docker run -it --rm -p 11434:11434 --name ollama ollama-gemma2b

# enter container
$ docker exec -it ollama bash

# check models
b7d183c5b0d7$ du -h /root/.ollama/models
8.0K    /root/.ollama/models/manifests/registry.ollama.ai/library/gemma
12K     /root/.ollama/models/manifests/registry.ollama.ai/library
16K     /root/.ollama/models/manifests/registry.ollama.ai
20K     /root/.ollama/models/manifests
1.6G    /root/.ollama/models/blobs
1.6G    /root/.ollama/models

'''
curl http://localhost:11434/api/generate -d '{
        "model": "phi3",
        "prompt": "10*10",
        "stream": true,
        "options": {
            "seed": 123,
            "top_k": 20,
            "top_p": 0.9,
            "temperature": 0
        }
    }'
'''

In [2]:
from openai import OpenAI
import os
 
key_filepath = "/home/jupyter/llm-camp-homework/key"
key = open(key_filepath, 'r', encoding='utf-8').read() if key_filepath else None
os.environ["OPENAI_API_KEY"] = key

client = OpenAI(
    base_url = "http://localhost:11434/v1/"
)

def llm(prompt):
    response = client.chat.completions.create(
        model       = 'phi3', 
        messages    = [{"role": "user", "content": prompt}],
        temperature = 0.0
    )
    return response.choices[0].message.content

prompt = "What's the formula for energy?"
response = llm(prompt)
print(response)

 The basic formula to calculate mechanical energy (which is a combination of potential and kinetic energy) in classical mechanics, assuming no non-conservative forces like friction are at work, can be expressed as:

E = PE + KE

where E represents the total mechanical energy, PE stands for gravitational potential energy, and KE is kinetic energy. The formulas for each of these components in a system where only gravity acts on an object near Earth's surface are given by:

PE = m * g * h
KE = 0.5 * m * v^2

Here, 'm' represents the mass of the object, 'g' is the acceleration due to gravity (approximately 9.81 m/s^2 near Earth's surface), 'h' stands for height above a reference point, and 'v' is velocity. So if you want just one formula that encompasses both potential and kinetic energy in such scenarios:

E = m * g * h + 0.5 * m * v^2


In [10]:
def build_prompt(query, search_results):
    prompt_template = """
        You're a course teaching assistant. Answer the QUESTION based on the CONTEXT from the FAQ database.
        Use only the facts from the CONTEXT when answering the QUESTION.

        QUESTION: {question}

        CONTEXT:
        {context}
    """.strip()

    context = ""
    
    for doc in search_results:
        context = context + f"section: {doc['section']}\nquestion: {doc['question']}\nanswer: {doc['text']}\n\n"
    
    prompt = prompt_template.format(question=query, context=context).strip()
    return prompt

#prompt = build_prompt(query, search_results)
#print(prompt)

In [13]:
from openai import OpenAI
import os

key_filepath = "/home/jupyter/llm-camp-homework/key"
key = open(key_filepath, 'r', encoding='utf-8').read() if key_filepath else None
os.environ["OPENAI_API_KEY"] = key
client = OpenAI(
    base_url = "http://localhost:11434/v1/"
)

def llm(prompt):
    response = client.chat.completions.create(
        model       = 'phi3', 
        messages    = [{"role": "user", "content": prompt}],
        temperature = 0.0
    )
    return response.choices[0].message.content

response = llm(prompt)
print(response)

 I'm sorry, but it seems like there is no context provided regarding a formula for energy in this FAQ database. The questions mentioned are related to machine learning concepts such as linear regression, matrix multiplication and target variables used within specific projects or reports. If you have any other question about these topics, feel free to ask!


In [1]:
!rm -f minsearch.py
!wget https://raw.githubusercontent.com/alexeygrigorev/minsearch/main/minsearch.py

--2024-07-09 13:56:36--  https://raw.githubusercontent.com/alexeygrigorev/minsearch/main/minsearch.py
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.111.133, 185.199.108.133, 185.199.109.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.111.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 3832 (3.7K) [text/plain]
Saving to: ‘minsearch.py’


2024-07-09 13:56:36 (19.9 MB/s) - ‘minsearch.py’ saved [3832/3832]



In [2]:
import requests 
import minsearch

docs_url = 'https://github.com/DataTalksClub/llm-zoomcamp/blob/main/01-intro/documents.json?raw=1'
docs_response = requests.get(docs_url)
documents_raw = docs_response.json()

documents = []

for course in documents_raw:
    course_name = course['course']

    for doc in course['documents']:
        doc['course'] = course_name
        documents.append(doc)

index = minsearch.Index(
    text_fields=["question", "text", "section"],
    keyword_fields=["course"]
)

index.fit(documents)

<minsearch.Index at 0x7f7ccf0c5330>

In [13]:
def search(query):
    boost = {'question': 3.0, 'section': 0.5}

    results = index.search(
        query=query,
        #filter_dict={'course': 'data-engineering-zoomcamp'},
        boost_dict=boost,
        num_results=5
    )

    return results

In [5]:
from transformers import T5Tokenizer, T5ForConditionalGeneration
tokenizer = T5Tokenizer.from_pretrained("google/flan-t5-xl")
model = T5ForConditionalGeneration.from_pretrained("google/flan-t5-xl", device_map="auto")

You are using the default legacy behaviour of the <class 'transformers.models.t5.tokenization_t5.T5Tokenizer'>. This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]



In [17]:
query = "I just discovered the course. Can I still join it?"
search_results = search(query)
print(search_results)

[{'text': "Yes, even if you don't register, you're still eligible to submit the homeworks.\nBe aware, however, that there will be deadlines for turning in the final projects. So don't leave everything for the last minute.", 'section': 'General course-related questions', 'question': 'Course - Can I still join the course after the start date?', 'course': 'data-engineering-zoomcamp'}, {'text': 'Yes, we will keep all the materials after the course finishes, so you can follow the course at your own pace after it finishes.\nYou can also continue looking at the homeworks and continue preparing for the next cohort. I guess you can also start working on your final capstone project.', 'section': 'General course-related questions', 'question': 'Course - Can I follow the course after it finishes?', 'course': 'data-engineering-zoomcamp'}, {'text': "The purpose of this document is to capture frequently asked technical questions\nThe exact day and hour of the course will be 15th Jan 2024 at 17h00. Th

In [23]:
from openai import OpenAI
import os

key_filepath = "/home/jupyter/llm-camp-homework/key"
key = open(key_filepath, 'r', encoding='utf-8').read() if key_filepath else None
os.environ["OPENAI_API_KEY"] = key
client = OpenAI(
    base_url = "http://localhost:11434/v1/"
)

def llm(prompt):
    # openai
    response = client.chat.completions.create(
        model       = 'phi3', 
        messages    = [{"role": "user", "content": prompt}],
        temperature = 0.0
    )
    return response.choices[0].message.content
    
    '''
    # customized
    if generate_params is None:
        generate_params = {}

    input_ids = tokenizer(prompt, return_tensors="pt").input_ids
    #input_ids = tokenizer(prompt, return_tensors="pt").input_ids.to("cuda")
    outputs = model.generate(
        input_ids,
        max_length=generate_params.get("max_length", 100),
        num_beams=generate_params.get("num_beams", 5),
        do_sample=generate_params.get("do_sample", False),
        temperature=generate_params.get("temperature", 1.0),
        top_k=generate_params.get("top_k", 50),
        top_p=generate_params.get("top_p", 0.95),
    )
    result = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return result
    '''

In [24]:
query = "What's the formula for energy?"
search_results = search(query)
prompt = build_prompt(query, search_results)
answer = llm(prompt)
print(answer)

 The homework URL in the context provided is not explicitly mentioned as being for energy calculations specifically; however, based on general knowledge about course-related questions regarding repositories:

The formula for calculating energy (E) can be given by E = mgh or E = 1/2 mv^2 + mgh depending on whether you're considering gravitational potential energy in a vertical system and kinetic energy. Here, 'm' stands for mass, 'g' is the acceleration due to gravity (approximately 9.81 m/s² near Earth’s surface), 'h' represents height or displacement within a gravitational field, and 'v' denotes velocity of an object with respect to its surr0672345_energy-formula
tion in the vertical direction (ignoring air resistance). These formulas are fundamental concepts that might be covered as part of coursework related to physics.


In [25]:
s = """
 The homework URL in the context provided is not explicitly mentioned as being for energy calculations specifically; however, based on general knowledge about course-related questions regarding repositories:

The formula for calculating energy (E) can be given by E = mgh or E = 1/2 mv^2 + mgh depending on whether you're considering gravitational potential energy in a vertical system and kinetic energy. Here, 'm' stands for mass, 'g' is the acceleration due to gravity (approximately 9.81 m/s² near Earth’s surface), 'h' represents height or displacement within a gravitational field, and 'v' denotes velocity of an object with respect to its surr0672345_energy-formula
tion in the vertical direction (ignoring air resistance). These formulas are fundamental concepts that might be covered as part of coursework related to physics.
"""
print(len(s))

836
