In [12]:
!curl http://localhost:9200

{
  "name" : "59a3e847cf2e",
  "cluster_name" : "docker-cluster",
  "cluster_uuid" : "i7aYvniKSO65jzSJC_v-TA",
  "version" : {
    "number" : "9.0.1",
    "build_flavor" : "default",
    "build_type" : "docker",
    "build_hash" : "73f7594ea00db50aa7e941e151a5b3985f01e364",
    "build_date" : "2025-04-30T10:07:41.393025990Z",
    "build_snapshot" : false,
    "lucene_version" : "10.1.0",
    "minimum_wire_compatibility_version" : "8.18.0",
    "minimum_index_compatibility_version" : "8.0.0"
  },
  "tagline" : "You Know, for Search"
}


The build hash for version `9.0.1` is `73f7594ea00db50aa7e941e151a5b3985f01e364`

In [13]:
import requests

docs_url = "https://github.com/DataTalksClub/llm-zoomcamp/blob/main/01-intro/documents.json?raw=1"
docs_response = requests.get(docs_url)
documents_raw = docs_response.json()

documents = []

for course in documents_raw:
    course_name = course["course"]

    for doc in course["documents"]:
        doc["course"] = course_name
        documents.append(doc)


In [14]:
len(documents)


948

In [15]:
import time
from elasticsearch import Elasticsearch, ConnectionError

es = Elasticsearch("http://localhost:9200")
index_name = "course-questions"

for _ in range(30):
    try:
        if es.ping():
            print("✔ ES reachable")
            break
    except ConnectionError:
        pass
    print("…waiting for ping()…")
    time.sleep(1)
else:
    raise RuntimeError("ES did not respond to ping in 30s")


✔ ES reachable


In [16]:
if es.indices.exists(index="course-questions"):
    print("Deleting existing 'course-questions' index...")
    es.indices.delete(index="course-questions")
    print("Deleted old index.")

print("Creating 'course-questions' index...")
index_settings = {
    "settings": {"number_of_shards": 1, "number_of_replicas": 0},
    "mappings": {
        "properties": {
            "text": {"type": "text"},
            "section": {"type": "text"},
            "question": {"type": "text"},
            "course": {"type": "keyword"},
        }
    },
}
es.indices.create(index="course-questions", body=index_settings)
print("Created index 'course-questions'")


Deleting existing 'course-questions' index...
Deleted old index.
Creating 'course-questions' index...
Created index 'course-questions'


In [17]:
settings = es.indices.get_settings(index="course-questions")
readonly_flag = settings["course-questions"]["settings"]["index"].get(
    "blocks.read_only_allow_delete"
)
print("Current blocks.read_only_allow_delete flag:", readonly_flag)


Current blocks.read_only_allow_delete flag: None


In [7]:
from elasticsearch.helpers import bulk, BulkIndexError

actions = [
    {"_index": "course-questions", "_id": i, "_source": doc}
    for i, doc in enumerate(documents)
]
es_with_timeout = es.options(request_timeout=600)

try:
    success_count, errors = bulk(
        client=es_with_timeout,
        actions=actions,
        chunk_size=500,
    )
    print(f"Successfully indexed {success_count} documents into 'course-questions'.")
    if errors:
        print("Bulk returned errors (unexpected):")
        for err in errors[:5]:  # show up to the first 5
            print(err)
except BulkIndexError as bulk_err:
    print(f"Bulk indexing failed: {bulk_err.args[0]}")
    for i, item_err in enumerate(bulk_err.errors[:10], start=1):
        print(f"\nError #{i}:")
        print(item_err)
    raise


Successfully indexed 948 documents into 'course-questions'.


In [19]:
def retrieve_documents(query, index_name="course-questions", max_results=3, course="machine-learning-zoomcamp"):
    search_query = {
        "size": max_results,
        "query": {
            "bool": {
                "must": {
                    "multi_match": {
                        "query": query,
                        "fields": ["question^4", "text"],
                        "type": "best_fields",
                    }
                },
                "filter": {"term": {"course": course}},
            }
        },
    }

    response = es.search(index=index_name, body=search_query)
    documents = [
        {
            "score": hit["_score"],
            "question": hit["_source"]["question"],
            "section": hit["_source"]["section"],
            "text": hit["_source"]["text"],
        }
        for hit in response["hits"]["hits"]
    ]
    return documents


In [20]:
query = "How do execute a command on a Kubernetes pod?"
results = retrieve_documents(query)

for i, doc in enumerate(results, 1):
    print(f"{i}. Score: {doc['score']:.2f}")
    print(f"   Question: {doc['question']}")
    print(f"   Section: {doc['section']}")
    print(f"   Text: {doc['text'][:100]}...\n")


In [21]:
query = "How do copy a file to a Docker container?"
results = retrieve_documents(query)

for i, doc in enumerate(results, 1):
    print(f"{i}. Score: {doc['score']:.2f}")
    print(f"   Question: {doc['question']}")
    print(f"   Section: {doc['section']}")
    print(f"   Text: {doc['text'][:100]}...\n")

In [23]:
context_template = """
Q: {question}
A: {text}
""".strip()

context = "\n\n".join(
    context_template.format(question=doc["question"], text=doc["text"])
    for doc in results
)

target_question = "How do I execute a command in a running docker container?"

prompt_template = """
You're a course teaching assistant. Answer the QUESTION based on the CONTEXT from the FAQ database.
Use only the facts from the CONTEXT when answering the QUESTION.

QUESTION: {question}

CONTEXT:
{context}
""".strip()

prompt = prompt_template.format(question=target_question, context=context)

print("Prompt length:", len(prompt))

Prompt length: 244


In [24]:
import tiktoken

encoding = tiktoken.encoding_for_model("gpt-4o")
len(encoding.encode(prompt))

49

In [26]:
prompt2 = """
Improve this code:

from elasticsearch import Elasticsearch
from openai import OpenAI

from llm_rag_workshop.settings import OPENAI_API_KEY

es = Elasticsearch("http://localhost:9200")
index_name = "course-questions"

client = OpenAI(api_key=OPENAI_API_KEY)


def ask_openai(prompt, model="gpt-4o"):
    print(f"Asking OpenAI with {model} model...")
    completion = client.chat.completions.create(
        model=model, messages=[{"role": "user", "content": prompt}]
    )
    print(completion)
    content = completion.choices[0].message.content.strip()
    return content


def build_prompt(question, context):
    prompt = fAnswer the following question based on the provided context:
    
    Question: {question}
    Context: {context}
    
    Answer:
    

    return ask_openai(prompt)


def retrieve_documents(
    query,
    index_name="course-questions",
    max_results=5,
    course="data-engineering-zoomcamp",
):
    search_query = {
        "size": max_results,
        "query": {
            "bool": {
                "must": {
                    "multi_match": {
                        "query": query,
                        "fields": ["question^3", "text", "section"],
                        "type": "best_fields",
                    }
                },
                "filter": {"term": {"course": course}},
            }
        },
    }

    response = es.search(index=index_name, body=search_query)
    documents = [hit["_source"] for hit in response["hits"]["hits"]]
    return documents


def qa_bot(user_question, course="data-engineering-zoomcamp"):
    context_docs = retrieve_documents(user_question, course=course)
    prompt = build_prompt(user_question, context_docs)
    answer = ask_openai(prompt)
    return answer
"""


In [31]:
from transformers import AutoTokenizer

# Load DeepSeek's tokenizer
tokenizer = AutoTokenizer.from_pretrained("deepseek-ai/DeepSeek-R1-0528")


def count_deepseek_tokens(text: str) -> int:
    tokens = tokenizer.tokenize(text)
    print("→ Subword tokens:", tokens)
    token_ids = tokenizer.convert_tokens_to_ids(tokens)
    print("\n→ Token IDs:", token_ids)
    print("\n→ tokenizer.encode(...) result:", tokenizer.encode(text))
    print("\n→ Decode IDs back to text:", tokenizer.decode(token_ids))
    return len(tokens)

count_deepseek_tokens("How do I execute a command in a running docker container?")

→ Subword tokens: ['How', 'Ġdo', 'ĠI', 'Ġexecute', 'Ġa', 'Ġcommand', 'Ġin', 'Ġa', 'Ġrunning', 'Ġdocker', 'Ġcontainer', '?']

→ Token IDs: [4117, 696, 342, 22218, 260, 6107, 295, 260, 6934, 63751, 15012, 33]

→ tokenizer.encode(...) result: [0, 4117, 696, 342, 22218, 260, 6107, 295, 260, 6934, 63751, 15012, 33]

→ Decode IDs back to text: How do I execute a command in a running docker container?


12

In [28]:
from openai import OpenAI
from llm_rag_workshop.settings import OPENAI_API_KEY

client = OpenAI(api_key=OPENAI_API_KEY)


def ask_openai(prompt, model="gpt-4o"):
    print(f"Asking OpenAI with {model} model...")
    completion = client.chat.completions.create(
        model=model, messages=[{"role": "user", "content": prompt}]
    )
    print(completion)
    content = completion.choices[0].message.content.strip()
    return content


In [29]:
answer = ask_openai(prompt)


Asking OpenAI with gpt-4o model...
ChatCompletion(id='chatcmpl-Bfl23jN3OtMFOo28PLwPpMTsI1Fyf', choices=[Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content='To execute a command in a running Docker container, you can use the `docker exec` command. The general syntax is: \n\n```\ndocker exec [OPTIONS] <container_name_or_id> <command>\n```\n\nFor example, if you want to open a bash shell in a running container named `my_container`, you would use:\n\n```\ndocker exec -it my_container /bin/bash\n```\n\nHere, `-it` allows you to interactively run commands inside the container. You can replace `/bin/bash` with any other command you wish to execute inside the container.', refusal=None, role='assistant', annotations=[], audio=None, function_call=None, tool_calls=None))], created=1749291939, model='gpt-4o-2024-08-06', object='chat.completion', service_tier='default', system_fingerprint='fp_07871e2ad8', usage=CompletionUsage(completion_tokens=118, prompt_to

I'll use the above query actual token numbers to calculate cost of 1000 requests:

```bash
usage=CompletionUsage(
    completion_tokens=118,
    prompt_tokens=56,
    total_tokens=174,
)
```

Input tokens (prompt_tokens): `118`
Output tokens (completion_tokens): `56`
Total: `174`

Input (118 tokens @ $0.005/1K):
118 ÷ 1000 × 0.005 = 

Output (72 tokens @ $0.015/1K):
56 ÷ 1000 × 0.015 = 

Input + Output = Total cost

So this single call cost approximately {Total cost}.

`{Total cost}` for 1000 requests