In [1]:
!curl http://localhost:9200

{
  "name" : "59a3e847cf2e",
  "cluster_name" : "docker-cluster",
  "cluster_uuid" : "i7aYvniKSO65jzSJC_v-TA",
  "version" : {
    "number" : "9.0.1",
    "build_flavor" : "default",
    "build_type" : "docker",
    "build_hash" : "73f7594ea00db50aa7e941e151a5b3985f01e364",
    "build_date" : "2025-04-30T10:07:41.393025990Z",
    "build_snapshot" : false,
    "lucene_version" : "10.1.0",
    "minimum_wire_compatibility_version" : "8.18.0",
    "minimum_index_compatibility_version" : "8.0.0"
  },
  "tagline" : "You Know, for Search"
}


The build hash for version `9.0.1` is `73f7594ea00db50aa7e941e151a5b3985f01e364`

In [5]:
import requests

docs_url = "https://github.com/DataTalksClub/llm-zoomcamp/blob/main/01-intro/documents.json?raw=1"
docs_response = requests.get(docs_url)
documents_raw = docs_response.json()

documents = []

for course in documents_raw:
    course_name = course["course"]

    for doc in course["documents"]:
        doc["course"] = course_name
        documents.append(doc)


In [12]:
len(documents)


948

In [6]:
import time
from elasticsearch import Elasticsearch, ConnectionError

es = Elasticsearch("http://localhost:9200")
index_name = "course-questions"

for _ in range(30):
    try:
        if es.ping():
            print("✔ ES reachable")
            break
    except ConnectionError:
        pass
    print("…waiting for ping()…")
    time.sleep(1)
else:
    raise RuntimeError("ES did not respond to ping in 30s")


✔ ES reachable


In [7]:
if es.indices.exists(index="course-questions"):
    print("Deleting existing 'course-questions' index...")
    es.indices.delete(index="course-questions")
    print("Deleted old index.")

print("Creating 'course-questions' index...")
index_settings = {
    "settings": {"number_of_shards": 1, "number_of_replicas": 0},
    "mappings": {
        "properties": {
            "text": {"type": "text"},
            "section": {"type": "text"},
            "question": {"type": "text"},
            "course": {"type": "keyword"},
        }
    },
}
es.indices.create(index="course-questions", body=index_settings)
print("Created index 'course-questions'")


Deleting existing 'course-questions' index...
Deleted old index.
Creating 'course-questions' index...
Created index 'course-questions'


In [8]:
settings = es.indices.get_settings(index="course-questions")
readonly_flag = settings["course-questions"]["settings"]["index"].get(
    "blocks.read_only_allow_delete"
)
print("Current blocks.read_only_allow_delete flag:", readonly_flag)


Current blocks.read_only_allow_delete flag: None


In [9]:
from elasticsearch.helpers import bulk, BulkIndexError

actions = [
    {"_index": "course-questions", "_id": i, "_source": doc}
    for i, doc in enumerate(documents)
]
es_with_timeout = es.options(request_timeout=600)

try:
    success_count, errors = bulk(
        client=es_with_timeout,
        actions=actions,
        chunk_size=500,
    )
    print(f"Successfully indexed {success_count} documents into 'course-questions'.")
    if errors:
        print("Bulk returned errors (unexpected):")
        for err in errors[:5]:  # show up to the first 5
            print(err)
except BulkIndexError as bulk_err:
    print(f"Bulk indexing failed: {bulk_err.args[0]}")
    for i, item_err in enumerate(bulk_err.errors[:10], start=1):
        print(f"\nError #{i}:")
        print(item_err)
    raise


Successfully indexed 948 documents into 'course-questions'.


In [13]:
def retrieve_documents(query, index_name="course-questions", max_results=3, course="machine-learning-zoomcamp"):
    search_query = {
        "size": max_results,
        "query": {
            "bool": {
                "must": {
                    "multi_match": {
                        "query": query,
                        "fields": ["question^4", "text"],
                        "type": "best_fields",
                    }
                },
                "filter": {"term": {"course": course}},
            }
        },
    }

    response = es.search(index=index_name, body=search_query)
    documents = [
        {
            "score": hit["_score"],
            "question": hit["_source"]["question"],
            "section": hit["_source"]["section"],
            "text": hit["_source"]["text"],
        }
        for hit in response["hits"]["hits"]
    ]
    return documents


In [14]:
query = "How do execute a command on a Kubernetes pod?"
results = retrieve_documents(query)

for i, doc in enumerate(results, 1):
    print(f"{i}. Score: {doc['score']:.2f}")
    print(f"   Question: {doc['question']}")
    print(f"   Section: {doc['section']}")
    print(f"   Text: {doc['text'][:100]}...\n")


1. Score: 44.51
   Question: How do I debug a docker container?
   Section: 5. Deploying Machine Learning Models
   Text: Launch the container image in interactive mode and overriding the entrypoint, so that it starts a ba...

2. Score: 35.43
   Question: Kubernetes-dashboard
   Section: 10. Kubernetes and TensorFlow Serving
   Text: Deploy and Access the Kubernetes Dashboard
Luke...

3. Score: 33.71
   Question: How do I copy files from a different folder into docker container’s working directory?
   Section: 5. Deploying Machine Learning Models
   Text: You can copy files from your local machine into a Docker container using the docker cp command. Here...



In [16]:
query = "How do copy a file to a Docker container?"
results = retrieve_documents(query)

for i, doc in enumerate(results, 1):
    print(f"{i}. Score: {doc['score']:.2f}")
    print(f"   Question: {doc['question']}")
    print(f"   Section: {doc['section']}")
    print(f"   Text: {doc['text'][:100]}...\n")

1. Score: 73.39
   Question: How do I debug a docker container?
   Section: 5. Deploying Machine Learning Models
   Text: Launch the container image in interactive mode and overriding the entrypoint, so that it starts a ba...

2. Score: 66.69
   Question: How do I copy files from my local machine to docker container?
   Section: 5. Deploying Machine Learning Models
   Text: You can copy files from your local machine into a Docker container using the docker cp command. Here...

3. Score: 59.81
   Question: How do I copy files from a different folder into docker container’s working directory?
   Section: 5. Deploying Machine Learning Models
   Text: You can copy files from your local machine into a Docker container using the docker cp command. Here...



In [19]:
context_template = """
Q: {question}
A: {text}
""".strip()

context = "\n\n".join(
    context_template.format(question=doc["question"], text=doc["text"])
    for doc in results
)

target_question = "How do I execute a command in a running docker container?"

prompt_template = """
You're a course teaching assistant. Answer the QUESTION based on the CONTEXT from the FAQ database.
Use only the facts from the CONTEXT when answering the QUESTION.

QUESTION: {question}

CONTEXT:
{context}
""".strip()

prompt = prompt_template.format(question=target_question, context=context)

print("Prompt length:", len(prompt))

Prompt length: 1462


In [20]:
import tiktoken

encoding = tiktoken.encoding_for_model("gpt-4o")
len(encoding.encode(prompt))

322

In [21]:
from openai import OpenAI
from llm_rag_workshop.settings import OPENAI_API_KEY

client = OpenAI(api_key=OPENAI_API_KEY)


def ask_openai(prompt, model="gpt-4o"):
    print(f"Asking OpenAI with {model} model...")
    completion = client.chat.completions.create(
        model=model, messages=[{"role": "user", "content": prompt}]
    )
    print(completion)
    content = completion.choices[0].message.content.strip()
    return content


In [22]:
answer = ask_openai(prompt)


Asking OpenAI with gpt-4o model...
ChatCompletion(id='chatcmpl-Bdizcj4WxOteyZgXYtrahVLMRNUXe', choices=[Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content='To execute a command in a running Docker container, you can use the `docker exec` command. First, identify the container ID with `docker ps`, then execute the command using the following syntax:\n\n```\ndocker exec -it <container-id> bash\n```\n\nThis will open a bash shell inside the running container, allowing you to execute further commands.', refusal=None, role='assistant', annotations=[], audio=None, function_call=None, tool_calls=None))], created=1748807444, model='gpt-4o-2024-08-06', object='chat.completion', service_tier='default', system_fingerprint='fp_07871e2ad8', usage=CompletionUsage(completion_tokens=72, prompt_tokens=329, total_tokens=401, completion_tokens_details=CompletionTokensDetails(accepted_prediction_tokens=0, audio_tokens=0, reasoning_tokens=0, rejected_prediction_token

I'll use the above query actual token numbers to calculate cost of 1000 requests:

```bash
usage=CompletionUsage(
    completion_tokens=72,
    prompt_tokens=329,
    total_tokens=401,
)
```

Input tokens (prompt_tokens): `329`
Output tokens (completion_tokens): `72`
Total: `401`

Input (329 tokens @ $0.005/1K):
329 ÷ 1000 × 0.005 = $0.001645

Output (72 tokens @ $0.015/1K):
72 ÷ 1000 × 0.015 = $0.00108

$0.001645 + $0.00108 = $0.002725

So this single call cost approximately $0.002725.

`$2.73` for 1000 requests