In [1]:
import requests 

docs_url = 'https://github.com/DataTalksClub/llm-zoomcamp/blob/main/01-intro/documents.json?raw=1'
docs_response = requests.get(docs_url)
documents_raw = docs_response.json()

documents = []

for course in documents_raw:
    course_name = course['course']

    for doc in course['documents']:
        doc['course'] = course_name
        documents.append(doc)

In [8]:
!pip install elasticsearch


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m25.0.1[0m[39;49m -> [0m[32;49m25.1.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpython3 -m pip install --upgrade pip[0m


In [9]:
from elasticsearch import Elasticsearch

In [12]:
es = Elasticsearch("http://localhost:9200")
index_name = 'courses'

In [24]:
for i, doc in enumerate(documents):
    es.index(index=index_name, id=i, document=doc)

In [25]:
q = "How do execute a command on a Kubernetes pod?"

In [62]:
search_query = {
    "size": 5,
    "query": {
        "bool": {
            "must": {
                "multi_match": {
                    "query": q,
                    "fields": ["question^4", "text"],
                    "type": "best_fields"
                }
            }
        }
    }
}


In [58]:
response = es.search(index=index_name, body=search_query)

In [59]:
hits = response['hits']['hits']

In [60]:
response['hits']['max_score']

44.50556

In [64]:
query = "How do copy a file to a Docker container?"

In [120]:
search_query = {
    "size": 3,
    "query": {
        "bool": {
            "must": {
                "multi_match": {
                    "query": query,
                    "fields": ["question^4", "text"],
                    "type": "best_fields"
                }
                },
                "filter": {
                    "match": {
                        "course": "machine-learning-zoomcamp"
                    }
                }
            }
        }
    }

In [121]:
response = es.search(index=index_name, body=search_query)

In [122]:
hits = response['hits']['hits']

In [123]:
hit_dics = {}

In [124]:
#my approach finding the 3rd question
for hit in hits:
    hit_dics[hit["_score"]] = hit

In [148]:
sorted(hit_dics.items(), key=lambda x: -x[0])[-1]

(59.812744,
 {'_index': 'courses',
  '_id': '636',
  '_score': 59.812744,
  '_ignored': ['text.keyword'],
  '_source': {'text': 'You can copy files from your local machine into a Docker container using the docker cp command. Here\'s how to do it:\nIn the Dockerfile, you can provide the folder containing the files that you want to copy over. The basic syntax is as follows:\nCOPY ["src/predict.py", "models/xgb_model.bin", "./"]\t\t\t\t\t\t\t\t\t\t\tGopakumar Gopinathan',
   'section': '5. Deploying Machine Learning Models',
   'question': 'How do I copy files from a different folder into docker container’s working directory?',
   'course': 'machine-learning-zoomcamp'}})

In [146]:
#what i should have done :D
hits[-1]['_source']['question']

'How do I copy files from a different folder into docker container’s working directory?'

In [170]:
prompt_template = """
You're a course teaching assistant. Answer the QUESTION based on the CONTEXT from the FAQ database.
Use only the facts from the CONTEXT when answering the QUESTION.

QUESTION: {question}

CONTEXT:
{context}
""".strip()
context = ""

for hit in response['hits']['hits']:
    doc = hit['_source']
    context += f"section: {doc['section']}\n\nquestion: {doc['question']}\n\nanswer: {doc['text']}\n\n"
context

'section: 5. Deploying Machine Learning Models\n\nquestion: How do I debug a docker container?\n\nanswer: Launch the container image in interactive mode and overriding the entrypoint, so that it starts a bash command.\ndocker run -it --entrypoint bash <image>\nIf the container is already running, execute a command in the specific container:\ndocker ps (find the container-id)\ndocker exec -it <container-id> bash\n(Marcos MJD)\n\nsection: 5. Deploying Machine Learning Models\n\nquestion: How do I copy files from my local machine to docker container?\n\nanswer: You can copy files from your local machine into a Docker container using the docker cp command. Here\'s how to do it:\nTo copy a file or directory from your local machine into a running Docker container, you can use the `docker cp command`. The basic syntax is as follows:\ndocker cp /path/to/local/file_or_directory container_id:/path/in/container\nHrithik Kumar Advani\n\nsection: 5. Deploying Machine Learning Models\n\nquestion: Ho

In [171]:
q = "How do I execute a command in a running docker container?"
prompt = prompt_template.format(question=q, context=context).strip()

In [172]:
len(prompt)

1642

In [179]:
from google import genai
import os


In [181]:
api_key = os.getenv("GEMINI_API_KEY")
client = genai.Client(api_key=api_key)
client.models.count_tokens(
    model="gemini-2.0-flash", contents=prompt
)

CountTokensResponse(total_tokens=390, cached_content_token_count=None)

In [239]:
def search(query):
    index_name = 'courses'
    for i, doc in enumerate(documents):
        es.index(index=index_name, id=i, document=doc)
    search_query = {
        "size": 5,
        "query": {
            "bool": {
                "must": {
                    "multi_match": {
                        "query": query,
                        "fields": ["question^4", "text"],
                        "type": "best_fields"
                    }
                }
            }
        }
    }
    return es.search(index=index_name, body=search_query)


In [240]:
def build_prompt(query, search_results):
    prompt_template = """
    You're a course teaching assistant. Answer the QUESTION based on the CONTEXT from the FAQ database.
    Use only the facts from the CONTEXT when answering the QUESTION.
    
    QUESTION: {question}
    
    CONTEXT:
    {context}
    """.strip()
    context = ""
    
    for hit in search_results['hits']['hits']:
        doc = hit['_source']
        context += f"section: {doc['section']}\n\nquestion: {doc['question']}\n\nanswer: {doc['text']}\n\n"
    prompt = prompt_template.format(question=query, context=context).strip()
    return prompt

In [241]:
def llm(prompt):
    api_key = os.getenv("GEMINI_API_KEY")
    client = genai.Client(api_key=api_key)
    response = client.models.generate_content(
        model="gemini-2.0-flash",
        contents=prompt
    )
    return response.text
    

In [242]:
def rag(query):
    search_results = search(query)
    prompt = build_prompt(query, search_results)
    answer = llm(prompt)
    return answer

In [252]:
query = "How do I debug a docker container?"
output = rag(query)

In [261]:
output

'To debug a docker container, launch the container image in interactive mode and override the entrypoint so that it starts a bash command: `docker run -it --entrypoint bash <image>`. If the container is already running, find the container ID using `docker ps` and then execute a command in the container: `docker exec -it <container-id> bash`.\n'