In [112]:
import requests
from openai import OpenAI
from dotenv import load_dotenv
from elasticsearch import Elasticsearch
from tqdm.auto import tqdm
import textwrap
import tiktoken

load_dotenv()

class HomeworkClass:
    documents: list
    open_ai: OpenAI
    es_client: Elasticsearch
    index_settings: dict[str, any]
    index_name: str

    def __init__(self, index_name: str):
        self.open_ai = OpenAI()
        self.es_client = Elasticsearch('http://localhost:9200')

        docs_url = 'https://github.com/DataTalksClub/llm-zoomcamp/blob/main/01-intro/documents.json?raw=1'
        docs_response = requests.get(docs_url)
        documents_raw = docs_response.json()

        documents = []

        for course in documents_raw:
            course_name = course['course']
            for doc in course['documents']:
                doc['course'] = course_name
                documents.append(doc)
        self.documents = documents

        self.index_settings = {
            "settings": {
                "number_of_shards": 1,
                "number_of_replicas": 0
            },
            "mappings": {
                "properties": {
                    "text": {"type": "text"},
                    "section": {"type": "text"},
                    "question": {"type": "text"},
                    "course": {"type": "keyword"}
                }
            }
        }
        self.index_name = index_name

    def _create_index(self):
        self.es_client.indices.create(
            index=self.index_name, body=self.index_settings)

    def _destroy_index(self):
        self.es_client.indices.delete(index=self.index_name)

    def _index_data(self):
        for doc in tqdm(self.documents):
            self.es_client.index(index=self.index_name, document=doc)

    def init_index(self):
        self._create_index()
        self._index_data()

    def reinitialize_index(self):
        self._destroy_index()
        self._create_index()
        self._index_data()

    def elastic_search(self, query: str):
        search_query = {
            # for question 3
            # "query": {
            #     "multi_match": {
            #         "query": query,
            #         "type": "best_fields",
            #         "fields": ["question^4", "text"]
            #     }
            # },
            # for question 4
            "size": 3,
            "query": {
                "bool": {
                    "must": {
                        "multi_match": {
                            "query": query,
                            "fields": ["question^4", "text"],
                            "type": "best_fields"
                        }
                    },
                    "filter": {
                        "term": {
                            "course": "machine-learning-zoomcamp"
                        }
                    }
                }
            }
        }
        response = self.es_client.search(
            index=self.index_name, body=search_query)
        result_docs = []
        for hit in response['hits']['hits']:
            result_docs.append({"score": hit['_score'], "source": hit['_source']})
        return result_docs
    
    def _build_prompt(self, query: str, search_results: list):
        context_template = textwrap.dedent("""
        Q: {question}
        A: {text}
        """).strip()

        context_entries = []
        for doc in search_results:
            ctx = context_template.format(
                question=doc['source']['question'], text=doc['source']['text'])
            context_entries.append(ctx)
        context = "\n\n".join(context_entries)

        prompt_template = textwrap.dedent("""
        You're a course teaching assistant. Answer the QUESTION based on the CONTEXT from the FAQ database.
        Use only the facts from the CONTEXT when answering the QUESTION.

        QUESTION: {question}

        CONTEXT:
        {context}
        """).strip()

        prompt = prompt_template.format(
            question=query, context=context).strip()
        return prompt
    
    def _llm(self, prompt: str):
        response = self.client.chat.completions.create(
            model='gpt-4o',
            messages=[{"role": "user", "content": prompt}]
        )
        return response.choices[0].message.content
    
    def get_prompt(self, query: str)->str:
        results = self.elastic_search(query=query)
        prompt = self._build_prompt(query=query, search_results=results)
        return prompt
    
    def encode_prompt(self, query: str):
        prompt = self.get_prompt(query)
        encoding = tiktoken.encoding_for_model('gpt-4o')
        encoded_list = encoding.encode(text=prompt)
        return encoded_list

    def rag(self, query: str):
        results = self.elastic_search(query=query)
        prompt = self._build_prompt(query=query, search_results=results)
        answer = self._llm(prompt)
        return answer

In [113]:
homework = HomeworkClass('homework-collection')

In [16]:
homework.init_index()

100%|██████████| 948/948 [00:02<00:00, 332.58it/s]


In [None]:
# question 3
homework.elastic_search(query='How do execute a command on a Kubernetes pod?')

[{'score': 44.50556,
  'source': {'text': 'Launch the container image in interactive mode and overriding the entrypoint, so that it starts a bash command.\ndocker run -it --entrypoint bash <image>\nIf the container is already running, execute a command in the specific container:\ndocker ps (find the container-id)\ndocker exec -it <container-id> bash\n(Marcos MJD)',
   'section': '5. Deploying Machine Learning Models',
   'question': 'How do I debug a docker container?',
   'course': 'machine-learning-zoomcamp'}},
 {'score': 35.433445,
  'source': {'text': 'Deploy and Access the Kubernetes Dashboard\nLuke',
   'section': '10. Kubernetes and TensorFlow Serving',
   'question': 'Kubernetes-dashboard',
   'course': 'machine-learning-zoomcamp'}},
 {'score': 33.70974,
  'source': {'text': 'You can copy files from your local machine into a Docker container using the docker cp command. Here\'s how to do it:\nIn the Dockerfile, you can provide the folder containing the files that you want to co

In [50]:
# question 4
homework.elastic_search(query='How do copy a file to a Docker container?')

[{'score': 73.38676,
  'source': {'text': 'Launch the container image in interactive mode and overriding the entrypoint, so that it starts a bash command.\ndocker run -it --entrypoint bash <image>\nIf the container is already running, execute a command in the specific container:\ndocker ps (find the container-id)\ndocker exec -it <container-id> bash\n(Marcos MJD)',
   'section': '5. Deploying Machine Learning Models',
   'question': 'How do I debug a docker container?',
   'course': 'machine-learning-zoomcamp'}},
 {'score': 66.688705,
  'source': {'text': "You can copy files from your local machine into a Docker container using the docker cp command. Here's how to do it:\nTo copy a file or directory from your local machine into a running Docker container, you can use the `docker cp command`. The basic syntax is as follows:\ndocker cp /path/to/local/file_or_directory container_id:/path/in/container\nHrithik Kumar Advani",
   'section': '5. Deploying Machine Learning Models',
   'questio

In [101]:
prompt = homework.get_prompt(query='How do copy a file to a Docker container?')
print(prompt)
len(prompt)

You're a course teaching assistant. Answer the QUESTION based on the CONTEXT from the FAQ database.
Use only the facts from the CONTEXT when answering the QUESTION.

QUESTION: How do copy a file to a Docker container?

CONTEXT:
Q: How do I debug a docker container?
A: Launch the container image in interactive mode and overriding the entrypoint, so that it starts a bash command.
docker run -it --entrypoint bash <image>
If the container is already running, execute a command in the specific container:
docker ps (find the container-id)
docker exec -it <container-id> bash
(Marcos MJD)

Q: How do I copy files from my local machine to docker container?
A: You can copy files from your local machine into a Docker container using the docker cp command. Here's how to do it:
To copy a file or directory from your local machine into a running Docker container, you can use the `docker cp command`. The basic syntax is as follows:
docker cp /path/to/local/file_or_directory container_id:/path/in/contain

1446

In [114]:
encoded = homework.encode_prompt(query='How do copy a file to a Docker container?')
num_token = len(encoded)
num_token

320