## Generation with Groq (OpenAI alternatives)

In [22]:
from openai import OpenAI
from elasticsearch import Elasticsearch
from tqdm.auto import tqdm

In [None]:
client = OpenAI(
    base_url='http://localhost:11434/v1/',
    api_key='ollama',
)

In [None]:
es_client = Elasticsearch("http://localhost:9200")

In [None]:
index_settings = {
    "settings": {
        "number_of_shards": 1,
        "number_of_replicas": 0
    },
    "mappings": {
        "properties": {
            "text": {"type": "text"},
            "section": {"type": "text"},
            "question": {"type": "text"},
            "course": {"type": "keyword"} 
        }
    }
}

index_name = "course-questions"

es_client.indices.create(index=index_name, body=index_settings)

In [None]:
for doc in tqdm(documents):
    es_client.index(index=index_name, document=doc)

In [None]:
def elastic_search(query):
    search_query = {
        "size": 5,
        "query": {
            "bool": {
                "must": {
                    "multi_match": {
                        "query": query,
                        "fields": ["question^3", "text", "section"],
                        "type": "best_fields"
                    }
                },
                "filter": {
                    "term": {
                        "course": "data-engineering-zoomcamp"
                    }
                }
            }
        }
    }

    response = es_client.search(index=index_name, body=search_query)
    
    result_docs = []
    
    for hit in response['hits']['hits']:
        result_docs.append(hit['_source'])
    
    return result_docs

In [27]:
def build_prompt(query, search_results):
    prompt_template = """
You're a course teaching assistant. Answer the QUESTION based on the CONTEXT from the FAQ database.
Use only the facts from the CONTEXT when answering the QUESTION.

QUESTION: {question}

CONTEXT: 
{context}
""".strip()

    context = ""
    
    for doc in search_results:
        context = context + f"section: {doc['section']}\nquestion: {doc['question']}\nanswer: {doc['text']}\n\n"
    
    prompt = prompt_template.format(question=query, context=context).strip()
    return prompt


In [28]:
def llm(prompt):
    response = client.chat.completions.create(
        model='phi3',
        messages=[{"role": "user", "content": prompt}]
    )
    
    return response.choices[0].message.content

In [29]:
def rag(query):
    search_results = elastic_search(query)
    prompt = build_prompt(query, search_results)
    answer = llm(prompt)
    return answer

In [30]:
query = 'how do I run kafka?'

"To run Apache Kafka, you need to have Kafka installed and running on your machine. However, the FAQ database seems to provide information on how to run Kafka-related projects or resolve errors in Python or Java code, but it doesn't include instructions on how to install and start the actual Kafka broker.\n\nTo install and run Apache Kafka, you can follow the official Apache Kafka documentation: <https://kafka.apache.org/quickstart>\n\nAfter setting up Kafka, you can run the provided Java or Python code in the FAQ database by following the appropriate instructions based on the context. For example, if you want to run a Java producer, you can use the command provided in the first context snippet:\n\n```bash\njava -cp build/libs/<jar_name>-1.0-SNAPSHOT.jar:out src/main/java/org/example/JsonProducer.java\n```\n\nReplace `<jar_name>` with the name of your Java project's JAR file.\n\nSimilarly, for Python-related questions and issues, you can refer to the respective context sections in the 

In [41]:
rag(query)

"Yes, you can still join the course even if it has already started. You are still eligible to submit homeworks, but be aware that there will be deadlines for turning in the final projects. If you want to follow the course at your own pace, you can do so after it finishes, as all materials will be kept. You can also start installing and setting up the dependencies and requirements, and look over the prerequisites and syllabus before starting the course. Additionally, you can get support in the Slack channel even if you take the course in the self-paced mode. You don't need a confirmation email after registering for the course, you can just start learning and submitting homework without registering."