In [385]:
import minsearch

In [386]:
import json

In [387]:
with open('documents-llm.json', 'rt') as f_in:
    docs_raw = json.load(f_in)

In [388]:
# (docs_raw)

In [389]:
documents = []

for course_dict in docs_raw:
    for doc in course_dict['documents']:
        doc['course'] = course_dict['course']
        documents.append(doc)

In [390]:
documents[0]

{'text': 'Yes, but if you want to receive a certificate, you need to submit your project while we’re still accepting submissions.',
 'section': 'General course-related questions',
 'question': 'I just discovered the course. Can I still join?',
 'course': 'llm-zoomcamp'}

In [391]:
index = minsearch.Index(
    text_fields=["question", "text", "section"],
    keyword_fields=["course"]
)

In [392]:
q = 'The course has already started, can I still enroll?'

In [393]:
index.fit(documents)

<minsearch.Index at 0x771a8311efb0>

In [394]:
boost = {'question': 3.0, 'section': 0.5}

results = index.search(
    query=q,
    filter_dict={'course': 'llm-zoomcamp'},
    boost_dict=boost,
    num_results=5
)

# results

In [395]:
import os
from groq import Groq
# from dotenv import load_dotenv

In [396]:

# load_dotenv()  # This loads from .env file
# api_key = os.getenv("GROQ_API_KEY")
# if not api_key:
#     raise ValueError("GROQ_API_KEY environment variable not set")

# client = Groq(api_key=api_key)


In [397]:
client = Groq()

In [398]:
response = client.chat.completions.create(
    model="llama-3.3-70b-versatile",
    messages=[
        {
            "role": "user",
            "content": q
        }
    ],
    
)

In [399]:
response.choices[0].message.content

"It depends on the course and institution. Some courses may allow late enrollment, while others may not. Here are a few possible scenarios:\n\n1. **Check with the institution**: Reach out to the course administrator, instructor, or admissions office to inquire about late enrollment. They can tell you if it's still possible to join the course and what the process would be.\n2. **Online courses**: If it's an online course, you might be able to enroll at any time, depending on the platform and course design. Some online courses are self-paced, so you can start whenever you're ready.\n3. **Prerequisites and catch-up work**: If you're enrolling late, you might need to complete any missed assignments or catch up on course material before you can join the current session.\n4. **Faculty approval**: In some cases, you may need to get approval from the instructor or faculty member to enroll late. They may consider factors like the course format, your background, and your ability to catch up on m

In [400]:
prompt_template = """
You're a course teaching assistant. Answer the QUESTION based on the CONTEXT from the FAQ database.
Use only the facts from the CONTEXT when answering the QUESTION.

QUESTION: {question}

CONTEXT:
{context}
""".strip()

In [401]:
context = ""

for doc in results:
    context = context + f"section: {doc['section']}\nquestion: {doc['question']}\nanswer: {doc['text']}\n\n"

In [402]:
prompt = prompt_template.format(question=q, context=context).strip()

In [403]:
# print(prompt)

In [404]:
def search(query):
    boost = {'question': 3.0, 'section': 0.5}

    results = index.search(
        query=query,
        filter_dict={'course': 'llm-zoomcamp'},
        boost_dict=boost,
        num_results=5
    )

    return results

<!-- # If the CONTEXT doesn't contain the answer, output NONE. -->

In [None]:
def build_prompt(query, search_results):
    prompt_template = """
    You're a course teaching assistant. Answer the QUESTION based on the CONTEXT from the FAQ database.
    Use only the facts from the CONTEXT when answering the QUESTION.

    QUESTION: {question}

    CONTEXT: 
    {context}
    """.strip()

    context = ""
    
    for doc in search_results:
        context = context + f"section: {doc['section']}\nquestion: {doc['question']}\nanswer: {doc['text']}\n\n"
    
    prompt = prompt_template.format(question=query, context=context).strip()
    return prompt

In [406]:
def llm(prompt):
    response = client.chat.completions.create(
    model="llama-3.3-70b-versatile",
    messages=[
        {
            "role": "user",
            "content": prompt
        }
    ],
    
)

    return response.choices[0].message.content

In [409]:
def rag(query):
    
    search_results = search(query)
    prompt = build_prompt(query, search_results)
    answer = llm(prompt)
    return answer

In [None]:
# query = "How to configure apache spark?"
query = "When does the course  will start?"
rag(query)

In [413]:
rag("What is saturn cloud?")

'Saturn Cloud is not explicitly defined in the provided context. However, based on the information given, Saturn Cloud appears to be a cloud-based platform or service, possibly related to notebook environments, where tasks such as cleaning out the Hugging Face model cache can be performed. It is mentioned in the context of Open-Source LLMs and as an alternative to other cloud services like Google Colab and Kaggle.'

In [414]:
documents[0]

{'text': 'Yes, but if you want to receive a certificate, you need to submit your project while we’re still accepting submissions.',
 'section': 'General course-related questions',
 'question': 'I just discovered the course. Can I still join?',
 'course': 'llm-zoomcamp'}

In [415]:
from elasticsearch import Elasticsearch

In [416]:
es_client = Elasticsearch('http://localhost:9200') 

In [417]:
index_settings = {
    "settings": {
        "number_of_shards": 1,
        "number_of_replicas": 0
    },
    "mappings": {
        "properties": {
            "text": {"type": "text"},
            "section": {"type": "text"},
            "question": {"type": "text"},
            "course": {"type": "keyword"} 
        }
    }
}

index_name = "course-questions"

es_client.indices.create(index=index_name, body=index_settings)

ObjectApiResponse({'acknowledged': True, 'shards_acknowledged': True, 'index': 'course-questions'})

In [419]:
documents[0]

{'text': 'Yes, but if you want to receive a certificate, you need to submit your project while we’re still accepting submissions.',
 'section': 'General course-related questions',
 'question': 'I just discovered the course. Can I still join?',
 'course': 'llm-zoomcamp'}

In [418]:
from tqdm.auto import tqdm

In [420]:
for doc in tqdm(documents):
    es_client.index(index=index_name, document=doc)

  0%|          | 0/86 [00:00<?, ?it/s]

In [421]:
# query = "When does the course  will start?"
query

'When does the course  will start?'

In [428]:
def elastic_search(query):

    search_query = {
            "size": 5,
            "query": {
                "bool": {
                    "must": {
                        "multi_match": {
                            "query": query,
                            "fields": ["question^3", "text", "section"],
                            "type": "best_fields"
                        }
                    },
                    "filter": {
                        "term": {
                            "course": "llm-zoomcamp"
                        }
                    }
                }
            }
        }
    
    response = es_client.search(index=index_name, body=search_query)

    result_docs = []

    for hit in response['hits']['hits']:
        result_docs.append(hit['_source'])
    
    return result_docs

In [429]:
def rag(query):
    search_results = elastic_search(query)
    prompt = build_prompt(query, search_results)
    answer = llm(prompt)
    return answer

In [432]:
elastic_search(query)

[{'text': 'Summer 2025 (via Alexey).',
  'section': 'General course-related questions',
  'question': 'When will the course be offered next?',
  'course': 'llm-zoomcamp'},
 {'text': "You don't need it. You're accepted. You can also just start learning and submitting homework (while the form is Open) without registering. It is not checked against any registered list. Registration is just to gauge interest before the start date.",
  'section': 'General course-related questions',
  'question': 'Course - I have registered for the [insert-zoomcamp-name]. When can I expect to receive the confirmation email?',
  'course': 'llm-zoomcamp'},
 {'text': 'No, it does not (answered in office hours Jul 1st, 2024). You can participate in the math-kaggle-llm-competition as a group if you want to form teams; but capstone is an individual attempt.',
  'section': 'Capstone Project',
  'question': 'Does the competition count as the capstone?',
  'course': 'llm-zoomcamp'},
 {'text': 'Using the Openai API do

In [433]:

rag("LLM")

'LLM stands for Large Language Model. Unfortunately, the provided context does not offer a detailed explanation of what LLM is, but rather mentions its usage in various sections of the course, such as the "LLM-as-judge" section and the use of open-source LLM models like Llama3, Phi3, Mistral, and Mixtral through the Ollama platform.'