In [1]:
import requests

In [4]:
base_faq_url = 'https://datatalks.club/faq'
courses_index_url = f'{base_faq_url}/json/courses.json'

courses_index = requests.get(courses_index_url).json()

In [8]:
courses_index

[{'course': 'llm-zoomcamp',
  'course_name': 'LLM Zoomcamp',
  'path': '/json/llm-zoomcamp.json',
  'questions_count': 91},
 {'course': 'data-engineering-zoomcamp',
  'course_name': 'Data Engineering Zoomcamp',
  'path': '/json/data-engineering-zoomcamp.json',
  'questions_count': 455},
 {'course': 'mlops-zoomcamp',
  'course_name': 'MLOps Zoomcamp',
  'path': '/json/mlops-zoomcamp.json',
  'questions_count': 249},
 {'course': 'machine-learning-zoomcamp',
  'course_name': 'ML Zoomcamp',
  'path': '/json/machine-learning-zoomcamp.json',
  'questions_count': 440}]

In [5]:
course_url = f'{base_faq_url}/json/llm-zoomcamp.json'
course_data = requests.get(course_url).json()

In [7]:
course_data[:3]

[{'id': '74eb249bbf',
  'course': 'llm-zoomcamp',
  'section': 'General Course-Related Questions',
  'question': 'I just discovered the course. Can I still join?',
  'answer': 'Yes, but if you want to receive a certificate, you need to submit your project while we’re still accepting submissions.'},
 {'id': '977bf7786c',
  'course': 'llm-zoomcamp',
  'section': 'General Course-Related Questions',
  'question': 'Course: I have registered for the LLM Zoomcamp. When can I expect to receive the confirmation email?',
  'answer': "You don't need it. You're accepted. You can also just start learning and submitting homework (while the form is open) without registering. It is not checked against any registered list. Registration is just to gauge interest before the start date."},
 {'id': '489dd1c9d9',
  'course': 'llm-zoomcamp',
  'section': 'General Course-Related Questions',
  'question': 'What is the video/zoom link to the stream for the “Office Hours” or live/workshop sessions?',
  'answer':

In [9]:
documents = []

for course in courses_index:
    course_path = course['path']
    course_url = f'{base_faq_url}/{course_path}'
    course_data = requests.get(course_url).json()
    documents.extend(course_data)

In [14]:
len(documents)

1235

In [18]:
documents[5]

{'id': '69d122f12e',
 'course': 'llm-zoomcamp',
 'section': 'General Course-Related Questions',
 'question': 'Certificate: Can I follow the course in a self-paced mode and get a certificate?',
 'answer': 'No, you can only get a certificate if you finish the course with a "live" cohort.\n\nWe don\'t award certificates for the self-paced mode. The reason is you need to peer-review 3 capstone(s) after submitting your project.\n\nYou can only peer-review projects at the time the course is running; after the form is closed and the peer-review list is compiled.'}

In [16]:
from minsearch import Index

In [19]:
index = Index(
    text_fields=['section', 'question', 'answer'],
    keyword_fields=['course']
)
index.fit(documents)

<minsearch.minsearch.Index at 0x1e8a6163cb0>

In [22]:
question = 'I just discovered the course. Can I join now?'

filter_dict = {
    'course': 'llm-zoomcamp'
}

boost_dict = {
    'question': 3,
    'section': 0.5,
    # 'answer': 1.0
}

search_results = index.search(
    question,
    filter_dict=filter_dict,
    boost_dict=boost_dict,
    num_results=5
)

In [25]:
def search(question):
    filter_dict = {
        'course': 'llm-zoomcamp'
    }
    
    boost_dict = {
        'question': 3,
        'section': 0.5,
    }
    
    search_results = index.search(
        question,
        filter_dict=filter_dict,
        boost_dict=boost_dict,
        num_results=5
    )

    return search_results

In [26]:
search(question)

[{'id': '74eb249bbf',
  'course': 'llm-zoomcamp',
  'section': 'General Course-Related Questions',
  'question': 'I just discovered the course. Can I still join?',
  'answer': 'Yes, but if you want to receive a certificate, you need to submit your project while we’re still accepting submissions.'},
 {'id': '977bf7786c',
  'course': 'llm-zoomcamp',
  'section': 'General Course-Related Questions',
  'question': 'Course: I have registered for the LLM Zoomcamp. When can I expect to receive the confirmation email?',
  'answer': "You don't need it. You're accepted. You can also just start learning and submitting homework (while the form is open) without registering. It is not checked against any registered list. Registration is just to gauge interest before the start date."},
 {'id': '69d122f12e',
  'course': 'llm-zoomcamp',
  'section': 'General Course-Related Questions',
  'question': 'Certificate: Can I follow the course in a self-paced mode and get a certificate?',
  'answer': 'No, you c

In [29]:
import sys
sys.path.append('..')

In [31]:
import rag
from openai import OpenAI

openai_client = OpenAI()

In [32]:
faq_rag = rag.RAG(
    index=index,
    llm_client=openai_client
)

In [33]:
question

'I just discovered the course. Can I join now?'

In [35]:
answer = faq_rag.rag(question)
print(answer.answer)

Yes, you can still join the course. However, if you want to receive a certificate, you need to submit your project while submissions are still being accepted.


In [46]:
class LLMZoomcampFAQRAG(rag.RAG):

    def search(self, query):
        print('using search from LLMZoomcampFAQRAG...')

        filter_dict = {
            'course': 'llm-zoomcamp'
        }
        
        boost_dict = {
            'question': 3,
            'section': 0.5,
        }
        
        search_results = self.index.search(
            question,
            filter_dict=filter_dict,
            boost_dict=boost_dict,
            num_results=5
        )
    
        return search_results

In [47]:
faq_rag = LLMZoomcampFAQRAG(
    index=index,
    llm_client=openai_client
)

In [48]:
answer = faq_rag.rag(question)
print(answer.answer)

using search from LLMZoomcampFAQRAG...
Yes, you can still join the course. However, if you want to receive a certificate, you need to submit your project while submissions are still being accepted.


In [52]:
from typing import Literal
from pydantic import BaseModel, Field


class Reference(BaseModel):
    document_id: str = Field(description="Reference to FAQ 'id' field that contains the answer")


class FAQRAGResponse(BaseModel):
    """
    This model provides a structured answer with metadata about the response,
    including confidence, categorization, and follow-up suggestions.
    """

    answer: str = Field(description="The main answer to the user's question in markdown")
    found_answer: bool = Field(description="True if relevant information was found in the documentation")
    confidence: float = Field(description="Confidence score from 0.0 to 1.0 indicating how certain the answer is")
    confidence_explanation: str = Field(description="Explanation about the confidence level")
    answer_type: Literal["how-to", "explanation", "troubleshooting", "comparison", "reference"] = Field(description="The category of the answer")
    followup_questions: list[str] = Field(description="Suggested follow-up questions the user might want to ask")
    references: list[Reference]

In [53]:
faq_rag = LLMZoomcampFAQRAG(
    index=index,
    llm_client=openai_client,
    output_type=FAQRAGResponse
)

In [54]:
answer = faq_rag.rag(question)
print(answer.answer)
print(answer.references)

using search from LLMZoomcampFAQRAG...
Yes, you can still join the course. However, if you want to receive a certificate, you need to submit your project while submissions are still being accepted.
[Reference(document_id='74eb249bbf')]


In [55]:
reference_url_template = 'https://datatalks.club/faq/{course_name}.html#{faq_id}'
reference_url_template.format(course_name='llm-zoomcamp', faq_id='74eb249bbf')

'https://datatalks.club/faq/llm-zoomcamp.html#74eb249bbf'