In [None]:
from together import Together

client = Together()

response = client.chat.completions.create(
    model="deepseek-ai/DeepSeek-R1-Distill-Llama-70B-free",
    messages=[
      {
        "role": "user",
        "content": "What are some fun things to do in New York?"
      }
    ]
)
print(response.choices[0].message.content)

In [3]:
import requests 

docs_url = 'https://github.com/alexeygrigorev/llm-rag-workshop/raw/main/notebooks/documents.json'
docs_response = requests.get(docs_url)
documents_raw = docs_response.json()

documents = []

for course in documents_raw:
    course_name = course['course']

    for doc in course['documents']:
        doc['course'] = course_name
        documents.append(doc)

In [4]:
from minsearch import AppendableIndex

index = AppendableIndex(
    text_fields=["question", "text", "section"],
    keyword_fields=["course"]
)

index.fit(documents)

<minsearch.append.AppendableIndex at 0x774efb59d400>

In [5]:
def search(query):
    boost = {'question': 3.0, 'section': 0.5}

    results = index.search(
        query=query,
        filter_dict={'course': 'data-engineering-zoomcamp'},
        boost_dict=boost,
        num_results=5,
        output_ids=True
    )

    return results

In [32]:
prompt_template = """
You're a course teaching assistant. Answer the QUESTION based on the CONTEXT from the FAQ database.
Use only the facts from the CONTEXT when answering the QUESTION.

<QUESTION>
{question}
</QUESTION>

<CONTEXT>
{context}
</CONTEXT>
""".strip()

def build_prompt(query, search_results):
    context = ""

    for doc in search_results:
        context = context + f"section: {doc['section']}\nquestion: {doc['question']}\nanswer: {doc['text']}\n\n"
    
    prompt = prompt_template.format(question=query, context=context).strip()
    # print(prompt)
    return prompt

In [51]:
from together import Together

client = Together()
def llm(prompt):
    response = client.chat.completions.create(
        model= 'meta-llama/Llama-3.3-70B-Instruct-Turbo-Free',
        # model="deepseek-ai/DeepSeek-R1-Distill-Llama-70B-free",
        messages=[
        {
            "role": "user",
            "content": prompt
        }
        ],
        # response_format='json'
    )
    return response.choices[0].message.content

In [35]:
def rag(query):
    search_results = search(query)
    prompt = build_prompt(query, search_results)
    # print(prompt)
    answer = llm(prompt)
    return answer

In [37]:
print(rag(query="Can i get certificate if I join the course now?"))

Yes, you can still join the course after the start date and you are eligible to submit homework. However, to get a certificate, you need to finish the course with a "live" cohort, which implies that you should join the course while it is still running, as certificates are not awarded for the self-paced mode.


## Agentic Rag

In [52]:
prompt_template = """
You're a course teaching assistant.

You're given a QUESTION from a course student and that you need to answer with your own knowledge and provided CONTEXT.
At the beginning the context is EMPTY.

You MUST respond in one of the three EXACT JSON templates shown below.

Do NOT include any explanations, thoughts, or text outside the JSON block.

<QUESTION>
{question}
</QUESTION>

<CONTEXT> 
{context}
</CONTEXT>

If CONTEXT is EMPTY, you can use our FAQ database.
In this case, use the following output template:

{{
"action": "SEARCH",
"reasoning": "<add your reasoning here>"
}}

If you can answer the QUESTION using CONTEXT, use this template:

{{
"action": "ANSWER",
"answer": "<your answer>",
"source": "CONTEXT"
}}

If the context doesn't contain the answer, use your own knowledge to answer the question

{{
"action": "ANSWER",
"answer": "<your answer>",
"source": "OWN_KNOWLEDGE"
}}
""".strip()

In [54]:
question = "how do I join the course now?"
context = "EMPTY"

prompt = prompt_template.format(question=question, context=context)
# print(prompt)

answer = llm(prompt)
print(answer)

{
"action": "SEARCH",
"reasoning": "The context is empty, so I need to search the FAQ database to find the steps to join the course."
}


In [55]:
def build_context(search_results):
    context = ""

    for doc in search_results:
        context = context + f"section: {doc['section']}\nquestion: {doc['question']}\nanswer: {doc['text']}\n\n"

    return context.strip()

search_results = search(question)
context = build_context(search_results)
prompt = prompt_template.format(question=question, context=context)
print(prompt)

You're a course teaching assistant.

You're given a QUESTION from a course student and that you need to answer with your own knowledge and provided CONTEXT.
At the beginning the context is EMPTY.

You MUST respond in one of the three EXACT JSON templates shown below.

Do NOT include any explanations, thoughts, or text outside the JSON block.

<QUESTION>
how do I join the course now?
</QUESTION>

<CONTEXT> 
section: General course-related questions
question: Course - Can I still join the course after the start date?
answer: Yes, even if you don't register, you're still eligible to submit the homeworks.
Be aware, however, that there will be deadlines for turning in the final projects. So don't leave everything for the last minute.

section: General course-related questions
question: Course - When will the course start?
answer: The purpose of this document is to capture frequently asked technical questions
The exact day and hour of the course will be 15th Jan 2024 at 17h00. The course wil

In [56]:
answer = llm(prompt)
print(answer)

{
"action": "ANSWER",
"answer": "To join the course, you should register before the course starts using the provided link, subscribe to the course public Google Calendar, and join the course Telegram channel with announcements. Don't forget to register in DataTalks.Club's Slack and join the channel.",
"source": "CONTEXT"
}


In [57]:
import json
def agentic_rag_v1(question):
    context = "EMPTY"
    prompt = prompt_template.format(question=question, context=context)
    answer_json = llm(prompt)
    answer = json.loads(answer_json)
    print(answer)

    if answer['action'] == 'SEARCH':
        print('need to perform search...')
        search_results = search(question)
        context = build_context(search_results)
        
        prompt = prompt_template.format(question=question, context=context)
        answer_json = llm(prompt)
        answer = json.loads(answer_json)
        print(answer)

    return answer

In [60]:
agentic_rag_v1(question="How can i run terraform in GCP?")
agentic_rag_v1(question="How can i join the course")

{'action': 'ANSWER', 'answer': 'To run Terraform in GCP, you can use the Google Cloud Shell, install Terraform on a GCP VM, or use the Terraform CLI locally with GCP credentials configured. First, ensure you have a GCP project and enable the necessary APIs. Then, install Terraform and configure your GCP credentials. You can then initialize Terraform, create a configuration file, and apply it to create resources in GCP.', 'source': 'OWN_KNOWLEDGE'}
{'action': 'SEARCH', 'reasoning': 'The context is empty, so I will search the FAQ database to find information on how to join the course.'}
need to perform search...
{'action': 'ANSWER', 'answer': 'You can join the course by registering before the course starts using the provided link, and then join the course Telegram channel with announcements. You can also start learning and submitting homework without registering, as registration is just to gauge interest before the start date.', 'source': 'CONTEXT'}


{'action': 'ANSWER',
 'answer': 'You can join the course by registering before the course starts using the provided link, and then join the course Telegram channel with announcements. You can also start learning and submitting homework without registering, as registration is just to gauge interest before the start date.',
 'source': 'CONTEXT'}