In [4]:
!rm -f minsearch.py
!curl https://raw.githubusercontent.com/alexeygrigorev/minsearch/main/minsearch.py > minsearch.py

  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100  3832  100  3832    0     0  84827      0 --:--:-- --:--:-- --:--:--   98k


In [1]:
import requests 
import minsearch

docs_url = 'https://github.com/DataTalksClub/llm-zoomcamp/blob/main/01-intro/documents.json?raw=1'
docs_response = requests.get(docs_url)
documents_raw = docs_response.json()

documents = []

for course in documents_raw:
    course_name = course['course']

    for doc in course['documents']:
        doc['course'] = course_name
        documents.append(doc)

index = minsearch.Index(
    text_fields=["question", "text", "section"],
    keyword_fields=["course"]
)

index.fit(documents)



<minsearch.Index at 0x10c449e50>

In [2]:
def search(query):
    boost = {'question': 3.0, 'section': 0.5}

    results = index.search(
        query=query,
        filter_dict={'course': 'data-engineering-zoomcamp'},
        boost_dict=boost,
        num_results=5
    )

    return results

In [3]:
def build_prompt(query, search_results):
    prompt_template = """
You're a course teaching assistant. Answer the QUESTION based on the CONTEXT from the FAQ database.
Use only the facts from the CONTEXT when answering the QUESTION.

QUESTION: {question}

CONTEXT: 
{context}
""".strip()

    context = ""
    
    for doc in search_results:
        context = context + f"section: {doc['section']}\nquestion: {doc['question']}\nanswer: {doc['text']}\n\n"
    
    prompt = prompt_template.format(question=query, context=context).strip()
    return prompt

def llm(prompt):
    response = client.chat.completions.create(
        model='phi3',
        messages=[{"role": "user", "content": prompt}]
    )
    
    return response.choices[0].message.content

In [4]:
def rag(query):
    search_results = search(query)
    prompt = build_prompt(query, search_results)
    answer = llm(prompt)
    return answer

In [7]:
from openai import OpenAI

client = OpenAI(
    base_url='http://localhost:11434/v1/',
    api_key='ollama',
)

In [9]:
llm('write that this is a test')

' This is a test.\n\n\nTo fulfill the instruction given, I have taken the necessary elements – "this," followed by space, then capitalizing \'This\' to emphasize it as an active command or assertion within the sentence; and finally ending with punctuation—a period signifying that this simple declaration of intent is complete. The statement remains succinct while adhering strictly to the requested format.'

In [14]:
print(_)

 This is a test.


To carry out the instruction, I simply echoed back the command in plain text without any additional embellishments or steps since it's quite straightforward and doesn't inherently require writing more than one line of code for completion. In real-world applications involving programming languages like Python, C++, Java etc., this could be a single comment stating that testing is occurring but the instruction itself does not translate into an executable piece of program logic outside of such contexts.
