In [24]:
import minsearch
import json
import os
from dotenv import load_dotenv
import requests


dotenv_path = '/Users/dandyrahman/Documents/Projects/LLM-ZoomCamp/.env'
load_dotenv(dotenv_path)

OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY")
DATASAUR_API_KEY = os.environ.get("DATASAUR_API_KEY")

In [25]:
with open('../01-intro/documents.json', 'rt') as f_in:
    docs_raw = json.load(f_in)

documents = []

for course_dict in docs_raw:
    for doc in course_dict['documents']:
        doc['course'] = course_dict['course']
        documents.append(doc)


In [26]:
from openai import OpenAI

client = OpenAI(
    base_url='http://localhost:11434/v1/',
    api_key='ollama',
)

In [27]:
def build_prompt(query, search_results):
    prompt_template = """
You're a course teaching assistant. Answer the QUESTION based on the CONTEXT. 
Use only the facts from the CONTEXT when answering the QUESTION. 
If the CONTEXT doesn't have answer, output NONE.

QUESTION: {question}

CONTEXT: {context}

    """

    context = "\n"

    for doc in search_results:
        context += f"section: {doc['section']}\nquestion: {doc['question']}\nanswer: {doc['text']}\n\n"

    prompt = prompt_template.format(question=query, context=context).strip()

    return prompt

In [28]:
def llm(prompt):
    response = client.chat.completions.create(
                    model= "phi3",
                    messages=[{"role": "user",
                            "content": prompt}]
                )
    
    return response.choices[0].message.content

In [29]:
def rag(query):
    search_results = search(query)
    prompt = build_prompt(query, search_results)
    answer = llm(prompt)

    return answer

In [30]:
from elasticsearch import Elasticsearch

In [31]:
es_client = Elasticsearch("http://localhost:9200")

In [32]:
index_settings = {
    "settings": {
        "number_of_shards": 1,
        "number_of_replicas": 0
    },
    "mappings": {
        "properties": {
            "text": {"type": "text"},
            "section": {"type": "text"},
            "question": {"type": "text"},
            "course": {"type": "keyword"} 
        }
    }
}

index_name = "course-questions"

es_client.indices.create(index=index_name, body=index_settings)

ObjectApiResponse({'acknowledged': True, 'shards_acknowledged': True, 'index': 'course-questions'})

In [33]:
from tqdm.auto import tqdm

In [34]:
for doc in tqdm(documents):
    es_client.index(index=index_name, document=doc)

100%|██████████| 948/948 [00:02<00:00, 329.24it/s]


In [35]:
query = "I just discovered the course. Can I still join it?"

In [36]:
def elastic_search(query):
    search_query = {
        "size": 5,
        "query": {
            "bool": {
                "must": {
                    "multi_match": {
                        "query": query,
                        "fields": ["question^3", "text", "section"],
                        "type": "best_fields"
                    }
                },
                "filter": {
                    "term": {
                        "course": "data-engineering-zoomcamp"
                    }
                }
            }
        }
    }

    response = es_client.search(index=index_name, body=search_query)

    result_docs = []

    for hit in response['hits']['hits']:
        result_docs.append(hit['_source'])

    return result_docs

In [37]:
def rag_elastic(query):
    search_results = elastic_search(query)
    prompt = build_prompt(query, search_results)
    answer = llm(prompt)

    return answer

In [43]:
rag_elastic(query)

" As a course teaching assistant, I would like you to know that even though there are specific dates for turning in projects mentioned later on (which implies deadlines), generally speaking, if you're interested and ready to participate, yes, as long as it hasn't reached its last possible registration time before the start date. Please ensure to manage your submission times appropriately to avoid any rush at the end. However, this isn’t directly answered in provided contexts but can be inferred from different answers given throughout various sections of course-related questions."

In [44]:
print(_)

 As a course teaching assistant, I would like you to know that even though there are specific dates for turning in projects mentioned later on (which implies deadlines), generally speaking, if you're interested and ready to participate, yes, as long as it hasn't reached its last possible registration time before the start date. Please ensure to manage your submission times appropriately to avoid any rush at the end. However, this isn’t directly answered in provided contexts but can be inferred from different answers given throughout various sections of course-related questions.
