# RAG app with Elasticsearch & OpenAI
1. Elasticsearch is used to index FAQ documents from 3 DataTalks.Club courses
1. queries to Elasticsearch retrieve documents
1. retrieved documents are used to build a prompt
1. OpenAI API is used to generate a response to the user's question

#### imports

In [None]:
import json
from elasticsearch import Elasticsearch
from tqdm.auto import tqdm
from openai import OpenAI

from dotenv import load_dotenv
load_dotenv('.envrc')

## Retrieval

#### created a volume with 
`> docker volume create faq_elasticsearch_data`  

#### now I can start the container with the attached volume to load up the indexed documents
```bash
docker run -it --rm --name elasticsearch \ 
    -p 9200:9200 -p 9300:9300 \
    -e "discovery.type=single-node" \
    -e "xpack.security.enabled=false" \
    -v faq_elasticsearch_data:/usr/share/faq/elasticsearch/data \
    docker.elastic.co/elasticsearch/elasticsearch:8.4.3
```
with the volumne attached, any previously indexed docs will be persisted & load up each time the container is started  
the following cell is commented out because the documents have already been indexed

run shell script in terminal to start container:  
`> llm-zoomcamp/elasticsearch/scripts/run_elasticsearch_w_volume.sh`

### load docs

In [None]:
# doc_file_path = './data/documents.json'

# with open(doc_file_path, 'rt') as f_in:
#     documents_file = json.load(f_in)

# documents = []

# for course in documents_file:
#     course_name = course['course']

#     for doc in course['documents']:
#         doc['course'] = course_name
#         documents.append(doc)

# print(documents[0])

# index_settings = {
#     "settings": {
#         "number_of_shards": 1,
#         "number_of_replicas": 0
#     },
#     "mappings": {
#         "properties": {
#             "text": {"type": "text"},
#             "section": {"type": "text"},
#             "question": {"type": "text"},
#             "course": {"type": "keyword"} 
#         }
#     }
# }

# es = Elasticsearch("http://localhost:9200")
# es.info()

# index_name = "faq_elasticsearch_data"
# response = es.indices.create(index=index_name, body=index_settings)

# print(response)

# for doc in tqdm(documents):
#     es.index(index="faq_elasticsearch_data", document=doc)

### initiate Elasticsearch connection

In [None]:
es = Elasticsearch("http://localhost:9200")
es.info()

### retrieve docs

In [None]:
# def retrieve_documents(query, index_name="faq_elasticsearch_data", max_results=5, filter: dict = None):
#     es = Elasticsearch("http://localhost:9200")
    
#     search_query = {
#         "size": max_results,
#         "query": {
#             "bool": {
#                 "must": {
#                     "multi_match": {
#                         "query": query,
#                         "fields": ["question^4", "text"],
#                         "type": "best_fields"
#                     }
#                 },
#                 "filter": filter
#             }
#         }
#     }
#     # print(search_query)
#     response = es.search(index=index_name, body=search_query)
#     documents = [hit['_source'] for hit in response['hits']['hits']]
#     scores = [hit['_score'] for hit in response['hits']['hits']]
#     return documents, scores

In [None]:
# sample_question = "How do I execute a command in a running docker container?"
# filter = {
#     "term": {
#         "course": "machine-learning-zoomcamp"
#     }
# }

# # response = retrieve_documents(query=sample_question, max_results=3)#, filter=filter)
# # print(response)
# response, scores = retrieve_documents(query=sample_question, max_results=3, filter=filter)
# # response, scores = retrieve_documents(query=sample_question, max_results=5)#, filter=filter)
# print(f"scores: {scores}\n")
# for doc in response:
#     print(f"course: {doc['course']}")
#     print(f"section: {doc['section']}")
#     print(f"question: {doc['question']}")
#     print(f"answer: {doc['text'][:60]}...\n")

In [None]:
def retrieve_documents_v2(query=None, index_name="faq_elasticsearch_data", max_results=5, filter=None):
    es = Elasticsearch("http://localhost:9200")
    
    search_query = {
        "size": max_results,
        "query": {
            "bool": {
                "must": [],
                "filter": []
            }
        }
    }
    
    if query:
        search_query["query"]["bool"]["must"].append({
            "multi_match": {
                "query": query,
                "fields": ["question^4", "text"],
                "type": "best_fields"
            }
        })
    else:
        search_query["query"]["bool"]["must"].append({"match_all": {}})
    
    if filter:
        for key, value in filter.items():
            search_query["query"]["bool"]["filter"].append({
                "term": {f"{key}.keyword": value}
            })
    
    response = es.search(index=index_name, body=search_query)
    documents = [hit['_source'] for hit in response['hits']['hits']]
    scores = [hit['_score'] for hit in response['hits']['hits']]
    return documents, scores


In [None]:
sample_question = "How do I execute a command in a running docker container?"
filter = {
        "course": "machine-learning-zoomcamp"
}

response, scores = retrieve_documents_v2(query=sample_question, max_results=3, filter=filter)

print(f"scores: {scores}\n")
for doc in response:
    print(f"course: {doc['course']}")
    print(f"section: {doc['section']}")
    print(f"question: {doc['question']}")
    print(f"answer: {doc['text'][:60]}...\n")

## Generate

### build prompt
* Q5: length of prompt
* Q6: number of tokens

In [None]:
def build_prompt(question: str, context_docs: list) -> str:
    prompt = f"""
    You're a course teaching assistant. Answer the user QUESTION based on CONTEXT - the documents retrieved from our FAQ database. 
    Only use the facts from the CONTEXT. If the CONTEXT doesn't contan the answer, return "NONE"

    QUESTION: {question}

    CONTEXT:
    {build_context(context_docs)}
    """.strip()

    return prompt

def build_context(context_docs: list) -> str:
    context_template = """
    Section: {section}
    Question: {question}
    Answer: {text}
    """.strip()

    context_result = ""

    for doc in context_docs:
        doc_str = context_template.format(**doc)
        context_result += ("\n\n" + doc_str)

    return context_result.strip()

In [None]:
prompt = build_prompt(sample_question, response)
print(f"length of prompt: {len(prompt)}")

In [None]:
import tiktoken
encoding = tiktoken.encoding_for_model("gpt-4o")
print(f"number of tokens: {len(encoding.encode(prompt))}")

### generate response

In [None]:
def get_openai_answer(prompt: str) -> str:
    client = OpenAI()
    response = client.chat.completions.create(
    model="gpt-4o",
    messages=[{"role": "user", "content": prompt}]
)
    answer = response.choices[0].message.content
    return answer

In [None]:
def qa_bot(question: str, course: str = None) -> str:
    if course:
        filter = {
            "term": {
                "course": course
            }
        }
    else:
        filter = None

    response = retrieve_documents(query=question, max_results=3, filter=filter)
    prompt = build_prompt(question, response)
    answer = get_openai_answer(prompt)

    return answer

## demo
* bonus 1: generating the answer
* bonus 2: calculate costs

In [None]:
sample_question = "How do I execute a command in a running docker container?"
course = 'machine-learning-zoomcamp'

answer = qa_bot(question=sample_question, course=course)

print(answer)

In [None]:
input_tokens_count = 150
output_tokens_count = 250

input_cost_per_1k_tokens = 0.005
output_cost_per_1k_tokens = 0.015

num_of_requests = 1000

input_cost = input_tokens_count * (input_cost_per_1k_tokens/1000)
output_cost = output_tokens_count * (output_cost_per_1k_tokens/1000)

total_cost = (input_cost + output_cost) * num_of_requests
print(f"total cost: ${total_cost:.2f}")

In [None]:
from 