In [4]:
## Install the required packages
%pip install -q elasticsearch openai dotenv

Note: you may need to restart the kernel to use updated packages.


In [5]:
import os
from elasticsearch import Elasticsearch
from openai import OpenAI

In [7]:
import dotenv
dotenv.load_dotenv()

True

In [9]:
es_client = Elasticsearch(
    os.environ.get("ES_ENDPOINT"),
    api_key=os.environ.get("ES_API_KEY")
)
      
openai_client = OpenAI(
    api_key=os.environ.get("OPENAI_API_KEY"),
)
index_source_fields = {"search-wiki": ["body_content"]}

In [10]:
def get_elasticsearch_results():
    es_query = {
        "retriever": {
            "standard": {
                "query": {
                    "multi_match": {
                        "query": query,
                        "fields": [
                            "body_content",
                            "title"
                        ]
                    }
                }
            }
        },
        "size": 3
    }
    result = es_client.search(index="search-wiki", body=es_query)
    return result["hits"]["hits"]

In [11]:
def create_openai_prompt(results):
    context = ""
    for hit in results:
        inner_hit_path = f"{hit['_index']}.{index_source_fields.get(hit['_index'])[0]}"
        ## For semantic_text matches, we need to extract the text from the inner_hits
        if 'inner_hits' in hit and inner_hit_path in hit['inner_hits']:
            context += '\n --- \n'.join(inner_hit['_source']['text'] for inner_hit in hit['inner_hits'][inner_hit_path]['hits']['hits'])
        else:
            source_field = index_source_fields.get(hit["_index"])[0]
            hit_context = hit["_source"][source_field]
            context += f"{hit_context}\n"
    prompt = f"""
  Instructions:
  
  - You are an assistant for question-answering tasks.
  - Answer questions truthfully and factually using only the context presented.
  - If you don't know the answer, just say that you don't know, don't make up an answer.
  - You must always cite the document where the answer was extracted using inline academic citation style [], using the position.
  - Use markdown format for code examples.
  - You are correct, factual, precise, and reliable.
  
  Context:
  {context}
  
  """
    return prompt


In [15]:
def generate_openai_completion(user_prompt, question):
    response = openai_client.chat.completions.create(
        model="gpt-4o-mini",
        messages=[
            {"role": "system", "content": user_prompt},
            {"role": "user", "content": question},
        ]
    )
    return response.choices[0].message.content