"Notebook magic" commands to install packages that we will need.

In [1]:
!pip install dotenv

Collecting dotenv
  Downloading dotenv-0.9.9-py2.py3-none-any.whl.metadata (279 bytes)
Downloading dotenv-0.9.9-py2.py3-none-any.whl (1.9 kB)
Installing collected packages: dotenv
Successfully installed dotenv-0.9.9


Some imports and environment variables we will make use of to connect to Elasticsearch and OpenAI's LLM.

In [None]:
import requests
from dotenv import load_dotenv
import os

# Export the API key to an environment variable
if not os.path.exists('.env.instruqt'):
    env_text = requests.get('http://kubernetes-vm:9000/env').text
    with open('.env.instruqt', 'w') as f:
        f.write(env_text)
load_dotenv('.env.instruqt')

openai_api_key =  os.environ.get("LLM_APIKEY") 
url = os.environ.get("LLM_PROXY_URL") 
openai_api_base = f"https://{url}"

os.environ["OPENAI_API_KEY"] = openai_api_key
os.environ["OPENAI_BASE_URL"] = openai_api_base

es_host = os.getenv("ELASTICSEARCH_URL", None)
es_api_key = os.getenv("ELASTICSEARCH_APIKEY", None)

In [None]:
if openai_api_key is None:
    raise ValueError("The openai_api_key environment variable is not set.")

In [None]:
#Little utility function for pretty printing JSON
def jsn(x):
    import json
    x=dict(x)
    print(json.dumps(x, indent=2, sort_keys=True))

In [None]:
#Helps to suppress spurious warnings
import warnings
warnings.filterwarnings('ignore')

Import the Elasticsearch module for python

In [None]:
from elasticsearch import Elasticsearch

Connect to Elasticsearch and verify

In [None]:
es = Elasticsearch(
     hosts=[f"{es_host}"],
     api_key=es_api_key,
)
jsn(es.info())

<br>

# Run searches on Elasticsearch #

In [None]:
#function that runs a simple match query
def retrieve_documents(query, top_n=2):
    search_query = {
        "query": {
            "match": {
                "body": query
            }
        }
    }
    response = es.search(index="elastic_blogs-full-embeddings_e5", body=search_query)
    top_docs = [hit["_source"]["body"] for hit in response["hits"]["hits"][:top_n]]
    #top_docs = [hit["_source"]["content"] for hit in response["hits"]["hits"][:top_n]]
    line_separated = "\n\n".join(top_docs)
    print(line_separated)

In [None]:
retrieve_documents("Kibana for data analytics",top_n=3)

<br>

That was a simple match query, but we want to be able to run a more sophisticated lexical search on Elasticsearch so we can RAG to the LLM  more relevant documents.

The function `create_response` can run searches by calling a search_template (which is more newly a search_application).
The search application is running a hybrid search  -  lexical and semantic - combined using RRF.

In [None]:
#First run with `render_query` to see the hybrid search and check that parameters get assigned values.

app_name = "RAG_application"                   #search_application built in Kibana Console
params1 = {"query_string" : "My first query","size" : 2}

create_response = es.search_application.render_query(name=app_name, params=params1)

print("The render_query shows the search code is a bool and semantic search combined by RRF: \n")
jsn(create_response)

In [None]:
#run with "search" to do a search on Elasticsearch

app_name = "RAG_application"
params1 = {"query_string" : "My first query", "size" : 3}   #dictionary of key:values

create_response = es.search_application.search(name=app_name, params=params1)

print("Documents from running the query: ")
jsn(create_response)

In [None]:
#retrieve_documemts is a function to run a search template/application
def retrieve_documents(query,  top_n=2, search_template="RAG_application"):
    params = {"query_string": query}
    params["size"]=top_n
    response = es.search_application.search(name=search_template, params=params)
    top_docs = [hit["_source"]["body"] for hit in response["hits"]["hits"][:top_n]]
    return "\n".join(top_docs)

In [None]:
#unit test
query = "How can I secure my networks between elasticsearch nodes?"
retrieved_documents = retrieve_documents(query)
print("Retrieved Documents:", retrieved_documents)