# Interact with LLM

In [None]:
!pip install openai
!pip install dotenv

In [None]:
from dotenv import load_dotenv
import os

In [None]:
load_dotenv('.env.instruqt')
openai_api_key =  os.environ.get("LLM_APIKEY") 
url = os.environ.get("LLM_PROXY_URL") 
openai_api_base = f"https://{url}"

In [None]:
os.environ["OPENAI_API_KEY"] = openai_api_key
os.environ["OPENAI_BASE_URL"] = openai_api_base

In [None]:
from elasticsearch import Elasticsearch

In [None]:
es_host = os.getenv("ELASTICSEARCH_URL", None)
es_api_key = os.getenv("ELASTICSEARCH_APIKEY", None)

In [None]:
es = Elasticsearch(
     hosts=[f"{es_host}"],
     api_key=es_api_key,
)

In [None]:
# LLM is from OpenAI 
from openai import OpenAI

In [None]:
#Start with a simple, one-pass interacation with the LLM. The function call2llm takes a systems_prompt, which is the 
#persona the system assumes in the interaction, and "users_prompt" which is the input from the user chatting with the LLM

def call2llm(systems_prompt, users_prompt):
    client = OpenAI(api_key=openai_api_key)
    response = client.chat.completions.create(
        messages=[
            {"role": "system", "content": systems_prompt},
            {"role": "user", "content": users_prompt}
        ],
        model="gpt-4.1",
        temperature=0.000001  # low means consistent LLM responses (high means more creative)
    )
    response = response.choices[0].message
    return response

In [None]:
#test
llm_answer = call2llm("You're a helpful assistant", "What is 2+2?")
print(llm_answer)

In [None]:
llm_answer2 = call2llm("You're a helpful assistant", "What did we just sum?")
print(llm_answer2)

No memory in call2llm of what happened previously.

<br>

#### Implement instead as a python class, which will help in adding conversational memory.  

In [None]:
class ChatWithLlm:
    def __init__(self,systems_prompt="assistant",model="gpt-4.1"):
        self.systems_prompt = systems_prompt
        self.model = model
        self.history = [{"role":"system",  
                         "content":systems_prompt}]          #history helps us "keep memory" of what happened before
   
    def call2llm(self, users_prompt, temperature=0.00001):   #low temperature means consistent LLM responses (high means more creative)
        client = OpenAI(api_key=openai_api_key)
        self.history.append({"role": "user", "content": users_prompt})   #user role prompts the LLM 
        response = client.chat.completions.create(
            messages=self.history,
            model=self.model,
            temperature=temperature,
        )
        response_llm = str(response.choices[0].message.content)
        self.history.append({"role": "assistant", "content": response_llm})
        return response_llm

In [None]:
#test with an instance of the ChatWithLlm class
chat = ChatWithLlm("You're a helpful assistant")
llm_answer =  chat.call2llm("What is 2 + 2?")
print(llm_answer)

In [None]:
llm_answer =  chat.call2llm("What did I just ask you?")
print(llm_answer)

In [None]:
llm_answer =  chat.call2llm("How did you remember what was asked?")
print(llm_answer)

 <br>
 <br>

## RAG solution

Finally here is the python class that performs our RAG solution.

Elastic_rag both queries Elastisearch and feeds those docs to the LLM in a prompt.

In [None]:
class Elasticsearch_rag:
    def __init__(self, systems_prompt="You are a helpful assistant.", model="gpt-4.1"):                
        #self.previous_response_id = None
        self.systems_prompt = systems_prompt
        self.model = model 
        self.history = [{"role": "system", "content": systems_prompt}]

    #retrieve documents from Elasticsearch
    def retrieve(self, query,  top_n=2, search_template="RAG_application"):
        params = {"query_string": query}
        params["size"]=top_n
        response = es.search_application.search(name=search_template, params=params)
        top_docs = [hit["_source"]["body"] for hit in response["hits"]["hits"][:top_n]]
        return "\n".join(top_docs)

    #combine user's query, conversation history, and docs from Elasticsearch to send to LLM
    def augment (self, query, temperature=0.00001):
        client = OpenAI(api_key=openai_api_key)
        self.history.append({"role": "user", "content": query})
        retrieval = Elasticsearch_rag()
        retrieved = retrieval.retrieve(query)
        prompt = ( "This is the query: "  +  query +  " Here are supporting documents. " + retrieved)
        self.history.append({"role": "user", "content": query})
        response = client.chat.completions.create(
            messages=self.history,
            model=self.model,
            temperature=temperature,
        )
        response_llm = str(response.choices[0].message.content)
        self.history.append({"role": "assistant", "content": response_llm})
        return response_llm

In [None]:
conversation = Elasticsearch_rag()   # an instance of a conversation
print(conversation.augment("What is Kibana good for?"))

In [None]:
print(conversation.augment("Can I run Kibana in a Docker container?"))

In [None]:
print(conversation.augment("What was the first question I asked?"))

Congratulations!  We have examined how to create a RAG application that feeds documents from Elasticsearch to OpenAI's GPT LLM.