In [None]:
!pip install -q einops==0.7.0 langchain==0.1.9 sentence-transformers==2.4.0 openai==1.13.3 langchain_elasticsearch langchain-community tf-keras jq tiktoken


# Store data into ElasticSearch

In [38]:
import json
import openai
import os
import warnings

warnings.filterwarnings('ignore')

from langchain_elasticsearch import ElasticsearchStore
#from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.embeddings.huggingface import HuggingFaceEmbeddings
#from getpass import getpass
#from urllib.request import urlopen
#from langchain.llms import OpenAI

from langchain.document_loaders import JSONLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter

#from langchain.callbacks.base import BaseCallbackHandler
from langchain.chains import RetrievalQA
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
from langchain_community.llms import VLLMOpenAI
from langchain.prompts import PromptTemplate
from elasticsearch import Elasticsearch

## Step 1: Initialize connection to Elasticsearch


In [14]:
client = Elasticsearch(['https://elasticsearch-sample-elasticsearch.apps.rosa-t59w8.oufo.p1.openshiftapps.com'], basic_auth=('elastic', '1rdzVXU8i8F9T8hs1p6c78d6'), verify_certs=False)

print(client.info())


{'name': 'elasticsearch-sample-es-default-1', 'cluster_name': 'elasticsearch-sample', 'cluster_uuid': 'nr3Sh4ArQcqFm90rHd5Smw', 'version': {'number': '8.17.0', 'build_flavor': 'default', 'build_type': 'docker', 'build_hash': '2b6a7fed44faa321997703718f07ee0420804b41', 'build_date': '2024-12-11T12:08:05.663969764Z', 'build_snapshot': False, 'lucene_version': '9.12.0', 'minimum_wire_compatibility_version': '7.17.0', 'minimum_index_compatibility_version': '7.0.0'}, 'tagline': 'You Know, for Search'}


In [16]:
!pwd

/opt/app-root/src/rag-with-elasticsearch


## Step 2: Initialise embeddings 

In [25]:
# use granite embeddings
embeddings = HuggingFaceEmbeddings(
    model_name='sentence-transformers/all-mpnet-base-v2'

)


## Step 3: Initialise vector store 


In [26]:
vector_store = ElasticsearchStore(es_connection= client,
    index_name="rhoai_index",
    embedding=embeddings,
)
 


## Step 4: Load the data and tokenise to be used in ElasticSearchDB

In [27]:

def metadata_func(record: dict, metadata: dict) -> dict:
    metadata["content"] = record.get("content")
    metadata["title"] = record.get("title")
    return metadata


loader = JSONLoader(
    file_path="rhoai.json",
    jq_schema=".[]",
    content_key="content",
    metadata_func=metadata_func,
)

text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
    chunk_size=128, chunk_overlap=64
)
docs = loader.load_and_split(text_splitter=text_splitter) # Please load the right doc




In [28]:

documents = vector_store.from_documents(
    docs,
    embeddings,
    index_name="rhoai_index",
    es_connection= client
)


results = vector_store.similarity_search("can you list OpenShift AI offerings")
print(results)

[Document(page_content='OpenShift AI integrates the following components and services \n At the service layer \n OpenShift AI dashboard \n A customer-facing dashboard that shows available and installed applications for the OpenShift AI environment as well as learning resources such as tutorials, quick starts, and documentation. Administrative users can access functionality to manage users, clusters, notebook images, accelerator profiles, and model-serving runtimes. Data scientists can use the dashboard to create projects to organize their data science work. \n Model serving', metadata={'source': '/opt/app-root/src/rag-with-elasticsearch/rhoai.json', 'seq_num': 3, 'content': 'OpenShift AI integrates the following components and services \n At the service layer \n OpenShift AI dashboard \n A customer-facing dashboard that shows available and installed applications for the OpenShift AI environment as well as learning resources such as tutorials, quick starts, and documentation. Administra

# Serve the LLM in OpenShift AI


In [31]:

#######################################################################
#      INITIALIZE ELASTICSEARCH CONNECTION AND VECTOR STORE           #
#######################################################################

client = Elasticsearch(['https://elasticsearch-sample-elasticsearch.apps.rosa-t59w8.oufo.p1.openshiftapps.com'], basic_auth=('elastic', '1rdzVXU8i8F9T8hs1p6c78d6'), verify_certs=False)

print(client.info())



{'name': 'elasticsearch-sample-es-default-1', 'cluster_name': 'elasticsearch-sample', 'cluster_uuid': 'nr3Sh4ArQcqFm90rHd5Smw', 'version': {'number': '8.17.0', 'build_flavor': 'default', 'build_type': 'docker', 'build_hash': '2b6a7fed44faa321997703718f07ee0420804b41', 'build_date': '2024-12-11T12:08:05.663969764Z', 'build_snapshot': False, 'lucene_version': '9.12.0', 'minimum_wire_compatibility_version': '7.17.0', 'minimum_index_compatibility_version': '7.0.0'}, 'tagline': 'You Know, for Search'}


In [35]:

embeddings = HuggingFaceEmbeddings(model_name='sentence-transformers/all-mpnet-base-v2') #Try using granite embedding next time

store = ElasticsearchStore(es_connection= client,
    index_name="rhoai_index",
    embedding=embeddings,
)

#######################################################################
#      INITIALIZE LLM DEPLOYMENT PARAMETERS                           #
#######################################################################

INFERENCE_SERVER_URL = f"http://llm-predictor.myrag.svc.cluster.local:8080/v1" # Must change to the correct url
MODEL_NAME = "llm"
MAX_TOKENS=1024
TOP_P=0.95
TEMPERATURE=0.01
PRESENCE_PENALTY=1.03



In [39]:
template = """
### [INST] 
Instruction: Answer the question based on your 
OpenShift AI knowledge. Here is context to help:

{context}

### QUESTION:
{question} 

[/INST]
 """

os.environ["TOKENIZERS_PARALLELISM"] = "false"

QA_CHAIN_PROMPT = PromptTemplate.from_template(template)

llm = VLLMOpenAI(
    openai_api_key="EMPTY",
    openai_api_base=INFERENCE_SERVER_URL,
    model_name=MODEL_NAME,
    top_p=TOP_P,
    temperature=TEMPERATURE,
    max_tokens=MAX_TOKENS,
    presence_penalty=PRESENCE_PENALTY,
    streaming=True,
    verbose=False,
    callbacks=[StreamingStdOutCallbackHandler()]
)

qa_chain = RetrievalQA.from_chain_type(
        llm,
        retriever=store.as_retriever(
            search_type="similarity",
            search_kwargs={"k": 4}
            ),
        chain_type_kwargs={"prompt": QA_CHAIN_PROMPT},
        return_source_documents=True
        )
os.environ["TOKENIZERS_PARALLELISM"] = "false"


In [40]:
question = "What accelerators are supported on OpenShift AI?"
result = qa_chain.invoke({"query": question})


APIConnectionError: Connection error.