# ElasticsearchStore Integrations (Recommended)

## Install Packages

In [None]:
#!pip3 install langchain

## Import packages

In [None]:
from dotenv import load_dotenv
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import ElasticsearchStore
from langchain.text_splitter import CharacterTextSplitter
from urllib.request import urlopen
import os, json

load_dotenv()

openai_api_key=os.getenv('OPENAI_API_KEY')
elastic_cloud_id=os.getenv('ES_CLOUD_ID')
elastic_user=os.getenv('ES_USER')
elastic_password=os.getenv('ES_PASSWORD')
elastic_endpoint=os.getenv('ES_ENDPOINT')
elastic_index_name='byte-discuss-elasticsearch-store'


## Add documents

### Let's download the sample dataset and deserialize the document.

In [None]:
url = "https://raw.githubusercontent.com/ashishtiwari1993/langchain-elasticsearch-RAG/main/data.json"

response = urlopen(url)

workplace_docs = json.loads(response.read())

### Split Documents into Passages

In [None]:
metadata = []
content = []

for doc in workplace_docs:
  content.append(doc["content"])
  metadata.append({
      "name": doc["name"],
      "summary": doc["summary"],
      "rolePermissions":doc["rolePermissions"]
  })

text_splitter = CharacterTextSplitter(chunk_size=50, chunk_overlap=0)
docs = text_splitter.create_documents(content, metadatas=metadata)

### Index data into elasticsearch

In [None]:
embeddings = OpenAIEmbeddings(openai_api_key=openai_api_key)

es = ElasticsearchStore.from_documents(
    docs,
    es_cloud_id=elastic_cloud_id,
    es_user=elastic_user,
    es_password=elastic_password,
    index_name=elastic_index_name,
    embedding=embeddings
)

es

## Show Result

In [None]:
def showResults(output):
  print("Total results: ", len(output))
  for index in range(len(output)):
    print(output[index])

## Similarity / Vector Search (Approximate KNN Search) - ApproxRetrievalStrategy()

In [None]:
query = "work from home policy"
result = es.similarity_search(query=query)

showResults(result)

## Hybrid Search (Approximate KNN + Keyword Search) - ApproxRetrievalStrategy()

In [None]:
embeddings = OpenAIEmbeddings(openai_api_key=openai_api_key)


es = ElasticsearchStore.from_documents(
    docs,
    es_cloud_id=elastic_cloud_id,
    es_user=elastic_user,
    es_password=elastic_password,
    embedding=embeddings,
    index_name=elastic_index_name,
    strategy=ElasticsearchStore.ApproxRetrievalStrategy(
        hybrid=True
    )
)

es.similarity_search("work from home policy")

## Exact KNN Search (Brute Force) - ExactRetrievalStrategy()

In [None]:
embeddings = OpenAIEmbeddings(openai_api_key=openai_api_key)


es = ElasticsearchStore.from_documents(
    docs,
    es_cloud_id=elastic_cloud_id,
    es_user=elastic_user,
    es_password=elastic_password,
    embedding=embeddings,
    index_name=elastic_index_name,
    strategy=ElasticsearchStore.ExactRetrievalStrategy()
)

es.similarity_search("work from home policy")

## Index / Search Documents using ELSER - SparseVectorRetrievalStrategy()

In [None]:
embeddings = OpenAIEmbeddings(openai_api_key=openai_api_key)


es = ElasticsearchStore.from_documents(
    docs,
    es_cloud_id=elastic_cloud_id,
    es_user=elastic_user,
    es_password=elastic_password,
    index_name=elastic_index_name+"-"+"elser",
    strategy=ElasticsearchStore.SparseVectorRetrievalStrategy()
)

es.similarity_search("work from home policy")