In [29]:
import json
from urllib.request import urlopen

url = "https://raw.githubusercontent.com/elastic/elasticsearch-labs/main/esre-examples/python-flask-example/example-data/data.json"

response = urlopen(url)

workplace_docs = json.loads(response.read())


In [None]:
!pip install -qU langchain jq openai elasticsearch tiktoken

In [None]:
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import ElasticVectorSearch
from langchain.embeddings import OpenAIEmbeddings
from elasticsearch import Elasticsearch

metadata = []
content = []

for doc in workplace_docs:
  content.append(doc["content"])
  metadata.append({
      "name": doc["name"],
      "summary": doc["summary"]
  })

text_splitter = CharacterTextSplitter(chunk_size=500, chunk_overlap=0)
docs = text_splitter.create_documents(content, metadatas=metadata)

embeddings = OpenAIEmbeddings(openai_api_key="<openai-key>")

client = Elasticsearch(
    cloud_id="My_deployment:dXMtY2VudHJhbDEuZ2NwLmNsb3VkLmVzLmlvOjQ0MyQxYTU2YWQyMTU4N2M0NGQzOTMwOTMyZWI5ZmExZDhlOCRiNGZkMDBhYTNlZjI0ODdiYmU5OGQ5N2YyNTBlYWUyYw==",
    basic_auth=("elastic", "<password>")
)


In [None]:
# Define the mapping
mapping = {
    "mappings": {
        "properties": {
            "text": { "type": "keyword" },
            "vector": {
                "type": "dense_vector",
                "dims": 1536,
                "index": "true",
                "similarity": "dot_product"
            }
        }
    }
}

# Create the index
client.indices.create(index='workplace_index', body=mapping)

In [132]:
# get the embeddings from openAI

texts = []
for passage in docs:
  texts.append(passage.page_content)

textEmbeddings = embeddings.embed_documents(texts)


In [None]:
# persist the passage documents into elasticsearch

actions = []
for i, passage in enumerate(docs):
    actions.append({"index": {"_index": "workplace_index"}})
    passageEmbedding = textEmbeddings[i]
    actions.append({
        "text": passage.page_content,
        "vector":passageEmbedding,
        "metadata": passage.metadata
    })

client.bulk(operations=actions)



Querying

In [134]:
from langchain.vectorstores.elastic_vector_search import ElasticKnnSearch
from langchain.llms import OpenAI
from langchain.chains import ConversationalRetrievalChain, RetrievalQA

db = ElasticKnnSearch(
    es_connection=client, index_name="workplace_index", embedding=embeddings
)

retriever = db.as_retriever()

llm = OpenAI(openai_api_key="<openai-key>")

qa = RetrievalQA.from_chain_type(
    llm=llm, chain_type="stuff", retriever=retriever, return_source_documents=True
)

ans = qa({"query": "what is the nasa sales team?"})

print("answer")
print(ans["result"])

print("sources")
for doc in ans["source_documents"]:
  print(doc.metadata["name"])
  print(doc.page_content)

answer
 The NASA Sales team is the North America South America regional sales team. It is composed of dedicated account managers, sales representatives, and support staff, and is led by Laura Martinez (Area Vice-President of North America) and Gary Johnson (Area Vice-President of South America).
sources
Sales Organization Overview
Our sales organization is structured to effectively serve our customers and achieve our business objectives across multiple regions. The organization is divided into the following main regions:

The Americas: This region includes the United States, Canada, Mexico, as well as Central and South America. The North America South America region (NASA) has two Area Vice-Presidents: Laura Martinez is the Area Vice-President of North America, and Gary Johnson is the Area Vice-President of South America.
Sales Organization Overview
Each regional sales team consists of dedicated account managers, sales representatives, and support staff, led by their respective Area Vi

In [None]:
client.indices.delete(index="workplace_index")