# Elasticsearch Vector Store

In [1]:
import logging
import sys
import os

logging.basicConfig(stream=sys.stdout, level=logging.INFO)
logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))

import getpass

os.environ["OPENAI_API_KEY"] = getpass.getpass("OpenAI API Key:")
import openai

openai.api_key = os.environ["OPENAI_API_KEY"]

#### Load documents, build VectorStoreIndex with Elasticsearch

In [3]:
from llama_index import VectorStoreIndex, SimpleDirectoryReader
from llama_index.vector_stores import ElasticsearchStore

In [4]:
# load documents
documents = SimpleDirectoryReader("../data/paul_graham").load_data()

In [5]:
# initialize without metadata filter
from llama_index.storage.storage_context import StorageContext

vector_store = ElasticsearchStore(
    es_url="http://localhost:9200", index_name="paul_graham"
)
storage_context = StorageContext.from_defaults(vector_store=vector_store)
index = VectorStoreIndex.from_documents(documents, storage_context=storage_context)

INFO:elastic_transport.transport:GET http://localhost:9200/ [status:200 duration:0.024s]
GET http://localhost:9200/ [status:200 duration:0.024s]
INFO:elastic_transport.transport:HEAD http://localhost:9200/paul_graham [status:200 duration:0.011s]
HEAD http://localhost:9200/paul_graham [status:200 duration:0.011s]
INFO:elastic_transport.transport:PUT http://localhost:9200/_bulk?refresh=true [status:200 duration:0.115s]
PUT http://localhost:9200/_bulk?refresh=true [status:200 duration:0.115s]
INFO:elastic_transport.transport:PUT http://localhost:9200/_bulk?refresh=true [status:200 duration:0.083s]
PUT http://localhost:9200/_bulk?refresh=true [status:200 duration:0.083s]


#### Query Index

In [8]:
# set Logging to DEBUG for more detailed outputs
query_engine = index.as_query_engine()
response = query_engine.query("what did he do growing up?")
print(response)

INFO:elastic_transport.transport:POST http://localhost:9200/paul_graham/_search [status:200 duration:0.036s]
POST http://localhost:9200/paul_graham/_search [status:200 duration:0.036s]
The context information does not provide any information about what he did growing up.


## Custom Filters and overriding Query 
llama-index supports ExactMatchFilters only at the moment. Elasticsearch supports a wide range of filters, including range filters, geo filters, and more. To use these filters, you can pass them in as a list of dictionaries to the `es_filter` parameter.

In [11]:
def custom_query(query, query_str):
    print("custom query", query)
    return query


query_engine = index.as_query_engine(
    vector_store_kwargs={
        "es_filter": [{"match": {"content": "growing up"}}],
        "custom_query": custom_query,
    }
)
response = query_engine.query("what did he do growing up?")
print(response)

custom query {'knn': {'filter': [{'match': {'content': 'growing up'}}], 'field': 'embedding', 'query_vector': [-0.0005294774891808629, -0.016782043501734734, 0.02605314739048481, -0.017400115728378296, -0.03385968506336212, 0.013476692140102386, 0.003145793220028281, 0.004205587785691023, -0.02345992624759674, -0.033268485218286514, 0.030742445960640907, -0.008639594539999962, 0.00025130235007964075, 0.008545540273189545, 0.016687987372279167, 0.018380971625447273, 0.03179048374295235, -0.012898928485810757, 0.008337276056408882, -0.018985610455274582, -0.008404457941651344, 0.002705751685425639, 0.0031105228699743748, -0.017883826047182083, 0.0013503564987331629, 0.01637895219027996, 0.018958736211061478, -0.020665157586336136, 0.007840129546821117, -0.01864970102906227, 0.012045717798173428, -0.01003697793930769, -0.0031911409460008144, 0.00022799863654654473, -0.034155286848545074, -0.003493459662422538, -0.003909987397491932, -0.018286917358636856, -0.013409510254859924, 0.01028555