# Elasticsearch playground

Jupyther notebook to play with Elasticsearch in general.
Connect to the cluster, play with queries, play with LLMChains.



In [6]:
from langchain import LLMChain, OpenAI, Cohere, HuggingFaceHub, PromptTemplate
#from langchain.model_laboratory import ModelLaboratory

from elasticsearch import Elasticsearch

    

## Cluster credentials and URL

In [7]:

ES_URL = "https://test1.es.europe-north1.gcp.elastic-cloud.com:443"
KB_URL = "https://test1.kb.europe-north1.gcp.elastic-cloud.com:9243"

username = "elastic"


# Connect to the cluster in Elastic Cloud

In [8]:
client = Elasticsearch(ES_URL,
                       basic_auth=(username, password),
                       http_compress=True
                       )

client.info()

ObjectApiResponse({'name': 'instance-0000000000', 'cluster_name': '24ee0a66ae1544e3958cd6069dd0e626', 'cluster_uuid': 'rCAnFuk1QZ27xyXaBlvouQ', 'version': {'number': '8.7.1', 'build_flavor': 'default', 'build_type': 'docker', 'build_hash': 'f229ed3f893a515d590d0f39b05f68913e2d9b53', 'build_date': '2023-04-27T04:33:42.127815583Z', 'build_snapshot': False, 'lucene_version': '9.5.0', 'minimum_wire_compatibility_version': '7.17.0', 'minimum_index_compatibility_version': '7.0.0'}, 'tagline': 'You Know, for Search'})

# Use Elasticsearch API

In [9]:
my_index = "test-123"

index_metadata = {
    "settings" : { "number_of_shards": 1 },
    "mappings": {
        "properties": {
            "name": { "type": "text"},
            "foo": { "type": "keyword" }
            }
            }
}
client.indices.create(index=my_index, body=index_metadata, ignore=400)

# client.indices.refresh(index=my_index)

client.count(index="*")


  client.indices.create(index=my_index, body=index_metadata, ignore=400)
  client.indices.create(index=my_index, body=index_metadata, ignore=400)


ObjectApiResponse({'count': 31808, '_shards': {'total': 6, 'successful': 6, 'skipped': 0, 'failed': 0}})

In [11]:
client.indices.get_mapping(index=my_index)


ObjectApiResponse({'test-123': {'mappings': {'properties': {'foo': {'type': 'keyword'}, 'name': {'type': 'text'}}}}})

In [46]:
info = client.indices.get(index=my_index)

def _format_index(index_info):
    result = ""
    for index, info in index_info.items():
        result += f"Index: {index}\n"
        mappings = info.get("mappings", {})
        properties = mappings.get("properties", {})
        result += "Fields:\n"
        for field, details in properties.items():
            result += f"  {field}: {details.get('type', 'N/A')}\n"
        result += "\n"
    return result


_format_index(info)






'Index: test-123\nFields:\n  foo: keyword\n  name: text\n\n'

In [23]:

client.indices.stats(index=my_index)


ObjectApiResponse({'_shards': {'total': 2, 'successful': 1, 'failed': 0}, '_all': {'primaries': {'docs': {'count': 0, 'deleted': 0}, 'shard_stats': {'total_count': 1}, 'store': {'size_in_bytes': 225, 'total_data_set_size_in_bytes': 225, 'reserved_in_bytes': 0}, 'indexing': {'index_total': 0, 'index_time_in_millis': 0, 'index_current': 0, 'index_failed': 0, 'delete_total': 0, 'delete_time_in_millis': 0, 'delete_current': 0, 'noop_update_total': 0, 'is_throttled': False, 'throttle_time_in_millis': 0, 'write_load': 0.0}, 'get': {'total': 0, 'time_in_millis': 0, 'exists_total': 0, 'exists_time_in_millis': 0, 'missing_total': 0, 'missing_time_in_millis': 0, 'current': 0}, 'search': {'open_contexts': 0, 'query_total': 7, 'query_time_in_millis': 1, 'query_current': 0, 'fetch_total': 0, 'fetch_time_in_millis': 0, 'fetch_current': 0, 'scroll_total': 0, 'scroll_time_in_millis': 0, 'scroll_current': 0, 'suggest_total': 0, 'suggest_time_in_millis': 0, 'suggest_current': 0}, 'merges': {'current': 0

In [37]:

aliases = client.indices.get_alias(index="*", ignore_unavailable=True, )

aliases.keys()

  aliases = client.indices.get_alias(index="*", ignore_unavailable=True, )


dict_keys(['.kibana-event-log-8.7.1-000001', 'test-123', '.fleet-file-data-agent-000001', '.apm-agent-configuration', '.apm-source-map', '.fleet-files-agent-000001', '.kibana_security_session_1', 'kibana_sample_data_flights', '.security-tokens-7', '.security-7', '.apm-custom-link', 'kibana_sample_data_ecommerce', '.security-profile-8', '.kibana_8.7.1_001', '.kibana_task_manager_8.7.1_001'])

In [61]:
datastreams = client.indices.get_data_stream(name="*")

datastreams.get("data_streams")

streams = client.indices.get_data_stream(name="*").get("data_streams")
streams
mylist = map(lambda info: info['name'], streams)
list(mylist)

['kibana_sample_data_logs']

# From here I test ESEngine class 

In [1]:
from langchain import ESEngine
ES_URL = "https://test1.es.europe-north1.gcp.elastic-cloud.com:443"
KB_URL = "https://test1.kb.europe-north1.gcp.elastic-cloud.com:9243"

username = "elastic"

db = ESEngine.from_uri(ES_URL, 
                       basic_auth=(username, password),
                       http_compress=True
                       )


#db.get_usable_index_names()
#db._get_datastream_names()
#db.get_index_fields(index_name="kibana_sample_data_ecommerce")
#db._get_sample_docs(index_name="kibana_sample_data_ecommerce")
#print(db.get_index_info(index_names=["kibana_sample_data_ecommerce"]))
print(db.get_index_info())

Index: .fleet-file-data-agent-000001
Fields: {'bid': 'keyword', 'data': 'binary', 'last': 'boolean', 'sha2': 'keyword'}


Index: .fleet-files-agent-000001
Fields: {'action_id': 'keyword', 'agent_id': 'keyword', 'file.ChunkSize': 'integer', 'file.Compression': 'keyword', 'file.Status': 'keyword', 'file.name': 'keyword', 'source': 'keyword', 'upload_id': 'keyword', 'upload_start': 'date'}


Index: test-123
Fields: {'foo': 'keyword', 'name': 'text'}


Index: kibana_sample_data_logs
Fields: {}

/*
Sample documents:
{'agent': 'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; .NET CLR 1.1.4322)', 'bytes': 484, 'clientip': '203.9.7.202', 'extension': '', 'geo': {'srcdest': 'US:US', 'src': 'US', 'dest': 'US', 'coordinates': {'lat': 39.32566333, 'lon': -76.41376556}}, 'host': 'www.elastic.co', 'index': 'kibana_sample_data_logs', 'ip': '203.9.7.202', 'machine': {'ram': 5368709120, 'os': 'win xp'}, 'memory': None, 'message': '203.9.7.202 - - [2018-09-14T09:57:02.180Z] "GET /enterprise-sear

In [2]:
db.get_index_mapping(index_name="kibana_sample_data_ecommerce")

ObjectApiResponse({'kibana_sample_data_ecommerce': {'mappings': {'properties': {'category': {'type': 'text', 'fields': {'keyword': {'type': 'keyword'}}}, 'currency': {'type': 'keyword'}, 'customer_birth_date': {'type': 'date'}, 'customer_first_name': {'type': 'text', 'fields': {'keyword': {'type': 'keyword', 'ignore_above': 256}}}, 'customer_full_name': {'type': 'text', 'fields': {'keyword': {'type': 'keyword', 'ignore_above': 256}}}, 'customer_gender': {'type': 'keyword'}, 'customer_id': {'type': 'keyword'}, 'customer_last_name': {'type': 'text', 'fields': {'keyword': {'type': 'keyword', 'ignore_above': 256}}}, 'customer_phone': {'type': 'keyword'}, 'day_of_week': {'type': 'keyword'}, 'day_of_week_i': {'type': 'integer'}, 'email': {'type': 'keyword'}, 'event': {'properties': {'dataset': {'type': 'keyword'}}}, 'geoip': {'properties': {'city_name': {'type': 'keyword'}, 'continent_name': {'type': 'keyword'}, 'country_iso_code': {'type': 'keyword'}, 'location': {'type': 'geo_point'}, 'reg

In [6]:
print(db.get_index_info(index_names=["kibana_sample_data_ecommerce"]))

Index: kibana_sample_data_ecommerce
Fields: {'category': 'text', 'category.keyword': 'keyword', 'currency': 'keyword', 'customer_birth_date': 'date', 'customer_first_name': 'text', 'customer_first_name.keyword': 'keyword', 'customer_full_name': 'text', 'customer_full_name.keyword': 'keyword', 'customer_gender': 'keyword', 'customer_id': 'keyword', 'customer_last_name': 'text', 'customer_last_name.keyword': 'keyword', 'customer_phone': 'keyword', 'day_of_week': 'keyword', 'day_of_week_i': 'integer', 'email': 'keyword', 'event.dataset': 'keyword', 'geoip.city_name': 'keyword', 'geoip.continent_name': 'keyword', 'geoip.country_iso_code': 'keyword', 'geoip.location': 'geo_point', 'geoip.region_name': 'keyword', 'manufacturer': 'text', 'manufacturer.keyword': 'keyword', 'order_date': 'date', 'order_id': 'keyword', 'products._id': 'text', 'products._id.keyword': 'keyword', 'products.base_price': 'half_float', 'products.base_unit_price': 'half_float', 'products.category': 'text', 'products.ca

# END OF MY TEST. Following code to be reused later

In [5]:
prompt = PromptTemplate(template="What is the capital of {state}?", input_variables=["state"])
model_lab_with_prompt = ModelLaboratory.from_llms(llms, prompt=prompt)

In [6]:
model_lab_with_prompt.compare("New York")

[1mInput:[0m
New York

[1mOpenAI[0m
Params: {'model': 'text-davinci-002', 'temperature': 0.0, 'max_tokens': 256, 'top_p': 1, 'frequency_penalty': 0, 'presence_penalty': 0, 'n': 1, 'best_of': 1}
[36;1m[1;3m

The capital of New York is Albany.[0m

[1mCohere[0m
Params: {'model': 'command-xlarge-20221108', 'max_tokens': 20, 'temperature': 0.0, 'k': 0, 'p': 1, 'frequency_penalty': 0, 'presence_penalty': 0}
[33;1m[1;3m

The capital of New York is Albany.[0m

[1mHuggingFaceHub[0m
Params: {'repo_id': 'google/flan-t5-xl', 'temperature': 1}
[38;5;200m[1;3mst john s[0m



In [7]:
from langchain import SelfAskWithSearchChain, SerpAPIWrapper

open_ai_llm = OpenAI(temperature=0)
search = SerpAPIWrapper()
self_ask_with_search_openai = SelfAskWithSearchChain(llm=open_ai_llm, search_chain=search, verbose=True)

cohere_llm = Cohere(temperature=0, model="command-xlarge-20221108")
search = SerpAPIWrapper()
self_ask_with_search_cohere = SelfAskWithSearchChain(llm=cohere_llm, search_chain=search, verbose=True)

In [8]:
chains = [self_ask_with_search_openai, self_ask_with_search_cohere]
names = [str(open_ai_llm), str(cohere_llm)]

In [9]:
model_lab = ModelLaboratory(chains, names=names)

In [10]:
model_lab.compare("What is the hometown of the reigning men's U.S. Open champion?")

[1mInput:[0m
What is the hometown of the reigning men's U.S. Open champion?

[1mOpenAI[0m
Params: {'model': 'text-davinci-002', 'temperature': 0.0, 'max_tokens': 256, 'top_p': 1, 'frequency_penalty': 0, 'presence_penalty': 0, 'n': 1, 'best_of': 1}


[1m> Entering new chain...[0m
What is the hometown of the reigning men's U.S. Open champion?
Are follow up questions needed here:[32;1m[1;3m Yes.
Follow up: Who is the reigning men's U.S. Open champion?[0m
Intermediate answer: [33;1m[1;3mCarlos Alcaraz.[0m[32;1m[1;3m
Follow up: Where is Carlos Alcaraz from?[0m
Intermediate answer: [33;1m[1;3mEl Palmar, Spain.[0m[32;1m[1;3m
So the final answer is: El Palmar, Spain[0m
[1m> Finished chain.[0m
[36;1m[1;3m
So the final answer is: El Palmar, Spain[0m

[1mCohere[0m
Params: {'model': 'command-xlarge-20221108', 'max_tokens': 256, 'temperature': 0.0, 'k': 0, 'p': 1, 'frequency_penalty': 0, 'presence_penalty': 0}


[1m> Entering new chain...[0m
What is the hometown of the