In [1]:
import os
import sys 
import logging 
import numpy as np 
# Load environment variables
from dotenv import load_dotenv
load_dotenv()

from elastic_helpers import ESBulkIndexer, ESQueryMaker
from embedding_model import EmbeddingModel
from llamaindex_processor import LlamaIndexProcessor
from nltk_processor import NLTKProcessor
from chunker import Chunker
from elastic_config import BASIC_CONFIG
from llm import LLMProcessor
from elastic_helpers import ESBulkIndexer
from elastic_config import BASIC_CONFIG
from document_enricher import DocumentEnricher
from entity_extractor import EntityExtractor

  _torch_pytree._register_pytree_node(
  _torch_pytree._register_pytree_node(
[nltk_data] Downloading package punkt to /Users/han/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /Users/han/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!
[nltk_data] Downloading package stopwords to /Users/han/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to /Users/han/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


# Improvements
1. Vector Enrichment by Metadata Recombination of keyphrases and generated questions via weightings
2. Sentence wise, token-level chunking with window overlaps
3. Match OR string
4. HyDE
5. Reverse Packing

<!-- ![](./assets/rag_pipe.png) -->

# Step One: Initialize services, Initial Setup

In [2]:


# Configure logging
logging.basicConfig(level=logging.INFO,
                    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
                    datefmt='%Y-%m-%d %H:%M:%S')
logger = logging.getLogger(__name__)

# Initialize Embedding Model
HUGGINGFACE_EMBEDDING_MODEL = os.environ.get('HUGGINGFACE_EMBEDDING_MODEL')
embedder=EmbeddingModel(model_name=HUGGINGFACE_EMBEDDING_MODEL)

# Initialize Document Processors
llamaindex_processor=LlamaIndexProcessor()
nltkprocessor=NLTKProcessor()
chunker=Chunker(embedder.tokenizer)
documentenricher=DocumentEnricher()
entity_extractor=EntityExtractor()

# Initialize Elasticsearch
ELASTIC_CLOUD_HOST = os.environ.get('ELASTIC_CLOUD_HOST')
ELASTIC_USERNAME = os.environ.get('ELASTIC_USERNAME')
ELASTIC_PASSWORD = os.environ.get('ELASTIC_PASSWORD')
ELASTIC_CLOUD_AUTH = (ELASTIC_USERNAME, ELASTIC_PASSWORD)
es_bulk_indexer = ESBulkIndexer(cloud_id=[ELASTIC_CLOUD_HOST], credentials=ELASTIC_CLOUD_AUTH)
es_query_maker = ESQueryMaker(cloud_id=[ELASTIC_CLOUD_HOST], credentials=ELASTIC_CLOUD_AUTH)

# Define Index Name
index_name=os.environ.get('ELASTIC_INDEX_NAME')

# Initialize LLMs
gpt4o = LLMProcessor(model='gpt-4o')

Using MPS


  _torch_pytree._register_pytree_node(
INFO:elastic_helpers:Connection created for hosts: ['https://training-9b4cc2.es.asia-southeast1.gcp.elastic-cloud.com:9243/']
INFO:elastic_helpers:Connection created for hosts: ['https://training-9b4cc2.es.asia-southeast1.gcp.elastic-cloud.com:9243/']
INFO:llm:LLMProcessor initialized with model: gpt-4o


In [3]:
es_bulk_indexer.ping()

INFO:elastic_transport.transport:HEAD https://training-9b4cc2.es.asia-southeast1.gcp.elastic-cloud.com:9243/ [status:200 duration:0.042s]


Ping successful: Connected to Elasticsearch!


# Step Two: Process some documents

In [35]:

documents=llamaindex_processor.load_documents('./documents/')
documents=[dict(doc_obj) for doc_obj in documents]

In [34]:
documents

[Document(id_='9b373033-1af4-4278-9d00-07fa3a340497', embedding=None, metadata={'page_label': '1', 'file_name': 'Elastic_NV_Annual-Report-Fiscal-Year-2023.pdf', 'file_path': '/Users/han/Desktop/Projects/truckasaurus/documents/Elastic_NV_Annual-Report-Fiscal-Year-2023.pdf', 'file_type': 'application/pdf', 'file_size': 3724426, 'creation_date': '2024-07-27', 'last_modified_date': '2024-07-27'}, excluded_embed_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], excluded_llm_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], relationships={}, text='', mimetype='text/plain', start_char_idx=None, end_char_idx=None, text_template='{metadata_str}\n\n{content}', metadata_template='{key}: {value}', metadata_seperator='\n'),
 Document(id_='253288ca-8fb1-402b-bf66-e9391188ccb2', embedding=None, metadata={'page_label': '2', 'file_name': 'Elastic_NV_Annual-Report-Fiscal-Ye

In [37]:
chunked_documents=chunker.sentence_wise_tokenized_chunk_documents(documents, chunk_size=512)

Token indices sequence length is longer than the specified maximum sequence length for this model (647 > 512). Running this sequence through the model will result in indexing errors


In [44]:
print(chunked_documents[4]['original_text'])


[CLS] the aggregate market value of the ordinary shares held by non - affiliates of the registrant, based on the closing price of the shares of ordinary shares on the new york stock exchange on october 31, 2022 ( the last business day of the registrant ’ s second fiscal quarter ), was approximately $ 6. 1 billion. [SEP] [CLS] as of may 31, 2023, the registrant had 97, 390, 886 ordinary shares, par value €0. 01 per share, outstanding. [SEP] [CLS] documents incorporated by reference portions of the registrant ’ s definitive proxy statement relating to the registrant ’ s 2023 annual general meeting of shareholders are incorporated by reference into part iii of this annual report on form 10 - k where indicated. [SEP] [CLS] such definitive proxy statement will be filed with the u. s. [SEP] [CLS] securities and exchange commission within 120 days after the end of the registrant ’ s fiscal year ended april 30, 2023. [SEP] [CLS] 2 [SEP]


In [12]:
# embedder.get_embeddings_from_tokens([chunked_documents[0]['chunk']])

# Step Three: Enrich your documents

In [3]:
import pickle

with open('./backups/embedding_generated_questions_0.pkl', 'rb') as f:
    chunked_documents=pickle.load(f)



In [4]:
processors=[
    (nltkprocessor.textrank_phrases, "keyphrases"),
    # (gpt4o.generate_questions, "potential_questions"),
    (entity_extractor.extract_entities, "entities")
    ]
documentenricher.enrich_document(chunked_documents, text_col='original_text', processors=processors)

Enriching documents using processors: [(<bound method NLTKProcessor.textrank_phrases of <nltk_processor.NLTKProcessor object at 0x32fc4f5f0>>, 'keyphrases'), (<bound method EntityExtractor.extract_entities of <entity_extractor.EntityExtractor object at 0x32fcd4680>>, 'entities')]: 100%|██████████| 224/224 [00:09<00:00, 22.92it/s]


In [21]:
print(chunked_documents[29]['keyphrases'])

sep cl principal
sep cl elastic
cl elastic community
elastic community code
community code conduct
sep cl community
cl community u
community u ability
u ability candid
ability candid feedback


# Step Four: Weighted Embeddings

In [6]:
cols_to_embed=['keyphrases', 'potential_questions', 'entities']

embedding_cols=[]
for col in cols_to_embed:
    embedding_col=embedder.embed_documents_text_wise(chunked_documents, text_field=col)
    embedding_cols.append(embedding_col)
embedding_col=embedder.embed_documents_token_wise(chunked_documents, token_field="chunk")
embedding_cols.append(embedding_col)

Embedding documents: 100%|██████████| 7/7 [00:03<00:00,  2.15it/s]
Embedding documents: 100%|██████████| 7/7 [00:12<00:00,  1.82s/it]
Embedding documents: 100%|██████████| 7/7 [00:13<00:00,  1.99s/it]
Embedding documents: 100%|██████████| 7/7 [00:04<00:00,  1.44it/s]


In [7]:
import pickle

with open('./backups/embedding_generated_questions_0.pkl', 'wb') as f:
    pickle.dump(chunked_documents, f)

In [52]:
embedding_cols

['keyphrases_embedding',
 'potential_questions_embedding',
 'entities_embedding',
 'chunk_embedding']

In [8]:
embedding_cols=[
                'keyphrases_embedding',
                'potential_questions_embedding',
                'entities_embedding',
                'chunk_embedding']
combination_weights=[
                    0.1,
                    0.15,
                    0.05,
                    0.7
                ]

In [9]:
from tqdm import tqdm 
def combine_embeddings(objects, embedding_cols, combination_weights, primary_embedding='primary_embedding'):
    # Ensure the number of weights matches the number of embedding columns
    assert len(embedding_cols) == len(combination_weights), "Number of embedding columns must match number of weights"
    
    # Normalize weights to sum to 1
    weights = np.array(combination_weights) / np.sum(combination_weights)
    
    for obj in tqdm(objects, desc="Combining embeddings"):
        # Initialize the combined embedding
        combined = np.zeros_like(obj[embedding_cols[0]])
        
        # Compute the weighted sum
        for col, weight in zip(embedding_cols, weights):
            combined += weight * np.array(obj[col])
        
        # Add the new combined embedding to the object
        obj.update({primary_embedding:combined.tolist()})
        
        # Remove the original embedding columns
        for col in embedding_cols:
            obj.pop(col, None)

In [10]:
combine_embeddings(chunked_documents, embedding_cols, combination_weights)

Combining embeddings: 100%|██████████| 224/224 [00:00<00:00, 8449.03it/s]


In [22]:
chunked_documents[3]

{'id_': '7fe71686-5cd0-4831-9e79-998c6dbeae0c',
 'chunk': [2312,
  14613,
  5371,
  2099,
  100,
  14613,
  5371,
  2099,
  100,
  2512,
  1011,
  14613,
  5371,
  2099,
  100,
  3760,
  7316,
  2194,
  100,
  8361,
  3930,
  2194,
  100,
  2065,
  2019,
  8361,
  3930,
  2194,
  1010,
  5769,
  2011,
  4638,
  2928,
  2065,
  1996,
  20588,
  6494,
  3372,
  2038,
  2700,
  2025,
  2000,
  2224,
  1996,
  3668,
  6653,
  2558,
  2005,
  14037,
  2075,
  2007,
  2151,
  2047,
  2030,
  8001,
  3361,
  9529,
  4781,
  3024,
  27081,
  2000,
  2930,
  2410,
  1006,
  1037,
  1007,
  1997,
  1996,
  3863,
  2552,
  1012,
  102,
  101,
  100,
  5769,
  2011,
  4638,
  2928,
  3251,
  1996,
  20588,
  6494,
  3372,
  2038,
  6406,
  1037,
  16360,
  2953,
  1056,
  2006,
  1998,
  2012,
  22199,
  3370,
  2000,
  2049,
  2968,
  1521,
  1055,
  7667,
  1997,
  1996,
  1041,
  16020,
  14931,
  3512,
  2791,
  1997,
  2049,
  4722,
  2491,
  2058,
  3361,
  7316,
  2104,
  2930,
  24837,
  1

# Step Five: Index to Elastic Search

In [12]:

index_exists = es_bulk_indexer.check_index_existence(index_name=index_name)
if not index_exists:
    logger.info(f"Creating new index: {index_name}")
    es_bulk_indexer.create_es_index(es_configuration=BASIC_CONFIG, index_name=index_name)

success_count = es_bulk_indexer.bulk_upload_documents(
    index_name=index_name, 
    documents=chunked_documents, 
    id_col='id_',
    batch_size=32
)

INFO:elastic_transport.transport:HEAD https://95f1d816b5a34c759c6101b2858ddfda.asia-southeast1.gcp.elastic-cloud.com:443/test_index_1 [status:404 duration:0.163s]
INFO:__main__:Creating new index: test_index_1
INFO:elastic_transport.transport:HEAD https://95f1d816b5a34c759c6101b2858ddfda.asia-southeast1.gcp.elastic-cloud.com:443/test_index_1 [status:404 duration:0.008s]
INFO:elastic_helpers:Index test_index_1 does not exist.
INFO:elastic_transport.transport:PUT https://95f1d816b5a34c759c6101b2858ddfda.asia-southeast1.gcp.elastic-cloud.com:443/test_index_1 [status:200 duration:0.118s]
INFO:elastic_helpers:New index test_index_1 created!
INFO:elastic_transport.transport:PUT https://95f1d816b5a34c759c6101b2858ddfda.asia-southeast1.gcp.elastic-cloud.com:443/_bulk [status:200 duration:0.160s]
INFO:elastic_helpers:Batch 1: Successfully indexed 32 documents to test_index_1
INFO:elastic_transport.transport:PUT https://95f1d816b5a34c759c6101b2858ddfda.asia-southeast1.gcp.elastic-cloud.com:443/_

# Step Six: Make a Query


In [3]:
def get_context(index_name, match_query, text_query, fields, num_candidates=100, num_results=20, text_fields=["original_text", 'keyphrases', 'potential_questions', 'entities'], embedding_field="primary_embedding"):
    embedding=embedder.get_embeddings_from_text(text_query)

    results, search_body = es_query_maker.hybrid_vector_search(
        index_name=index_name,
        query_text=match_query,
        query_vector=embedding[0][0],
        text_fields=text_fields,
        vector_field=embedding_field,
        num_candidates=num_candidates,
        num_results=num_results
    )
    context_docs=['\n\n'.join([field+":\n\n"+j['_source'][field] for field in fields]) for j in results['hits']['hits']]
    context_docs.reverse()
    return context_docs, search_body


In [4]:
def retrieval_augmented_generation(query_text):
    match_query= gpt4o.generate_query(query_text)
    fields=['original_text']

    hyde_document=gpt4o.generate_HyDE(query_text)

    context, search_body=get_context(index_name, match_query, hyde_document, fields)

    answer= gpt4o.basic_qa(query=query_text, context=context)
    return answer, match_query, hyde_document, context, search_body

# Test 1

In [15]:
query_text="who audits elastic"
answer, match_query, hyde_document, context, search_body = retrieval_augmented_generation(query_text)
print(answer)

INFO:llm:Executing task: Elastic Search Query Generation
INFO:llm:Processing request with model: gpt-4o
INFO:httpx:HTTP Request: POST https://ca-shared.openai.azure.com//openai/deployments/experiment_45kTPM_gpt-4o/chat/completions?api-version=2024-06-01 "HTTP/1.1 200 OK"
INFO:llm:Request processed successfully
INFO:llm:Elastic search query generation completed successfully
INFO:llm:Executing task: Elastic Search Query Generation
INFO:llm:Processing request with model: gpt-4o
INFO:httpx:HTTP Request: POST https://ca-shared.openai.azure.com//openai/deployments/experiment_45kTPM_gpt-4o/chat/completions?api-version=2024-06-01 "HTTP/1.1 200 OK"
INFO:llm:Request processed successfully
INFO:llm:Elastic search query generation completed successfully
INFO:elastic_transport.transport:POST https://training-9b4cc2.es.asia-southeast1.gcp.elastic-cloud.com:9243/test_index_1/_search [status:200 duration:0.041s]
INFO:elastic_helpers:Hybrid search executed on index: test_index_1 with text query: audits

According to the context, the independent registered public accounting firm that audits Elastic N.V. is PricewaterhouseCoopers LLP (PwC). Here is the relevant excerpt:

"[CLS] report of independent registered public accounting firm to the board of directors and shareholders of elastic n. v. [SEP] [CLS] opinions on the financial statements and internal control over financial reporting we have audited the accompanying consolidated balance sheets of elastic n. v. [SEP] [CLS] ... [SEP] [CLS] / s / pricewaterhouseco"

PwC audited the consolidated financial statements as well as the company's internal control over financial reporting.


In [8]:
search_body

{'knn': {'field': 'primary_embedding',
  'query_vector': [0.4265527129173279,
   -0.1712949573993683,
   -0.042020395398139954,
   -0.5471225380897522,
   0.4746799170970917,
   -0.33410170674324036,
   0.1717018485069275,
   -0.4014941453933716,
   -0.10174159705638885,
   -0.09129292517900467,
   0.06006495654582977,
   -0.3378024101257324,
   -0.11432260274887085,
   -0.5213066339492798,
   0.2760009467601776,
   -0.09254149347543716,
   -0.12062013149261475,
   -0.5503711104393005,
   0.43687838315963745,
   0.7064043283462524,
   0.45067155361175537,
   -0.2836616039276123,
   -0.04662211239337921,
   -0.20350763201713562,
   0.010872878134250641,
   0.5147824883460999,
   -0.06530994921922684,
   -0.3454621434211731,
   -0.6738244891166687,
   -1.3922719955444336,
   0.0437544621527195,
   -0.68052738904953,
   0.05167144536972046,
   0.03836928308010101,
   0.01118258386850357,
   -0.11759516596794128,
   -0.39992353320121765,
   -0.12960785627365112,
   -0.04864056035876274,
  

# Test 2

In [14]:
query_text="total revenue 2023"
answer, match_query, hyde_document, context, search_body = retrieval_augmented_generation(query_text)
print(answer)

INFO:llm:Executing task: Elastic Search Query Generation
INFO:llm:Processing request with model: gpt-4o
INFO:httpx:HTTP Request: POST https://ca-shared.openai.azure.com//openai/deployments/experiment_45kTPM_gpt-4o/chat/completions?api-version=2024-06-01 "HTTP/1.1 200 OK"
INFO:llm:Request processed successfully
INFO:llm:Elastic search query generation completed successfully
INFO:llm:Executing task: Elastic Search Query Generation
INFO:llm:Processing request with model: gpt-4o
INFO:httpx:HTTP Request: POST https://ca-shared.openai.azure.com//openai/deployments/experiment_45kTPM_gpt-4o/chat/completions?api-version=2024-06-01 "HTTP/1.1 200 OK"
INFO:llm:Request processed successfully
INFO:llm:Elastic search query generation completed successfully
INFO:elastic_transport.transport:POST https://training-9b4cc2.es.asia-southeast1.gcp.elastic-cloud.com:9243/test_index_1/_search [status:200 duration:0.038s]
INFO:elastic_helpers:Hybrid search executed on index: test_index_1 with text query: total 

According to the provided context, the total revenue for the fiscal year ended April 30, 2023, was $1,068,989,000.

Quoting directly from the source:
"The following table presents revenue by category ( in thousands ) : year ended April 30, 2023 [...] total revenue $ 1, 068, 989 100 %"

Additionally, in the consolidated statements of operations:
"total revenue 1, 068, 989 862, 374 608, 489"

Thus, the total revenue for the fiscal year 2023 is $1,068,989,000.


# Test 3

In [13]:
query_text="what product does growth primarily depend on? How much"
answer, match_query, hyde_document, context, search_body = retrieval_augmented_generation(query_text)
print(answer)

INFO:llm:Executing task: Elastic Search Query Generation
INFO:llm:Processing request with model: gpt-4o
INFO:httpx:HTTP Request: POST https://ca-shared.openai.azure.com//openai/deployments/experiment_45kTPM_gpt-4o/chat/completions?api-version=2024-06-01 "HTTP/1.1 200 OK"
INFO:llm:Request processed successfully
INFO:llm:Elastic search query generation completed successfully
INFO:llm:Executing task: Elastic Search Query Generation
INFO:llm:Processing request with model: gpt-4o
INFO:httpx:HTTP Request: POST https://ca-shared.openai.azure.com//openai/deployments/experiment_45kTPM_gpt-4o/chat/completions?api-version=2024-06-01 "HTTP/1.1 200 OK"
INFO:llm:Request processed successfully
INFO:llm:Elastic search query generation completed successfully
INFO:elastic_transport.transport:POST https://training-9b4cc2.es.asia-southeast1.gcp.elastic-cloud.com:9243/test_index_1/_search [status:200 duration:0.044s]
INFO:elastic_helpers:Hybrid search executed on index: test_index_1 with text query: produc

### Products Driving Growth
According to the provided information, the company's growth primarily depends on the "elastic stack." This is evident from multiple sections of the context. For instance:

1. **Revenue Source:**
   - "Our revenue is derived primarily from subscriptions of search, observability and security built into the elastic stack."
   
2. **Future Investments:**
   - "We believe that releasing additional features of the elastic stack, including our solutions, drives usage of our products and ultimately drives our growth."
   - "To that end, we plan to continue to invest in building new features and solutions that expand the capabilities of the elastic stack."

These points collectively indicate that the company's growth is heavily reliant on the functionality, enhancements, and adoption of the elastic stack.

### Extent of Dependence
The context provides quantitative data on revenue growth, but it does not specify how much growth is projected or quantified specifically 

# Test 4

In [11]:
query_text="Describe employee benefit plan"
answer, match_query, hyde_document, context, search_body = retrieval_augmented_generation(query_text)
print(answer)

INFO:llm:Executing task: Elastic Search Query Generation
INFO:llm:Processing request with model: gpt-4o
INFO:httpx:HTTP Request: POST https://ca-shared.openai.azure.com//openai/deployments/experiment_45kTPM_gpt-4o/chat/completions?api-version=2024-06-01 "HTTP/1.1 200 OK"
INFO:llm:Request processed successfully
INFO:llm:Elastic search query generation completed successfully
INFO:llm:Executing task: Elastic Search Query Generation
INFO:llm:Processing request with model: gpt-4o
INFO:httpx:HTTP Request: POST https://ca-shared.openai.azure.com//openai/deployments/experiment_45kTPM_gpt-4o/chat/completions?api-version=2024-06-01 "HTTP/1.1 200 OK"
INFO:llm:Request processed successfully
INFO:llm:Elastic search query generation completed successfully
INFO:elastic_transport.transport:POST https://training-9b4cc2.es.asia-southeast1.gcp.elastic-cloud.com:9243/test_index_1/_search [status:200 duration:0.191s]
INFO:elastic_helpers:Hybrid search executed on index: test_index_1 with text query: employ

### Employee Benefit Plan Overview

The provided context outlines various elements of the employee benefit plan at Elastic, often referred to as "Elasticians." Here are the key components described in the texts:

1. **401(k) Plan**:
   - **Coverage**: The 401(k) plan is available to substantially all U.S. employees who meet minimum age and service requirements.
   - **Contributions**: Elastic makes contributions to the 401(k) plan up to 6% of the participating employee’s W-2 earnings and wages.
   - **Expenses**: For the fiscal years ended April 30, Elastic recorded expenses of $17.9 million (2023), $15.2 million (2022), and $11.4 million (2021) related to the 401(k) plan.
   - **Defined-Contribution Plans in Other Countries**: Elastic has defined-contribution plans in various other countries and recorded respective expenses of $9.4 million (2023), $7.2 million (2022), and $5.1 million (2021).

2. **Stock-Based Compensation**:
   - **Types of Awards**: Stock options, restricted stock u

# Test 5

In [10]:
query_text="Which companies did Elastic acquire?"
answer, match_query, hyde_document, context, search_body = retrieval_augmented_generation(query_text)
print(answer)

INFO:llm:Executing task: Elastic Search Query Generation
INFO:llm:Processing request with model: gpt-4o
INFO:httpx:HTTP Request: POST https://ca-shared.openai.azure.com//openai/deployments/experiment_45kTPM_gpt-4o/chat/completions?api-version=2024-06-01 "HTTP/1.1 200 OK"
INFO:llm:Request processed successfully
INFO:llm:Elastic search query generation completed successfully
INFO:llm:Executing task: Elastic Search Query Generation
INFO:llm:Processing request with model: gpt-4o
INFO:httpx:HTTP Request: POST https://ca-shared.openai.azure.com//openai/deployments/experiment_45kTPM_gpt-4o/chat/completions?api-version=2024-06-01 "HTTP/1.1 200 OK"
INFO:llm:Request processed successfully
INFO:llm:Elastic search query generation completed successfully
INFO:elastic_transport.transport:POST https://training-9b4cc2.es.asia-southeast1.gcp.elastic-cloud.com:9243/test_index_1/_search [status:200 duration:0.036s]
INFO:elastic_helpers:Hybrid search executed on index: test_index_1 with text query: Elasti

Elastic acquired several companies over the years to enhance its technology and market presence. According to the provided context, the specific acquisitions include:

1. **CmdWatch Security Inc.**: Acquired on September 17, 2021. The total purchase consideration was $77.8 million.
2. **Build Security Ltd.**: Acquired on September 2, 2021. The combined purchase price for Build Security Ltd. and another acquisition was $57.2 million.
3. **Optimyze Cloud Inc.**: Acquired on November 1, 2021. Part of the combined purchase price mentioned above with Build Security Ltd.
4. **Endgame, Inc.**: Although the context mentions an amended and restated stock incentive plan, it indicates that Endgame, Inc. was associated with Elastic, suggesting it could have been acquired at some point.

These acquisitions are part of Elastic's strategy to enhance the technology underlying its security and observability offerings.


In [6]:
match_query

'companies Elastic OR Elastic acquired OR acquisitions OR acquisition OR purchase OR purchases OR bought OR buy OR buys'