In [6]:
import warnings
warnings.filterwarnings('ignore')

In [1]:
from opensearchpy import Float, OpenSearch, Field, Integer, Document, Keyword, Text, Boolean, DenseVector, Nested, Date, Object, connections, InnerDoc, helpers
import os

# local test
# docker pull opensearchproject/opensearch:latest
# docker run -it -p 9200:9200 -p 9600:9600 -e OPENSEARCH_INITIAL_ADMIN_PASSWORD=PassWord#1234! -e "discovery.type=single-node"  --name opensearch-node opensearchproject/opensearch:latest
# docker stop opensearch-node
# docker start opensearch-node
# curl -X GET "https://localhost:9200" -ku admin:PassWord#1234!
# Al usar el plugin de Chrome, primero acceder al link --> When using https make sure that your browser trusts the clusters ssl certificate. Help
# y luego configurar el acceso: abrir en navegador https://localhost:9200, luego loguearse.
# Nota: verificar "https://...."



OPENSEARCH_HOST = os.getenv('OPENSEARCH_HOST', "localhost")
auth = ('admin', 'PassWord#1234!')
port = 9200
os_client = connections.create_connection(
    hosts = [{'host': OPENSEARCH_HOST, 'port': port}],
    http_auth = auth,
    http_compress = True, # enables gzip compression for request bodies
    use_ssl = True,
    verify_certs = False,
    alias='default'
    # ssl_assert_hostname = False,
    # ssl_show_warn = False
)

# Index Test
TEST_DIMENSIONS = 4
TEST_INDEX_NAME = 'test'
TEST_INDEX_PARAMS = {
    'number_of_shards': 1,      # 1 fragmento (no hay replicas)
    'knn': True
}

knn_params = {                      # parámetros de knn
    "name": "hnsw",
    "space_type": "cosinesimil",    # usamos similitud coseno
    "engine": "nmslib"              # tipo de algortimo que va a utilizar
}
#-------------------------------------------------------------------------------------------------
class KNNVector(Field):
    name = "knn_vector"
    def __init__(self, dimension, method, **kwargs):
        super(KNNVector, self).__init__(dimension=dimension, method=method, **kwargs)
    
#-------------------------------------------------------------------------------------------------------------
class Test(Document):
    name = Text() 
    vector = KNNVector(TEST_DIMENSIONS, knn_params)    # vector                       

    class Index:
        name = TEST_INDEX_NAME                          
        if not os_client.indices.exists(index=TEST_INDEX_NAME):
            settings = {
                'index': TEST_INDEX_PARAMS
            }

    def save(self, ** kwargs):
        self.meta.id = f'{self.meta.id}'
        return super(Test, self).save(** kwargs)
    



In [2]:
def init_opensearch():
    
    if not os_client.indices.exists(index=TEST_INDEX_NAME):
        Test.init()
        print("Índice Test creado")
    else:
        print("El índice Test ya existe. Saltando inicialización de base de datos.")
        
    return 

In [7]:
test_embeddings = [ [1.5, 5.5, 4.5, 6.4],
                    [2.5, 3.5, 5.6, 6.7],
                    [4.5, 5.5, 6.7, 3.7],
                    [1.5, 5.4, 4.4, 6.3]
                ]

# Cargar registros de prueba en test
index_name = 'test'

for i, embedding in enumerate(test_embeddings):
    doc = Test( meta={'id': i+1},
                name = str(i),
                vector=embedding)
    
    doc.save()

In [8]:
Test().search().count()

4

In [9]:
search_embedding = [2.5, 3.5, 5.1, 6.4]  

# Consulta de búsqueda por similitud de vectores
query = {
    "size": 2,  # Devolver solo el embedding más cercano
    "query": {
        "knn": {
            "vector": {
                "vector": search_embedding,
                "k" : 3
            }
        }
    }
}

# Realizar la consulta
result = os_client.search(index='test', body=query)

# Obtener el embedding más cercano
if result['hits']['total']['value'] > 0:
    closest_embedding = result['hits']['hits'][0]['_source']
    print("Embedding más cercano encontrado:")
    print(closest_embedding)
else:
    print("No se encontraron resultados para el embedding más cercano.")

Embedding más cercano encontrado:
{'name': '1', 'vector': [2.5, 3.5, 5.6, 6.7]}


In [10]:
# Verificar que el índice y el campo están correctamente configurados
mapping_result = os_client.indices.get_mapping(index='test')
print(f"Mapeo actual del índice 'test':")
print(mapping_result)

Mapeo actual del índice 'test':
{'test': {'mappings': {'properties': {'name': {'type': 'text'}, 'vector': {'type': 'knn_vector', 'dimension': 4, 'method': {'engine': 'nmslib', 'space_type': 'cosinesimil', 'name': 'hnsw', 'parameters': {}}}}}}}
