In [1]:
from opensearchpy import Field, Document, Keyword, Text, Date
from opensearchpy import OpenSearch
import numpy as np
import pandas as pd

import warnings
warnings.filterwarnings("ignore")

In [2]:
df_movies = pd.read_csv('../data/peliculas.csv')
df_users = pd.read_csv('../data/usuarios.csv')

movie_embeddings_matrix = np.load('../vectors/movie_embeddings_matrix.npy')
user_embeddings_matrix = np.load('../vectors/user_embeddings_matrix.npy')

user2Idx = np.load('../vectors/user2Idx.npy', allow_pickle=True).item()
movie2Idx = np.load('../vectors/movie2Idx.npy', allow_pickle=True).item()

df_users['userIdx'] = df_users['id'].apply(lambda x: user2Idx[x])
df_movies['movieIdx'] = df_movies['id'].apply(lambda x: movie2Idx[x])

In [4]:
host = 'localhost'
port = 9200
auth = ('admin', '@Thinkpad57!')

client = OpenSearch(
    hosts = [{'host': host, 'port': port}],
    http_auth = auth,
    use_ssl = True,
    verify_certs = False,
)

In [5]:
class KNNVector(Field):
    name = "knn_vector"
    def __init__(self, dimension, method, **kwargs):
        super(KNNVector, self).__init__(dimension=dimension, method=method, **kwargs)

method = {
    "name": "hnsw",
    "space_type": "cosinesimil",
    "engine": "nmslib"
}

index_name = 'movie'
class Movie(Document):
    movie_id = Keyword()
    name = Text()
    created_at = Date()
    vector = KNNVector(
        movie_embeddings_matrix.shape[1],
        method
    )
    class Index:
        name = index_name
        settings = {
                'index': {
                'knn': True
            }
        }
    def save(self, ** kwargs):
        self.meta.id = self.movie_id
        return super(Movie, self).save(** kwargs)

In [10]:
movie_idx_to_search = 1447

#df_movies[df_movies['id'] == movie_idx_to_search]

movie_embeddings_matrix[movie_idx_to_search]

array([ 0.01373185, -0.01663706,  0.04965537, -0.02427759, -0.0730793 ,
       -0.01518517,  0.03065911, -0.04689679], dtype=float32)

In [13]:
query = {
    "size": 5,
    "query": {
        "knn": {
        "vector": {
            "vector": movie_embeddings_matrix[movie_idx_to_search],
            "k" : 5
        }
        }
    }
}

response = client.search(index='movie', body=query)

In [14]:
for h in response['hits']['hits']:
    print(h)

{'_index': 'movie', '_id': '711', '_score': 'Infinity', '_source': {'movie_id': 711, 'name': 'Substance of Fire, The (1996)', 'vector': [1.6679603622595922e-34, 2.6088745297997226e-33, 4.647216998956094e-33, -2.69640183916195e-33, -5.72037575033167e-33, -1.4385661294454768e-33, -6.313771176098515e-34, 5.0784294691399876e-33], 'created_at': '2024-12-31T16:45:44.034337'}}
{'_index': 'movie', '_id': '1122', '_score': 'Infinity', '_source': {'movie_id': 1122, 'name': 'They Made Me a Criminal (1939)', 'vector': [2.3758254279095114e-33, -2.75788607118171e-33, 2.1536951991705008e-33, 2.4461765607543175e-33, 4.550375366231538e-33, 2.3621167760560234e-33, 1.0964034174791177e-33, -5.043016967137497e-33], 'created_at': '2024-12-31T16:46:04.933929'}}
{'_index': 'movie', '_id': '1310', '_score': 'Infinity', '_source': {'movie_id': 1310, 'name': 'Walk in the Sun, A (1945)', 'vector': [-5.457583702629778e-33, -5.729541667532207e-33, 3.822707628874079e-35, -4.6363778390155584e-33, -4.7513665331229177e