In [11]:
import numpy as np
import pandas as pd
from datetime import datetime

In [12]:
movie_embeddings_matrix = np.load('../data/vector_db/movie_embeddings_matrix.npy')
user_embeddings_matrix = np.load('../data/vector_db/user_embeddings_matrix.npy')
user2Idx = np.load('../data/vector_db/user2Idx.npy', allow_pickle=True).item()
movie2Idx = np.load('../data/vector_db/movie2Idx.npy', allow_pickle=True).item()

In [13]:
df_users = pd.read_csv('../data/usuarios.csv')
df_movies = pd.read_csv('../data/peliculas.csv')
ratings = pd.read_csv('../data/scores.csv')
df_movies.loc[df_movies['IMDB URL'].isna(), 'IMDB URL'] = ''

user_movies = ratings[ratings['user_id'] == 196]
user_movies = user_movies.loc[user_movies['rating'].idxmax()]
user_movies

id                         1896
user_id                     196
movie_id                    655
rating                        5
Date        1997-12-04 16:09:53
Name: 1896, dtype: object

In [14]:
df_users['userIdx'] = df_users['id'].apply(lambda x: user2Idx[x])
df_movies['movieIdx'] = df_movies['id'].apply(lambda x: movie2Idx[x])

In [15]:
import sys
import os
notebook_dir = os.path.dirname(os.path.abspath('recommendation_system.ipynb'))
project_root = os.path.abspath(os.path.join(notebook_dir, '..'))
sys.path.append(project_root)

#from config.config import client
from src.services.database.models import MovieV, UserV
from src.services.database.vector_db_service import VectorDbService
vdbs = VectorDbService()
client = vdbs.client
client.cluster.health()

{'cluster_name': 'docker-cluster',
 'status': 'yellow',
 'timed_out': False,
 'number_of_nodes': 1,
 'number_of_data_nodes': 1,
 'discovered_master': True,
 'discovered_cluster_manager': True,
 'active_primary_shards': 14,
 'active_shards': 14,
 'relocating_shards': 0,
 'initializing_shards': 0,
 'unassigned_shards': 10,
 'delayed_unassigned_shards': 0,
 'number_of_pending_tasks': 0,
 'number_of_in_flight_fetch': 0,
 'task_max_waiting_in_queue_millis': 0,
 'active_shards_percent_as_number': 58.333333333333336}

In [18]:
try:
    #client.indices.delete('movie')
    MovieV.init(using=client)
    print("Índice inicializado correctamente.")
except Exception as e:
    print(f"Error al inicializar el índice: {e}")

Índice inicializado correctamente.


In [23]:
try:
    #client.indices.delete('user')
    UserV.init(using=client)
    print("Índice inicializado correctamente.")
except Exception as e:
    print(f"Error al inicializar el índice: {e}")

Índice inicializado correctamente.


In [19]:
for i, row in df_movies.iterrows():
    try:

        mv = MovieV(
            movie_id = row.id,
            url = row['IMDB URL'],
            name = row['Name'],
            vector = list(movie_embeddings_matrix[row.movieIdx]),
            created_at = datetime.now()
        )
        mv.save(using=client)
        #Movie.search(using=client).count()
        #mv.save(using=client)
    except Exception as e:
        print(f"Error al guardar la película {row['Name']}: {e}")

In [24]:
for i, row in df_users.iterrows():
    try:

        uv = UserV(
            user_id = row.id,
            occupation = row['Occupation'],
            created_at = datetime.now(),
            active_since = row['Active Since'],
            vector = list(user_embeddings_matrix[row.userIdx])
        )
        uv.save(using=client)
    except Exception as e:
        print(f"Error al guardar el usuario {row.id}: {e}")

In [20]:
MovieV.search(using=client).count()

1682

In [25]:
UserV.search(using=client).count()

943

In [21]:
movie_idx_to_search = 5

df_movies[df_movies['movieIdx'] == movie_idx_to_search]
# %%
movie_embeddings_matrix[movie_idx_to_search]

query = {
    "size": 5,
    "query": {
        "knn": {
        "vector": {
            "vector": movie_embeddings_matrix[movie_idx_to_search],
            "k" : 20
        }
        }
    }
}

response = client.search(index='movie', body=query)

for h in response['hits']['hits']:
  print(h)

{'_index': 'movie', '_id': '346', '_score': 1.0, '_source': {'movie_id': 346, 'url': 'http://us.imdb.com/M/title-exact?imdb-title-119396', 'name': 'Jackie Brown (1997)', 'vector': [0.0675366222858429, 0.09568929672241211, 0.22328245639801025], 'created_at': '2024-10-06T11:10:30.838250'}}
{'_index': 'movie', '_id': '987', '_score': 0.99915195, '_source': {'movie_id': 987, 'url': 'http://us.imdb.com/M/title-exact?Underworld%20(1997)', 'name': 'Underworld (1997)', 'vector': [0.000920130405575037, 0.0010935215977951884, 0.0028165383264422417], 'created_at': '2024-10-06T11:10:34.038812'}}
{'_index': 'movie', '_id': '285', '_score': 0.9991291, '_source': {'movie_id': 285, 'url': 'http://us.imdb.com/M/title-exact?Secrets%20&%20Lies%20(1996)', 'name': 'Secrets & Lies (1996)', 'vector': [0.06075748801231384, 0.08659423142671585, 0.22390972077846527], 'created_at': '2024-10-06T11:10:30.514241'}}
{'_index': 'movie', '_id': '1411', '_score': 0.99898773, '_source': {'movie_id': 1411, 'url': 'http:/