# Ensemple CB recommender based on Sentence Transformers

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
BASE_PATH        = '../../..'
DATASET_PATH     = f'{BASE_PATH}/datasets'
COLLECTION_NAMES = ['genres', 'overview', 'tags', 'title']

In [3]:
import sys
sys.path.append('../../../lib')

import torch
import numpy as np
import pandas as pd

import pytorch_common.util as pu

import logging
import data as dt
import data.dataset as ds

import recommender as rc
from context import AppContextFactory

2023-05-02 20:21:22.016785: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


<Figure size 640x480 with 0 Axes>

## Setup

In [4]:
pu.LoggerBuilder().on_console().build()

<RootLogger root (INFO)>

In [5]:
pu.set_device_name('gpu')

pu.get_device(), torch.cuda.is_available(), torch.__version__, torch.cuda.get_arch_list()

(device(type='cuda', index=0),
 True,
 '1.11.0',
 ['sm_37',
  'sm_50',
  'sm_60',
  'sm_61',
  'sm_70',
  'sm_75',
  'sm_80',
  'sm_86',
  'compute_37'])

In [6]:
ctx = AppContextFactory(DATASET_PATH, COLLECTION_NAMES)

2023-05-02 20:21:22,433 - INFO - Anonymized telemetry enabled. See https://docs.trychroma.com/telemetry for more information.
2023-05-02 20:21:22,435 - INFO - Running Chroma using direct local API.
2023-05-02 20:21:22,445 - INFO - Successfully imported ClickHouse Connect C data optimizations
2023-05-02 20:21:22,445 - INFO - Successfully import ClickHouse Connect C/Numpy optimizations
2023-05-02 20:21:22,447 - INFO - Using python library for writing JSON byte strings
2023-05-02 20:21:26,212 - INFO - Load pretrained SentenceTransformer: all-MiniLM-L6-v2
2023-05-02 20:21:26,382 - INFO - Use pytorch device: cuda
2023-05-02 20:21:59,044 - INFO - Load pretrained SentenceTransformer: all-MiniLM-L6-v2
2023-05-02 20:21:59,165 - INFO - Use pytorch device: cuda
2023-05-02 20:22:24,649 - INFO - Load pretrained SentenceTransformer: all-MiniLM-L6-v2
2023-05-02 20:22:24,772 - INFO - Use pytorch device: cuda
2023-05-02 20:22:56,723 - INFO - Load pretrained SentenceTransformer: all-MiniLM-L6-v2
2023-05

## Evaluación

In [7]:
result = ctx.repositories.title.search_by_ids([1])
toy_story_emb = result['embeddings'][0]

ctx.repositories.title.search_sims(embs=[toy_story_emb], limit=3)

{'ids': [['1', '3114', '78499']],
 'embeddings': None,
 'documents': None,
 'metadatas': [[{'title_tokens': 'toy story',
    'release_year': 1995,
    'title': 'Toy Story',
    'imdb_id': 114709},
   {'title_tokens': 'toy story',
    'release_year': 1999,
    'title': 'Toy Story 2',
    'imdb_id': 120363},
   {'title_tokens': 'toy story',
    'release_year': 2010,
    'title': 'Toy Story 3',
    'imdb_id': 435761}]],
 'distances': [[0.0, 0.12047415971755981, 0.1525389850139618]]}

In [8]:
user_ids = ctx.repositories.dataset.users_id_from_movie_id(movie_id=1) 

In [9]:
ctx \
    .overview_personalized_item_recommender(
        n_top_rated_user_items=20,
        n_sim_items=3
    ) \
    .recommend(user_id=user_ids[130]) \
    .show()

Unnamed: 0,Similarity,Rating,.,Recommended Movies,..,Already seen movies,Rating.1
0,0.35,3.0,We Recommend ==>,,==> Because You Saw ==>,,3.5
1,0.26,4.1,We Recommend ==>,,==> Because You Saw ==>,,3.5
2,0.34,4.5,We Recommend ==>,,==> Because You Saw ==>,,3.0
3,0.29,3.0,We Recommend ==>,,==> Because You Saw ==>,,3.0
4,0.39,2.6,We Recommend ==>,,==> Because You Saw ==>,,1.0


In [10]:
ctx \
    .title_personalized_item_recommender() \
    .recommend(user_id=user_ids[130]) \
    .show()

Unnamed: 0,Similarity,Rating,.,Recommended Movies,..,Already seen movies,Rating.1
0,0.87,3.6,We Recommend ==>,,==> Because You Saw ==>,,3.5
1,0.56,4.1,We Recommend ==>,,==> Because You Saw ==>,,3.5
2,0.63,3.0,We Recommend ==>,,==> Because You Saw ==>,,3.0
3,0.48,3.3,We Recommend ==>,,==> Because You Saw ==>,,3.0


In [11]:
ctx \
    .genre_personalized_item_recommender() \
    .recommend(user_id=user_ids[130]) \
    .show()

Unnamed: 0,Similarity,Rating,.,Recommended Movies,..,Already seen movies,Rating.1
0,0.92,2.0,We Recommend ==>,,==> Because You Saw ==>,,3.5
1,0.91,2.5,We Recommend ==>,,==> Because You Saw ==>,,3.5


In [12]:
ctx \
    .tag_personalized_item_recommender() \
    .recommend(user_id=user_ids[130]) \
    .show()

Unnamed: 0,Similarity,Rating,.,Recommended Movies,..,Already seen movies,Rating.1
0,0.49,3.6,We Recommend ==>,,==> Because You Saw ==>,,3.5
1,0.46,3.2,We Recommend ==>,,==> Because You Saw ==>,,3.5
2,0.12,3.0,We Recommend ==>,,==> Because You Saw ==>,,3.0
3,0.09,3.0,We Recommend ==>,,==> Because You Saw ==>,,3.0


In [13]:
rc.PersonalizedItemEmbDBEnsembleRecommender(
    recommenders=[
        ctx.overview_personalized_item_recommender(
            n_top_rated_user_items=10,
            n_sim_items=3
        ),
        ctx.title_personalized_item_recommender(
            n_top_rated_user_items=10,
            n_sim_items=3
        ),
        ctx.genre_personalized_item_recommender(
            n_top_rated_user_items=10,
            n_sim_items=3
        )
    ],
    weights = [0.6, 0.1, 0.3],
    recommender_k = 5
) \
 .recommend(user_id=user_ids[130], k=10) \
 .show()

Unnamed: 0,Rating,Recommended Movies,Similarity
6,4.5,,0.94
0,4.1,,0.76
1,3.6,,0.97
4,3.3,,0.58
5,3.0,,0.73
7,3.0,,0.89
8,3.0,,0.95
3,2.5,,1.21
2,2.0,,1.22
