# Ensemple CB recommender based on Sparse Transformers

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
BASE_PATH        = '../../..'
LIB_PATH         = f'{BASE_PATH}/lib'
DATASET_PATH     = f'{BASE_PATH}/datasets'

In [3]:
import sys
sys.path.append(LIB_PATH)

import torch
import pytorch_common.util as pu
from pytorch_common.util import set_device_name, \
                                get_device, \
                                LoggerBuilder

import data as dt
import data.dataset as ds

import util as ut

from sentence_transformers import SentenceTransformer

from recommender import ItemRecommenderBuilder, item_rec_sys_cfg, PersonalizedItemEmbDBEnsembleRecommender

2023-05-05 18:32:40.259112: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
Using embedded DuckDB without persistence: data will be transient


<Figure size 640x480 with 0 Axes>

## Setup

In [4]:
pu.LoggerBuilder().on_console().build()

<RootLogger root (INFO)>

In [5]:
pu.set_device_name('gpu')

pu.get_device(), torch.cuda.is_available(), torch.__version__, torch.cuda.get_arch_list()

(device(type='cuda', index=0),
 True,
 '1.11.0',
 ['sm_37',
  'sm_50',
  'sm_60',
  'sm_61',
  'sm_70',
  'sm_75',
  'sm_80',
  'sm_86',
  'compute_37'])

In [16]:
TITLE_REC_SYS_CFG    = item_rec_sys_cfg(DATASET_PATH, 'title',    'tf-idf-sparse-auto-encoder')
TAGS_REC_SYS_CFG     = item_rec_sys_cfg(DATASET_PATH, 'tags',     'tf-idf-sparse-auto-encoder')
GENRES_REC_SYS_CFG   = item_rec_sys_cfg(DATASET_PATH, 'genres',   'tf-idf-sparse-auto-encoder')

2023-05-05 18:35:02,922 - INFO - Cfg:

embedding_col: title_embedding
file_path: ../../../datasets/title-tf-idf-sparse-auto-encoder.json
metadata_cols:
- title
- release_year
- imdb_id
- title_tokens
name: title-tf-idf-sparse-auto-encoder

2023-05-05 18:35:02,923 - INFO - Cfg:

embedding_col: tags_embedding
file_path: ../../../datasets/tags-tf-idf-sparse-auto-encoder.json
metadata_cols:
- tags
- release_year
- imdb_id
- tags_tokens
- title
name: tags-tf-idf-sparse-auto-encoder

2023-05-05 18:35:02,924 - INFO - Cfg:

embedding_col: genres_embedding
file_path: ../../../datasets/genres-tf-idf-sparse-auto-encoder.json
metadata_cols:
- genres
- release_year
- imdb_id
- genres_tokens
- title
name: genres-tf-idf-sparse-auto-encoder



## Evaluación

In [8]:
builder = ItemRecommenderBuilder(
    DATASET_PATH, 
    [
        TITLE_REC_SYS_CFG, 
        TAGS_REC_SYS_CFG,
        GENRES_REC_SYS_CFG
    ]
)

2023-05-05 18:34:15,258 - INFO - collection with name title-tf-idf-sparse-auto-encoder already exists, returning existing collection
2023-05-05 18:34:15,258 - INFO - Load pretrained SentenceTransformer: all-MiniLM-L6-v2
2023-05-05 18:34:15,369 - INFO - Use pytorch device: cuda
2023-05-05 18:34:21,122 - INFO - collection with name tags-tf-idf-sparse-auto-encoder already exists, returning existing collection
2023-05-05 18:34:21,123 - INFO - Load pretrained SentenceTransformer: all-MiniLM-L6-v2
2023-05-05 18:34:21,239 - INFO - Use pytorch device: cuda
2023-05-05 18:34:26,228 - INFO - collection with name genres-tf-idf-sparse-auto-encoder already exists, returning existing collection
2023-05-05 18:34:26,229 - INFO - Load pretrained SentenceTransformer: all-MiniLM-L6-v2
2023-05-05 18:34:26,352 - INFO - Use pytorch device: cuda
2023-05-05 18:34:31,285 - INFO - Load pretrained SentenceTransformer: all-MiniLM-L6-v2
2023-05-05 18:34:31,402 - INFO - Use pytorch device: cuda


In [9]:
title_repo = builder.repositories[TITLE_REC_SYS_CFG.name]

result = title_repo.search_by_ids([1])
toy_story_emb = result['embeddings'][0]

title_repo.search_sims(embs=[toy_story_emb], limit=3)

{'ids': [['3114', '1', '78499']],
 'embeddings': None,
 'documents': None,
 'metadatas': [[{'title': 'Toy Story 2',
    'release_year': 1999,
    'imdb_id': 120363,
    'title_tokens': 'toy story'},
   {'title': 'Toy Story',
    'release_year': 1995,
    'imdb_id': 114709,
    'title_tokens': 'toy story'},
   {'title': 'Toy Story 3',
    'release_year': 2010,
    'imdb_id': 435761,
    'title_tokens': 'toy story'}]],
 'distances': [[0.0, 0.0, 0.0]]}

In [10]:
user_ids = builder.repositories.dataset.users_id_from_movie_id(movie_id=1) 

In [24]:
builder \
    .personalized_item_recommender(TITLE_REC_SYS_CFG.name) \
    .recommend(user_id=user_ids[130]) \
    .show()

2023-05-05 18:37:57,532 - INFO - Found 4 items by ids: Int64Index([131258, 14972, 20240, 15438, 9685, 2012, 14137, 26780, 15275,
            128359],
           dtype='int64').
2023-05-05 18:37:57,537 - INFO - Found 3 similar to 2012 item.
2023-05-05 18:37:57,542 - INFO - Found 3 similar to 131258 item.
2023-05-05 18:37:57,547 - INFO - Found 3 similar to 2012 item.
2023-05-05 18:37:57,551 - INFO - Found 3 similar to 131258 item.



Item Recommender: title-tf-idf-sparse-auto-encoder



Unnamed: 0,Similarity,Rating,.,Recommended Movies,..,Already seen movies,Rating.1
0,-1.05,4.1,We Recommend ==>,,==> Because You Saw ==>,,3.5
3,-1.05,4.1,We Recommend ==>,,==> Because You Saw ==>,,3.5
1,1.0,3.3,We Recommend ==>,,==> Because You Saw ==>,,3.0
2,1.0,3.3,We Recommend ==>,,==> Because You Saw ==>,,3.0
4,1.0,3.3,We Recommend ==>,,==> Because You Saw ==>,,3.0


In [25]:
builder \
    .personalized_item_recommender(GENRES_REC_SYS_CFG.name) \
    .recommend(user_id=user_ids[130]) \
    .show()

2023-05-05 18:38:02,148 - INFO - Found 2 items by ids: Int64Index([131258, 14972, 20240, 15438, 9685, 2012, 14137, 26780, 15275,
            128359],
           dtype='int64').
2023-05-05 18:38:02,156 - INFO - Found 3 similar to 2012 item.
2023-05-05 18:38:02,162 - INFO - Found 3 similar to 131258 item.



Item Recommender: genres-tf-idf-sparse-auto-encoder



Unnamed: 0,Similarity,Rating,.,Recommended Movies,..,Already seen movies,Rating.1
0,0.54,3.3,We Recommend ==>,,==> Because You Saw ==>,,3.5
1,0.42,2.5,We Recommend ==>,,==> Because You Saw ==>,,3.5
2,0.86,4.2,We Recommend ==>,,==> Because You Saw ==>,,3.0
3,0.86,1.0,We Recommend ==>,,==> Because You Saw ==>,,3.0
4,0.86,3.0,We Recommend ==>,,==> Because You Saw ==>,,3.0


In [26]:
PersonalizedItemEmbDBEnsembleRecommender(
    recommenders=[
        builder.personalized_item_recommender(
            TITLE_REC_SYS_CFG.name,
            n_top_rated_user_items=10,
            n_sim_items=3
        ),
        builder.personalized_item_recommender(
            GENRES_REC_SYS_CFG.name,
            n_top_rated_user_items=10,
            n_sim_items=3
        )
    ],
    weights = [0.7, 0.3],
    recommender_k = 5
) \
 .recommend(user_id=user_ids[130], k=10) \
 .show()

2023-05-05 18:38:19,432 - INFO - Found 4 items by ids: Int64Index([131258, 14972, 20240, 15438, 9685, 2012, 14137, 26780, 15275,
            128359],
           dtype='int64').
2023-05-05 18:38:19,439 - INFO - Found 3 similar to 2012 item.
2023-05-05 18:38:19,444 - INFO - Found 3 similar to 131258 item.
2023-05-05 18:38:19,449 - INFO - Found 3 similar to 2012 item.
2023-05-05 18:38:19,453 - INFO - Found 3 similar to 131258 item.
2023-05-05 18:38:19,458 - INFO - Found 2 items by ids: Int64Index([131258, 14972, 20240, 15438, 9685, 2012, 14137, 26780, 15275,
            128359],
           dtype='int64').
2023-05-05 18:38:19,462 - INFO - Found 3 similar to 2012 item.
2023-05-05 18:38:19,466 - INFO - Found 3 similar to 131258 item.



Item Recommender: Ensemble of title-tf-idf-sparse-auto-encoder, genres-tf-idf-sparse-auto-encoder.



Unnamed: 0,Rating,Recommended Movies,Similarity
1,4.2,,1.16
3,3.3,,1.7
4,3.3,,0.84
5,3.0,,1.16
0,2.5,,0.72
2,1.0,,1.16
