# Ensemple CB recommender based on Sentence Transformers

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
BASE_PATH        = '../../..'
LIB_PATH         = f'{BASE_PATH}/lib'
DATASET_PATH     = f'{BASE_PATH}/datasets'

In [3]:
import sys
sys.path.append(LIB_PATH)

import torch
import pytorch_common.util as pu
from pytorch_common.util import set_device_name, \
                                get_device, \
                                LoggerBuilder

import data as dt
import data.dataset as ds

import util as ut

from sentence_transformers import SentenceTransformer

import recommender as rc

2023-10-08 12:57:27.813431: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-10-08 12:57:28.593847: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2023-10-08 12:57:28.605040: W tensorflow/core/common_runtime/gpu/gpu_device.cc:1956] Cannot dlopen some GPU libraries. Please make sure the missing libraries mentioned above are installed properly if you would like to use GPU. Follow the guide at https://www.tensorflow.org/install/gpu for how to download and setup the required libraries for your 

<Figure size 640x480 with 0 Axes>

## Setup

In [4]:
pu.LoggerBuilder().on_console().build()

<RootLogger root (INFO)>

In [5]:
pu.set_device_name('gpu')

pu.get_device(), torch.cuda.is_available(), torch.__version__, torch.cuda.get_arch_list()

(device(type='cuda', index=0),
 True,
 '2.0.1+cu118',
 ['sm_37', 'sm_50', 'sm_60', 'sm_70', 'sm_75', 'sm_80', 'sm_86', 'sm_90'])

In [6]:
TITLE_REC_SYS_CFG    = rc.item_rec_sys_cfg(DATASET_PATH, 'title',    'all-roberta-large-v1')
TAGS_REC_SYS_CFG     = rc.item_rec_sys_cfg(DATASET_PATH, 'tags',     'all-roberta-large-v1')
GENRES_REC_SYS_CFG   = rc.item_rec_sys_cfg(DATASET_PATH, 'genres',   'all-roberta-large-v1')
OVERVIEW_REC_SYS_CFG = rc.item_rec_sys_cfg(DATASET_PATH, 'overview', 'all-mpnet-base-v2')

2023-10-08 12:57:29,264 - INFO - Cfg:

embedding_col: title_embedding
file_path: ../../../datasets/title-all-roberta-large-v1.json
metadata_cols:
- title
- release_year
- imdb_id
- title_tokens
name: title-all-roberta-large-v1

2023-10-08 12:57:29,264 - INFO - Cfg:

embedding_col: tags_embedding
file_path: ../../../datasets/tags-all-roberta-large-v1.json
metadata_cols:
- tags
- release_year
- imdb_id
- tags_tokens
- title
name: tags-all-roberta-large-v1

2023-10-08 12:57:29,265 - INFO - Cfg:

embedding_col: genres_embedding
file_path: ../../../datasets/genres-all-roberta-large-v1.json
metadata_cols:
- genres
- release_year
- imdb_id
- genres_tokens
- title
name: genres-all-roberta-large-v1

2023-10-08 12:57:29,266 - INFO - Cfg:

embedding_col: overview_embedding
file_path: ../../../datasets/overview-all-mpnet-base-v2.json
metadata_cols:
- overview
- release_year
- imdb_id
- overview_tokens
- title
name: overview-all-mpnet-base-v2



## Evaluación

In [7]:
builder = rc.SimilarItemRecommenderBuilder(
    DATASET_PATH, 
    [
        TITLE_REC_SYS_CFG, 
        TAGS_REC_SYS_CFG,
        GENRES_REC_SYS_CFG,
        OVERVIEW_REC_SYS_CFG
    ]
)

Insert Embeddings:   0%|          | 0/18703 [00:00<?, ?it/s]

Insert Embeddings:   0%|          | 0/18703 [00:00<?, ?it/s]

Insert Embeddings:   0%|          | 0/18703 [00:00<?, ?it/s]

Insert Embeddings:   0%|          | 0/18703 [00:00<?, ?it/s]

In [8]:
title_repo = builder.repositories[TITLE_REC_SYS_CFG.name]

result = title_repo.search_by_ids([1])
toy_story_emb = result.embeddings[0]

title_repo.search_sims(embs=[toy_story_emb], limit=3)

{
    "distances": [
        0.0,
        0.1203928217291832,
        0.15256161987781525
    ],
    "documents": [],
    "embeddings": [],
    "ids": [
        "1",
        "3114",
        "78499"
    ],
    "metadatas": [
        {
            "imdb_id": 114709,
            "release_year": 1995,
            "title": "Toy Story",
            "title_tokens": "toy story"
        },
        {
            "imdb_id": 120363,
            "release_year": 1999,
            "title": "Toy Story 2",
            "title_tokens": "toy story"
        },
        {
            "imdb_id": 435761,
            "release_year": 2010,
            "title": "Toy Story 3",
            "title_tokens": "toy story"
        }
    ]
}

In [9]:
user_ids = builder.repositories.dataset.users_id_from_movie_id(movie_id=1) 

In [10]:
builder \
    .user_item_recommender(
        OVERVIEW_REC_SYS_CFG.name,
        n_top_rated_user_items=20,
        n_sim_items=3
    ) \
    .recommend(user_id=user_ids[130]) \
    .show()

2023-10-08 12:58:42,124 - INFO - Found 5 items by ids: Int64Index([ 27296,   5243,  29250,  78269,  71225,  17421,  60141,   5104,
              4593,  26910,   7714,  21709,  24015,  55610, 116237, 157415,
             67973,  61042,  61300, 141780],
           dtype='int64').
2023-10-08 12:58:42,125 - INFO - Found 3 similar to 7714 item.
2023-10-08 12:58:42,126 - INFO - Found 3 similar to 60141 item.
2023-10-08 12:58:42,127 - INFO - Found 3 similar to 5243 item.
2023-10-08 12:58:42,128 - INFO - Found 3 similar to 4593 item.
2023-10-08 12:58:42,129 - INFO - Found 3 similar to 5104 item.



Item Recommender: overview-all-mpnet-base-v2



Unnamed: 0,Similarity,Rating,.,Recommended Movies,..,Already seen movies,Rating.1
0,0.4,3.6,We Recommend ==>,,==> Because You Saw ==>,,4.0
1,0.12,2.0,We Recommend ==>,,==> Because You Saw ==>,,4.0
8,0.49,3.7,We Recommend ==>,,==> Because You Saw ==>,,4.0
9,0.44,3.0,We Recommend ==>,,==> Because You Saw ==>,,4.0
2,0.26,3.1,We Recommend ==>,,==> Because You Saw ==>,,3.7


In [11]:
builder \
    .user_item_recommender(TITLE_REC_SYS_CFG.name) \
    .recommend(user_id=user_ids[130]) \
    .show()

2023-10-08 12:58:42,690 - INFO - Found 4 items by ids: Int64Index([27296, 5243, 29250, 78269, 71225, 17421, 60141, 5104, 4593, 26910], dtype='int64').
2023-10-08 12:58:42,691 - INFO - Found 3 similar to 60141 item.
2023-10-08 12:58:42,692 - INFO - Found 3 similar to 5243 item.
2023-10-08 12:58:42,693 - INFO - Found 3 similar to 4593 item.
2023-10-08 12:58:42,694 - INFO - Found 3 similar to 5104 item.



Item Recommender: title-all-roberta-large-v1



Unnamed: 0,Similarity,Rating,.,Recommended Movies,..,Already seen movies,Rating.1
6,0.38,3.0,We Recommend ==>,,==> Because You Saw ==>,,4.0
7,0.27,2.5,We Recommend ==>,,==> Because You Saw ==>,,4.0
0,0.37,4.0,We Recommend ==>,,==> Because You Saw ==>,,3.7
1,0.26,2.7,We Recommend ==>,,==> Because You Saw ==>,,3.7
4,0.49,3.2,We Recommend ==>,,==> Because You Saw ==>,,3.0


In [12]:
builder \
    .user_item_recommender(GENRES_REC_SYS_CFG.name) \
    .recommend(user_id=user_ids[130]) \
    .show()

2023-10-08 12:58:43,274 - INFO - Found 4 items by ids: Int64Index([27296, 5243, 29250, 78269, 71225, 17421, 60141, 5104, 4593, 26910], dtype='int64').
2023-10-08 12:58:43,275 - INFO - Found 3 similar to 60141 item.
2023-10-08 12:58:43,276 - INFO - Found 3 similar to 5243 item.
2023-10-08 12:58:43,276 - INFO - Found 3 similar to 4593 item.
2023-10-08 12:58:43,277 - INFO - Found 3 similar to 5104 item.



Item Recommender: genres-all-roberta-large-v1



Unnamed: 0,Similarity,Rating,.,Recommended Movies,..,Already seen movies,Rating.1
9,1.0,2.5,We Recommend ==>,,==> Because You Saw ==>,,4.0
10,1.0,3.7,We Recommend ==>,,==> Because You Saw ==>,,4.0
11,1.0,3.9,We Recommend ==>,,==> Because You Saw ==>,,4.0
0,0.16,3.4,We Recommend ==>,,==> Because You Saw ==>,,3.7
1,0.16,4.0,We Recommend ==>,,==> Because You Saw ==>,,3.7


In [13]:
builder \
    .user_item_recommender(TAGS_REC_SYS_CFG.name) \
    .recommend(user_id=user_ids[130]) \
    .show()

2023-10-08 12:58:43,790 - INFO - Found 4 items by ids: Int64Index([27296, 5243, 29250, 78269, 71225, 17421, 60141, 5104, 4593, 26910], dtype='int64').
2023-10-08 12:58:43,791 - INFO - Found 3 similar to 60141 item.
2023-10-08 12:58:43,792 - INFO - Found 3 similar to 5243 item.
2023-10-08 12:58:43,793 - INFO - Found 3 similar to 4593 item.
2023-10-08 12:58:43,794 - INFO - Found 3 similar to 5104 item.



Item Recommender: tags-all-roberta-large-v1



Unnamed: 0,Similarity,Rating,.,Recommended Movies,..,Already seen movies,Rating.1
6,-0.04,3.0,We Recommend ==>,,==> Because You Saw ==>,,4.0
7,-0.08,3.0,We Recommend ==>,,==> Because You Saw ==>,,4.0
0,0.41,1.5,We Recommend ==>,,==> Because You Saw ==>,,3.7
1,0.41,3.0,We Recommend ==>,,==> Because You Saw ==>,,3.7
4,0.01,2.7,We Recommend ==>,,==> Because You Saw ==>,,3.0


In [15]:
rc.UserSimilarItemEnsembleRecommender(
    recommenders=[
        builder.user_item_recommender(
            OVERVIEW_REC_SYS_CFG.name,
            n_top_rated_user_items=10,
            n_sim_items=3
        ),
        builder.user_item_recommender(
            TITLE_REC_SYS_CFG.name,
            n_top_rated_user_items=10,
            n_sim_items=3
        ),
        builder.user_item_recommender(
            GENRES_REC_SYS_CFG.name,
            n_top_rated_user_items=10,
            n_sim_items=3
        )
    ],
    weights = [0.6, 0.1, 0.3],
    recommender_k = 5
) \
 .recommend(user_id=user_ids[130], k=10) \
 .show()

2023-10-08 12:59:20,172 - INFO - Found 4 items by ids: Int64Index([27296, 5243, 29250, 78269, 71225, 17421, 60141, 5104, 4593, 26910], dtype='int64').
2023-10-08 12:59:20,173 - INFO - Found 3 similar to 60141 item.
2023-10-08 12:59:20,174 - INFO - Found 3 similar to 5243 item.
2023-10-08 12:59:20,175 - INFO - Found 3 similar to 4593 item.
2023-10-08 12:59:20,176 - INFO - Found 3 similar to 5104 item.
2023-10-08 12:59:20,180 - INFO - Found 4 items by ids: Int64Index([27296, 5243, 29250, 78269, 71225, 17421, 60141, 5104, 4593, 26910], dtype='int64').
2023-10-08 12:59:20,181 - INFO - Found 3 similar to 60141 item.
2023-10-08 12:59:20,182 - INFO - Found 3 similar to 5243 item.
2023-10-08 12:59:20,182 - INFO - Found 3 similar to 4593 item.
2023-10-08 12:59:20,183 - INFO - Found 3 similar to 5104 item.
2023-10-08 12:59:20,187 - INFO - Found 4 items by ids: Int64Index([27296, 5243, 29250, 78269, 71225, 17421, 60141, 5104, 4593, 26910], dtype='int64').
2023-10-08 12:59:20,188 - INFO - Found 3 


Recommender: Ensemble of overview-all-mpnet-base-v2, title-all-roberta-large-v1, genres-all-roberta-large-v1.



Unnamed: 0,Rating,Recommended Movies,Similarity
10,4.0,,0.47
14,4.0,,0.46
9,3.9,,1.3
7,3.8,,0.67
2,3.7,,1.09
8,3.7,,1.3
5,3.4,,0.46
1,3.2,,0.59
0,3.1,,0.86
11,3.0,,0.48
