# Ensemple CB recommender based on Sentence Transformers

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
BASE_PATH        = '../../..'
LIB_PATH         = f'{BASE_PATH}/lib'
DATASET_PATH     = f'{BASE_PATH}/datasets'

In [15]:
import sys
sys.path.append(LIB_PATH)

import torch
import pytorch_common.util as pu
from pytorch_common.util import set_device_name, \
                                get_device, \
                                LoggerBuilder

import data as dt
import data.dataset as ds

import util as ut

from sentence_transformers import SentenceTransformer

from recommender import ItemRecommenderBuilder, item_rec_sys_cfg, PersonalizedItemEmbDBEnsembleRecommender

## Setup

In [4]:
pu.LoggerBuilder().on_console().build()

<RootLogger root (INFO)>

In [5]:
pu.set_device_name('gpu')

pu.get_device(), torch.cuda.is_available(), torch.__version__, torch.cuda.get_arch_list()

(device(type='cuda', index=0),
 True,
 '1.11.0',
 ['sm_37',
  'sm_50',
  'sm_60',
  'sm_61',
  'sm_70',
  'sm_75',
  'sm_80',
  'sm_86',
  'compute_37'])

In [6]:
TITLE_REC_SYS_CFG    = item_rec_sys_cfg(DATASET_PATH, 'title',    'all-roberta-large-v1')
TAGS_REC_SYS_CFG     = item_rec_sys_cfg(DATASET_PATH, 'tags',     'all-roberta-large-v1')
GENRES_REC_SYS_CFG   = item_rec_sys_cfg(DATASET_PATH, 'genres',   'all-roberta-large-v1')
OVERVIEW_REC_SYS_CFG = item_rec_sys_cfg(DATASET_PATH, 'overview', 'all-mpnet-base-v2')

2023-05-04 15:09:14,695 - INFO - Cfg:

embedding_col: title_embedding
file_path: ../../../datasets/title-all-roberta-large-v1.json
metadata_cols:
- title
- release_year
- imdb_id
- title_tokens
name: title-all-roberta-large-v1

2023-05-04 15:09:14,696 - INFO - Cfg:

embedding_col: tags_embedding
file_path: ../../../datasets/tags-all-roberta-large-v1.json
metadata_cols:
- tags
- release_year
- imdb_id
- tags_tokens
- title
name: tags-all-roberta-large-v1

2023-05-04 15:09:14,696 - INFO - Cfg:

embedding_col: genres_embedding
file_path: ../../../datasets/genres-all-roberta-large-v1.json
metadata_cols:
- genres
- release_year
- imdb_id
- genres_tokens
- title
name: genres-all-roberta-large-v1

2023-05-04 15:09:14,697 - INFO - Cfg:

embedding_col: overview_embedding
file_path: ../../../datasets/overview-all-mpnet-base-v2.json
metadata_cols:
- overview
- release_year
- imdb_id
- overview_tokens
- title
name: overview-all-mpnet-base-v2



## Evaluación

In [7]:
builder = ItemRecommenderBuilder(
    DATASET_PATH, 
    [
        TITLE_REC_SYS_CFG, 
        TAGS_REC_SYS_CFG,
        GENRES_REC_SYS_CFG,
        OVERVIEW_REC_SYS_CFG
    ]
)

2023-05-04 15:09:14,714 - INFO - Load pretrained SentenceTransformer: all-MiniLM-L6-v2
2023-05-04 15:09:14,887 - INFO - Use pytorch device: cuda
2023-05-04 15:09:48,135 - INFO - Load pretrained SentenceTransformer: all-MiniLM-L6-v2
2023-05-04 15:09:48,250 - INFO - Use pytorch device: cuda
2023-05-04 15:10:21,417 - INFO - Load pretrained SentenceTransformer: all-MiniLM-L6-v2
2023-05-04 15:10:21,536 - INFO - Use pytorch device: cuda
2023-05-04 15:10:54,846 - INFO - Load pretrained SentenceTransformer: all-MiniLM-L6-v2
2023-05-04 15:10:54,977 - INFO - Use pytorch device: cuda


In [8]:
title_repo = builder.repositories[TITLE_REC_SYS_CFG.name]

result = title_repo.search_by_ids([1])
toy_story_emb = result['embeddings'][0]

title_repo.search_sims(embs=[toy_story_emb], limit=3)

{'ids': [['1', '3114', '78499']],
 'embeddings': None,
 'documents': None,
 'metadatas': [[{'title': 'Toy Story',
    'release_year': 1995,
    'imdb_id': 114709,
    'title_tokens': 'toy story'},
   {'title': 'Toy Story 2',
    'release_year': 1999,
    'imdb_id': 120363,
    'title_tokens': 'toy story'},
   {'title': 'Toy Story 3',
    'release_year': 2010,
    'imdb_id': 435761,
    'title_tokens': 'toy story'}]],
 'distances': [[0.0, 0.12047415971755981, 0.1525389850139618]]}

In [9]:
user_ids = builder.repositories.dataset.users_id_from_movie_id(movie_id=1) 

In [10]:
builder \
    .personalized_item_recommender(
        OVERVIEW_REC_SYS_CFG.name,
        n_top_rated_user_items=20,
        n_sim_items=3
    ) \
    .recommend(user_id=user_ids[130]) \
    .show()

2023-05-04 15:11:22,271 - INFO - Found 3 items by ids: Int64Index([131258,  14972,  20240,  15438,   9685,   2012,  14137,  26780,
             15275, 128359,  10666,   8761,  61638,  39278,  14815,  13146,
             48654,  35971,  17129,  47755],
           dtype='int64').
2023-05-04 15:11:22,275 - INFO - Found 3 similar to 2012 item.
2023-05-04 15:11:22,279 - INFO - Found 3 similar to 131258 item.
2023-05-04 15:11:22,282 - INFO - Found 3 similar to 61638 item.


Item Recommender: overview-all-mpnet-base-v2



Unnamed: 0,Similarity,Rating,.,Recommended Movies,..,Already seen movies,Rating.1
0,0.35,3.0,We Recommend ==>,,==> Because You Saw ==>,,3.5
1,0.26,4.1,We Recommend ==>,,==> Because You Saw ==>,,3.5
2,0.34,4.5,We Recommend ==>,,==> Because You Saw ==>,,3.0
3,0.29,3.0,We Recommend ==>,,==> Because You Saw ==>,,3.0
4,0.39,2.6,We Recommend ==>,,==> Because You Saw ==>,,1.0


In [11]:
builder \
    .personalized_item_recommender(TITLE_REC_SYS_CFG.name) \
    .recommend(user_id=user_ids[130]) \
    .show()

2023-05-04 15:11:23,504 - INFO - Found 2 items by ids: Int64Index([131258, 14972, 20240, 15438, 9685, 2012, 14137, 26780, 15275,
            128359],
           dtype='int64').
2023-05-04 15:11:23,509 - INFO - Found 3 similar to 2012 item.
2023-05-04 15:11:23,513 - INFO - Found 3 similar to 131258 item.


Item Recommender: title-all-roberta-large-v1



Unnamed: 0,Similarity,Rating,.,Recommended Movies,..,Already seen movies,Rating.1
0,0.87,3.6,We Recommend ==>,,==> Because You Saw ==>,,3.5
1,0.56,4.1,We Recommend ==>,,==> Because You Saw ==>,,3.5
2,0.63,3.0,We Recommend ==>,,==> Because You Saw ==>,,3.0
3,0.48,3.3,We Recommend ==>,,==> Because You Saw ==>,,3.0


In [12]:
builder \
    .personalized_item_recommender(GENRES_REC_SYS_CFG.name) \
    .recommend(user_id=user_ids[130]) \
    .show()

2023-05-04 15:11:25,258 - INFO - Found 2 items by ids: Int64Index([131258, 14972, 20240, 15438, 9685, 2012, 14137, 26780, 15275,
            128359],
           dtype='int64').
2023-05-04 15:11:25,262 - INFO - Found 3 similar to 2012 item.
2023-05-04 15:11:25,265 - INFO - Found 3 similar to 131258 item.


Item Recommender: genres-all-roberta-large-v1



Unnamed: 0,Similarity,Rating,.,Recommended Movies,..,Already seen movies,Rating.1
0,0.92,2.0,We Recommend ==>,,==> Because You Saw ==>,,3.5
1,0.91,2.5,We Recommend ==>,,==> Because You Saw ==>,,3.5
2,1.0,3.0,We Recommend ==>,,==> Because You Saw ==>,,3.0
3,1.0,3.0,We Recommend ==>,,==> Because You Saw ==>,,3.0
4,1.0,3.0,We Recommend ==>,,==> Because You Saw ==>,,3.0


In [13]:
builder \
    .personalized_item_recommender(TAGS_REC_SYS_CFG.name) \
    .recommend(user_id=user_ids[130]) \
    .show()

2023-05-04 15:11:30,770 - INFO - Found 2 items by ids: Int64Index([131258, 14972, 20240, 15438, 9685, 2012, 14137, 26780, 15275,
            128359],
           dtype='int64').
2023-05-04 15:11:30,774 - INFO - Found 3 similar to 2012 item.
2023-05-04 15:11:30,778 - INFO - Found 3 similar to 131258 item.


Item Recommender: tags-all-roberta-large-v1



Unnamed: 0,Similarity,Rating,.,Recommended Movies,..,Already seen movies,Rating.1
0,0.49,3.6,We Recommend ==>,,==> Because You Saw ==>,,3.5
1,0.46,3.2,We Recommend ==>,,==> Because You Saw ==>,,3.5
2,0.12,3.0,We Recommend ==>,,==> Because You Saw ==>,,3.0
3,0.09,3.0,We Recommend ==>,,==> Because You Saw ==>,,3.0


In [17]:
PersonalizedItemEmbDBEnsembleRecommender(
    recommenders=[
        builder.personalized_item_recommender(
            OVERVIEW_REC_SYS_CFG.name,
            n_top_rated_user_items=10,
            n_sim_items=3
        ),
        builder.personalized_item_recommender(
            TITLE_REC_SYS_CFG.name,
            n_top_rated_user_items=10,
            n_sim_items=3
        ),
        builder.personalized_item_recommender(
            GENRES_REC_SYS_CFG.name,
            n_top_rated_user_items=10,
            n_sim_items=3
        )
    ],
    weights = [0.6, 0.1, 0.3],
    recommender_k = 5
) \
 .recommend(user_id=user_ids[130], k=10) \
 .show()

2023-05-04 15:14:39,862 - INFO - Found 2 items by ids: Int64Index([131258, 14972, 20240, 15438, 9685, 2012, 14137, 26780, 15275,
            128359],
           dtype='int64').
2023-05-04 15:14:39,866 - INFO - Found 3 similar to 2012 item.
2023-05-04 15:14:39,870 - INFO - Found 3 similar to 131258 item.
2023-05-04 15:14:39,887 - INFO - Found 2 items by ids: Int64Index([131258, 14972, 20240, 15438, 9685, 2012, 14137, 26780, 15275,
            128359],
           dtype='int64').
2023-05-04 15:14:39,890 - INFO - Found 3 similar to 2012 item.
2023-05-04 15:14:39,894 - INFO - Found 3 similar to 131258 item.
2023-05-04 15:14:39,911 - INFO - Found 2 items by ids: Int64Index([131258, 14972, 20240, 15438, 9685, 2012, 14137, 26780, 15275,
            128359],
           dtype='int64').
2023-05-04 15:14:39,915 - INFO - Found 3 similar to 2012 item.
2023-05-04 15:14:39,919 - INFO - Found 3 similar to 131258 item.



Item Recommender: Ensemble of overview-all-mpnet-base-v2, title-all-roberta-large-v1, genres-all-roberta-large-v1.



Unnamed: 0,Rating,Recommended Movies,Similarity
9,4.5,,0.94
0,4.1,,0.76
1,3.6,,0.97
6,3.3,,0.58
2,3.0,,1.3
5,3.0,,1.3
7,3.0,,0.73
8,3.0,,1.3
10,3.0,,0.89
11,3.0,,0.95
