# Ensemple CB recommender based on Sentence Transformers

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
BASE_PATH        = '../../..'
LIB_PATH         = f'{BASE_PATH}/lib'
DATASET_PATH     = f'{BASE_PATH}/datasets'

In [4]:
import sys
sys.path.append(LIB_PATH)

import torch
import numpy as np
import pandas as pd

import pytorch_common.util as pu

import logging
import data as dt
import data.dataset as ds

from recommender import ItemRecommenderBuilder, item_rec_sys_cfg

## Setup

In [5]:
pu.LoggerBuilder().on_console().build()

<RootLogger root (INFO)>

In [6]:
pu.set_device_name('gpu')

pu.get_device(), torch.cuda.is_available(), torch.__version__, torch.cuda.get_arch_list()

(device(type='cuda', index=0),
 True,
 '1.11.0',
 ['sm_37',
  'sm_50',
  'sm_60',
  'sm_61',
  'sm_70',
  'sm_75',
  'sm_80',
  'sm_86',
  'compute_37'])

In [7]:
TITLE_REC_SYS_CFG    = item_rec_sys_cfg(DATASET_PATH, 'title',    'all-roberta-large-v1')
TAGS_REC_SYS_CFG     = item_rec_sys_cfg(DATASET_PATH, 'tags',     'all-roberta-large-v1')
GENRES_REC_SYS_CFG   = item_rec_sys_cfg(DATASET_PATH, 'genres',   'all-roberta-large-v1')
OVERVIEW_REC_SYS_CFG = item_rec_sys_cfg(DATASET_PATH, 'overview', 'all-mpnet-base-v2')

## Evaluación

In [8]:
builder = ItemRecommenderBuilder(
    DATASET_PATH, 
    [TITLE_REC_SYS_CFG, TAGS_REC_SYS_CFG, GENRES_REC_SYS_CFG, OVERVIEW_REC_SYS_CFG]
)

2023-05-03 22:47:56,329 - INFO - Load pretrained SentenceTransformer: all-MiniLM-L6-v2
2023-05-03 22:47:56,496 - INFO - Use pytorch device: cuda
2023-05-03 22:48:28,985 - INFO - Load pretrained SentenceTransformer: all-MiniLM-L6-v2
2023-05-03 22:48:29,101 - INFO - Use pytorch device: cuda
2023-05-03 22:49:01,615 - INFO - Load pretrained SentenceTransformer: all-MiniLM-L6-v2
2023-05-03 22:49:01,733 - INFO - Use pytorch device: cuda
2023-05-03 22:49:35,020 - INFO - Load pretrained SentenceTransformer: all-MiniLM-L6-v2
2023-05-03 22:49:35,136 - INFO - Use pytorch device: cuda


In [9]:
title_repo = builder.repositories[TITLE_REC_SYS_CFG.name]

result = title_repo.search_by_ids([1])
toy_story_emb = result['embeddings'][0]

title_repo.search_sims(embs=[toy_story_emb], limit=3)

{'ids': [['1', '3114', '78499']],
 'embeddings': None,
 'documents': None,
 'metadatas': [[{'title': 'Toy Story',
    'release_year': 1995,
    'imdb_id': 114709,
    'title_tokens': 'toy story'},
   {'title': 'Toy Story 2',
    'release_year': 1999,
    'imdb_id': 120363,
    'title_tokens': 'toy story'},
   {'title': 'Toy Story 3',
    'release_year': 2010,
    'imdb_id': 435761,
    'title_tokens': 'toy story'}]],
 'distances': [[0.0, 0.12047415971755981, 0.1525389850139618]]}

In [10]:
user_ids = builder.repositories.dataset.users_id_from_movie_id(movie_id=1) 

In [11]:
builder \
    .personalized_item_recommender(
        OVERVIEW_REC_SYS_CFG.name,
        n_top_rated_user_items=20,
        n_sim_items=3
    ) \
    .recommend(user_id=user_ids[130]) \
    .show()

Unnamed: 0,Similarity,Rating,.,Recommended Movies,..,Already seen movies,Rating.1
0,0.35,3.0,We Recommend ==>,,==> Because You Saw ==>,,3.5
1,0.26,4.1,We Recommend ==>,,==> Because You Saw ==>,,3.5
2,0.34,4.5,We Recommend ==>,,==> Because You Saw ==>,,3.0
3,0.29,3.0,We Recommend ==>,,==> Because You Saw ==>,,3.0
4,0.39,2.6,We Recommend ==>,,==> Because You Saw ==>,,1.0


In [12]:
builder \
    .personalized_item_recommender(TITLE_REC_SYS_CFG.name) \
    .recommend(user_id=user_ids[130]) \
    .show()

Unnamed: 0,Similarity,Rating,.,Recommended Movies,..,Already seen movies,Rating.1
0,0.87,3.6,We Recommend ==>,,==> Because You Saw ==>,,3.5
1,0.56,4.1,We Recommend ==>,,==> Because You Saw ==>,,3.5
2,0.63,3.0,We Recommend ==>,,==> Because You Saw ==>,,3.0
3,0.48,3.3,We Recommend ==>,,==> Because You Saw ==>,,3.0


In [13]:
builder \
    .personalized_item_recommender(GENRES_REC_SYS_CFG.name) \
    .recommend(user_id=user_ids[130]) \
    .show()

Unnamed: 0,Similarity,Rating,.,Recommended Movies,..,Already seen movies,Rating.1
0,0.92,2.0,We Recommend ==>,,==> Because You Saw ==>,,3.5
1,0.91,2.5,We Recommend ==>,,==> Because You Saw ==>,,3.5


In [14]:
builder \
    .personalized_item_recommender(TAGS_REC_SYS_CFG.name) \
    .recommend(user_id=user_ids[130]) \
    .show()

Unnamed: 0,Similarity,Rating,.,Recommended Movies,..,Already seen movies,Rating.1
0,0.49,3.6,We Recommend ==>,,==> Because You Saw ==>,,3.5
1,0.46,3.2,We Recommend ==>,,==> Because You Saw ==>,,3.5
2,0.12,3.0,We Recommend ==>,,==> Because You Saw ==>,,3.0
3,0.09,3.0,We Recommend ==>,,==> Because You Saw ==>,,3.0


In [15]:
rc.PersonalizedItemEmbDBEnsembleRecommender(
    recommenders=[
        builder.personalized_item_recommender(
            OVERVIEW_REC_SYS_CFG.name,
            n_top_rated_user_items=10,
            n_sim_items=3
        ),
        builder.personalized_item_recommender(
            TITLE_REC_SYS_CFG.name,
            n_top_rated_user_items=10,
            n_sim_items=3
        ),
        builder.personalized_item_recommender(
            GENRES_REC_SYS_CFG.name,
            n_top_rated_user_items=10,
            n_sim_items=3
        )
    ],
    weights = [0.6, 0.1, 0.3],
    recommender_k = 5
) \
 .recommend(user_id=user_ids[130], k=10) \
 .show()

Unnamed: 0,Rating,Recommended Movies,Similarity
6,4.5,,0.94
0,4.1,,0.76
1,3.6,,0.97
4,3.3,,0.58
5,3.0,,0.73
7,3.0,,0.89
8,3.0,,0.95
3,2.5,,1.21
2,2.0,,1.22
