# Ensemple CB recommender based on Sparse Transformers

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
BASE_PATH        = '../../..'
LIB_PATH         = f'{BASE_PATH}/lib'
DATASET_PATH     = f'{BASE_PATH}/datasets'

In [3]:
import sys
sys.path.append(LIB_PATH)

import torch
import pytorch_common.util as pu
from pytorch_common.util import set_device_name, \
                                get_device, \
                                LoggerBuilder

import data as dt
import data.dataset as ds

import util as ut

from sentence_transformers import SentenceTransformer

import recommender as rc

2023-10-08 13:04:08.677079: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-10-08 13:04:09.770266: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2023-10-08 13:04:09.781675: W tensorflow/core/common_runtime/gpu/gpu_device.cc:1956] Cannot dlopen some GPU libraries. Please make sure the missing libraries mentioned above are installed properly if you would like to use GPU. Follow the guide at https://www.tensorflow.org/install/gpu for how to download and setup the required libraries for your 

<Figure size 640x480 with 0 Axes>

## Setup

In [4]:
pu.LoggerBuilder().on_console().build()

<RootLogger root (INFO)>

In [5]:
pu.set_device_name('gpu')

pu.get_device(), torch.cuda.is_available(), torch.__version__, torch.cuda.get_arch_list()

(device(type='cuda', index=0),
 True,
 '2.0.1+cu118',
 ['sm_37', 'sm_50', 'sm_60', 'sm_70', 'sm_75', 'sm_80', 'sm_86', 'sm_90'])

In [6]:
TITLE_REC_SYS_CFG    = rc.item_rec_sys_cfg(DATASET_PATH, 'title',    'tf-idf-sparse-auto-encoder')
TAGS_REC_SYS_CFG     = rc.item_rec_sys_cfg(DATASET_PATH, 'tags',     'tf-idf-sparse-auto-encoder')
GENRES_REC_SYS_CFG   = rc.item_rec_sys_cfg(DATASET_PATH, 'genres',   'tf-idf-sparse-auto-encoder')

2023-10-08 13:04:10,593 - INFO - Cfg:

embedding_col: title_embedding
file_path: ../../../datasets/title-tf-idf-sparse-auto-encoder.json
metadata_cols:
- title
- release_year
- imdb_id
- title_tokens
name: title-tf-idf-sparse-auto-encoder

2023-10-08 13:04:10,593 - INFO - Cfg:

embedding_col: tags_embedding
file_path: ../../../datasets/tags-tf-idf-sparse-auto-encoder.json
metadata_cols:
- tags
- release_year
- imdb_id
- tags_tokens
- title
name: tags-tf-idf-sparse-auto-encoder

2023-10-08 13:04:10,594 - INFO - Cfg:

embedding_col: genres_embedding
file_path: ../../../datasets/genres-tf-idf-sparse-auto-encoder.json
metadata_cols:
- genres
- release_year
- imdb_id
- genres_tokens
- title
name: genres-tf-idf-sparse-auto-encoder



## Evaluación

In [7]:
builder = rc.SimilarItemRecommenderBuilder(
    DATASET_PATH, 
    [
        TITLE_REC_SYS_CFG, 
        TAGS_REC_SYS_CFG,
        GENRES_REC_SYS_CFG
    ]
)

Insert Embeddings:   0%|          | 0/17611 [00:00<?, ?it/s]

Insert Embeddings:   0%|          | 0/17611 [00:00<?, ?it/s]

Insert Embeddings:   0%|          | 0/17611 [00:00<?, ?it/s]

In [9]:
title_repo = builder.repositories[TITLE_REC_SYS_CFG.name]

result = title_repo.search_by_ids([1])
toy_story_emb = result.embeddings[0]

title_repo.search_sims(embs=[toy_story_emb], limit=3)

{
    "distances": [
        0.0,
        0.0,
        0.0
    ],
    "documents": [],
    "embeddings": [],
    "ids": [
        "1",
        "3114",
        "78499"
    ],
    "metadatas": [
        {
            "imdb_id": 114709,
            "release_year": 1995,
            "title": "Toy Story",
            "title_tokens": "toy story"
        },
        {
            "imdb_id": 120363,
            "release_year": 1999,
            "title": "Toy Story 2",
            "title_tokens": "toy story"
        },
        {
            "imdb_id": 435761,
            "release_year": 2010,
            "title": "Toy Story 3",
            "title_tokens": "toy story"
        }
    ]
}

In [10]:
user_ids = builder.repositories.dataset.users_id_from_movie_id(movie_id=1) 

In [11]:
builder \
    .user_item_recommender(TITLE_REC_SYS_CFG.name) \
    .recommend(user_id=user_ids[130]) \
    .show()

2023-10-08 13:04:45,602 - INFO - Found 3 items by ids: Int64Index([27296, 5243, 29250, 78269, 71225, 17421, 60141, 5104, 4593, 26910], dtype='int64').
2023-10-08 13:04:45,603 - INFO - Found 3 similar to 60141 item.
2023-10-08 13:04:45,604 - INFO - Found 3 similar to 5243 item.
2023-10-08 13:04:45,605 - INFO - Found 3 similar to 4593 item.



Item Recommender: title-tf-idf-sparse-auto-encoder



Unnamed: 0,Similarity,Rating,.,Recommended Movies,..,Already seen movies,Rating.1
0,0.61,4.0,We Recommend ==>,,==> Because You Saw ==>,,3.7
1,0.37,3.0,We Recommend ==>,,==> Because You Saw ==>,,3.7
4,0.69,4.0,We Recommend ==>,,==> Because You Saw ==>,,3.0
5,0.69,1.0,We Recommend ==>,,==> Because You Saw ==>,,3.0
2,0.97,3.8,We Recommend ==>,,==> Because You Saw ==>,,2.8


In [12]:
builder \
    .user_item_recommender(GENRES_REC_SYS_CFG.name) \
    .recommend(user_id=user_ids[130]) \
    .show()

2023-10-08 13:04:48,164 - INFO - Found 3 items by ids: Int64Index([27296, 5243, 29250, 78269, 71225, 17421, 60141, 5104, 4593, 26910], dtype='int64').
2023-10-08 13:04:48,165 - INFO - Found 3 similar to 60141 item.
2023-10-08 13:04:48,166 - INFO - Found 3 similar to 5243 item.
2023-10-08 13:04:48,166 - INFO - Found 3 similar to 4593 item.



Item Recommender: genres-tf-idf-sparse-auto-encoder



Unnamed: 0,Similarity,Rating,.,Recommended Movies,..,Already seen movies,Rating.1
0,1.0,2.8,We Recommend ==>,,==> Because You Saw ==>,,3.7
1,1.0,4.0,We Recommend ==>,,==> Because You Saw ==>,,3.7
2,1.0,2.0,We Recommend ==>,,==> Because You Saw ==>,,3.7
6,1.0,2.0,We Recommend ==>,,==> Because You Saw ==>,,3.0
7,1.0,3.0,We Recommend ==>,,==> Because You Saw ==>,,3.0


In [13]:
rc.UserSimilarItemEnsembleRecommender(
    recommenders=[
        builder.user_item_recommender(
            TITLE_REC_SYS_CFG.name,
            n_top_rated_user_items=10,
            n_sim_items=3
        ),
        builder.user_item_recommender(
            GENRES_REC_SYS_CFG.name,
            n_top_rated_user_items=10,
            n_sim_items=3
        )
    ],
    weights = [0.7, 0.3],
    recommender_k = 5
) \
 .recommend(user_id=user_ids[130], k=10) \
 .show()

2023-10-08 13:04:50,334 - INFO - Found 3 items by ids: Int64Index([27296, 5243, 29250, 78269, 71225, 17421, 60141, 5104, 4593, 26910], dtype='int64').
2023-10-08 13:04:50,335 - INFO - Found 3 similar to 60141 item.
2023-10-08 13:04:50,336 - INFO - Found 3 similar to 5243 item.
2023-10-08 13:04:50,336 - INFO - Found 3 similar to 4593 item.
2023-10-08 13:04:50,340 - INFO - Found 3 items by ids: Int64Index([27296, 5243, 29250, 78269, 71225, 17421, 60141, 5104, 4593, 26910], dtype='int64').
2023-10-08 13:04:50,341 - INFO - Found 3 similar to 60141 item.
2023-10-08 13:04:50,341 - INFO - Found 3 similar to 5243 item.
2023-10-08 13:04:50,342 - INFO - Found 3 similar to 4593 item.



Recommender: Ensemble of title-tf-idf-sparse-auto-encoder, genres-tf-idf-sparse-auto-encoder.



Unnamed: 0,Rating,Recommended Movies,Similarity
6,4.0,,1.39
7,4.0,,1.31
9,4.0,,1.3
3,3.8,,1.67
2,3.0,,1.3
4,3.0,,1.07
5,2.8,,1.3
0,2.0,,1.3
1,2.0,,1.3
8,1.0,,1.39
