# Models: Movie Tags Sentence Transformer

In [3]:
%load_ext autoreload
%autoreload 2

In [4]:
BASE_PATH        = '../../..'
LIB_PATH         = f'{BASE_PATH}/lib'
DATASET_PATH     = f'{BASE_PATH}/datasets'
MODEL            = 'all-roberta-large-v1'
FIELD            = 'tags'

In [5]:
import sys
sys.path.append(LIB_PATH)

import torch
import pytorch_common.util as pu
from pytorch_common.util import set_device_name, \
                                get_device, \
                                LoggerBuilder

import data as dt
import data.dataset as ds

import util as ut

from sentence_transformers import SentenceTransformer

import recommender as rc

2023-10-08 12:48:08.785477: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-10-08 12:48:09.886168: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2023-10-08 12:48:09.902853: W tensorflow/core/common_runtime/gpu/gpu_device.cc:1956] Cannot dlopen some GPU libraries. Please make sure the missing libraries mentioned above are installed properly if you would like to use GPU. Follow the guide at https://www.tensorflow.org/install/gpu for how to download and setup the required libraries for your 

<Figure size 640x480 with 0 Axes>

## Setup

In [6]:
pu.LoggerBuilder().on_console().build()

<RootLogger root (INFO)>

In [7]:
pu.set_device_name('gpu')

pu.get_device(), torch.cuda.is_available(), torch.__version__, torch.cuda.get_arch_list()

(device(type='cuda', index=0),
 True,
 '2.0.1+cu118',
 ['sm_37', 'sm_50', 'sm_60', 'sm_70', 'sm_75', 'sm_80', 'sm_86', 'sm_90'])

In [8]:
ut.set_seed(42)

In [9]:
REC_SYS_CFG = rc.item_rec_sys_cfg(DATASET_PATH, FIELD, MODEL)

2023-10-08 12:48:10,883 - INFO - Cfg:

embedding_col: tags_embedding
file_path: ../../../datasets/tags-all-roberta-large-v1.json
metadata_cols:
- tags
- release_year
- imdb_id
- tags_tokens
- title
name: tags-all-roberta-large-v1



## Preprocesamiento

In [10]:
dataset = ds.MovieLensTMDBDataLoader.df_from_path(DATASET_PATH)

columns = ['movie_id', 'movie_release_year', 'movie_imdb_id', 'movie_title', f'movie_{FIELD}']

movie_data = dataset \
    .pipe(dt.select, columns) \
    .pipe(dt.distinct, ['movie_id']) \
    .pipe(dt.rename, {
        'movie_id': 'id', 
        'movie_title': 'title',
        'movie_imdb_id': 'imdb_id',  
        'movie_release_year': 'release_year',
        f'movie_{FIELD}': FIELD
    }) \
    .pipe(dt.join_str_list, FIELD) \
    .pipe(dt.tokenize, FIELD) \
    .pipe(dt.reset_index)

## Generacion de embeddings

In [11]:
model = SentenceTransformer(MODEL)

embeddings = model.encode(movie_data[[FIELD]].values.reshape(-1))

movie_data = movie_data \
    .pipe(dt.append_emb_vectors, embeddings, FIELD)

movie_data.to_json(REC_SYS_CFG.file_path)

del model

2023-10-08 12:48:15,524 - INFO - Load pretrained SentenceTransformer: all-roberta-large-v1
2023-10-08 12:48:18,357 - INFO - Use pytorch device: cuda


Batches:   0%|          | 0/585 [00:00<?, ?it/s]

## Evaluación

In [12]:
builder = rc.SimilarItemRecommenderBuilder(DATASET_PATH, [REC_SYS_CFG])

Insert Embeddings:   0%|          | 0/18703 [00:00<?, ?it/s]

In [13]:
builder.item_recommender(REC_SYS_CFG.name, n_sim_items = 10) \
    .recommend(item_id=1, k=10) \
    .show()

2023-10-08 12:49:34,658 - INFO - Found 1 items by ids: [1].
2023-10-08 12:49:34,659 - INFO - Found 10 similar to 1 item.



Item Recommender: tags-all-roberta-large-v1



Unnamed: 0,Similarity,Rating,.,Recommended Movies,..,Already seen movies,Rating.1
0,0.64,3.8,We Recommend ==>,,==> Because You Saw ==>,,3.8
1,0.59,4.1,We Recommend ==>,,==> Because You Saw ==>,,3.8
2,0.58,4.0,We Recommend ==>,,==> Because You Saw ==>,,3.8
3,0.55,3.9,We Recommend ==>,,==> Because You Saw ==>,,3.8
4,0.53,3.6,We Recommend ==>,,==> Because You Saw ==>,,3.8
5,0.52,4.0,We Recommend ==>,,==> Because You Saw ==>,,3.8
6,0.49,3.9,We Recommend ==>,,==> Because You Saw ==>,,3.8
7,0.49,4.0,We Recommend ==>,,==> Because You Saw ==>,,3.8
8,0.48,4.1,We Recommend ==>,,==> Because You Saw ==>,,3.8
