# Models: Genres content based filtering

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
BASE_PATH             = '../..'
LIB_PATH              = f'{BASE_PATH}/lib'
DATASET_PATH          = f'{BASE_PATH}/datasets'

In [3]:
import sys
sys.path.append(LIB_PATH)

import numpy as np
import pandas as pd

import data.dataset as ds

import util as ut

import recommender as rc

from database.chromadb import RepositoryFactory

import pytorch_common.util as pu

2023-10-07 22:27:07.072931: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-10-07 22:27:08.154772: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2023-10-07 22:27:08.167226: W tensorflow/core/common_runtime/gpu/gpu_device.cc:1956] Cannot dlopen some GPU libraries. Please make sure the missing libraries mentioned above are installed properly if you would like to use GPU. Follow the guide at https://www.tensorflow.org/install/gpu for how to download and setup the required libraries for your 

<Figure size 640x480 with 0 Axes>

## Setup

In [4]:
pu.LoggerBuilder().on_console().build()

<RootLogger root (INFO)>

## Carga de dataset

In [5]:
dataset = ds.MovieLensTMDBDatasetFactory.from_path(
    DATASET_PATH,
    filter_fn = lambda df: df.query('user_movie_rating_year >= 2004')
)

In [6]:
dataset.data.groupby(['user_id'])['movie_id'] \
            .count() \
            .reset_index() \
            .sort_values('movie_id', ascending=False).head(3)

Unnamed: 0,user_id,movie_id
5019,62199,3688
520,6550,1935
5653,70092,1745


In [7]:
dataset.data[dataset.data.user_id == 10955]

Unnamed: 0,user_id,user_seq,user_movie_tags,user_movie_rating,user_movie_rating_timestamp,user_movie_rating_year,movie_id,movie_seq,movie_title,movie_genres,movie_for_adults,movie_original_language,movie_overview,movie_tags,movie_release_year,movie_imdb_id
194219,10955,12837,[salman khan],1,2016-05-06 21:24:11,2016,146400,18555,Har Dil Jo Pyar Karega...,[(no genres listed)],True,hi,"Raj is a struggling singer with big dreams, wh...",[salman khan],2000,250415


In [8]:
dataset.data[dataset.data.user_id == 24853]

Unnamed: 0,user_id,user_seq,user_movie_tags,user_movie_rating,user_movie_rating_timestamp,user_movie_rating_year,movie_id,movie_seq,movie_title,movie_genres,movie_for_adults,movie_original_language,movie_overview,movie_tags,movie_release_year,movie_imdb_id
194157,24853,12667,"[bizarre, drama, good photography, indie, young]",5,2016-04-03 19:09:38,2016,128727,18495,Bizarre,[(no genres listed)],True,en,"Maurice, a reticent young homeless man, someho...","[bizarre, drama, good photography, indie, young]",2015,3904272


In [9]:
df = ut.embedding_from_list_col(
    dataset.data, 
    'user_id', 
    'movie_genres', 
    exclude=['(no genres listed)']
)

In [10]:
ut.save_df(df, f'{DATASET_PATH}/user-genre-freq-embedding.json')

In [11]:
repository = RepositoryFactory().create(
    name          = 'user-genre-freq-embedding',
    file_path     = f'{DATASET_PATH}/user-genre-freq-embedding.json',
    metadata_cols = ['user_id'],
    embedding_col = 'movie_genres_embedding',
    id_col        = 'user_id'
)


Insert Embeddings:   0%|          | 0/13000 [00:00<?, ?it/s]

In [12]:
recommender = rc.EmbCBFilteringRecommender('genres', repository, dataset)

In [13]:
recommender.recommend(6550).show()


Recommender: genres-cb-recommender



Unnamed: 0,Rating,Movies,movie_genres,movie_release_year
0,5.0,,[Action],2010
16,5.0,,"[Drama, Romance, Sci-Fi]",2010
79,5.0,,[Drama],2010
101,5.0,,"[Action, Drama, Mystery, Sci-Fi, Thriller]",2011
274,5.0,,"[Drama, Thriller]",2011
369,5.0,,"[Action, Crime]",2011
414,5.0,,"[Action, Adventure, Thriller, IMAX]",2012
519,5.0,,"[Animation, Children, Comedy]",2012
541,5.0,,[Drama],2012
551,5.0,,[Comedy],2014


In [14]:
recommender.recommend(62199).show()


Recommender: genres-cb-recommender



Unnamed: 0,Rating,Movies,movie_genres,movie_release_year
0,4.985932,,[Drama],2014
12,4.985932,,"[Comedy, Drama, War]",2015
17,4.983177,,"[Animation, Comedy, Drama, Fantasy, Sci-Fi]",2012
32,4.983177,,"[Adventure, Animation, Drama]",2014
51,4.980317,,[Comedy],2011
52,4.980317,,"[Drama, War]",2015
53,4.236666,,"[Drama, Sci-Fi, Thriller]",2015
357,4.0,,[Documentary],2013
365,4.0,,"[Comedy, Drama]",2014
393,4.0,,"[Documentary, Western]",2015


In [15]:
recommender.recommend(70092).show()


Recommender: genres-cb-recommender



Unnamed: 0,Rating,Movies,movie_genres,movie_release_year
0,4.982299,,"[Action, Adventure, Sci-Fi]",2014
72,4.982299,,"[Action, Animation, Comedy]",2014
190,4.97983,,[Drama],2014
202,4.97983,,"[Comedy, Drama, War]",2015
207,4.97976,,"[Animation, Comedy]",2012
289,4.97976,,[Drama],2013
364,4.979412,,"[Action, Adventure, Drama, Fantasy, Mystery, IMAX]",2011
435,4.977702,,"[Drama, Mystery, War]",2010
472,4.977702,,[Documentary],2011
504,4.977702,,"[Adventure, Comedy, Documentary, Mystery]",2014
