# Multi-feature user profile content based model


In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
BASE_PATH             = '../..'
LIB_PATH              = f'{BASE_PATH}/lib'
DATASET_PATH          = f'{BASE_PATH}/datasets'

In [3]:
import sys
sys.path.append(LIB_PATH)

import numpy as np
import pandas as pd

import data.dataset as ds

import util as ut

import recommender as rc

from database.chromadb import RepositoryFactory

import pytorch_common.util as pu

2023-10-23 22:29:00.933984: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-10-23 22:29:01.712516: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2023-10-23 22:29:01.722836: W tensorflow/core/common_runtime/gpu/gpu_device.cc:1956] Cannot dlopen some GPU libraries. Please make sure the missing libraries mentioned above are installed properly if you would like to use GPU. Follow the guide at https://www.tensorflow.org/install/gpu for how to download and setup the required libraries for your 

<Figure size 640x480 with 0 Axes>

## Setup

In [4]:
pd.set_option('max_colwidth', 2000)

In [5]:
pu.LoggerBuilder().on_console().build()

<RootLogger root (INFO)>

## Carga de dataset

In [6]:
dataset = ds.MovieLensTMDBDatasetFactory.from_path(
    DATASET_PATH,
    filter_fn = lambda df: df.query('user_movie_rating_year >= 2004')
)

ds = dataset.data.rename(
    columns={
        'user_movie_rating'       : 'rating',
        'movie_genres'            : 'genres', 
        'movie_for_adults'        : 'adults',
        'movie_original_language' : 'language',
        'movie_release_year'      : 'year',
        'movie_tags'              : 'tags'
    }
)

In [7]:
model = rc.MultiFeatureUserProfileRecommender(
    user_id_col  = 'user_id',
    item_id_col  = 'movie_id',
    emb_cols     = [ 'genres', 'adults', 'language', 'year' ],
    col_bucket   = { 'year': 10 }
)

model.fit(ds)

<recommender.user.content_based.multi_feature_user_profile_recommender.MultiFeatureUserProfileRecommender at 0x7f72307a67a0>

In [9]:
model.user_profile.query('user_id == 6550')

Unnamed: 0,user_id,language_ku,language_ta,language_sq,language_fa,genres_film-noir,genres_mystery,language_lv,year_1990,language_uk,...,language_de,language_sr,genres_crime,language_id,language_pl,language_ab,year_2000,language_el,language_mr,genres_musical
520,6550,0.0,0.0,0.0,0.0,0.000642,0.013951,0.0,0.036622,0.0,...,0.000642,0.0,0.020652,9.2e-05,0.0,0.0,0.049656,0.0,0.0,0.003121


In [11]:
model.recommend_all([6550])

2023-10-23 22:29:39,292 - INFO - 0:00:03.16


Unnamed: 0,user_id,movie_id,score
0,6550,81132,0.72749
1,6550,43932,0.688481
2,6550,6902,0.685544
3,6550,117646,0.675447
4,6550,72165,0.667279
5,6550,4719,0.663791
6,6550,164226,0.662322
7,6550,164185,0.65911
8,6550,2617,0.654245
9,6550,4956,0.653052
