# Recommender ChatBot: Collaborative Filtering recommender using user embeddings from chromadb

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import os

BASE_PATH    = '../..'
API_PATH     = f'{BASE_PATH}/chat-bot-api'
LIB_PATH     = f'{BASE_PATH}/lib'


os.environ['TMP_PATH']         = f'{BASE_PATH}/tmp'
os.environ['DATASET_PATH']     = f'{BASE_PATH}/datasets'
os.environ['WEIGHTS_PATH']     = f'{BASE_PATH}/weights'
os.environ['METRICS_PATH']     = f'{BASE_PATH}/metrics'
os.environ['MONGODB_URL']      = 'mongodb://0.0.0.0:27017'
os.environ['MONGODB_DATABASE'] = 'chatbot'
os.environ['CHROMA_HOST']      = '0.0.0.0'
os.environ['CHROMA_PORT']      = '9090'

In [3]:
import sys
sys.path.append(LIB_PATH)
sys.path.append(API_PATH)

import pytorch_common.util as pu
from app_context import AppContext

2024-02-24 17:57:17.360600: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
2024-02-24 17:57:18.087334: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2024-02-24 17:57:18.098481: W tensorflow/core/common_runtime/gpu/gpu_device.cc:1956] Cannot dlopen some GPU libraries. Please make sure the missing libraries mentioned above are installed properly if you would like to use GPU. Follow the guide at https://www.tensorflow.org/install/gpu for how to download and setup the required libraries for your 

<Figure size 640x480 with 0 Axes>

## Setup

In [4]:
pu.LoggerBuilder().on_console().build()

<RootLogger root (INFO)>

In [5]:
ctx = AppContext()

2024-02-24 17:57:19,505 - INFO - Load pretrained SentenceTransformer: all-mpnet-base-v2
2024-02-24 17:57:20,066 - INFO - Use pytorch device: cuda
2024-02-24 17:57:20,069 - INFO - Anonymized telemetry enabled. See                     https://docs.trychroma.com/telemetry for more information.
2024-02-24 17:57:20,091 - INFO - Anonymized telemetry enabled. See                     https://docs.trychroma.com/telemetry for more information.


## Recommender

In [105]:
recommendations = await ctx.database_user_item_filtering_recommender.recommend(
    user_id            = "adrianmarino@gmail.com",
    text_query         = 'I want to see sci-fi movies',
    k_sim_users        = 10,
    max_items_by_user  = 10,
    min_rating_by_user = 4,
    not_seen           = True
)

2024-02-24 18:10:29,334 - INFO - Found 9 similar users
2024-02-24 18:10:29,339 - INFO - Found 425 similar users interactions
2024-02-24 18:10:29,339 - INFO - Select 90 similar users interactions
2024-02-24 18:10:29,589 - INFO - Found 2000 items by text query
2024-02-24 18:10:29,598 - INFO - Select 21 similar user items by text query
2024-02-24 18:10:29,601 - INFO - Select 21 similar user unseen items


In [106]:
recommendations.show_seen()

Not Found items!


In [107]:
recommendations.show(
    sort_by        = ['user_sim_weighted_pred_rating_score'],
    k              = 10
)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9
Poster,,,,,,,,,,
Genres,ActionSci-fiThriller,ComedyDramaSci-fi,AdventureComedySci-fi,ComedyCrimeDramaThriller,AdventureAnimationChildrenDrama,ActionSci-fiThriller,Sci-fiThriller,ActionSci-fiImax,ActionHorrorSci-fiThriller,DramaSci-fiThriller
Mean Rating,4.272401,4.117647,4.101064,4.210526,3.995122,3.862069,3.185185,4.009524,4.045455,3.976636
Predicted Rating,4.794106,4.75918,4.655716,4.583458,4.469231,4.366083,4.32298,4.318829,4.299498,4.261951
User sim weighted rating score,0.989114,0.953286,0.949447,0.975057,0.925132,0.894363,0.737613,0.928253,0.936903,0.92064
User sim weighted predicted rating score,0.989114,0.981908,0.960561,0.945913,0.922297,0.901052,0.892157,0.891054,0.887379,0.87932
User Item Similarity,0.989114,0.989114,0.989114,0.989386,0.98934,0.989386,0.989386,0.989112,0.989462,0.989114


### Notes
* Movies seen by similar users weighted by user predicted rating.
* Ordered by **User sim weighted predicted rating score**.
* **User sim weighted predicted rating score** = similar_user_similarity(0..1) * user_predicted_rating (Normalize to 0..1)

In [108]:
recommendations.show(
    sort_by        = ['user_sim_weighted_rating_score'],
    k              = 10
)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9
Poster,,,,,,,,,,
Genres,ActionSci-fiThriller,ComedyCrimeDramaThriller,ComedyDramaSci-fi,AdventureComedySci-fi,ActionHorrorSci-fiThriller,ActionSci-fiImax,AdventureAnimationChildrenDrama,DramaSci-fiThriller,AdventureDramaSci-fi,ActionSci-fiThriller
Mean Rating,4.272401,4.210526,4.117647,4.101064,4.045455,4.009524,3.995122,3.976636,3.912195,3.862069
Predicted Rating,4.794106,4.583458,4.75918,4.655716,4.299498,4.318829,4.469231,4.261951,4.242562,4.366083
User sim weighted rating score,0.989114,0.975057,0.953286,0.949447,0.936903,0.928253,0.925132,0.92064,0.905929,0.894363
User sim weighted predicted rating score,0.989114,0.945913,0.981908,0.960561,0.887379,0.891054,0.922297,0.87932,0.87552,0.901052
User Item Similarity,0.989114,0.989386,0.989114,0.989114,0.989462,0.989112,0.98934,0.989114,0.98934,0.989386


### Notes
* Movies seen by similar users weighted by movie mean rating.
* Ordered by **User sim weighted rating score**.
* **User sim weighted rating score** = similar_user_similarity(0..1) * mean_movie_rating (Normalize to 0..1) 

In [109]:
recommendations.show(
    sort_by        = ['user_item_sim'],
    k              = 10
)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9
Poster,,,,,,,,,,
Genres,HorrorSci-fiThriller,HorrorMysteryThriller,HorrorMysteryThriller,ActionHorrorSci-fiThriller,ActionSci-fi,Sci-fiThriller,ActionSci-fiThriller,ActionAdventureSci-fiThriller,ComedyCrimeDramaThriller,ActionSci-fiImax
Mean Rating,3.169231,3.575221,2.875,4.045455,3.555556,3.185185,3.862069,3.601695,4.210526,3.429204
Predicted Rating,3.960943,4.062789,3.568155,4.299498,4.124599,4.32298,4.366083,3.960579,4.583458,3.892266
User sim weighted rating score,0.734794,0.828086,0.665902,0.936903,0.823381,0.737613,0.894363,0.834066,0.975057,0.794085
User sim weighted predicted rating score,0.818418,0.838612,0.736513,0.887379,0.851216,0.892157,0.901052,0.817366,0.945913,0.803231
User Item Similarity,0.990567,0.989565,0.989565,0.989462,0.989386,0.989386,0.989386,0.989386,0.989386,0.98934


### Notes
* Movies seen by similar users.
* Ordered by **similar user similarity by movie**.
* take movies sample from each similar user, then assign similary and order by similary.