# Recommender ChatBot: Collaborative Filtering recommender using user embeddings from chromadb

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
BASE_PATH    = '../..'
LIB_PATH     = f'{BASE_PATH}/lib'
API_PATH     = f'{BASE_PATH}/chat-bot-api'

In [4]:
import sys
sys.path.append(LIB_PATH)
sys.path.append(API_PATH)


import torch
import pytorch_common
import pytorch_common.util as pu
import util as ut
import os
import numpy as np
import logging
import pandas as pd
from recommender import DatabaseUserItemFilteringRecommender

2024-02-23 19:12:14.166498: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
2024-02-23 19:12:14.947764: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2024-02-23 19:12:14.958435: W tensorflow/core/common_runtime/gpu/gpu_device.cc:1956] Cannot dlopen some GPU libraries. Please make sure the missing libraries mentioned above are installed properly if you would like to use GPU. Follow the guide at https://www.tensorflow.org/install/gpu for how to download and setup the required libraries for your 

<Figure size 640x480 with 0 Axes>

## Setup

In [5]:
pu.LoggerBuilder().on_console().build()

<RootLogger root (INFO)>

In [6]:
import pytorch_common
pytorch_common.__version__

'0.3.8'

In [7]:
torch.__version__

'2.1.2+cu118'

In [8]:
pu.set_device_name('gpu')

pu.get_device(), torch.cuda.is_available()

(device(type='cuda', index=0), True)

In [9]:
ut.set_seed(42)

In [10]:
os.environ['TMP_PATH']         = f'{BASE_PATH}/tmp'
os.environ['DATASET_PATH']     = f'{BASE_PATH}/datasets'
os.environ['WEIGHTS_PATH']     = f'{BASE_PATH}/weights'
os.environ['METRICS_PATH']     = f'{BASE_PATH}/metrics'
os.environ['MONGODB_URL']      = 'mongodb://0.0.0.0:27017'
os.environ['MONGODB_DATABASE'] = 'chatbot'
os.environ['CHROMA_HOST']      = '0.0.0.0'
os.environ['CHROMA_PORT']      = '9090'


from app_context import AppContext

## Recommender

In [11]:
ctx = AppContext()

2024-02-23 19:12:19,003 - INFO - Load pretrained SentenceTransformer: all-mpnet-base-v2
2024-02-23 19:12:19,569 - INFO - Use pytorch device: cuda
2024-02-23 19:12:19,571 - INFO - Anonymized telemetry enabled. See                     https://docs.trychroma.com/telemetry for more information.
2024-02-23 19:12:19,591 - INFO - Anonymized telemetry enabled. See                     https://docs.trychroma.com/telemetry for more information.


In [12]:
recommender = DatabaseUserItemFilteringRecommender(
    user_emb_repository          = ctx.users_cf_emb_repository,
    items_repository             = ctx.items_repository,
    interactions_repository      = ctx.interactions_repository,
    pred_interactions_repository = ctx.pred_interactions_repository,

)

recommendations = await recommender.recommend(
    user_id           = "adrianmarino@gmail.com",
    k_sim_users       = 20,
    max_items_by_user = 5,
    not_seen          = True
)

In [13]:
recommendations.show_seen()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38
Poster,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
Genres,AdventureAnimationChildrenComedyFantasy,ActionAdventureSci-fi,ActionSci-fiThrillerImax,AdventureAnimationChildrenComedyFantasy,ActionAdventureAnimationChildrenComedy,DramaMysterySci-fiThriller,AdventureAnimationChildrenComedyDramaFantasy,ActionAdventureSci-fi,AdventureAnimationChildrenComedyFantasy,ActionAdventure,ActionCrimeDramaImax,AdventureAnimationChildrenComedyFantasyImax,ActionCrimeDramaMysterySci-fiThrillerImax,Sci-fiImax,ActionAdventureFantasyImax,DramaMysteryThriller,AdventureAnimationChildrenComedy,ActionAdventureSci-fiThrillerWar,MysteryThriller,ActionAdventureAnimationChildrenComedy,AnimationChildrenDrama,ActionSci-fi,ActionAdventureSci-fiImax,ActionAdventureSci-fi,AdventureAnimationChildrenComedyFantasy,AdventureAnimationComedy,War,ActionSci-fiThriller,ActionAdventureSci-fi,ActionAdventureSci-fi,ActionAdventureAnimationChildrenComedyFantasy,AdventureAnimationChildrenComedyFantasy,ActionAdventureSci-fiImax,ActionSci-fiImax,Comedy,ActionAdventureSci-fiThrillerImax,Comedy,ActionAdventureSci-fi,ComedyDramaRomance
Rating,5.0,5.0,5.0,5.0,5.0,5.0,5.0,5.0,5.0,5.0,5.0,5.0,5.0,5.0,5.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,3.0,3.0,3.0,3.0,3.0,2.0


In [14]:
recommendations.show(
    sort_by        = ['user_sim_weighted_pred_rating_score'],
    k              = 5
)

Unnamed: 0,0,1,2,3,4
Poster,,,,,
Genres,ActionCrimeDramaThriller,ActionSci-fiThriller,Comedy,AdventureComedySci-fi,Drama
Mean Rating,4.388889,4.272401,4.044776,4.101064,4.375
Predicted Rating,4.806043,4.794106,4.772671,4.655716,4.57359
User sim weighted rating score,0.868326,0.844945,0.801325,0.810956,0.866358
User sim weighted predicted rating score,0.989232,0.986385,0.983689,0.957789,0.942234
User Item Similarity,0.989232,0.988841,0.990567,0.988715,0.990123


### Notes
* Movies seen by similar users weighted by user predicted rating.
* Ordered by **User sim weighted predicted rating score**.
* **User sim weighted predicted rating score** = similar_user_similarity(0..1) * user_predicted_rating (Normalize to 0..1)

In [15]:
recommendations.show(
    sort_by        = ['user_sim_weighted_rating_score'],
    k              = 5
)

Unnamed: 0,0,1,2,3,4
Poster,,,,,
Genres,ActionAdventureDramaFantasy,ActionCrimeDramaThriller,Drama,ActionSci-fiThriller,ComedyDramaWar
Mean Rating,5.0,4.388889,4.375,4.272401,4.25
Predicted Rating,4.19122,4.806043,4.57359,4.794106,4.484792
User sim weighted rating score,0.988815,0.868326,0.866358,0.844945,0.840493
User sim weighted predicted rating score,0.862319,0.989232,0.942234,0.986385,0.92272
User Item Similarity,0.988815,0.989232,0.990123,0.988841,0.988815


### Notes
* Movies seen by similar users weighted by movie mean rating.
* Ordered by **User sim weighted rating score**.
* **User sim weighted rating score** = similar_user_similarity(0..1) * mean_movie_rating (Normalize to 0..1) 

In [16]:
recommendations.show(
    sort_by        = ['user_item_sim'],
    k              = 5
)

Unnamed: 0,0,1,2,3,4
Poster,,,,,
Genres,Comedy,DramaRomanceSci-fi,Thriller,Drama,AnimationChildrenFantasyMusical
Mean Rating,4.044776,3.902439,3.857143,3.97619,3.571429
Predicted Rating,4.772671,4.271195,3.970502,4.510912,3.814456
User sim weighted rating score,0.801325,0.773126,0.764152,0.787737,0.707231
User sim weighted predicted rating score,0.983689,0.88033,0.818355,0.929738,0.78584
User Item Similarity,0.990567,0.990567,0.990567,0.990567,0.990123


### Notes
* Movies seen by similar users.
* Ordered by **similar user similarity by movie**.
* take movies sample from each similar user, then assign similary and order by similary.