# Recommender ChatBot: LLM/Collaborative Filtering recommender ensemble

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import os

BASE_PATH    = '../..'
API_PATH     = f'{BASE_PATH}/chat-bot-api'
LIB_PATH     = f'{BASE_PATH}/lib'


os.environ['TMP_PATH']         = f'{BASE_PATH}/tmp'
os.environ['DATASET_PATH']     = f'{BASE_PATH}/datasets'
os.environ['WEIGHTS_PATH']     = f'{BASE_PATH}/weights'
os.environ['METRICS_PATH']     = f'{BASE_PATH}/metrics'
os.environ['MONGODB_URL']      = 'mongodb://0.0.0.0:27017'
os.environ['MONGODB_DATABASE'] = 'chatbot'
os.environ['CHROMA_HOST']      = '0.0.0.0'
os.environ['CHROMA_PORT']      = '9090'

In [3]:
import sys
sys.path.append(LIB_PATH)
sys.path.append(API_PATH)

import pytorch_common.util as pu
from app_context import AppContext

2024-02-29 20:34:29.724435: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
2024-02-29 20:34:30.412371: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2024-02-29 20:34:30.424138: W tensorflow/core/common_runtime/gpu/gpu_device.cc:1956] Cannot dlopen some GPU libraries. Please make sure the missing libraries mentioned above are installed properly if you would like to use GPU. Follow the guide at https://www.tensorflow.org/install/gpu for how to download and setup the required libraries for your 

<Figure size 640x480 with 0 Axes>

## Setup

In [4]:
pu.LoggerBuilder().on_console().build()

<RootLogger root (INFO)>

In [5]:
ctx = AppContext()

2024-02-29 20:34:31,800 - INFO - Load pretrained SentenceTransformer: all-mpnet-base-v2
2024-02-29 20:34:32,388 - INFO - Use pytorch device: cuda
2024-02-29 20:34:32,390 - INFO - Anonymized telemetry enabled. See                     https://docs.trychroma.com/telemetry for more information.
2024-02-29 20:34:32,409 - INFO - Anonymized telemetry enabled. See                     https://docs.trychroma.com/telemetry for more information.


## Recommender

In [6]:
recommendations = await ctx.database_user_item_filtering_recommender.recommend(
    user_id            = "adrianmarino@gmail.com",
    text_query         = "I want to see sci-fi movies from 90'",
    k_sim_users        = 10,
    max_items_by_user  = 50,
    text_query_limit   = 700,
    min_rating_by_user = 4,
    not_seen           = False
)

2024-02-29 20:34:33,963 - INFO - Found 10 similar users
2024-02-29 20:34:33,969 - INFO - Found 451 similar users interactions
2024-02-29 20:34:33,969 - INFO - Select 166 similar users interactions (max by user: 50, min rating: 4)
2024-02-29 20:34:34,307 - INFO - Found 700 items by text query
2024-02-29 20:34:34,314 - INFO - Select 11 similar user items by text query
2024-02-29 20:34:34,317 - INFO - Select 11 similar user unseen items
2024-02-29 20:34:34,325 - INFO - 39 movies seen by adrianmarino@gmail.com user


In [7]:
recommendations.show_seen()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38
Poster,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
Genres,AdventureAnimationChildrenComedyFantasy,ActionAdventureSci-fi,ActionSci-fiThrillerImax,AdventureAnimationChildrenComedyFantasy,ActionAdventureAnimationChildrenComedy,DramaMysterySci-fiThriller,AdventureAnimationChildrenComedyDramaFantasy,ActionAdventureSci-fi,AdventureAnimationChildrenComedyFantasy,ActionAdventure,ActionCrimeDramaImax,AdventureAnimationChildrenComedyFantasyImax,ActionCrimeDramaMysterySci-fiThrillerImax,Sci-fiImax,ActionAdventureFantasyImax,DramaMysteryThriller,AdventureAnimationChildrenComedy,ActionAdventureSci-fiThrillerWar,MysteryThriller,ActionAdventureAnimationChildrenComedy,AnimationChildrenDrama,ActionSci-fi,ActionAdventureSci-fiImax,ActionAdventureSci-fi,AdventureAnimationChildrenComedyFantasy,AdventureAnimationComedy,DramaWar,ActionSci-fiThriller,ActionAdventureSci-fi,ActionAdventureSci-fi,ActionAdventureAnimationChildrenComedyFantasy,AdventureAnimationChildrenComedyFantasy,ActionAdventureSci-fiImax,ActionSci-fiImax,Comedy,ActionAdventureSci-fiThrillerImax,Comedy,ActionAdventureSci-fi,ComedyDramaRomance
Rating,5.0,5.0,5.0,5.0,5.0,5.0,5.0,5.0,5.0,5.0,5.0,5.0,5.0,5.0,5.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,3.0,3.0,3.0,3.0,3.0,2.0


In [11]:
recommendations.data['id'].unique()

array(['1270', '164179', '173291', '1779', '19', '27660', '2916', '32',
       '36363', '442', '55768'], dtype=object)

In [8]:
recommendations.show(
    sort_by        = ['user_sim_weighted_pred_rating_score'],
    k              = 10
)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9
Poster,,,,,,,,,,
Genres,ComedyDramaSci-fi,AdventureComedySci-fi,AdventureAnimationChildrenSci-fi,ActionComedySci-fi,ActionAdventureComedySci-fi,ActionSci-fiThriller,ActionAdventureHorrorSci-fi,MysterySci-fiThriller,ComedyDramaRomance,ActionAdventureSci-fi
Mean Rating,4.117647,4.101064,4.0,3.786517,3.908297,3.862069,3.987578,3.941748,3.9,3.552239
Predicted Rating,4.75918,4.655716,4.531448,4.494987,4.438426,4.366083,4.290807,4.181332,4.163946,4.100974
User sim weighted rating score,0.989114,0.985401,0.961118,0.909822,0.940205,0.927976,0.959277,0.947194,0.937047,0.85353
User sim weighted predicted rating score,0.989114,0.967877,0.942042,0.934463,0.923806,0.907665,0.893081,0.869324,0.865603,0.852551
User Item Similarity,0.989114,0.989386,0.989386,0.989386,0.990567,0.989386,0.990567,0.989462,0.98934,0.989386


### Notes
* Movies seen by similar users weighted by user predicted rating.
* Ordered by **User sim weighted predicted rating score**.
* **User sim weighted predicted rating score** = similar_user_similarity(0..1) * user_predicted_rating (Normalize to 0..1)

In [9]:
recommendations.show(
    sort_by        = ['user_sim_weighted_rating_score'],
    k              = 10
)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9
Poster,,,,,,,,,,
Genres,ComedyDramaSci-fi,AdventureComedySci-fi,AdventureAnimationChildrenSci-fi,ActionAdventureHorrorSci-fi,MysterySci-fiThriller,ActionAdventureComedySci-fi,ComedyDramaRomance,ActionAnimationDramaSci-fi,ActionSci-fiThriller,ActionComedySci-fi
Mean Rating,4.117647,4.101064,4.0,3.987578,3.941748,3.908297,3.9,3.866667,3.862069,3.786517
Predicted Rating,4.75918,4.655716,4.531448,4.290807,4.181332,4.438426,4.163946,4.07275,4.366083,4.494987
User sim weighted rating score,0.989114,0.985401,0.961118,0.959277,0.947194,0.940205,0.937047,0.93019,0.927976,0.909822
User sim weighted predicted rating score,0.989114,0.967877,0.942042,0.893081,0.869324,0.923806,0.865603,0.847695,0.907665,0.934463
User Item Similarity,0.989114,0.989386,0.989386,0.990567,0.989462,0.990567,0.98934,0.990567,0.989386,0.989386


### Notes
* Movies seen by similar users weighted by movie mean rating.
* Ordered by **User sim weighted rating score**.
* **User sim weighted rating score** = similar_user_similarity(0..1) * mean_movie_rating (Normalize to 0..1) 

In [10]:
recommendations.show(
    sort_by        = ['user_item_sim'],
    k              = 10
)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9
Poster,,,,,,,,,,
Genres,ActionAdventureHorrorSci-fi,ActionAdventureComedySci-fi,ActionAnimationDramaSci-fi,ActionDramaWar,ActionAdventureSci-fi,MysterySci-fiThriller,ActionSci-fiThriller,AdventureComedySci-fi,CrimeComedyAdventure,ActionComedySci-fi
Mean Rating,3.987578,3.908297,3.866667,3.446809,3.04918,3.941748,3.862069,4.101064,2.793651,3.786517
Predicted Rating,4.290807,4.438426,4.07275,3.852608,3.695041,4.181332,4.366083,4.655716,3.25725,4.494987
User sim weighted rating score,0.959277,0.940205,0.93019,0.829186,0.733201,0.947194,0.927976,0.985401,0.671257,0.909822
User sim weighted predicted rating score,0.893081,0.923806,0.847695,0.801875,0.768734,0.869324,0.907665,0.967877,0.67715,0.934463
User Item Similarity,0.990567,0.990567,0.990567,0.990567,0.990123,0.989462,0.989386,0.989386,0.989386,0.989386


### Notes
* Movies seen by similar users.
* Ordered by **similar user similarity by movie**.
* take movies sample from each similar user, then assign similary and order by similary.