# Recommender ChatBot: LLM/Collaborative Filtering recommender ensemble

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import os

BASE_PATH    = '../..'
API_PATH     = f'{BASE_PATH}/chat-bot-api'
LIB_PATH     = f'{BASE_PATH}/lib'


os.environ['TMP_PATH']         = f'{BASE_PATH}/tmp'
os.environ['DATASET_PATH']     = f'{BASE_PATH}/datasets'
os.environ['WEIGHTS_PATH']     = f'{BASE_PATH}/weights'
os.environ['METRICS_PATH']     = f'{BASE_PATH}/metrics'
os.environ['MONGODB_URL']      = 'mongodb://0.0.0.0:27017'
os.environ['MONGODB_DATABASE'] = 'chatbot'
os.environ['CHROMA_HOST']      = '0.0.0.0'
os.environ['CHROMA_PORT']      = '9090'

In [3]:
import sys
sys.path.append(LIB_PATH)
sys.path.append(API_PATH)

import pytorch_common.util as pu
from app_context import AppContext

[1m<[0m[1;95mFigure[0m[39m size 64[0m[1;36m0x480[0m[39m with [0m[1;36m0[0m[39m Axes[0m[1m>[0m

## Setup

In [4]:
pu.LoggerBuilder().on_console().build()

[1m<[0m[1;95mRootLogger[0m[39m root [0m[1;39m([0m[39mINFO[0m[1;39m)[0m[1m>[0m

In [5]:
ctx = AppContext()

2024-03-23 19:16:39,917 - INFO - Load pretrained SentenceTransformer: all-mpnet-base-v2
2024-03-23 19:16:42,660 - INFO - Use pytorch device_name: cuda
2024-03-23 19:16:42,730 - INFO - Anonymized telemetry enabled. See                     https://docs.trychroma.com/telemetry for more information.
2024-03-23 19:16:42,749 - INFO - Anonymized telemetry enabled. See                     https://docs.trychroma.com/telemetry for more information.


## Recommender

In [6]:
recommendations = await ctx.database_user_item_filtering_recommender.recommend(
    user_id            = "adrianmarino@gmail.com",
    text_query         = "I want to see sci-fi movies from 90'",
    k_sim_users        = 10,
    max_items_by_user  = 50,
    text_query_limit   = 700,
    min_rating_by_user = 4,
    not_seen           = False
)

2024-03-23 19:16:45,480 - INFO - Found 10 similar users
2024-03-23 19:16:45,484 - INFO - Found 451 similar users interactions
2024-03-23 19:16:45,484 - INFO - Select 172 similar users interactions (max by user: 50, min rating: 4)
2024-03-23 19:16:45,809 - INFO - Found 700 items by text query
2024-03-23 19:16:45,816 - INFO - Select 9 similar user items by text query
2024-03-23 19:16:45,817 - INFO - Select 9 similar user unseen items
2024-03-23 19:16:45,824 - INFO - 0 movies seen by adrianmarino@gmail.com user


In [9]:
recommendations.show_seen()

Not Found items!


In [10]:
recommendations.data['id'].unique()


[1;35marray[0m[1m([0m[1m[[0m[32m'109487'[0m, [32m'1200'[0m, [32m'1270'[0m, [32m'1371'[0m, [32m'164179'[0m, [32m'173291'[0m, [32m'19'[0m, [32m'32'[0m,
       [32m'79091'[0m[1m][0m, [33mdtype[0m=[35mobject[0m[1m)[0m

In [11]:
recommendations.show(
    sort_by        = ['user_sim_weighted_pred_rating_score'],
    k              = 10
)

Unnamed: 0,0,1,2,3,4,5,6,7,8
Poster,,,,,,,,,
Genres,AdventureComedySci-fi,ActionAdventureHorrorSci-fi,MysterySci-fiThriller,Sci-fiImax,Documentary,AdventureSci-fi,ActionAdventureSci-fi,CrimeComedyAdventure,AnimationChildrenComedyCrime
Mean Rating,4.101064,3.987578,3.941748,4.062907,4.037037,3.08,3.04918,2.793651,3.60241
Predicted Rating,0.339101,0.273018,0.266492,0.23275,0.16196,0.0,0.0,0.0,0.0
User sim weighted rating score,0.989386,0.963156,0.950689,0.981351,0.974665,0.742849,0.736166,0.673971,0.869733
User sim weighted predicted rating score,0.989386,0.797531,0.777323,0.6799,0.4729,0.0,0.0,0.0,0.0
User Item Similarity,0.989386,0.990567,0.989114,0.990567,0.990123,0.989114,0.990123,0.989386,0.990123


### Notes
* Movies seen by similar users weighted by user predicted rating.
* Ordered by **User sim weighted predicted rating score**.
* **User sim weighted predicted rating score** = similar_user_similarity(0..1) * user_predicted_rating (Normalize to 0..1)

In [9]:
recommendations.show(
    sort_by        = ['user_sim_weighted_rating_score'],
    k              = 10
)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9
Poster,,,,,,,,,,
Genres,ComedyDramaSci-fi,AdventureComedySci-fi,AdventureAnimationChildrenSci-fi,ActionAdventureHorrorSci-fi,MysterySci-fiThriller,ActionAdventureComedySci-fi,ComedyDramaRomance,ActionAnimationDramaSci-fi,ActionSci-fiThriller,ActionComedySci-fi
Mean Rating,4.117647,4.101064,4.0,3.987578,3.941748,3.908297,3.9,3.866667,3.862069,3.786517
Predicted Rating,4.75918,4.655716,4.531448,4.290807,4.181332,4.438426,4.163946,4.07275,4.366083,4.494987
User sim weighted rating score,0.989114,0.985401,0.961118,0.959277,0.947194,0.940205,0.937047,0.93019,0.927976,0.909822
User sim weighted predicted rating score,0.989114,0.967877,0.942042,0.893081,0.869324,0.923806,0.865603,0.847695,0.907665,0.934463
User Item Similarity,0.989114,0.989386,0.989386,0.990567,0.989462,0.990567,0.98934,0.990567,0.989386,0.989386


### Notes
* Movies seen by similar users weighted by movie mean rating.
* Ordered by **User sim weighted rating score**.
* **User sim weighted rating score** = similar_user_similarity(0..1) * mean_movie_rating (Normalize to 0..1) 

In [10]:
recommendations.show(
    sort_by        = ['user_item_sim'],
    k              = 10
)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9
Poster,,,,,,,,,,
Genres,ActionAdventureHorrorSci-fi,ActionAdventureComedySci-fi,ActionAnimationDramaSci-fi,ActionDramaWar,ActionAdventureSci-fi,MysterySci-fiThriller,ActionSci-fiThriller,AdventureComedySci-fi,CrimeComedyAdventure,ActionComedySci-fi
Mean Rating,3.987578,3.908297,3.866667,3.446809,3.04918,3.941748,3.862069,4.101064,2.793651,3.786517
Predicted Rating,4.290807,4.438426,4.07275,3.852608,3.695041,4.181332,4.366083,4.655716,3.25725,4.494987
User sim weighted rating score,0.959277,0.940205,0.93019,0.829186,0.733201,0.947194,0.927976,0.985401,0.671257,0.909822
User sim weighted predicted rating score,0.893081,0.923806,0.847695,0.801875,0.768734,0.869324,0.907665,0.967877,0.67715,0.934463
User Item Similarity,0.990567,0.990567,0.990567,0.990567,0.990123,0.989462,0.989386,0.989386,0.989386,0.989386


### Notes
* Movies seen by similar users.
* Ordered by **similar user similarity by movie**.
* take movies sample from each similar user, then assign similary and order by similary.