# Recommender ChatBot: Load items and interactions to API

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
BASE_PATH    = '../..'
LIB_PATH     = f'{BASE_PATH}/lib'
DATASET_PATH = f'{BASE_PATH}/datasets'
ITEMS_PATH   = f'{DATASET_PATH}/chatbot-api-movies.json'

In [3]:
import sys
sys.path.append(LIB_PATH)

import pandas as pd

import util as ut

import torch
import data as dt
import data.dataset as ds

import os

from rest import RecChatBotV1ApiClient

2024-02-17 23:01:57.529008: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
2024-02-17 23:01:58.317126: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2024-02-17 23:01:58.327752: W tensorflow/core/common_runtime/gpu/gpu_device.cc:1956] Cannot dlopen some GPU libraries. Please make sure the missing libraries mentioned above are installed properly if you would like to use GPU. Follow the guide at https://www.tensorflow.org/install/gpu for how to download and setup the required libraries for your 

<Figure size 640x480 with 0 Axes>

# Common Functions and Classes

# Load Dataset

In [4]:
def to_tensor(obs, device, columns): 
    data = obs[columns]
    if type(data) == pd.DataFrame:
        data = data.values
    return torch.tensor(data).to(device)

features_fn = lambda obs, device: to_tensor(obs, device, ['user_seq', 'movie_seq'])
target_fn   = lambda obs, device: to_tensor(obs, device, ['user_movie_rating'])

In [5]:
dataset = ds.MovieLensTMDBDatasetFactory.from_path(
    path             = DATASET_PATH,
    transform        = features_fn,
    target_transform = target_fn,
    device           = torch.device('cpu'),
    filter_fn        = lambda df: df[(df['user_movie_rating_year'] >= 2004)]
)

In [6]:
dataset.data.columns

Index(['user_id', 'user_seq', 'user_movie_tags', 'user_movie_rating',
       'user_movie_rating_timestamp', 'user_movie_rating_year', 'movie_id',
       'movie_seq', 'movie_title', 'movie_genres', 'movie_for_adults',
       'movie_original_language', 'movie_overview', 'movie_tags',
       'movie_release_year', 'movie_imdb_id'],
      dtype='object')

In [7]:
if not os.path.exists(ITEMS_PATH):
    items = dataset \
        .data[['movie_id', 'movie_title', 'movie_genres', 'movie_overview', 'movie_release_year', 'movie_imdb_id', 'user_movie_rating']]

    item_mean_rating = items.groupby(['movie_id'])['user_movie_rating'].mean().reset_index()
    items = items.drop_duplicates(subset=['movie_id']).drop(columns=['user_movie_rating'])
    items[items['movie_title'] == 'Paths of Glory']
    items = items.merge(item_mean_rating, on='movie_id')
    items

## Populate items poster

In [8]:
if not os.path.exists(ITEMS_PATH):
    from rest import TMDBApiClient
    import logging

    client = TMDBApiClient()

    with dt.progress_bar(items.shape[0], title='Processing') as pb:
        def resolve_url(title):
            return (title, client.find_poster_by(title))

        params = [[row['movie_title']] for _, row in items.iterrows()]

        poster_by_id = ut.ParallelExecutor()(
            resolve_url,
            params = params,
            fallback_result = None
        )

        poster_by_id = {item[0]: item[1] for item in poster_by_id}

In [9]:
if not os.path.exists(ITEMS_PATH):
    items['poster'] = items['movie_title'].apply(lambda title: poster_by_id[title])
    ut.save_df(items, f'{DATASET_PATH}/chatbot-api-movies.json')

## Load builded items

In [10]:
items = ut.load_df(f'{DATASET_PATH}/chatbot-api-movies.json')

In [11]:
pd.set_option('display.max_colwidth', None)
items[['movie_title', 'poster']].head()

Unnamed: 0,movie_title,poster
0,Toy Story,http://image.tmdb.org/t/p/w500/uXDfjJbdP4ijW5hWSBrPrlKpxab.jpg
1,Clueless,http://image.tmdb.org/t/p/w500/8AwVTcgpTnmeOs4TdTWqcFDXEsA.jpg
2,While You Were Sleeping,http://image.tmdb.org/t/p/w500/qNGO3ETcNwlWqK2kNRpbJSJRlos.jpg
3,Forrest Gump,http://image.tmdb.org/t/p/w500/arw2vcBveWOVZr6pxd9XTd1TdQa.jpg
4,Pretty Woman,http://image.tmdb.org/t/p/w500/hVHUfT801LQATGd26VPzhorIYza.jpg


In [12]:
interactions = dataset \
    .data[['user_id', 'movie_id', 'user_movie_rating', 'user_movie_rating_timestamp']] \
    .rename(columns={
        'user_movie_rating'           : 'rating',
        'movie_id'                    : 'item_id',
        'user_movie_rating_timestamp' : 'timestamp'
    })

interactions.head()

Unnamed: 0,user_id,item_id,rating,timestamp
0,100538,1,4,2019-01-01 21:11:09
496,100538,39,3,2019-01-03 11:58:44
8785,100538,339,3,2019-01-04 08:18:24
9194,100538,356,4,2019-01-03 10:37:28
14189,100538,597,4,2019-01-03 11:55:57


In [137]:
incomplete_items = items[((items['movie_overview'] == 'No overview found.') | (items['movie_overview'] == ''))]
incomplete_items.shape

(14, 8)

In [138]:
from rest import TMDBApiClient


data = {}
for idx, row  in incomplete_items.iterrows():
    client = TMDBApiClient()
    data[row['movie_id']] = client.find_movies_by(row['movie_title'])

In [135]:
def resolve_overview(data, row):
    if row['movie_id'] in data:
        movies = data[row['movie_id']]
        if len(movies) > 0:
            previous_title_match = False
            for movie in movies:
                if previous_title_match == False:
                    title_match = row['movie_title'].strip().lower() == movie['title'].strip().lower() or row['movie_title'].strip().lower() == movie['original_title'].strip().lower()
                
                if (title_match or previous_title_match) and len(movie['overview']) > 0:
                    return movie['overview']

                previous_title_match = title_match
                
    return row['movie_overview']

items['movie_overview'] = items.apply(lambda row: resolve_overview(data, row), axis=1) 

In [136]:
incomplete_items = items[((items['movie_overview'] == 'No overview found.') | (items['movie_overview'] == ''))]
incomplete_items

Unnamed: 0,movie_id,movie_title,movie_genres,movie_overview,movie_release_year,movie_imdb_id,user_movie_rating,poster
11005,101629,"King - Jari Litmanen, The",[Documentary],,2012,2316787,4.0,http://image.tmdb.org/t/p/w500None
12040,120484,Lille Fridolf and I,[Comedy],,1956,49444,3.0,
13033,87063,Bis zum Ellenbogen,[Comedy],,2007,818108,3.0,http://image.tmdb.org/t/p/w500/cGYAVxSL1cQcFgUWE0F80FBe7kM.jpg
13036,87769,Die Frau des Frisörs,[Drama],,2008,1105294,3.0,http://image.tmdb.org/t/p/w500None
13039,88053,Hangtime - Kein leichtes Spiel,[Drama],,2009,1288502,3.0,http://image.tmdb.org/t/p/w500/zUmIgRtXLusjodCzxUpYDArfiBX.jpg
13042,89926,At Any Second,[Drama],,2008,1315582,3.0,
13065,110671,Cutlet for Three,[Comedy],,2010,1327702,2.0,
13140,113256,"New Jean-Claude, The","[Comedy, Romance]",,2002,297314,4.0,
13148,117535,Dans la peau d'une grande,[Comedy],,2011,1967481,4.0,http://image.tmdb.org/t/p/w500/8Z76AGUhVieAMcPbGSoB5hES9ju.jpg
13155,130058,Des roses en hiver,[Drama],,2014,3432260,1.0,http://image.tmdb.org/t/p/w500/99Gj5eYOL8s5E52d2fJkkx9wPoL.jpg


## Upload item and interaction to RecChatBotAPI

In [141]:
client = RecChatBotV1ApiClient()

In [142]:
client.add_items(items)

[]

In [38]:
client.add_interactions(interactions)

[]