# Chatbot: Load data

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
BASE_PATH             = '../../..'
LIB_PATH              = f'{BASE_PATH}/lib'
DATASET_PATH          = f'{BASE_PATH}/datasets'

In [3]:
import sys
sys.path.append(LIB_PATH)

import numpy as np
import pandas as pd

import util as ut

import torch
import data as dt
import data.dataset as ds
import requests
import json

2024-01-05 18:33:08.078900: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
2024-01-05 18:33:08.955691: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2024-01-05 18:33:08.966767: W tensorflow/core/common_runtime/gpu/gpu_device.cc:1956] Cannot dlopen some GPU libraries. Please make sure the missing libraries mentioned above are installed properly if you would like to use GPU. Follow the guide at https://www.tensorflow.org/install/gpu for how to download and setup the required libraries for your 

<Figure size 640x480 with 0 Axes>

In [4]:
def to_tensor(obs, device, columns): 
    data = obs[columns]
    if type(data) == pd.DataFrame:
        data = data.values
    return torch.tensor(data).to(device)

features_fn = lambda obs, device: to_tensor(obs, device, ['user_seq', 'movie_seq'])
target_fn   = lambda obs, device: to_tensor(obs, device, ['user_movie_rating'])

In [5]:
dataset = ds.MovieLensTMDBDatasetFactory.from_path(
    path             = DATASET_PATH,
    transform        = features_fn,
    target_transform = target_fn,
    device           = torch.device('cpu'),
    filter_fn        = lambda df: df[(df['user_movie_rating_year'] >= 2004)]
)

In [6]:
items = dataset \
    .data[['movie_id', 'movie_title', 'movie_genres', 'movie_overview', 'movie_release_year', 'movie_imdb_id', 'user_movie_rating']]

items

Unnamed: 0,movie_id,movie_title,movie_genres,movie_overview,movie_release_year,movie_imdb_id,user_movie_rating
0,1,Toy Story,"[Adventure, Animation, Children, Comedy, Fantasy]","Led by Woody, Andy's toys live happily in his ...",1995,114709,4
1,1,Toy Story,"[Adventure, Animation, Children, Comedy, Fantasy]","Led by Woody, Andy's toys live happily in his ...",1995,114709,3
2,1,Toy Story,"[Adventure, Animation, Children, Comedy, Fantasy]","Led by Woody, Andy's toys live happily in his ...",1995,114709,5
3,1,Toy Story,"[Adventure, Animation, Children, Comedy, Fantasy]","Led by Woody, Andy's toys live happily in his ...",1995,114709,4
4,1,Toy Story,"[Adventure, Animation, Children, Comedy, Fantasy]","Led by Woody, Andy's toys live happily in his ...",1995,114709,4
...,...,...,...,...,...,...,...
194268,173173,This Is Not What I Expected,"[Comedy, Romance]","Lu Jin is a handsome, wealthy hotel executive ...",2017,6772874,3
194269,174399,Daddy's Little Girl,"[Horror, Thriller]",After the police find Derek’s daughter brutall...,2012,2214941,3
194270,174443,American Wrestler: The Wizard,[Drama],"In 1980, a teenage boy escapes the unrest in I...",2016,4303202,4
194271,174505,Besetment,"[Horror, Thriller]","After struggling to find employment, Amanda ta...",2016,3750238,1


In [7]:
item_mean_rating = items.groupby(['movie_id'])['user_movie_rating'].mean().reset_index()
item_mean_rating

Unnamed: 0,movie_id,user_movie_rating
0,1,3.806931
1,2,3.035714
2,3,2.909091
3,4,2.000000
4,5,3.000000
...,...,...
18603,176257,3.000000
18604,176259,3.000000
18605,176263,2.000000
18606,176273,1.000000


In [8]:
items = items.drop_duplicates(subset=['movie_id']).drop(columns=['user_movie_rating'])
items

Unnamed: 0,movie_id,movie_title,movie_genres,movie_overview,movie_release_year,movie_imdb_id
0,1,Toy Story,"[Adventure, Animation, Children, Comedy, Fantasy]","Led by Woody, Andy's toys live happily in his ...",1995,114709
202,39,Clueless,"[Comedy, Romance]","Shallow, rich and socially successful Cher is ...",1995,112697
279,339,While You Were Sleeping,"[Comedy, Romance]",A love story built on a misunderstanding. A tr...,1995,114924
322,356,Forrest Gump,"[Comedy, Drama, Romance, War]",A man with a low IQ has accomplished great thi...,1994,109830
960,597,Pretty Woman,"[Comedy, Romance]",When millionaire wheeler-dealer Edward Lewis e...,1990,100405
...,...,...,...,...,...,...
194268,173173,This Is Not What I Expected,"[Comedy, Romance]","Lu Jin is a handsome, wealthy hotel executive ...",2017,6772874
194269,174399,Daddy's Little Girl,"[Horror, Thriller]",After the police find Derek’s daughter brutall...,2012,2214941
194270,174443,American Wrestler: The Wizard,[Drama],"In 1980, a teenage boy escapes the unrest in I...",2016,4303202
194271,174505,Besetment,"[Horror, Thriller]","After struggling to find employment, Amanda ta...",2016,3750238


In [9]:
items = items.merge(item_mean_rating, on='movie_id')
items

Unnamed: 0,movie_id,movie_title,movie_genres,movie_overview,movie_release_year,movie_imdb_id,user_movie_rating
0,1,Toy Story,"[Adventure, Animation, Children, Comedy, Fantasy]","Led by Woody, Andy's toys live happily in his ...",1995,114709,3.806931
1,39,Clueless,"[Comedy, Romance]","Shallow, rich and socially successful Cher is ...",1995,112697,3.506494
2,339,While You Were Sleeping,"[Comedy, Romance]",A love story built on a misunderstanding. A tr...,1995,114924,3.395349
3,356,Forrest Gump,"[Comedy, Drama, Romance, War]",A man with a low IQ has accomplished great thi...,1994,109830,4.004702
4,597,Pretty Woman,"[Comedy, Romance]",When millionaire wheeler-dealer Edward Lewis e...,1990,100405,3.281818
...,...,...,...,...,...,...,...
18603,173173,This Is Not What I Expected,"[Comedy, Romance]","Lu Jin is a handsome, wealthy hotel executive ...",2017,6772874,3.000000
18604,174399,Daddy's Little Girl,"[Horror, Thriller]",After the police find Derek’s daughter brutall...,2012,2214941,3.000000
18605,174443,American Wrestler: The Wizard,[Drama],"In 1980, a teenage boy escapes the unrest in I...",2016,4303202,4.000000
18606,174505,Besetment,"[Horror, Thriller]","After struggling to find employment, Amanda ta...",2016,3750238,1.000000


In [10]:
base_url = 'http://nonosoft.ddns.net:8080/api/v1'

error_items = []
page        = []
page_counter = 0
for _, row in items.iterrows():
    if page_counter < 500:
        page.append({
            'id'          : str(row['movie_id']),
            'title'       : row['movie_title'].strip(),
            'description' : row['movie_overview'].strip(),
            'genres'      : [g.lower() for g in row['movie_genres']],
            'release'     : str(row['movie_release_year']),
            'rating'      : float(row['user_movie_rating']),
            'imdb_id'     : str(row['movie_imdb_id'])
        })
        page_counter += 1
    else:
        try:
            headers =  {"Content-Type":"application/json"}
            response = requests.post(f'{base_url}/items/bulk', data=json.dumps(page), headers=headers)
        except Exception as e:
            print(e)
            error_items.extend(page)
        finally:
            page_counter = 0
            page         = []