# Modelos: KNN

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
BASE_PATH                    = '../..'
LIB_PATH                     = f'{BASE_PATH}/lib'
DATASET_PATH                 = f'{BASE_PATH}/datasets'
STACKING_PATH                = f'{DATASET_PATH}/stacking'
WEIGHTS_PATH                 = f'{BASE_PATH}/weights'
TMP_PATH                     = f'/var/tmp'

METRICS_PATH                 = f'{BASE_PATH}/metrics/knn'

USER_STACKING_TRAIN_PATH     = f'{STACKING_PATH}/knn_user_train.json'
ITEM_STACKING_TRAIN_PATH     = f'{STACKING_PATH}/knn_item_train.json'
ENSEMBLE_STACKING_TRAIN_PATH = f'{STACKING_PATH}/knn_ensemble_train.json'

USER_STACKING_TEST_PATH      = f'{STACKING_PATH}/knn_user_test.json'
ITEM_STACKING_TEST_PATH      = f'{STACKING_PATH}/knn_item_test.json'
ENSEMBLE_STACKING_TEST_PATH  = f'{STACKING_PATH}/knn_ensemble_test.json'

In [7]:
import sys
sys.path.append(LIB_PATH)

import numpy as np
import pandas as pd

import torch
from torch.utils.data import DataLoader

import pytorch_common.util as pu

import model as ml
import data.dataset as ds

import util as ut

import random

import service as sv

## Setup

In [8]:
ut.mkdir(STACKING_PATH)

In [9]:
import pytorch_common
pytorch_common.__version__

'0.1.2'

In [10]:
torch.__version__

'2.0.1+cu117'

In [11]:
pu.LoggerBuilder().on_console().build()

<RootLogger root (INFO)>

In [12]:
pu.set_device_name('gpu')

pu.get_device(), torch.cuda.is_available()

(device(type='cuda', index=0), True)

In [13]:
ut.set_seed(42)

In [14]:
TRAIN        = True
N_NEIGHTBORS = 1000

In [15]:
def build_stacking_df(
    predictor,
    ds,
    n_neighbors = 1000,
    batch_size  = 500,
    num_workers = 24,
    pin_memory  = True
):
    dl = DataLoader(
        ds,
        batch_size  = batch_size,
        num_workers = num_workers,
        pin_memory  = pin_memory
    )

    return pd.DataFrame({
        'user_id'	 : ds.data['user_id'],
        'movie_id'   : ds.data['movie_id'],
        'rating'     : ds.data['user_movie_rating'],
        'prediction' : predictor.predict_dl(dl, n_neighbors).numpy()
    })

In [16]:
def to_tensor(obs, device, columns): 
    data = obs[columns]
    if type(data) == pd.DataFrame:
        data = data.values
    return torch.tensor(data).to(device)

features_fn = lambda obs, device: to_tensor(obs, device, ['user_seq', 'movie_seq'])
target_fn   = lambda obs, device: to_tensor(obs, device, ['user_movie_rating'])

## Carga de dataset

In [17]:
dataset = ds.MovieLensTMDBDatasetFactory.from_path(
    path             = DATASET_PATH,
    transform        = features_fn,
    target_transform = target_fn,
    device           = torch.device('cpu'),
    filter_fn        = lambda df: df[(df['user_movie_rating_year'] >= 2010)]
)

train_set, test_set, rating_mean_df, rating_std = dataset.train_test_split(split_year=2018)

train_set.shape, test_set.shape

2023-05-30 19:01:33,338 - INFO - Train: 79.01 % - Test: 9.78 %


((117188, 18), (14507, 18))

## Entrenamiento del modelo

In [22]:
user_predictor =  sv.KNNPredictionService(
    weights_path   = WEIGHTS_PATH,
    temp_path      = TMP_PATH,
    predictor_name = 'knn_user_based',
    user_seq_col   = 'user_seq',
    item_seq_col   = 'movie_seq',
    rating_col     = 'user_movie_rating',
    model_Type     = ml.KNNType.USER_BASED
)

user_predictor.fit_predict(train_set.data, test_set.data)

user_predictor.delete()

2023-05-30 19:03:27,324 - INFO - Waiting 179 minutes to change interactions.


KNNUserBasedPredictor prediction:   0%|          | 0/14507 [00:00<?, ?it/s]

In [23]:
item_predictor = sv.KNNPredictionService(
    weights_path   = WEIGHTS_PATH,
    temp_path      = TMP_PATH,
    predictor_name = 'knn_item_based',
    user_seq_col   = 'user_seq',
    item_seq_col   = 'movie_seq',
    rating_col     = 'user_movie_rating',
    model_Type     = ml.KNNType.ITEM_BASED
)

item_predictor.fit_predict(train_set.data, test_set.data)

item_predictor.delete()

2023-05-30 19:03:35,317 - INFO - Waiting 179 minutes to change interactions.


KNNItemBasedPredictor prediction:   0%|          | 0/14507 [00:00<?, ?it/s]