In [1]:
%load_ext autoreload
%autoreload 2
import sys
from pathlib import Path

protomf_path = Path("./ProtoMF/")
sys.path.append(protomf_path.__str__())

import argparse
import os

from confs.hyper_params import (anchor_hyper_params,
                                item_proto_chose_original_hyper_params,
                                mf_hyper_params,
                                proto_double_tie_chose_original_hyper_params,
                                user_proto_chose_original_hyper_params)
from experiment_helper import start_hyper, start_multiple_hyper

from utilities.consts import SINGLE_SEED

c:\Users\Alexey\venvs\recsys_project\Lib\site-packages\ray\thirdparty_files
c:\Users\Alexey\Documents\github\hse_courses\2nd_year\term1\recsys\project
C:\Program Files\WindowsApps\PythonSoftwareFoundation.Python.3.11_3.11.2032.0_x64__qbz5n2kfra8p0\python311.zip
C:\Program Files\WindowsApps\PythonSoftwareFoundation.Python.3.11_3.11.2032.0_x64__qbz5n2kfra8p0\DLLs
C:\Program Files\WindowsApps\PythonSoftwareFoundation.Python.3.11_3.11.2032.0_x64__qbz5n2kfra8p0\Lib
C:\Program Files\WindowsApps\PythonSoftwareFoundation.Python.3.11_3.11.2032.0_x64__qbz5n2kfra8p0
c:\Users\Alexey\venvs\recsys_project

c:\Users\Alexey\venvs\recsys_project\Lib\site-packages
c:\Users\Alexey\venvs\recsys_project\Lib\site-packages\win32
c:\Users\Alexey\venvs\recsys_project\Lib\site-packages\win32\lib
c:\Users\Alexey\venvs\recsys_project\Lib\site-packages\Pythonwin
ProtoMF


In [2]:
import argparse
import os
import platform
from functools import partial

import numpy as np
import pandas as pd
import torch
from ray import tune
from ray.air.integrations.wandb import WandbLoggerCallback
from ray.tune.schedulers import ASHAScheduler
from ray.tune.search.hyperopt import HyperOptSearch
from scipy import sparse as sp
from torch import nn
from torch.utils import data
from torch.utils.data.dataset import T_co

from feature_extraction.feature_extractor_factories import \
    FeatureExtractorFactory
from feature_extraction.feature_extractors import FeatureExtractor
from rec_sys_folder.protomf_dataset import get_protorecdataset_dataloader
from rec_sys_folder.rec_sys import RecSys
from rec_sys_folder.tester import Tester
from rec_sys_folder.trainer import Trainer
from utilities.consts import (CPU_PER_TRIAL, DATA_PATH, GPU_PER_TRIAL,
                              MAX_PATIENCE, NEG_VAL, NUM_SAMPLES, NUM_WORKERS,
                              OPTIMIZING_METRIC, PROJECT_NAME, SEED_LIST,
                              SINGLE_SEED, WANDB_API_KEY)
from utilities.eval import Evaluator
from utilities.utils import generate_id, reproducible

In [3]:
torch.cuda.is_available()

True

In [4]:
import wandb

In [5]:
def load_data(conf: argparse.Namespace, is_train: bool = True):
    if is_train:
        train_loader = get_protorecdataset_dataloader(
            data_path=conf.data_path,
            split_set="train",
            n_neg=conf.neg_train,
            neg_strategy=conf.train_neg_strategy,
            batch_size=conf.batch_size,
            shuffle=True,
            num_workers=NUM_WORKERS,
            prefetch_factor=5,
        )

        val_loader = get_protorecdataset_dataloader(
            data_path=conf.data_path,
            split_set="val",
            n_neg=NEG_VAL,
            neg_strategy=conf.eval_neg_strategy,
            batch_size=conf.val_batch_size,
            num_workers=NUM_WORKERS,
        )

        test_loader = get_protorecdataset_dataloader(
            data_path=conf.data_path,
            split_set="test",
            n_neg=NEG_VAL,
            neg_strategy=conf.eval_neg_strategy,
            batch_size=conf.val_batch_size,
            num_workers=NUM_WORKERS,
        )

        return {
            "train_loader": train_loader,
            "val_loader": val_loader,
            "test_loader": test_loader,
        }
    else:
        test_loader = get_protorecdataset_dataloader(
            data_path=conf.data_path,
            split_set="test",
            n_neg=NEG_VAL,
            neg_strategy=conf.eval_neg_strategy,
            batch_size=conf.val_batch_size,
            num_workers=NUM_WORKERS,
        )

        return {"test_loader": test_loader}


def start_training(config):
    config = argparse.Namespace(**config)
    print(config)

    data_loaders_dict = load_data(config)

    reproducible(config.seed)

    # trainer = Trainer(data_loaders_dict['train_loader'], data_loaders_dict['val_loader'], data_loaders_dict['test_loader'], config)
    trainer = Trainer(
        data_loaders_dict["train_loader"], data_loaders_dict["val_loader"], config
    )

    trainer.run()

    wandb.finish()


def start_testing(config, model_load_path: str):
    config = argparse.Namespace(**config)
    print(config)

    data_loaders_dict = load_data(config, is_train=False)

    reproducible(config.seed)

    tester = Tester(data_loaders_dict["test_loader"], config, model_load_path)

    metric_values = tester.test()
    return metric_values


def start_hyper(conf: dict, model: str, dataset: str, seed: int = SINGLE_SEED):
    print("Starting Hyperparameter Optimization")
    print(f"Seed is {seed}")

    # Search Algorithm
    search_alg = HyperOptSearch(random_state_seed=seed)

    if dataset == "lfm2b-1mon":
        scheduler = ASHAScheduler(grace_period=4)
    else:
        scheduler = None

    # Logger
    callback = WandbLoggerCallback(
        project=PROJECT_NAME,
        log_config=True,
        api_key=WANDB_API_KEY,
        reinit=True,
        force=True,
        job_type="train/val",
        tags=[model, str(seed), dataset],
    )

    # Hostname
    host_name = platform.uname()

    # Dataset
    data_path = DATA_PATH
    conf["data_path"] = os.path.join(data_path, dataset)

    # Seed
    conf["seed"] = seed

    group_name = f"{model}_{dataset}_{seed}"
    tune.register_trainable(group_name, start_training)
    analysis = tune.run(
        group_name,
        config=conf,
        name=generate_id(prefix=group_name),
        resources_per_trial={"gpu": GPU_PER_TRIAL, "cpu": CPU_PER_TRIAL},
        scheduler=scheduler,
        search_alg=search_alg,
        num_samples=NUM_SAMPLES,
        callbacks=[callback],
        metric="_metric/" + OPTIMIZING_METRIC,
        mode="max",
    )
    metric_name = "_metric/" + OPTIMIZING_METRIC
    best_trial = analysis.get_best_trial(metric_name, "max", scope="all")
    best_trial_config = best_trial.config
    best_trial_checkpoint = os.path.join(
        analysis.get_best_checkpoint(best_trial, metric_name, "max"), "best_model.pth"
    )

    wandb.login(key=WANDB_API_KEY)
    wandb.init(
        project=PROJECT_NAME,
        group="test_results",
        config=best_trial_config,
        name=group_name,
        force=True,
        job_type="test",
        tags=[model, str(seed), dataset],
    )
    metric_values = start_testing(best_trial_config, best_trial_checkpoint)
    wandb.finish()
    return metric_values


def start_multiple_hyper(
    conf: dict, model: str, dataset: str, seed_list: list = SEED_LIST
):
    print("Starting Multi-Hyperparameter Optimization")
    print("seed_list is ", seed_list)
    metric_values_list = []
    mean_values = dict()

    for seed in seed_list:
        metric_values_list.append(start_hyper(conf, model, dataset, seed))

    for key in metric_values_list[0].keys():
        _sum = 0
        for metric_values in metric_values_list:
            _sum += metric_values[key]
        _mean = _sum / len(metric_values_list)

        mean_values[key] = _mean

    group_name = f"{model}_{dataset}"

In [6]:
base_param = {
    "device": "cuda" if torch.cuda.is_available() else "cpu",
    "n_epochs": 10,
    "eval_neg_strategy": "uniform",
    "val_batch_size": 256,
    "train_batch_size": 256,
    "data_path": protomf_path / "data/ml",
    "NUM_WORKERS": 1,
    "rec_sys_param": {"use_bias": 0},
}

base_hyper_params = {
    **base_param,
    "neg_train": 99,
    "neg_val": 99,
    "train_neg_strategy": "uniform",  # tune.choice(['popular', 'uniform']),
    "loss_func_name": "sampled_softmax",  # tune.choice(['bce', 'bpr', 'sampled_softmax']),
    "batch_size": np.random.randint(64, 512),
    "optim_param": {
        "optim": "adagrad",
        "wd": np.random.uniform(low=1e-4, high=1e-2),
        "lr": np.random.uniform(low=1e-4, high=1e-1),
    },
}
user_proto_chose_original_hyper_params = {
    **base_hyper_params,
    "loss_func_aggr": "mean",
    "ft_ext_param": {
        "ft_type": "prototypes",
        "embedding_dim": np.random.randint(10, 100),  # tune.randint(10, 100),
        "user_ft_ext_param": {
            "ft_type": "prototypes",
            "sim_proto_weight": np.random.uniform(
                low=1e-3, high=10
            ),  # tune.loguniform(1e-3, 10),
            "sim_batch_weight": np.random.uniform(low=1e-3, high=10),
            "use_weight_matrix": False,
            "n_prototypes": np.random.randint(10, 100),  # tune.randint(10, 100),
            "cosine_type": "shifted",
            "reg_proto_type": "max",
            "reg_batch_type": "max",
        },
        "item_ft_ext_param": {
            "ft_type": "embedding",
        },
    },
}
user_proto_chose_original_hyper_params = argparse.Namespace(
    **user_proto_chose_original_hyper_params
)

proto_double_tie_chose_original_hyper_params = {
    "loss_func_aggr": "mean",
    "ft_ext_param": {
        "ft_type": "prototypes_double_tie",
        "embedding_dim": 100,  # tune.randint(10, 100),
        "item_ft_ext_param": {
            "ft_type": "prototypes_double_tie",
            "sim_proto_weight": 1e-3,  # tune.loguniform(1e-3, 10),
            "sim_batch_weight": 1e-3,  # tune.loguniform(1e-3, 10),
            "use_weight_matrix": False,
            "n_prototypes": 5,  # tune.randint(10, 100),
            "cosine_type": "shifted",
            "reg_proto_type": "max",
            "reg_batch_type": "max",
        },
        "user_ft_ext_param": {
            "ft_type": "prototypes_double_tie",
            "sim_proto_weight": 1e-3,  # tune.loguniform(1e-3, 10),
            "sim_batch_weight": 1e-3,  # tune.loguniform(1e-3, 10),
            "use_weight_matrix": False,
            "n_prototypes": 100,  # tune.randint(10, 100),
            "cosine_type": "shifted",
            "reg_proto_type": "max",
            "reg_batch_type": "max",
        },
    },
    "checkpoint_dir": "experiments",
    **base_hyper_params,
}

from omegaconf import OmegaConf

# proto_double_tie_chose_original_hyper_params = argparse.Namespace(**proto_double_tie_chose_original_hyper_params)
# proto_double_tie_chose_original_hyper_params = OmegaConf.create(proto_double_tie_chose_original_hyper_params)

In [12]:
# data_loaders_dict = load_data(proto_double_tie_chose_original_hyper_params)
# config = proto_double_tie_chose_original_hyper_params
# trainer = Trainer(data_loaders_dict['train_loader'], data_loaders_dict['val_loader'],  config)

In [7]:
import optuna

In [8]:
study = optuna.create_study(direction="maximize")

[I 2023-12-24 20:41:18,397] A new study created in memory with name: no-name-2c38d3af-9356-4cbf-9b00-ea67cc1a00d9


In [9]:
"""
'user_ft_ext_param'  'n_prototypes': 5,,
'item_ft_ext_param' 'n_prototypes': 5,
 'embedding_dim': 100,
"""


def objective(trial):
    prototypes1 = trial.suggest_int("prototypes1", 20, 100, 20)
    prototypes2 = trial.suggest_int("prototypes2", 20, 100, 20)

    embeddings_dim = trial.suggest_int("embedding_dim", 50, 400, 50)

    proto_double_tie_chose_original_hyper_params["ft_ext_param"]["user_ft_ext_param"][
        "n_prototypes"
    ] = prototypes1
    proto_double_tie_chose_original_hyper_params["ft_ext_param"]["item_ft_ext_param"][
        "n_prototypes"
    ] = prototypes2
    proto_double_tie_chose_original_hyper_params["embedding_dim"] = embeddings_dim

    config = argparse.Namespace(**proto_double_tie_chose_original_hyper_params)
    data_loaders_dict = load_data(config)

    trainer = Trainer(
        data_loaders_dict["train_loader"], data_loaders_dict["val_loader"], config
    )

    return trainer.run(trial)


study.optimize(objective, n_trials=100)

  prototypes1 = trial.suggest_int('prototypes1', 20, 100, 20)
  prototypes2 = trial.suggest_int('prototypes2', 20, 100, 20)
  embeddings_dim = trial.suggest_int("embedding_dim", 50, 400, 50)


Loading data
Built ProtoRecDataset module 
- data_path: ProtoMF\data\ml 
- n_users: 6028 
- n_items: 3123 
- n_interactions: 545656 
- split_set: train 
- n_neg: 99 
- neg_strategy: uniform 

Loading data
Built ProtoRecDataset module 
- data_path: ProtoMF\data\ml 
- n_users: 6028 
- n_items: 3123 
- n_interactions: 14196 
- split_set: val 
- n_neg: 99 
- neg_strategy: uniform 

Loading data
Built ProtoRecDataset module 
- data_path: ProtoMF\data\ml 
- n_users: 6028 
- n_items: 3123 
- n_interactions: 13952 
- split_set: test 
- n_neg: 99 
- neg_strategy: uniform 

--- Building FeatureExtractor model ---
Built Embedding model 
- n_objects: 6028 
- embedding_dim: 100 
- max_norm: None
- only_positive: False
Built PrototypeEmbedding model 
- n_prototypes: 100 
- use_weight_matrix: False 
- sim_proto_weight: 0.001 
- sim_batch_weight: 0.001 
- reg_proto_type: max 
- reg_batch_type: max 
- cosine_type: shifted 

--- Finished building FeatureExtractor model ---

--- Building FeatureExtractor

Output()



Init - Avg Val Value 0.103 



Epoch 0 - Epoch Avg Train Loss 3.772 

Validation started


Output()

Epoch 0 - Avg Val Value 1.213 

Epoch 0 - New best model found (val value 1.213) 



[W 2023-12-24 21:41:35,248] Trial 0 failed with parameters: {'prototypes1': 100, 'prototypes2': 100, 'embedding_dim': 300} because of the following error: KeyboardInterrupt().
Traceback (most recent call last):
  File "c:\Users\Alexey\venvs\recsys_project\Lib\site-packages\optuna\study\_optimize.py", line 200, in _run_trial
    value_or_values = func(trial)
                      ^^^^^^^^^^^
  File "C:\Users\Alexey\AppData\Local\Temp\ipykernel_24884\3566805594.py", line 22, in objective
    return trainer.run(trial)
           ^^^^^^^^^^^^^^^^^^
  File "c:\Users\Alexey\Documents\github\hse_courses\2nd_year\term1\recsys\project\rec_sys_folder\trainer.py", line 113, in run
    for u_idxs, i_idxs, labels in track(self.train_loader, total=len(self.train_loader)):
  File "c:\Users\Alexey\venvs\recsys_project\Lib\site-packages\rich\progress.py", line 168, in track
    yield from progress.track(
  File "c:\Users\Alexey\venvs\recsys_project\Lib\site-packages\rich\progress.py", line 1209, in tra

In [7]:
start_hyper(proto_double_tie_chose_original_hyper_params, "ui model", "ml")

Starting Hyperparameter Optimization
Seed is 38210573


2023-12-24 03:16:32,251	INFO worker.py:1724 -- Started a local Ray instance.
2023-12-24 03:16:34,005	INFO tune.py:220 -- Initializing Ray automatically. For cluster usage or custom Ray initialization, call `ray.init(...)` before `tune.run(...)`.
2023-12-24 03:16:34,011	INFO tune.py:583 -- [output] This uses the legacy output and progress reporter, as Jupyter notebooks are not supported by the new engine, yet. For more information, please see https://github.com/ray-project/ray/issues/36949


0,1
Current time:,2023-12-24 03:16:55
Running for:,00:00:20.37
Memory:,15.8/15.9 GiB

Trial name,status,loc,NUM_WORKERS,batch_size,data_path,device,eval_neg_strategy,ft_ext_param/embeddi ng_dim,ft_ext_param/ft_type,...t_param/item_ft_e xt_param/cosine_type,ft_ext_param/item_ft _ext_param/ft_type,..._param/item_ft_ex t_param/n_prototypes,...aram/item_ft_ext_ param/reg_batch_type,...aram/item_ft_ext_ param/reg_proto_type,...am/item_ft_ext_pa ram/sim_batch_weight,...am/item_ft_ext_pa ram/sim_proto_weight,...m/item_ft_ext_par am/use_weight_matrix,...t_param/user_ft_e xt_param/cosine_type,ft_ext_param/user_ft _ext_param/ft_type,..._param/user_ft_ex t_param/n_prototypes,...aram/user_ft_ext_ param/reg_batch_type,...aram/user_ft_ext_ param/reg_proto_type,...am/user_ft_ext_pa ram/sim_batch_weight,...am/user_ft_ext_pa ram/sim_proto_weight,...m/user_ft_ext_par am/use_weight_matrix,loss_func_aggr,loss_func_name,n_epochs,neg_train,neg_val,optim_param/lr,optim_param/optim,optim_param/wd,rec_sys_param/use_bi as,seed,train_batch_size,train_neg_strategy,val_batch_size
ui model_ml_38210573_d61f3356,RUNNING,127.0.0.1:3464,1,162,C:\Users\Alexey_c030,cuda,uniform,55,prototypes_doub_a790,shifted,prototypes_doub_a790,92,max,max,0.0512209,6.08195,False,shifted,prototypes_doub_a790,70,max,max,2.20496,0.0442315,False,mean,sampled_softmax,1,99,99,0.0661069,adagrad,0.00226364,0,38210573,256,uniform,256
ui model_ml_38210573_4e9b67f9,RUNNING,127.0.0.1:29232,1,162,C:\Users\Alexey_c030,cuda,uniform,45,prototypes_doub_a790,shifted,prototypes_doub_a790,29,max,max,1.47044,0.0934367,False,shifted,prototypes_doub_a790,74,max,max,0.0309485,0.881004,False,mean,sampled_softmax,1,99,99,0.0661069,adagrad,0.00226364,0,38210573,256,uniform,256
ui model_ml_38210573_99b52469,PENDING,,1,162,C:\Users\Alexey_c030,cuda,uniform,94,prototypes_doub_a790,shifted,prototypes_doub_a790,87,max,max,0.114407,0.0223808,False,shifted,prototypes_doub_a790,67,max,max,0.0305799,5.93527,False,mean,sampled_softmax,1,99,99,0.0661069,adagrad,0.00226364,0,38210573,256,uniform,256


[36m(pid=3464)[0m c:\Users\Alexey\Documents\github\hse_courses\2nd_year\term1\recsys\project
[36m(pid=3464)[0m c:\Users\Alexey\venvs\recsys_project\Lib\site-packages\ray\thirdparty_files
[36m(pid=3464)[0m c:\Users\Alexey\venvs\recsys_project\Lib\site-packages\ray\_private\workers
[36m(pid=3464)[0m C:\Program Files\WindowsApps\PythonSoftwareFoundation.Python.3.11_3.11.2032.0_x64__qbz5n2kfra8p0\python311.zip
[36m(pid=3464)[0m C:\Program Files\WindowsApps\PythonSoftwareFoundation.Python.3.11_3.11.2032.0_x64__qbz5n2kfra8p0\DLLs
[36m(pid=3464)[0m C:\Program Files\WindowsApps\PythonSoftwareFoundation.Python.3.11_3.11.2032.0_x64__qbz5n2kfra8p0\Lib
[36m(pid=3464)[0m C:\Program Files\WindowsApps\PythonSoftwareFoundation.Python.3.11_3.11.2032.0_x64__qbz5n2kfra8p0
[36m(pid=3464)[0m c:\Users\Alexey\venvs\recsys_project
[36m(pid=3464)[0m c:\Users\Alexey\venvs\recsys_project\Lib\site-packages
[36m(pid=3464)[0m c:\Users\Alexey\venvs\recsys_project\Lib\site-packages\win32
[36m(pid

[36m(_WandbLoggingActor pid=27252)[0m wandb: Currently logged in as: ras-unlucky (rho-corp). Use `wandb login --relogin` to force relogin


[36m(start_training pid=29232)[0m Namespace(loss_func_aggr='mean', ft_ext_param={'ft_type': 'prototypes_double_tie', 'embedding_dim': 45, 'item_ft_ext_param': {'ft_type': 'prototypes_double_tie', 'sim_proto_weight': 0.09343665856284789, 'sim_batch_weight': 1.4704425396918077, 'use_weight_matrix': False, 'n_prototypes': 29, 'cosine_type': 'shifted', 'reg_proto_type': 'max', 'reg_batch_type': 'max'}, 'user_ft_ext_param': {'ft_type': 'prototypes_double_tie', 'sim_proto_weight': 0.8810036892287099, 'sim_batch_weight': 0.030948521969642345, 'use_weight_matrix': False, 'n_prototypes': 74, 'cosine_type': 'shifted', 'reg_proto_type': 'max', 'reg_batch_type': 'max'}}, device='cuda', n_epochs=1, eval_neg_strategy='uniform', val_batch_size=256, train_batch_size=256, data_path='C:\\Users\\Alexey\\Documents\\github\\hse_courses\\2nd_year\\term1\\recsys\\project\\ProtoMF\\data\\ml', NUM_WORKERS=1, rec_sys_param={'use_bias': 0}, neg_train=99, neg_val=99, train_neg_strategy='uniform', loss_func_name

[36m(_WandbLoggingActor pid=27252)[0m wandb: Tracking run with wandb version 0.16.1
[36m(_WandbLoggingActor pid=27252)[0m wandb: Run data is saved locally in C:\Users\Alexey\ray_results\ui model_ml_38210573_2023-12-24_3-16-29.780734\ui model_ml_38210573_d61f3356_1_NUM_WORKERS=1,batch_size=162,data_path=C_Users_Alexey_Documents_github_hse_courses_2nd_year_term1__2023-12-24_03-16-34\wandb\run-20231224_031645-d61f3356
[36m(_WandbLoggingActor pid=27252)[0m wandb: Run `wandb offline` to turn off syncing.
[36m(_WandbLoggingActor pid=27252)[0m wandb: Syncing run ui model_ml_38210573_d61f3356
[36m(_WandbLoggingActor pid=27252)[0m wandb:  View project at https://wandb.ai/rho-corp/protomf
[36m(_WandbLoggingActor pid=27252)[0m wandb:  View run at https://wandb.ai/rho-corp/protomf/runs/d61f3356


[36m(start_training pid=29232)[0m Built ProtoRecDataset module 
[36m(start_training pid=29232)[0m - data_path: C:\Users\Alexey\Documents\github\hse_courses\2nd_year\term1\recsys\project\ProtoMF\data\ml 
[36m(start_training pid=29232)[0m - n_users: 6028 
[36m(start_training pid=29232)[0m - n_items: 3123 
[36m(start_training pid=29232)[0m - n_interactions: 13952 
[36m(start_training pid=29232)[0m - split_set: test 
[36m(start_training pid=29232)[0m - n_neg: 99 
[36m(start_training pid=29232)[0m - neg_strategy: uniform 
[36m(start_training pid=29232)[0m 
[36m(start_training pid=29232)[0m --- Building FeatureExtractor model ---
[36m(start_training pid=29232)[0m Built Embedding model 
[36m(start_training pid=29232)[0m - n_objects: 6028 
[36m(start_training pid=29232)[0m - embedding_dim: 45 
[36m(start_training pid=29232)[0m - max_norm: None
[36m(start_training pid=29232)[0m - only_positive: False
[36m(start_training pid=29232)[0m Built PrototypeEmbedding mode

[36m(_WandbLoggingActor pid=2716)[0m wandb: Currently logged in as: ras-unlucky (rho-corp). Use `wandb login --relogin` to force relogin
[36m(_WandbLoggingActor pid=2716)[0m wandb: - Waiting for wandb.init()...
[36m(_WandbLoggingActor pid=2716)[0m wandb: \ Waiting for wandb.init()...
[36m(_WandbLoggingActor pid=2716)[0m wandb: | Waiting for wandb.init()...
[36m(_WandbLoggingActor pid=2716)[0m wandb: Tracking run with wandb version 0.16.1
[36m(_WandbLoggingActor pid=2716)[0m wandb: Run data is saved locally in C:\Users\Alexey\ray_results\ui model_ml_38210573_2023-12-24_3-16-29.780734\ui model_ml_38210573_4e9b67f9_2_NUM_WORKERS=1,batch_size=162,data_path=C_Users_Alexey_Documents_github_hse_courses_2nd_year_term1__2023-12-24_03-16-39\wandb\run-20231224_031656-4e9b67f9
[36m(_WandbLoggingActor pid=2716)[0m wandb: Run `wandb offline` to turn off syncing.
[36m(_WandbLoggingActor pid=2716)[0m wandb: Syncing run ui model_ml_38210573_4e9b67f9
[36m(_WandbLoggingActor pid=2716)[0

In [None]:
class RecSys(nn.Module):
    def __init__(
        self,
        n_users: int,
        n_items: int,
        rec_sys_param,
        user_feature_extractor: FeatureExtractor,
        item_feature_extractor: FeatureExtractor,
        loss_func_name: str,
        loss_func_aggr: str = "mean",
    ):
        """
        General Recommender System
        It generates the user/item vectors (given the feature extractors) and computes the similarity by the dot product.
        :param n_users: number of users in the system
        :param n_items: number of items in the system
        :param rec_sys_param: parameters of the Recommender System module
        :param user_feature_extractor: feature_extractor.FeatureExtractor module that generates user embeddings.
        :param item_feature_extractor: feature_extractor.FeatureExtractor module that generates item embeddings.
        :param loss_func_name: name of the loss function to use for the network.
        :param loss_func_aggr: type of aggregation for the loss function, either 'mean' or 'sum'.
        """

        assert loss_func_aggr in [
            "mean",
            "sum",
        ], f"Loss function aggregators <{loss_func_aggr}> not implemented...yet"

        super().__init__()
        self.n_users = n_users
        self.n_items = n_items
        self.rec_sys_param = rec_sys_param
        self.user_feature_extractor = user_feature_extractor
        self.item_feature_extractor = item_feature_extractor
        self.loss_func_name = loss_func_name
        self.loss_func_aggr = loss_func_aggr

        self.use_bias = (
            self.rec_sys_param["use_bias"] > 0
            if "use_bias" in self.rec_sys_param
            else True
        )

        if self.use_bias:
            self.user_bias = nn.Embedding(self.n_users, 1)
            self.item_bias = nn.Embedding(self.n_items, 1)
            self.global_bias = nn.Parameter(torch.zeros(1), requires_grad=True)

        if self.loss_func_name == "bce":
            self.rec_loss = partial(bce_loss, aggregator=self.loss_func_aggr)
        elif self.loss_func_name == "bpr":
            self.rec_loss = partial(bpr_loss, aggregator=self.loss_func_aggr)
        elif self.loss_func_name == "sampled_softmax":
            self.rec_loss = partial(
                sampled_softmax_loss, aggregator=self.loss_func_aggr
            )
        else:
            raise ValueError(
                f"Recommender System Loss function <{self.rec_loss}> Not Implemented... Yet"
            )

        self.initialized = False

        print(
            f"Built RecSys module \n"
            f"- n_users: {self.n_users} \n"
            f"- n_items: {self.n_items} \n"
            f"- user_feature_extractor: {self.user_feature_extractor.name} \n"
            f"- item_feature_extractor: {self.item_feature_extractor.name} \n"
            f"- loss_func_name: {self.loss_func_name} \n"
            f"- use_bias: {self.use_bias} \n"
        )

    def init_parameters(self):
        """
        Method for initializing the Recommender System Processor
        """
        if self.use_bias:
            torch.nn.init.constant_(self.user_bias.weight, 0.0)
            torch.nn.init.constant_(self.item_bias.weight, 0.0)

        self.user_feature_extractor.init_parameters()
        self.item_feature_extractor.init_parameters()

        self.initialized = True

    def loss_func(self, logits, labels):
        """
        Loss function of the Recommender System module. It takes into account eventual feature_extractor loss terms.
        NB. Any feature_extractor loss is pre-weighted.
        :param logits: output of the system.
        :param labels: binary labels
        :return: aggregated loss
        """

        rec_loss = self.rec_loss(logits, labels)
        item_feat_ext_loss = self.item_feature_extractor.get_and_reset_loss()
        user_feat_ext_loss = self.user_feature_extractor.get_and_reset_loss()
        return rec_loss + item_feat_ext_loss + user_feat_ext_loss

    def forward(self, u_idxs, i_idxs):
        """
        Performs the forward pass considering user indexes and the item indexes. Negative Sampling is done automatically
        by the dataloader
        :param u_idxs: User indexes. Shape is (batch_size,)
        :param i_idxs: Item indexes. Shape is (batch_size, n_neg + 1)

        :return: A matrix of logits values. Shape is (batch_size, 1 + n_neg). First column is always associated
                to the positive track.
        """
        assert self.initialized, (
            "Model initialization has not been called! Please call .init_parameters() "
            "before using the model"
        )

        # --- User pass ---
        u_embed = self.user_feature_extractor(u_idxs)
        if self.use_bias:
            u_bias = self.user_bias(u_idxs)

        # --- Item pass ---
        if self.use_bias:
            i_bias = self.item_bias(i_idxs).squeeze()

        i_embed = self.item_feature_extractor(i_idxs)

        # --- Dot Product ---
        dots = torch.sum(
            u_embed.unsqueeze(1) * i_embed, dim=-1
        )  # [batch_size, n_neg_p_1]

        if self.use_bias:
            # Optional bias
            dots = dots + u_bias + i_bias + self.global_bias

        return dots


def bce_loss(logits, labels, aggregator="mean"):
    """
    It computes the binary cross entropy loss with negative sampling, expressed by the formula:
                                    -∑_j log(x_ui) + log(1 - x_uj)
    where x_ui and x_uj are the prediction for user u on item i and j, respectively. Item i positive instance while
    Item j is a negative instance. The Sum is carried out across the different negative instances. In other words
    the positive item is weighted as many as negative items are considered.

    :param logits: Logits values from the network. The first column always contain the values of positive instances.
            Shape is (batch_size, 1 + n_neg).
    :param labels: 1-0 Labels. The first column contains 1s while all the others 0s.
    :param aggregator: function to use to aggregate the loss terms. Default to mean

    :return: The binary cross entropy as computed above
    """
    weights = torch.ones_like(logits)
    weights[:, 0] = logits.shape[1] - 1

    loss = nn.BCEWithLogitsLoss(weights.flatten(), reduction=aggregator)(
        logits.flatten(), labels.flatten()
    )

    return loss


def bpr_loss(logits, labels, aggregator="mean"):
    """
    It computes the Bayesian Personalized Ranking loss (https://arxiv.org/pdf/1205.2618.pdf).

    :param logits: Logits values from the network. The first column always contain the values of positive instances.
            Shape is (batch_size, 1 + n_neg).
    :param labels: 1-0 Labels. The first column contains 1s while all the others 0s.
    :param aggregator: function to use to aggregate the loss terms. Default to mean

    :return: The bayesian personalized ranking loss
    """
    pos_logits = logits[:, 0].unsqueeze(1)  # [batch_size,1]
    neg_logits = logits[:, 1:]  # [batch_size,n_neg]

    labels = labels[:, 0]  # I guess this is just to avoid problems with the device
    labels = torch.repeat_interleave(labels, neg_logits.shape[1])

    diff_logits = pos_logits - neg_logits

    loss = nn.BCEWithLogitsLoss(reduction=aggregator)(
        diff_logits.flatten(), labels.flatten()
    )

    return loss


def sampled_softmax_loss(logits, labels, aggregator="sum"):
    """
    It computes the (Sampled) Softmax Loss (a.k.a. sampled cross entropy) expressed by the formula:
                        -x_ui +  log( ∑_j e^{x_uj})
    where x_ui and x_uj are the prediction for user u on item i and j, respectively. Item i positive instance while j
    goes over all the sampled items (negatives + the positive).
    :param logits: Logits values from the network. The first column always contain the values of positive instances.
            Shape is (batch_size, 1 + n_neg).
    :param labels: 1-0 Labels. The first column contains 1s while all the others 0s.
    :param aggregator: function to use to aggregate the loss terms. Default to sum
    :return:
    """

    pos_logits_sum = -logits[:, 0]
    log_sum_exp_sum = torch.logsumexp(logits, dim=-1)

    sampled_loss = pos_logits_sum + log_sum_exp_sum

    if aggregator == "sum":
        return sampled_loss.sum()
    elif aggregator == "mean":
        return sampled_loss.mean()
    else:
        raise ValueError("Loss aggregator not defined")

In [8]:
checkpoint_dir = Path("./ProtoMF/best_models")

In [9]:
class Trainer:
    def __init__(
        self, train_loader: data.DataLoader, val_loader: data.DataLoader, conf
    ):
        """
        Train and Evaluate the model.
        :param train_loader: Training DataLoader (check music4all_data.Music4AllDataset for more info)
        :param val_loader: Validation DataLoader (check music4all_data.Music4AllDataset for more info)
        :param conf: Experiment configuration parameters
        """

        self.train_loader = train_loader
        self.val_loader = val_loader

        self.rec_sys_param = conf.rec_sys_param
        self.ft_ext_param = conf.ft_ext_param
        self.optim_param = conf.optim_param

        self.n_epochs = conf.n_epochs
        self.loss_func_name = conf.loss_func_name
        self.loss_func_aggr = (
            conf.loss_func_aggr if "loss_func_aggr" in conf else "mean"
        )

        self.device = conf.device

        self.optimizing_metric = OPTIMIZING_METRIC
        self.max_patience = MAX_PATIENCE

        self.model = self._build_model()
        self.optimizer = self._build_optimizer()

        print(
            f"Built Trainer module \n"
            f"- n_epochs: {self.n_epochs} \n"
            f"- loss_func_name: {self.loss_func_name} \n"
            f"- loss_func_aggr: {self.loss_func_aggr} \n"
            f"- device: {self.device} \n"
            f"- optimizing_metric: {self.optimizing_metric} \n"
        )

    def _build_model(self):
        # Step 1 --- Building User and Item Feature Extractors
        n_users = self.train_loader.dataset.n_users
        n_items = self.train_loader.dataset.n_items
        (
            user_feature_extractor,
            item_feature_extractor,
        ) = FeatureExtractorFactory.create_models(self.ft_ext_param, n_users, n_items)
        # Step 2 --- Building RecSys Module
        rec_sys = RecSys(
            n_users,
            n_items,
            self.rec_sys_param,
            user_feature_extractor,
            item_feature_extractor,
            self.loss_func_name,
            self.loss_func_aggr,
        )

        rec_sys.init_parameters()
        rec_sys = nn.DataParallel(rec_sys)
        rec_sys = rec_sys.to(self.device)

        return rec_sys

    def _build_optimizer(self):
        self.lr = self.optim_param["lr"] if "lr" in self.optim_param else 1e-3
        self.wd = self.optim_param["wd"] if "wd" in self.optim_param else 1e-4

        optim_name = self.optim_param["optim"]
        if optim_name == "adam":
            optim = torch.optim.Adam(
                self.model.parameters(), lr=self.lr, weight_decay=self.wd
            )
        elif optim_name == "adagrad":
            optim = torch.optim.Adagrad(
                self.model.parameters(), lr=self.lr, weight_decay=self.wd
            )
        else:
            raise ValueError("Optimizer not yet included")

        print(
            f"Built Optimizer  \n"
            f"- name: {optim_name} \n"
            f"- lr: {self.lr} \n"
            f"- wd: {self.wd} \n"
        )

        return optim

    def run(self):
        """
        Runs the Training procedure
        """
        metrics_values = self.val()
        best_value = metrics_values[self.optimizing_metric]
        #         tune.report(metrics_values)
        print("Init - Avg Val Value {:.3f} \n".format(best_value))

        patience = 0
        for epoch in range(self.n_epochs):
            if patience == self.max_patience:
                print("Max Patience reached, stopping.")
                break

            self.model.train()

            epoch_train_loss = 0

            for u_idxs, i_idxs, labels in self.train_loader:
                u_idxs = u_idxs.to(self.device)
                i_idxs = i_idxs.to(self.device)
                labels = labels.to(self.device)

                out = self.model(u_idxs, i_idxs)

                loss = self.model.module.loss_func(out, labels)

                epoch_train_loss += loss.item()

                loss.backward()
                self.optimizer.step()
                self.optimizer.zero_grad()
                if int(u_idxs[0]) % 1000 == 0:
                    print(str(int(u_idxs[0])) + "_users_past")
            epoch_train_loss /= len(self.train_loader)
            print(
                "Epoch {} - Epoch Avg Train Loss {:.3f} \n".format(
                    epoch, epoch_train_loss
                )
            )

            metrics_values = self.val()
            curr_value = metrics_values[self.optimizing_metric]
            print("Epoch {} - Avg Val Value {:.3f} \n".format(epoch, curr_value))
            tune.report({**metrics_values, "epoch_train_loss": epoch_train_loss})

            if curr_value > best_value:
                best_value = curr_value
                print(
                    "Epoch {} - New best model found (val value {:.3f}) \n".format(
                        epoch, curr_value
                    )
                )
                torch.save(
                    self.model.module.state_dict(),
                    os.path.join(checkpoint_dir, "best_model.pth"),
                )
                patience = 0
            else:
                patience += 1

    @torch.no_grad()
    def val(self):
        """
        Runs the evaluation procedure.
        :return: A scalar float value, output of the validation (e.g. NDCG@10).
        """
        self.model.eval()
        print("Validation started")
        val_loss = 0
        eval = Evaluator(self.val_loader.dataset.n_users)

        for u_idxs, i_idxs, labels in self.val_loader:
            u_idxs = u_idxs.to(self.device)
            i_idxs = i_idxs.to(self.device)
            labels = labels.to(self.device)

            out = self.model(u_idxs, i_idxs)

            val_loss += self.model.module.loss_func(out, labels).item()

            out = nn.Sigmoid()(out)
            out = out.to("cpu")

            eval.eval_batch(out)
            if int(u_idxs[0]) % 1000 == 0:
                print(str(int(u_idxs[0])) + "_users_past")
        val_loss /= len(self.val_loader)
        metrics_values = {**eval.get_results(), "val_loss": val_loss}

        return metrics_values

In [10]:
class ProtoRecDataset(data.Dataset):
    """
    Dataset class to be used in ProtoRec. To use this class for any dataset, please refer to the splitter functions
    (e.g. movielens_splitter.py)

    This class implements some basic functionalities about negative sampling. The negative sampling for a specific user
    is influenced by the split_set:
        - split_set = train: The other training items are excluded from the sampling.
        - split_set = val: The other validation items and training items are excluded from the sampling.
        - split_set = test: The other test items and training items are excluded from the sampling.

    About the data management and access:
    To perform a fast iteration and sampling over the dataset, we use two sparse matrices (COO and CSR). The COO
    is used for iteration over the training data while the CSR for fast negative sampling. We always load the train
    CSR since it is used to exclude the training data from the negative sampling also for Validation and Testing.
    NB. Depending on the split_set, the matrices may have different data. Train COO and Train CSR have always the
    same data. However, Val CSR has Val + Train data (same applies for test). This is due to the negative sampling
    in the csr matrix, for which we also exclude items from training (see below).
    """

    def __init__(
        self, data_path: str, split_set: str, n_neg: int, neg_strategy: str = "uniform"
    ):
        """
        :param data_path: path to the directory with the listening_history_*, item_ids, and user_ids files.
        :param split_set: Value in [train, val, test].
        :param n_neg: Number of negative samples.
        :param neg_strategy: Strategy to select the negative samples.
        """
        assert split_set in [
            "train",
            "val",
            "test",
        ], f"<{split_set}> is not a valid value for split set!"

        self.data_path = data_path
        self.split_set = split_set
        self.n_neg = n_neg
        self.neg_strategy = neg_strategy

        self.n_users = None
        self.n_items = None

        self.item_ids = None

        self.coo_matrix = None
        self.csr_matrix = None

        self.pop_distribution = None

        self.load_data()

        print(
            f"Built ProtoRecDataset module \n"
            f"- data_path: {self.data_path} \n"
            f"- n_users: {self.n_users} \n"
            f"- n_items: {self.n_items} \n"
            f"- n_interactions: {self.coo_matrix.nnz} \n"
            f"- split_set: {self.split_set} \n"
            f"- n_neg: {self.n_neg} \n"
            f"- neg_strategy: {self.neg_strategy} \n"
        )

    def load_data(self):
        print("Loading data")

        user_ids = pd.read_csv(os.path.join(self.data_path, "user_ids.csv"))
        item_ids = pd.read_csv(os.path.join(self.data_path, "item_ids.csv"))

        self.n_users = len(user_ids)
        self.n_items = len(item_ids)

        train_lhs = pd.read_csv(
            os.path.join(self.data_path, "listening_history_train.csv")
        )

        train_csr = sp.csr_matrix(
            (
                np.ones(len(train_lhs), dtype=np.int16),
                (train_lhs.user_id, train_lhs.item_id),
            ),
            shape=(self.n_users, self.n_items),
        )

        # Computing the popularity distribution (see _neg_sample_popular)
        item_popularity = np.array(train_csr.sum(axis=0)).flatten()
        self.pop_distribution = item_popularity / item_popularity.sum()

        if self.split_set == "val":
            val_lhs = pd.read_csv(
                os.path.join(self.data_path, "listening_history_val.csv")
            )

            val_csr = sp.csr_matrix(
                (
                    np.ones(len(val_lhs), dtype=np.int16),
                    (val_lhs.user_id, val_lhs.item_id),
                ),
                shape=(self.n_users, self.n_items),
            )

            val_coo = sp.coo_matrix(val_csr)

            self.coo_matrix = val_coo
            self.csr_matrix = val_csr + train_csr

        elif self.split_set == "test":
            test_lhs = pd.read_csv(
                os.path.join(self.data_path, "listening_history_test.csv")
            )

            test_csr = sp.csr_matrix(
                (
                    np.ones(len(test_lhs), dtype=np.int16),
                    (test_lhs.user_id, test_lhs.item_id),
                ),
                shape=(self.n_users, self.n_items),
            )

            test_coo = sp.coo_matrix(test_csr)

            self.coo_matrix = test_coo
            self.csr_matrix = test_csr + train_csr

        elif self.split_set == "train":
            train_coo = sp.coo_matrix(train_csr)

            self.coo_matrix = train_coo
            self.csr_matrix = train_csr

    def _neg_sample_uniform(self, row_idx: int) -> np.array:
        """
        For a specific user, it samples n_neg items u.a.r.
        :param row_idx: user id (or row in the matrix)
        :return: npy array containing the negatively sampled items.
        """

        consumed_items = self.csr_matrix.indices[
            self.csr_matrix.indptr[row_idx] : self.csr_matrix.indptr[row_idx + 1]
        ]

        # Uniform distribution without items consumed by the user
        p = np.ones(self.n_items)
        p[consumed_items] = 0.0  # Excluding consumed items
        p = p / p.sum()

        sampled = np.random.choice(
            np.arange(self.n_items), self.n_neg, replace=False, p=p
        )

        return sampled

    def _neg_sample_popular(self, row_idx: int) -> np.array:
        """
        For a specific user, it samples n_neg items considering the frequency of appearance of items in the dataset, i.e.
        p(i being neg) ∝ (pop_i)^0.75.
        :param row_idx: user id (or row in the matrix)
        :return: npy array containing the negatively sampled items.
        """
        consumed_items = self.csr_matrix.indices[
            self.csr_matrix.indptr[row_idx] : self.csr_matrix.indptr[row_idx + 1]
        ]

        p = self.pop_distribution.copy()
        p[consumed_items] = 0.0  # Excluding consumed items
        p = np.power(p, 0.75)  # Squashing factor alpha = .75
        p = p / p.sum()

        sampled = np.random.choice(
            np.arange(self.n_items), self.n_neg, replace=False, p=p
        )
        return sampled

    def __len__(self) -> int:
        return self.coo_matrix.nnz

    def __getitem__(self, index) -> T_co:
        """
        Loads the (user,item) pair associated to the index and performs the negative sampling.
        :param index: (user,item) index pair (as defined by the COO.data vector)
        :return: (user_idx,item_idxs,labels) where
            user_idx: is the index of the user
            item_idxs: is a npy array containing the items indexes. The positive item is in the 1st position followed
                        by the negative items indexes. Shape is (1 + n_neg,)
            labels: npy array containing the labels. First position is 1, the others are 0. Shape is (1 + n_neg,).

        """

        user_idx = self.coo_matrix.row[index].astype("int64")
        item_idx_pos = self.coo_matrix.col[index]

        # Select the correct negative sampling strategy
        if self.neg_strategy == "uniform":
            neg_samples = self._neg_sample_uniform(user_idx)
        elif self.neg_strategy == "popular":
            neg_samples = self._neg_sample_popular(user_idx)
        else:
            raise ValueError(
                f"Negative Sampling Strategy <{self.neg_strategy}> not implemented ... Yet"
            )

        item_idxs = np.concatenate(([item_idx_pos], neg_samples)).astype("int64")

        labels = np.zeros(1 + self.n_neg, dtype="float32")
        labels[0] = 1.0

        return user_idx, item_idxs, labels

In [11]:
data_path = Path(r".\ProtoMF\data\ml")
dataset = "ml"
tst = ProtoRecDataset(data_path, "train", 10, "uniform")
tst.__getitem__(0)
tst2 = data.DataLoader(tst)
tst2

tst.__getitem__(3)

Loading data
Built ProtoRecDataset module 
- data_path: ProtoMF\data\ml 
- n_users: 6028 
- n_items: 3123 
- n_interactions: 545656 
- split_set: train 
- n_neg: 10 
- neg_strategy: uniform 



(0,
 array([ 222,  994,  840,  175, 1634, 1579, 1230, 2104, 2736, 1697, 2827],
       dtype=int64),
 array([1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], dtype=float32))

In [12]:
def get_protorecdataset_dataloader(
    data_path: str, split_set: str, n_neg: int, neg_strategy="uniform", **loader_params
) -> data.DataLoader:
    """
    Returns the dataloader for a ProtoRecDataset
    :param data_path, ... ,neg_strategy: check ProtoRecDataset class for info about these parameters
    :param loader_params: parameters for the Dataloader
    :return:
    """
    protorec_dataset = ProtoRecDataset(data_path, split_set, n_neg, neg_strategy)
    return data.DataLoader(protorec_dataset, **loader_params)

In [13]:
def load_data(conf, is_train: bool = True):
    if is_train:
        train_loader = get_protorecdataset_dataloader(
            data_path=conf.data_path,
            split_set="train",
            n_neg=conf.neg_train,
            neg_strategy=conf.train_neg_strategy,
            batch_size=conf.train_batch_size,
            shuffle=True,
            num_workers=conf.NUM_WORKERS,
            prefetch_factor=5,
        )

        val_loader = get_protorecdataset_dataloader(
            data_path=conf.data_path,
            split_set="val",
            n_neg=conf.neg_val,
            neg_strategy=conf.eval_neg_strategy,
            batch_size=conf.val_batch_size,
            num_workers=conf.NUM_WORKERS,
        )

        test_loader = get_protorecdataset_dataloader(
            data_path=conf.data_path,
            split_set="test",
            n_neg=conf.neg_val,
            neg_strategy=conf.eval_neg_strategy,
            batch_size=conf.val_batch_size,
            num_workers=conf.NUM_WORKERS,
        )

        return {
            "train_loader": train_loader,
            "val_loader": val_loader,
            "test_loader": test_loader,
        }
    else:
        test_loader = get_protorecdataset_dataloader(
            data_path=conf.data_path,
            split_set="test",
            n_neg=conf.neg_val,
            neg_strategy=conf.eval_neg_strategy,
            batch_size=conf.val_batch_size,
            num_workers=conf.NUM_WORKERS,
        )

        return {"test_loader": test_loader}

In [15]:
# data_loaders_dict = load_data(user_proto_chose_original_hyper_params)

In [15]:
train_loader = get_protorecdataset_dataloader(
    protomf_path / "data/ml", "train", 99, batch_size=256
)
val_loader = get_protorecdataset_dataloader(
    protomf_path / "data\ml", "val", 99, batch_size=256
)
test_loader = get_protorecdataset_dataloader(
    protomf_path / "data\ml", "test", 99, batch_size=256
)

Loading data
Built ProtoRecDataset module 
- data_path: ProtoMF\data\ml 
- n_users: 6028 
- n_items: 3123 
- n_interactions: 545656 
- split_set: train 
- n_neg: 99 
- neg_strategy: uniform 

Loading data
Built ProtoRecDataset module 
- data_path: ProtoMF\data\ml 
- n_users: 6028 
- n_items: 3123 
- n_interactions: 14196 
- split_set: val 
- n_neg: 99 
- neg_strategy: uniform 

Loading data
Built ProtoRecDataset module 
- data_path: ProtoMF\data\ml 
- n_users: 6028 
- n_items: 3123 
- n_interactions: 13952 
- split_set: test 
- n_neg: 99 
- neg_strategy: uniform 



In [20]:
# from omegaconf import OmegaConf

# OmegaConf.create(proto_double_tie_chose_original_hyper_params)

In [16]:
trainer = Trainer(
    train_loader, val_loader, proto_double_tie_chose_original_hyper_params
)

AttributeError: 'dict' object has no attribute 'rec_sys_param'

In [17]:
trainer._build_model()
trainer._build_optimizer()

--- Building FeatureExtractor model ---
Built Embedding model 
- n_objects: 6028 
- embedding_dim: 75 
- max_norm: None
- only_positive: False
Built PrototypeEmbedding model 
- n_prototypes: 71 
- use_weight_matrix: False 
- sim_proto_weight: 7.912332955359149 
- sim_batch_weight: 5.745415598212118 
- reg_proto_type: max 
- reg_batch_type: max 
- cosine_type: shifted 

--- Finished building FeatureExtractor model ---

--- Building FeatureExtractor model ---
Built Embedding model 
- n_objects: 3123 
- embedding_dim: 71 
- max_norm: None
- only_positive: False
--- Finished building FeatureExtractor model ---

Built RecSys module 
- n_users: 6028 
- n_items: 3123 
- user_feature_extractor: PrototypeEmbedding 
- item_feature_extractor: Embedding 
- loss_func_name: sampled_softmax 
- use_bias: False 

Built Optimizer  
- name: adagrad 
- lr: 0.011408907195512278 
- wd: 0.004751680623599311 



Adagrad (
Parameter Group 0
    differentiable: False
    eps: 1e-10
    foreach: None
    initial_accumulator_value: 0
    lr: 0.011408907195512278
    lr_decay: 0
    maximize: False
    weight_decay: 0.004751680623599311
)

In [30]:
trainer.run()

Validation started
Init - Avg Val Value 0.276 

0_users_past
2000_users_past
Epoch 0 - Epoch Avg Train Loss 3.783 

Validation started


Session not detected. You should not be calling `report` outside `tuner.fit()` or while using the class API. 
  File "C:\Users\aleke\anaconda3\envs\tf\lib\runpy.py", line 197, in _run_module_as_main
    return _run_code(code, main_globals, None,
  File "C:\Users\aleke\anaconda3\envs\tf\lib\runpy.py", line 87, in _run_code
    exec(code, run_globals)
  File "C:\Users\aleke\anaconda3\envs\tf\lib\site-packages\ipykernel_launcher.py", line 17, in <module>
    app.launch_new_instance()
  File "C:\Users\aleke\anaconda3\envs\tf\lib\site-packages\traitlets\config\application.py", line 992, in launch_instance
    app.start()
  File "C:\Users\aleke\anaconda3\envs\tf\lib\site-packages\ipykernel\kernelapp.py", line 711, in start
    self.io_loop.start()
  File "C:\Users\aleke\anaconda3\envs\tf\lib\site-packages\tornado\platform\asyncio.py", line 215, in start
    self.asyncio_loop.run_forever()
  File "C:\Users\aleke\anaconda3\envs\tf\lib\asyncio\base_events.py", line 601, in run_forever
    self.

Epoch 0 - Avg Val Value 0.671 

Epoch 0 - New best model found (val value 0.671) 

0_users_past
2000_users_past
Epoch 1 - Epoch Avg Train Loss 0.819 

Validation started
Epoch 1 - Avg Val Value 0.833 

Epoch 1 - New best model found (val value 0.833) 

0_users_past
2000_users_past
Epoch 2 - Epoch Avg Train Loss 0.239 

Validation started
Epoch 2 - Avg Val Value 0.919 

Epoch 2 - New best model found (val value 0.919) 

0_users_past
2000_users_past
Epoch 3 - Epoch Avg Train Loss -0.025 

Validation started
Epoch 3 - Avg Val Value 0.971 

Epoch 3 - New best model found (val value 0.971) 

0_users_past
2000_users_past
Epoch 4 - Epoch Avg Train Loss -0.180 

Validation started
Epoch 4 - Avg Val Value 1.024 

Epoch 4 - New best model found (val value 1.024) 

0_users_past
2000_users_past
Epoch 5 - Epoch Avg Train Loss -0.285 

Validation started
Epoch 5 - Avg Val Value 1.046 

Epoch 5 - New best model found (val value 1.046) 

0_users_past
2000_users_past
Epoch 6 - Epoch Avg Train Loss -0.3

KeyboardInterrupt: 

In [33]:
model_up = torch.load(checkpoint_dir + "/best_model.pth")

In [42]:
items_feats = np.array(
    model_up["item_feature_extractor.embedding_layer.weight"].to("cpu")
)
user_protos = np.array(model_up["user_feature_extractor.prototypes"].to("cpu"))
user_embeds = np.array(
    model_up["user_feature_extractor.embedding_ext.embedding_layer.weight"].to("cpu")
)

In [44]:
normed_mat_users = np.array(
    ((user_embeds.T) * 1 / np.linalg.norm(user_embeds, 2, axis=1))
)
normed_mat_protos = np.array(
    ((user_protos.T) * (1 / np.linalg.norm(user_protos, 2, axis=1)))
)
user_to_protos = 1 + np.dot(normed_mat_users.T, normed_mat_protos)
scores = user_to_protos.dot(items_feats.T)
top_20 = scores.argsort()[:, ::-1][:, :20]

In [45]:
top_20

array([[ 526,  222,  926, ...,   98,   48,  927],
       [ 222,  926, 2257, ..., 2137, 2179,  930],
       [ 926,  222, 2257, ...,   98,  944, 1254],
       ...,
       [ 926, 2257,  928, ..., 2137,  940, 2375],
       [2257,  526,  222, ...,  511,    0,  944],
       [ 526,  928,  222, ..., 2137,  864,  944]], dtype=int64)

In [56]:
valid = pd.read_excel(
    r"C:\Users\aleke\Downloads\KION_DATASET\ProtoMF\data\ml\valid_ml_our_split.xlsx"
)
test = pd.read_excel(
    r"C:\Users\aleke\Downloads\KION_DATASET\ProtoMF\data\ml\test_ml_our_split.xlsx"
)
train = pd.read_excel(
    r"C:\Users\aleke\Downloads\KION_DATASET\ProtoMF\data\ml\train_ml_our_split.xlsx"
)

In [57]:
def hr_score(top_n_items, real_likes):
    mask = (top_n_items[..., None] == real_likes[:, None]).any(2)
    return mask.any(axis=1).mean()


def mrr_score(top_n_items, real_likes):
    idx = np.arange(1, top_n_items.shape[1] + 1)[None, :]
    mask = (top_n_items[..., None] == real_likes[:, None]).any(2)
    return (mask / idx).max(axis=1).mean()


def coverage_score(top_n_items, total_item_count):
    return len(np.unique(top_n_items)) * 1.0 / total_item_count

In [59]:
max_likes = test.groupby("userid")["itemid"].apply(len).max()

In [61]:
test_likes = test.groupby("userid")["itemid"].apply(
    lambda x: list(np.pad(x, (0, max_likes - len(x)), "constant", constant_values=-1))
)

test_users = test_likes.index
test_likes = np.asarray(list(test_likes))
hr_score(top_20[test_users], np.array(test_likes))

0.4154676258992806