In [1]:
import logging
import numpy as np
import sys
from tqdm.notebook import tqdm
from sklearn.metrics import average_precision_score

sys.path.append('../src')
from dataset_utils import get_datasets

logger = logging.getLogger(__name__)


In [2]:
categories = ["pinterest", 'movielens','Clothing_Shoes_and_Jewelry','Toys_and_Games']
cfgs = [{
    "train_df_path": f"../outputs/{category}/df_train.pkl",
    "test_df_path": f"../outputs/{category}/df_test.pkl",
    "cf_vector_df_path": f"../outputs/{category}/cf_df.pkl",
} for category in categories ]

# Load data
for cfg in cfgs:
    train_dataset, test_dataset, dataset_meta, pos_weight = get_datasets(
        cfg["train_df_path"],
        cfg["test_df_path"],
        cfg["cf_vector_df_path"],
        out_dir=".",
        labeled_ratio=1.0,
        is_use_bias=True,
        is_skip_img=True,
        is_plot_conf_hist=False,
    )

    targets,random_based_preds,pop_based_preds = [],[],[]
    for cf_vector, target  in tqdm(test_dataset):
        random_based_preds.append(np.random.randint(low=0,high=2,size=target.shape))
        targets.append(target.numpy())

    random_based_preds = np.vstack(random_based_preds)
    targets = np.vstack(targets)

    pop = targets.sum(axis=0,keepdims=True)/targets.shape[0]
    pop_based_preds = np.repeat(pop,len(targets),axis=0)

    ap_random_based = average_precision_score(targets,random_based_preds)
    ap_pop_based_preds = average_precision_score(targets,pop_based_preds)
    print(f'{[ap_random_based, ap_pop_based_preds]=}')



  0%|          | 0/8119 [00:00<?, ?it/s]

0.03141306485606054 0.03125


  0%|          | 0/3183 [00:00<?, ?it/s]

0.10776259784816523 0.10678439737420838


  0%|          | 0/6766 [00:00<?, ?it/s]

0.016212146747320598 0.016176804804394426


  0%|          | 0/3572 [00:00<?, ?it/s]

0.02376077292056427 0.023708058562481857
