In [None]:
import os

import numpy as np
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from tqdm.auto import tqdm

from models import VisRank
from utils.data import extract_embedding
from utils.metrics import (
    auc_exact,
    nDCG,
    precision,
    recall,
    reciprocal_rank,
)


# Dataset
DATASET = "UGallery"
assert DATASET in ["UGallery", "Wikimedia"]

# Parameters
FEATURE_EXTRACTOR = "resnet50"
assert FEATURE_EXTRACTOR in ["resnet50"]


In [None]:
# Mode
MODE_PROFILE = "user"

# Paths (general)
EMBEDDING_PATH = os.path.join("data", DATASET, f"embedding-{FEATURE_EXTRACTOR}.npy")
EVALUATION_PATH = os.path.join("data", DATASET, f"naive-{MODE_PROFILE}-evaluation.csv")

# Paths (images)
IMAGES_DIR = None
if DATASET == "Wikimedia":
    IMAGES_DIR = os.path.join("/", "mnt", "data2", "wikimedia", "imagenes_tarea")
elif DATASET == "UGallery":
    IMAGES_DIR = os.path.join("/", "mnt", "workspace", "Ugallery", "mini-images-224-224-v2")


In [None]:
# Load embedding from file
print(f"\nLoading embedding from file... ({EMBEDDING_PATH})")
embedding = np.load(EMBEDDING_PATH, allow_pickle=True)

# Extract features and "id2index" mapping
print("\nExtracting data into variables...")
features, _, item_index2fn = extract_embedding(embedding, verbose=True)
print(f">> Features shape: {features.shape}")
del embedding  # Release some memory

# Fallback for explicit_features
explicit_features = np.copy(features)


In [None]:
# Load evaluation dataframe
print("\nLoad evaluation dataframe")
evaluation_df = pd.read_csv(EVALUATION_PATH)
# Transform lists from str to int
string_to_list = lambda s: list(map(int, s.split()))
evaluation_df["profile"] = evaluation_df["profile"].apply(
    lambda s: string_to_list(s) if isinstance(s, str) else s,
)
evaluation_df["predict"] = evaluation_df["predict"].apply(
    lambda s: string_to_list(s) if isinstance(s, str) else s,
)
# Group evaluations by profile and user
evaluation_df["profile"] = evaluation_df["profile"].map(tuple)
evaluation_df = evaluation_df.groupby(["profile", "user_id"]).agg({"predict": sum}).reset_index()
evaluation_df["profile"] = evaluation_df["profile"].map(list)
print(f">> Evaluation: {evaluation_df.shape}")


In [None]:
# Model initialization
print("\nModel initialization")
model = VisRank(
    features,  # Embedding
    similarity_method=cosine_similarity,  # Similarity measure
)


In [None]:
# Predict all
# If True, ranks every item including already consumed items
# If False, ranks ALL - PROFILE (consumed) + PREDICT (ground truth)
PREDICT_ALL = False


In [None]:
import matplotlib.image as mpimg
import matplotlib.pyplot as plt
import torch


In [None]:
# %%time
# Metrics
N_EVALS = len(evaluation_df.index)
# Area Under the Curve (AUC)
AUC = np.zeros(N_EVALS, dtype=float)
# Reciprocal Rank (RR)
RR = np.zeros(N_EVALS, dtype=float)
# Recall
R20 = np.zeros(N_EVALS, dtype=float)
R100 = np.zeros(N_EVALS, dtype=float)
R200 = np.zeros(N_EVALS, dtype=float)
# Precision
P20 = np.zeros(N_EVALS, dtype=float)
P100 = np.zeros(N_EVALS, dtype=float)
P200 = np.zeros(N_EVALS, dtype=float)
# Normalized discounted cumulative gain (nDCG)
N20 = np.zeros(N_EVALS, dtype=float)
N100 = np.zeros(N_EVALS, dtype=float)
N200 = np.zeros(N_EVALS, dtype=float)
PROFILE_SIZES = np.zeros(N_EVALS, dtype=int)
N_ITEMS = len(features)


evaluation_df["profile"] = evaluation_df["profile"].map(tuple)
grouped_evals = evaluation_df.groupby(["profile", "user_id"]).agg({"predict": sum}).reset_index()
for i, row in tqdm(enumerate(evaluation_df.itertuples()), total=len(evaluation_df.index)):
    # Load data into tensors
    profile = np.array(row.profile)
    user_id = int(row.user_id)
    predict = row.predict
    # Prediction
    indexes, _ = model.most_similar_to_profile(profile, k=None, method="maximum", include_consumed=True)
    if not PREDICT_ALL:
        indexes = np.delete(
            indexes,
            np.where(np.isin(indexes, profile) & ~np.isin(indexes, predict)),
        )
    # Ranking
    pos_of_evals = torch.Tensor(np.where(np.isin(indexes, predict))).flatten()
    # Store metrics
    AUC[i] = auc_exact(pos_of_evals, N_ITEMS)
    RR[i] = reciprocal_rank(pos_of_evals)
    R20[i] = recall(pos_of_evals, 20)
    P20[i] = precision(pos_of_evals, 20)
    N20[i] = nDCG(pos_of_evals, 20)
    R100[i] = recall(pos_of_evals, 100)
    P100[i] = precision(pos_of_evals, 100)
    N100[i] = nDCG(pos_of_evals, 100)
    R200[i] = recall(pos_of_evals, 200)
    P200[i] = precision(pos_of_evals, 200)
    N200[i] = nDCG(pos_of_evals, 200)
    PROFILE_SIZES[i] = len(row.profile)


In [None]:
# Display stats
print(f"AVG AUC = {AUC.mean()}")
print(f"AVG RR = {RR.mean()}")
print(f"AVG R20 = {R20.mean()}")
print(f"AVG P20 = {P20.mean()}")
print(f"AVG NDCG20 = {N20.mean()}")
print(f"AVG R100 = {R100.mean()}")
print(f"AVG P100 = {P100.mean()}")
print(f"AVG NDCG100 = {N100.mean()}")
print(f"AVG R200 = {R200.mean()}")
print(f"AVG P200 = {P200.mean()}")
print(f"AVG NDCG200 = {N200.mean()}")


## Results inspection

In [None]:
USER_ROW = 1

assert 0 <= USER_ROW < len(evaluation_df)


# Row in evaluation dataframe
row = evaluation_df.iloc[USER_ROW]

# Load data into tensors
profile = np.array(row.profile, ndmin=1)
user_id = int(row.user_id)
predict = np.array(row.predict, ndmin=1)
# Prediction
indexes, _ = model.most_similar_to_profile(profile, k=None, method="maximum", include_consumed=True)
if not PREDICT_ALL:
    indexes = np.delete(
        indexes,
        np.where(np.isin(indexes, profile) & ~np.isin(indexes, predict)),
    )
# Ranking
pos_of_evals = torch.Tensor(np.where(np.isin(indexes, predict))).flatten()

# Display metrics
print(f"| {'-' * 15} | {'-' * 7} |")
print(f"| {'Metric':^15} | {'Score':^7} |")
print(f"| {'-' * 15} | {'-' * 7} |")
print(f"| {'AUC':^15} | {auc_exact(pos_of_evals, N_ITEMS):.5f} |")
print(f"| {'RR':^15} | {reciprocal_rank(pos_of_evals):.5f} |")
for k in [20, 100, 500]:
    print(f"| {'-' * 15} | {'-' * 7} |")
    print(f"| {f'Recall@{k}':^15} | {recall(pos_of_evals, k):.5f} |")
    print(f"| {f'Precision@{k}':^15} | {precision(pos_of_evals, k):.5f} |")
    print(f"| {f'nDCG@{k}':^15} | {nDCG(pos_of_evals, k):.5f} |")
print(f"| {'-' * 15} | {'-' * 7} |")

# Ranking
K = 20
ranking = indexes
if not PREDICT_ALL:
    ranking = ranking[(~np.isin(ranking, profile)) | (np.isin(ranking, predict))]
ranking = ranking[:K]
print()
print(f"Size of profile: {profile.size}")
print(f"Position of actual items: {pos_of_evals.cpu().numpy()}")



COLUMNS = 10
ELEMENTS = {
    "Consumed": profile,
    "Recommendation": ranking,
    "Ground truth": predict,
}
SHOW_FILENAME = False

for label, items in ELEMENTS.items():
    n_rows = ((len(items) - 1) // COLUMNS + 1)
    fig = plt.figure(figsize=(COLUMNS * 2, 4 * n_rows))
    plt.title(f"{label.title()} (n={len(items)})")
    plt.axis("off")
    for i, img_id in enumerate(items, start=1):
        img_fn = item_index2fn[img_id]
        image = mpimg.imread(os.path.join(IMAGES_DIR, img_fn))
        ax = fig.add_subplot(n_rows, COLUMNS, i)
        if SHOW_FILENAME:
            ax.set_title(img_fn)
        if label == "Recommendation":
            if img_id in predict:
                ax.patch.set_edgecolor("green")
                ax.patch.set_linewidth("5")
                if SHOW_FILENAME:
                    ax.set_title(img_fn, color="green")
                else:
                    ax.set_title("Ground truth", color="green")
            elif img_id in profile:
                ax.patch.set_edgecolor("red")
                ax.patch.set_linewidth("5")
                if SHOW_FILENAME:
                    ax.set_title(img_fn, color="red")
                else:
                    ax.set_title("Consumed", color="red")
        plt.xticks([])
        plt.yticks([])
        plt.imshow(image)
