In [None]:
import os

import numpy as np
import pandas as pd
import torch
from tqdm.auto import tqdm

from models import CuratorNet, VBPR
from utils.data import extract_embedding
from utils.metrics import auc_exact, nDCG, precision, recall


# Evaluation procedure

### Google Colaboratory setup

Clone repository contents in VM and install dependencies using the script:

```python
# (1) Replace contents of VM
!rm -rf sample_data
# (Replace username and password/token)
!git clone --single-branch --branch master https://username:password@github.com/aaossa/CuratorNet-experiments.git
!cp -a CuratorNet-experiments/. .
!rm -r CuratorNet-experiments/
# Setup VM using script
!chmod +x ./scripts/colaboratory.sh
!./scripts/colaboratory.sh requirements/dev.txt
```

Mount Google Drive in case the data is available there:

```python
# (2) Mount Google Drive
from google.colab import drive
drive.mount("/content/drive")
```

Extract data in the right folder:

```python
# (3) Bring actual data to VM
# Extract data from mounted drive to data folder
!tar -xvzf "/content/drive/My Drive/dataset/dataset.tar.gz" -C data/dataset
```

**Important:** Restart the VM after following the steps to make sure you're using the right version of the declared requirements.

In [None]:
!nvidia-smi

In [None]:
# Dataset
# * UGallery
# * Wikimedia
# * Pinterest
DATASET = "UGallery"
assert DATASET in ["UGallery", "Wikimedia", "Pinterest"]

# Model
# * CuratorNet
# * VBPR
MODEL = "CuratorNet"
assert MODEL in ["CuratorNet", "VBPR"]


In [None]:
# Mode
# Use 'MODE_PROFILE = True' for CuratorNet-like training 
# Use 'MODE_PROFILE = False' for VBPR-like training
MODE_PROFILE = MODEL in ["CuratorNet"]
MODE_PROFILE = "profile" if MODE_PROFILE else "user"

# Checkpoint (ex. 'CuratorNet_2020-08-07-23-59-50')
CHECKPOINT = "CuratorNet_2020-08-09-01-35-38"
if CHECKPOINT is not None:
    assert CHECKPOINT.startswith(MODEL)


In [None]:
# Paths (general)
CHECKPOINT_PATH = os.path.join("checkpoints", MODEL, f"{CHECKPOINT}.tar")
EMBEDDING_PATH = os.path.join("data", DATASET, f"{DATASET.lower()}_embedding.npy")
EVALUATION_PATH = os.path.join("data", DATASET, f"{MODE_PROFILE}-evaluation.csv")

# General constants
RNG_SEED = 0
USE_GPU = True


In [None]:
# Freezing RNG seed if needed
if RNG_SEED is not None:
    print(f"\nUsing random seed... ({RNG_SEED})")
    torch.manual_seed(RNG_SEED)


In [None]:
# Load embedding from file
print(f"\nLoading embedding from file... ({EMBEDDING_PATH})")
embedding = np.load(EMBEDDING_PATH, allow_pickle=True)

# Extract features and "id2index" mapping
print("\nExtracting data into variables...")
features, _ = extract_embedding(embedding, verbose=True)
print(f">> Features shape: {features.shape}")
del embedding  # Release some memory


In [None]:
# Load evaluation dataframe
print("\nLoad evaluation dataframe")
evaluation_df = pd.read_csv(EVALUATION_PATH)
# Transform lists from str to int
string_to_list = lambda s: list(map(int, s.split()))
evaluation_df["profile"] = evaluation_df["profile"].apply(
    lambda s: string_to_list(s) if isinstance(s, str) else s,
)
evaluation_df["predict"] = evaluation_df["predict"].apply(
    lambda s: string_to_list(s) if isinstance(s, str) else s,
)
print(f">> Evaluation: {evaluation_df.shape}")


In [None]:
# Create device instance
print("\nDevice initialization")
device = torch.device("cuda:0" if torch.cuda.is_available() and USE_GPU else "cpu")
if torch.cuda.is_available() != USE_GPU:
    print((f"\nNotice: Not using GPU - "
           f"Cuda available ({torch.cuda.is_available()}) "
           f"does not match USE_GPU ({USE_GPU})"
    ))

# Loading checkpoint
if CHECKPOINT is not None:
    print("\nLoading checkpoint")
    checkpoint = torch.load(CHECKPOINT_PATH, map_location=torch.device("cpu"))
    print(f">> Best epoch: {checkpoint['epoch']} | Best accuracy: {checkpoint['accuracy']}")

# Model initialization
print("\nModel initialization")
model = None
if MODEL == "CuratorNet":
    model = CuratorNet(
        torch.Tensor(features),  # Pretrained visual features
        input_size=features.shape[1],  # Network input size
    ).to(device)
elif MODEL == "VBPR":
    n_users = checkpoint["model"]["gamma_users.weight"].size(0)
    n_items = checkpoint["model"]["gamma_items.weight"].size(0)
    dim_gamma = checkpoint["model"]["gamma_users.weight"].size(1)
    dim_theta = checkpoint["model"]["theta_users.weight"].size(1)
    model = VBPR(
        n_users, n_items,  # Number of users and items
        torch.Tensor(features),  # Pretrained visual features
        dim_gamma, dim_theta,  # Size of internal spaces
    ).to(device)

# Load state dict
if CHECKPOINT is not None:
    model.load_state_dict(checkpoint["model"])
    
# Change model mode to eval
print("\nChanging model mode to eval")
model.eval()


In [None]:
# Metrics
AUC = list()
R20, P20, N20 = list(), list(), list()
R100, P100, N100 = list(), list(), list()
PROFILE_SIZES = list()
INVENTORY_IDXS = list(range(len(features)))

for row in tqdm(evaluation_df.itertuples(), total=len(evaluation_df.index)):
    # Prediction
    if MODE_PROFILE == "profile":
        profile = torch.tensor(row.profile, device=device).unsqueeze(0)
        scores = model.recommend(profile, None).cpu().numpy()
    elif MODE_PROFILE == "user":
        user_id = torch.tensor([int(row.user_id)], device=device)
        scores = model.recommend(user_id, None).squeeze().cpu().numpy()
    # Ranking
    idx_of_evals = np.nonzero(np.in1d(INVENTORY_IDXS, row.predict))[0]
    pos_of_evals = np.nonzero(np.in1d(np.argsort(scores)[::-1], idx_of_evals))[0]
    # Store metrics
    AUC.append(auc_exact(pos_of_evals, len(INVENTORY_IDXS)))
    R20.append(recall(pos_of_evals, 20))
    P20.append(precision(pos_of_evals, 20))
    N20.append(nDCG(pos_of_evals, 20))
    R100.append(recall(pos_of_evals, 100))
    P100.append(precision(pos_of_evals, 100))
    N100.append(nDCG(pos_of_evals, 100))
    PROFILE_SIZES.append(len(row.profile))


In [None]:
# Display stats
print(f"AVG AUC = {sum(AUC) / len(AUC)}")
print(f"AVG R20 = {sum(R20) / len(R20)}")
print(f"AVG P20 = {sum(P20) / len(P20)}")
print(f"AVG NDCG20 = {sum(N20) / len(N20)}")
print(f"AVG R100 = {sum(R100) / len(R100)}")
print(f"AVG P100 = {sum(P100) / len(P100)}")
print(f"AVG NDCG100 = {sum(N100) / len(N100)}")
