In [None]:
!nvidia-smi

### Google Colaboratory setup

Clone repository contents in VM and install dependencies using the script:

```python
# (1) Replace contents of VM
!rm -rf sample_data
# (Replace username and password/token)
!git clone --single-branch --branch master https://username:password@github.com/aaossa/CuratorNet-experiments.git
!cp -a CuratorNet-experiments/. .
!rm -r CuratorNet-experiments/
# Setup VM using script
!chmod +x ./scripts/colaboratory.sh
!./scripts/colaboratory.sh requirements/dev.txt
```

Mount Google Drive in case the data is available there:

```python
# (2) Mount Google Drive
from google.colab import drive
drive.mount("/content/drive")
```

Extract data in the right folder:

```python
# (3) Bring actual data to VM
# Extract data from mounted drive to data folder
!tar -xvzf "/content/drive/My Drive/path_to_data/data.tar.gz" -C data/UGallery
```

**Important:** Restart the VM after following the steps to make sure you're using the right version of the declared requirements.

In [None]:
import os

import numpy as np
import torch
from tqdm.auto import tqdm

from models import CuratorNet
from utils.metrics import auc_exact, nDCG, precision, recall
from utils.ugallery.data import get_evaluation_dataframe


# Parameters
CHECKPOINT_PATH = os.path.join("checkpoints", "CuratorNet", "CuratorNet_2020-07-04-15-32-13.tar")
EMBEDDING_PATH = os.path.join("data", "UGallery", "ugallery_embedding.npy")
EVALUATION_PATH = os.path.join("data", "UGallery", "evaluation.csv")
FULL_INVENTORY_MODE = False
USE_GPU = True


In [None]:
# Load embedding from file
print(f"\nLoading embedding from file... ({EMBEDDING_PATH})")
embedding = np.load(EMBEDDING_PATH, allow_pickle=True)

# Reshape embedding
print("\nReshape embedding")
new_shape = (embedding.shape[0], embedding[0, 1].shape[0])
embedding = np.concatenate(embedding[:, 1]).reshape(*new_shape)

# Model initialization
print("\nInitialize model")
device = torch.device("cuda:0" if torch.cuda.is_available() and USE_GPU else "cpu")
if torch.cuda.is_available() != USE_GPU:
    print((f"\nNotice: Not using GPU - "
           f"Cuda available ({torch.cuda.is_available()}) "
           f"does not match USE_GPU ({USE_GPU})"
    ))
model = CuratorNet(torch.Tensor(embedding), input_size=embedding.shape[1]).to(device)

# Loading checkpoint
print("\nLoading checkpoint")
checkpoint = torch.load(CHECKPOINT_PATH, map_location=device)
model.load_state_dict(checkpoint["model"])
print(f">> Best epoch: {checkpoint['epoch']} | Best accuracy: {checkpoint['accuracy']}")

# Change model mode to eval
print("\nChanging model mode to eval")
model.eval()

# Load evaluation dataframe
print("\nLoad evaluation dataframe")
evaluation_df = get_evaluation_dataframe(EVALUATION_PATH)
print(f">> Evaluation: {evaluation_df.shape}")

# Evaluation
print("\nEvaluation")


In [None]:
%%time
# Metrics
INVENTORY_IDXS = list()
AUC = list()
R20, P20, N20 = list(), list(), list()
R100, P100, N100 = list(), list(), list()
PROFILE_SIZES = list()

for row in tqdm(evaluation_df.itertuples(), total=len(evaluation_df.index)):
    if row.event == "inventory":
        # Add item to inventory if necessary
        INVENTORY_IDXS.append(int(row.artwork_id))
    elif row.event == "purchase":
        # Remove item from inventory
        for item in row.shopping_cart:
            if item in INVENTORY_IDXS:
                INVENTORY_IDXS.remove(item)
    elif row.event == "evaluation":
        # Calculate metrics for evaluation
        predict_idxs = row.predict
        if FULL_INVENTORY_MODE:
            inventory_idxs = list(range(len(embedding)))
            inventory = None
        else:
            inventory_idxs = list(set(INVENTORY_IDXS + predict_idxs))
            inventory = torch.tensor(inventory_idxs, device=device).unsqueeze(0)
        # Prediction
        profile_idxs = row.profile
        profile = torch.tensor(profile_idxs, device=device).unsqueeze(0)
        scores = model.recommend(profile, inventory).cpu().numpy()
        # Ranking
        idx_of_evals = np.nonzero(np.in1d(inventory_idxs, predict_idxs))[0]
        pos_of_evals = np.nonzero(np.in1d(np.argsort(scores)[::-1], idx_of_evals))[0]
        # Addition to metrics
        AUC.append(auc_exact(pos_of_evals, len(inventory_idxs)))
        R20.append(recall(pos_of_evals, 20))
        P20.append(precision(pos_of_evals, 20))
        N20.append(nDCG(pos_of_evals, 20))
        R100.append(recall(pos_of_evals, 100))
        P100.append(precision(pos_of_evals, 100))
        N100.append(nDCG(pos_of_evals, 100))
        PROFILE_SIZES.append(len(profile_idxs))
        # After prediction
        for item in predict_idxs:
            if item in INVENTORY_IDXS:
                INVENTORY_IDXS.remove(item)


In [None]:
# Display stats
print(f"AVG AUC = {sum(AUC) / len(AUC)}")
print(f"AVG R20 = {sum(R20) / len(R20)}")
print(f"AVG P20 = {sum(P20) / len(P20)}")
print(f"AVG NDCG20 = {sum(N20) / len(N20)}")
print(f"AVG R100 = {sum(R100) / len(R100)}")
print(f"AVG P100 = {sum(P100) / len(P100)}")
print(f"AVG NDCG100 = {sum(N100) / len(N100)}")
