In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
!pip install cornac==1.15.4 --quiet adjustText

# Text + Image

## 1. Import libraries

In [1]:
import os
import sys
from collections import defaultdict
import timeit
from tqdm.notebook import tqdm

import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import plotly.express as px
from adjustText import adjust_text

%matplotlib inline

import cv2

import cornac
from cornac.utils import cache
from cornac.data import ImageModality, TextModality
from cornac.data.text import BaseTokenizer
from cornac.eval_methods import RatioSplit, BaseMethod, CrossValidation, StratifiedSplit
from cornac.models import VMF, CausalRec, VBPR, ConvMF
from cornac.hyperopt import Discrete, Continuous
from cornac.hyperopt import GridSearch, RandomSearch

from recommenders.evaluation.python_evaluation import serendipity, distributional_coverage, catalog_coverage

import tensorflow as tf

from scipy.stats import hmean

print(f"System version: {sys.version}")
print(f"Cornac version: {cornac.__version__}")
print(f"Tensorflow version: {tf.__version__}")


import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Input, Dense, Dropout, Flatten, Conv2D, MaxPooling2D, GlobalAveragePooling2D, BatchNormalization

from keras.utils import np_utils
from keras.models import Sequential,Model,load_model

from harmonic_mean import HarmonicMean
from serendipity_wrapper import Serendipity
from combined_eval_method import CombinedBaseMethod
from new_random_search import NewRandomSearch

SEED = 2023
VERBOSE = True

FM model is only supported on Linux.
Windows executable can be found at http://www.libfm.org.
System version: 3.9.16 (main, Mar  8 2023, 04:29:44) 
[Clang 14.0.6 ]
Cornac version: 1.15.4
Tensorflow version: 2.12.0


## 2. Import and read data

### 2.1 User and cat data

In [2]:
# Derive a weighted "rating" by summing explict and implict feedback

from sklearn.preprocessing import MinMaxScaler

def weighted_rating_add(dfs_list, like_weight, click_weight, dwell_weight):
    
    dfs = dfs_list
    L = like_weight
    C = click_weight
    D = dwell_weight
    
    for i in range(len(dfs)):
    
        # convert True/False to 1/0 for all dfs
        dfs[i]['like'] = dfs[i]['like'].apply(lambda x: 1 if x else 0)

 
        # for train set
        if i == 0:
            dfs[i]['click'] = dfs[i]['click'].apply(lambda x: 1 if x else 0)
            dfs[i].drop(['Unnamed: 0', 'created_at', 'updated_at'], axis=1, inplace=True)
            dfs[i] = dfs[i][['id', 'userID', 'catID', 'like', 'dwell_time_ms', 'click']]

 
        # for test and val sets
        elif i == 1 or i == 2:
            dfs[i].drop(['Unnamed: 0', 'dwell_time_ms', 'click', 'created_at', 'updated_at'], axis=1, inplace=True)
            dfs[i] = dfs[i][['id', 'userID', 'catID', 'like']]
            # add a rating_add col that shows the ground truth (1 if like or 0 otherwise)
            dfs[i]['rating_add'] = dfs[i]['like']
    
    # reassigning train, test, val
    train, test, val = dfs[0], dfs[1], dfs[2]
    
    # log transform dwell_time
    train['log_dwell_time'] = train['dwell_time_ms'].apply(lambda x: np.log(x))
    train.drop(['dwell_time_ms'], axis=1, inplace=True)
    
    # initialize MinMaxScaler
    scaler = MinMaxScaler()
    
    # normalizing log_dwell_time
    train['norm_log_dwell_time'] = scaler.fit_transform(train[['log_dwell_time']])
    train.drop(['log_dwell_time'], axis=1, inplace=True)
    
    # assigning weights to features
    train_weighted = train.copy()
    train_weighted['rating_add'] = L*train_weighted['like'] + C*train_weighted['click'] + D* train_weighted['norm_log_dwell_time']
    train_weighted.drop(['click','norm_log_dwell_time'], axis=1, inplace=True)
    
    return train_weighted, test, val


In [3]:
### Specify parameters


SPLIT_TYPE = 'strat'
# SPLIT_TYPE = 'lsuo'

TARGET = 'rating_add'
# TARGET = 'like'

In [7]:
# Interactions

df_interactions_train = pd.read_csv('model_data/train_val_test/'+SPLIT_TYPE+'_train.csv')
train_id = df_interactions_train['id'].values.tolist()

df_interactions_test = pd.read_csv('model_data/train_val_test/'+SPLIT_TYPE+'_test.csv')
test_id = df_interactions_test['id'].values.tolist()

df_interactions_val = pd.read_csv('model_data/train_val_test/'+SPLIT_TYPE+'_validation.csv')
val_id = df_interactions_val['id'].values.tolist()

# Make the weighted_rating_add score
dfs = [df_interactions_train, df_interactions_test, df_interactions_val]
# Weights for like, click, dwell_time: 0.5, 0.25, 0.25
df_interactions_train, df_interactions_test, df_interactions_val = weighted_rating_add(dfs, 0.5, 0.25, 0.25)


# merge and sort by catID (in case of indexing issues)
df_interactions_merged = pd.concat([df_interactions_train, df_interactions_test, df_interactions_val])
df_interactions_merged.sort_values(by=['catID'], inplace = True)


### Auxillary
df_users = pd.read_csv('model_data/auxiliary/users.csv')
df_cats = pd.read_csv('model_data/auxiliary/cats.csv')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dfs[i]['rating_add'] = dfs[i]['like']
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dfs[i]['rating_add'] = dfs[i]['like']
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  train['log_dwell_time'] = train['dwell_time_ms'].apply(lambda x: np.log(x))


In [8]:
# Create feedback df consisting of userID, itemID, likes

df_feedback = df_interactions_merged[['id','userID','catID',TARGET]].copy()
df_feedback.set_index('id', inplace = True)
df_feedback_train = df_feedback.loc[train_id]
df_feedback_test = df_feedback.loc[test_id]
df_feedback_val = df_feedback.loc[val_id]

# Prepare data in this format: [(user i, item j, rating),...]
train = list(df_feedback_train.itertuples(index=False, name=None))
test = list(df_feedback_test.itertuples(index=False, name=None))
validation = list(df_feedback_val.itertuples(index=False, name=None))
merged = list(df_feedback.itertuples(index=False, name=None))


### 2.2 Image preprocessing

In [15]:
# Read image vectors from .pkl file
df_img = pd.read_pickle('model_data/auxiliary/cat_images.pkl')

df_img.rename(columns={'id':'catID', 'cws_id':'cwsID'}, inplace = True)
df_img.sort_values(by=['catID'], inplace = True)

# list of catIDs
catID = df_img['catID'].tolist()

In [16]:
# function to remap vectors from a list to array
def list_to_arr(str_list):
    # split the stringed list
    arr = [float(num.strip(',')) for num in str_list[1:-1].split()]
    arr = np.array(arr)
    return arr


In [17]:
### Feature vectors
# Arrayify
img_features=df_img['feature_vectors'].map(list_to_arr)

# Reshape
array_data = np.array([np.array(x) for x in img_features])
img_features = array_data.reshape((array_data.shape[0], -1))

img_features.shape

(404, 20480)

In [34]:
### Image vactors
# Arrayify
df_img['img_vector'] = df_img['img_vector'].map(list_to_arr)
df_img['img_vector'] = df_img['img_vector'].apply(lambda x: np.array(x).reshape(128, 128, 3))
df_img['img_vector'] = df_img['img_vector'].apply(lambda x: x.astype(np.float32))

### 2.3 Text Processing

In [35]:
# Create tuple for texts

def df_to_tuplelist(df):

    # transform into tuples
    tuple_list = list(df.itertuples(index=False, name=None))

    # rearrange
    for i in range(len(tuple_list)):
        tuple_list[i] = (str(tuple_list[i][1]), str(tuple_list[i][0]), float(tuple_list[i][2]))

    return tuple_list

def df_to_tuplelist_pair(df):
    # make into tuples
    cats_data = list(df.itertuples(index=False, name=None))

    # unzip the tuple into 2 lists
    cat_ids, texts = zip(*cats_data)

    cat_ids = (list(cat_ids))
    texts = list(texts)

    for i in range(len(texts)):
        texts[i] = texts[i].replace('\n', ' ')

    return cat_ids, texts


DESC_OR_DET = 'details'

catID, texts = df_to_tuplelist_pair(df_cats[['id',DESC_OR_DET]])

## 3. Building models

In [36]:
# Instantiate ImageModality and TextModality, it makes it convenient to work with visual auxiliary information
item_image_modality = ImageModality(features=img_features, ids=catID, normalized=True)

item_text_modality = TextModality(
    corpus=texts,
    ids=catID,
    tokenizer=BaseTokenizer(sep=" ", stop_words="english"),
    max_vocab=8000,
    max_doc_freq=0.5,
)


In [37]:
### Function to generate output
def make_recommendations(MODEL, TRAINING_SET):
    item_id2idx = MODEL.train_set.iid_map
    item_idx2id = list(MODEL.train_set.item_ids)
    user_idx2id = MODEL.train_set.uid_map
    user_idx2id = list(MODEL.train_set.user_ids)
   
    num_users = len(np.unique(user_idx2id))
    
    # For each user, get the list of items that they have rated in train and probe
    rated = TRAINING_SET.groupby('userID')['catID'].agg(lambda x: list(x))
    rated = rated.to_dict()

    rec_result = {}

    for UIDX in range(0, num_users):
        recommendations, scores = MODEL.rank(UIDX)
        rec_result[user_idx2id[UIDX]] = [item_idx2id[i] for i in recommendations]

    # sort results
    rec_result = {key:value for key, value in sorted(rec_result.items(), key=lambda item: item[0])}

    # remove the rated items from rec_results
    for user in rec_result:
        tmp = [x for x in rec_result[user] if x not in rated[user]]
        rec_result[user] = tmp

    return rated, rec_result


### Function to visualise rated and recommended items
def show_results(user, data):
  grid_l, grid_h = 4, 4
  samples = grid_l*grid_h

  # For a particular user, prepare the data as images and labels
  user_items = data[user]

  labels = user_items #catID
  images = []

  for label in labels:
     img = df_img.loc[df_img['catID']==label,'img_vector'].iloc[0]
     # normalize
     img = (img - np.min(img)) / (np.max(img) - np.min(img))
     images.append(img)

  # Plot the grid
  plt.figure(figsize=(grid_l*2, grid_h*2))
  for i in range(samples):
    plt.subplot(grid_h,grid_l,i+1)
    plt.imshow(images[i])
    plt.axis('off')
    plt.title(labels[i], fontsize = 6)
    plt.tight_layout()
  plt.show()

### 3.1 Evaluation metrics and methods

In [38]:
# # Evaluation metrics from cornac
# eval_metrics = [
#   cornac.metrics.FMeasure(k=10),
#   cornac.metrics.NCRR(),
#   cornac.metrics.NDCG(),
# ]

# Use these for evaluation
eval_metrics = [
    HarmonicMean(
        10,
        Serendipity(),
        cornac.metrics.FMeasure(k=10),
        cornac.metrics.NCRR(),
        cornac.metrics.NDCG()
    ),
    Serendipity(),
    cornac.metrics.FMeasure(k=10),
    cornac.metrics.NCRR(),
    cornac.metrics.NDCG()
]

#### **Distributional coverage**

Distributional coverage measures how equally different items are recommended to users:
$$
\textrm{DistributionalCoverage} = -\sum_{i \in N_t} p(i|R) \log_2 p(i)
$$
where $p(i|R)$ denotes the probability that item $i$ is observed among all recommendation lists. 

$$
p(i|R) = \frac{|M_r (i)|}{|\textrm{recommendations}|}
$$
and $M_r (i)$ denotes the users who are recommended item $i$. The more unexpected or low-probability items are recommended, the higher the distributional_coverage score will be.
<br>
<br>

#### **Serendipity**

Serendipity represents the “unusualness” or “surprise” of recommendations.
$$
\textrm{serendipity} = \frac{1}{|M|} \sum_{u \in M_r}
\frac{1}{|N_r (u)|} \sum_{i \in N_r (u)} \big(1 - \textrm{expectedness}(i|u) \big) \, \textrm{relevance}(i)
$$

where $M$ is the set of users and $N_r(u)$ the set of recommendations for user $u$.


The **expectedness** of an unseen item $i$ for user $u$ as the average similarity between every already seen item $j$ in the historical data and $i$ can be defined as
$$
\textrm{expectedness}(i|u) = \frac{1}{|N_t (u)|} \sum_{j \in N_t (u)} \textrm{Cosine Similarity}(i,j)
$$

and **relevance** indicates whether the item turns out to be liked by the user or not

$$
\textrm{relevance}(i)=
\begin{cases}
  1 & \text{if } i \text{ in recommendations set is liked by a user } u \text{ in test set}\\    
  0 & \text{otherwise}   
\end{cases}
$$

***

**References**

Microsoft Corporation. (2018, January). Apply Diversity Metrics. GitHub. https://github.com/microsoft/recommenders/blob/0d2385681b2320f98d5ff0e448f505146b69df99/examples/03_evaluate/als_movielens_diversity_metrics.ipynb

Zhang, Y. C., Séaghdha, D. Ó., Quercia, D., & Jambor, T. (2012, February). Auralist: introducing serendipity into music recommendation. In Proceedings of the fifth ACM international conference on Web search and data mining (pp. 13-22).

Yan, Ziyou. (2020, April). Serendipity: Accuracy’s Unpopular Best Friend in Recommenders. eugeneyan.com. https://eugeneyan.com/writing/serendipity-and-accuracy-in-recommender-systems/.
 
 
 
 
 

In [39]:
# Function for generating serendipity, distributional coverage and harmonic mean scores
def evaluate(EXPERIMENT, TRAINING_SET):
    
    MODEL = EXPERIMENT.models[0]
    
    # Get disctionary of rated and recommended items
    rated, recommendations = make_recommendations(MODEL, TRAINING_SET)


    ### PREPARING DATAFRAMES ###
    # Make recommended items into a dataframe for MS recommenders
    data = []
    
    # Iterate over the dictionary keys and values
    for key, values in recommendations.items():
        # Iterate over the values and append rows to the data list
        for value in values:
            data.append([key, value])
    
    # Create the DataFrame with the specified columns
    recommendations_df = pd.DataFrame(data, columns=['userID', 'itemID'])

    # Make the testing set into a dataframe for MS recommenders
    train_df = TRAINING_SET[['userID','catID']].reset_index()
    train_df = train_df[['userID','catID']]
    train_df = train_df.rename(columns={'userID':'userID', 'catID':'itemID'})


    ### METRICS ###
    # Calculate serendipity
    serendipity_score = serendipity(train_df, recommendations_df)

    # Calculate coverage
    dist_coverage_score = distributional_coverage(train_df, recommendations_df)

    # Extract 
    for result in EXPERIMENT.result:
        model_name = result.model_name
        fone10 = result.metric_avg_results['F1@10']
        ncrr = result.metric_avg_results['NCRR@-1']
        ndcg = result.metric_avg_results['NDCG@-1']

    # Calculate harmonic mean
    h_mean = hmean([fone10, ncrr, ndcg, dist_coverage_score, serendipity_score])

    print(model_name)
    print(f"F1@10: {fone10:.3f}")
    print(f"NCRR: {ncrr:.3f}")
    print(f"NDCG: {ndcg:.3f}")
    print(f"Distributional coverage: {dist_coverage_score:.3f}")
    print(f"Serendipity: {serendipity_score:.3f}")
    print(f"Harmonic mean: {h_mean:.3f}")
    print()


### 3.2 Interactions + images

#### 3.2.1 Visual Matrix Factorisation (VMF)

In [40]:
# Set the evaluation metric to optimise for hyperparameter tuning.
# We chose F1@10 because it seems to be the lowest.
fone10 = cornac.metrics.FMeasure(k=10)

# Define an evaluation method to split feedback into train and test sets
bm = CombinedBaseMethod.from_splits(
    train_data=train,
    test_data=test,
    val_data=validation,
    exclude_unknowns= SPLIT_TYPE == 'strat', # True when using stratified split, False when usinng lsuo
    verbose=True,
    item_image=item_image_modality,
)

# Instantiate VMF
vmf = cornac.models.VMF(
    k=10,
    d=10,
    n_epochs=20,
    batch_size=100,
    learning_rate=0.001,
    gamma=0.9,        # weight for previous/current gradient in RMSProp
    lambda_u=0.001,   # regularization parameter for user factors
    lambda_v=0.001,   # regularization parameter for item factors
    lambda_p=1.0,     # regularization parameter for user visual factors
    lambda_e=10.0,    # regularization parameter for the kernel embedding matrix
    use_gpu=True,
    verbose=False,
    #seed = SEED
)

# RandomSearch
rs_vmf = NewRandomSearch(
    model=vmf,
    space=[
        Discrete("k", [10, 20, 50, 100]),
        Discrete("d", [10, 20, 50, 100]),
        Continuous("learning_rate", low=0.001, high=1.0),
        Continuous("gamma", low=0.5, high=1.0),
        Continuous("lambda_u", low=0.001, high=10.0),
        Continuous("lambda_v", low=0.001, high=100.0),
        Continuous("lambda_p", low=0.001, high=100.0),
        Continuous("lambda_e", low=0.001, high=100.0),
    ],
    metric=HarmonicMean(
        10,
        Serendipity(),
        cornac.metrics.FMeasure(k=10),
        cornac.metrics.NCRR(),
        cornac.metrics.NDCG()
    ),
    eval_method=bm,
    n_trails=20,
)

# Put everything into an experiment and run it
experiment_vmf = cornac.Experiment(eval_method=bm, models=[rs_vmf], metrics=eval_metrics, user_based=False)
experiment_vmf.run()

# Print best params
print('Random search best params: ', rs_vmf.best_params)

creating from splits
initialising Combined Base
rating_threshold = 1.0
exclude_unknowns = True
---
Training data:
Number of users = 104
Number of items = 400
Number of ratings = 3874
Max rating = 1.0
Min rating = 0.0
Global mean = 0.4
---
Test data:
Number of users = 100
Number of items = 195
Number of ratings = 487
Number of unknown users = 0
Number of unknown items = 0
---
Validation data:
Number of users = 96
Number of items = 184
Number of ratings = 476
---
Total users = 104
Total items = 400

[RandomSearch_VMF] Training started!

[RandomSearch_VMF] Evaluation started!


Ranking:   0%|          | 0/100 [00:00<?, ?it/s]

Ranking:   0%|          | 0/96 [00:00<?, ?it/s]


VALIDATION:
...
                 |  F1@10 | HarmonicMean | NCRR@-1 | NDCG@-1 | Serendipity | Time (s)
---------------- + ------ + ------------ + ------- + ------- + ----------- + --------
RandomSearch_VMF | 0.0217 |       0.0060 |  0.0314 |  0.2053 |      0.0091 |   5.9817

TEST:
...
                 |  F1@10 | HarmonicMean | NCRR@-1 | NDCG@-1 | Serendipity | Train (s) | Test (s)
---------------- + ------ + ------------ + ------- + ------- + ----------- + --------- + --------
RandomSearch_VMF | 0.0146 |       0.0048 |  0.0238 |  0.1891 |      0.0084 |  668.7049 |   9.0871

Random search best params:  {'d': 10, 'gamma': 0.9969308122374138, 'k': 10, 'lambda_e': 30.350529627393342, 'lambda_p': 13.904005170649988, 'lambda_u': 5.198992858085342, 'lambda_v': 96.99342413773483, 'learning_rate': 0.17524287094021548}


In [41]:
EXPERIMENT = experiment_vmf
TRAINING_SET = df_feedback_train

# Evaluate and calculate harmonic mean
evaluate(EXPERIMENT, TRAINING_SET)

RandomSearch_VMF
F1@10: 0.015
NCRR: 0.024
NDCG: 0.189
Distributional coverage: 8.633
Serendipity: 0.814
Harmonic mean: 0.043



#### 3.2.2 Visual BPR
We use the leave some users out split because it gives better performance

In [42]:
# # If there are 0s in the rating/like column,
# # Prepare the data in this format, keeping only rows where like == 1 (i.e. True)
# # This is because  CausalRec​ and VBPR utilizes the uij_iter​ function, which samples negative items j​ for each positive item i​.
# # However, the negative items should not appear in the feedback​ matrix

if TARGET == 'like':
    merged_new = [(t[0], t[1], 1) for t in merged if t[2]]
    train_new = [(t[0], t[1], 1) for t in train if t[2]]
    test_new = [(t[0], t[1], 1) for t in test if t[2]]
    validation_new = [(t[0], t[1], 1) for t in validation if t[2]]
elif TARGET == 'rating_add':
    merged_new = merged
    train_new = train
    test_new = test
    validation_new = validation


In [43]:
# Set the evaluation metric to optimise for hyperparameter tuning.
# We chose F1@10 because it seems to be the lowest.
fone10 = cornac.metrics.FMeasure(k=10)

# Define an evaluation method to split feedback into train and test sets
bm = CombinedBaseMethod.from_splits(
    train_data=train_new,
    test_data=test_new,
    val_data=validation_new,
    #rating_threshold=0.5,
    exclude_unknowns= SPLIT_TYPE == 'strat', # True when using stratified split, False when usinng lsuo
    verbose=True,
    item_image=item_image_modality,
)


# Instantiate VBPR
vbpr = cornac.models.VBPR(
    k=10,               # dimension of the gamma latent factors
    k2=10,              # dimension of the tetha latent factors
    n_epochs=20,
    batch_size=100,
    learning_rate=0.005,
    lambda_w=0.01,      # regularization hyperparameter for latent factor weights
    lambda_b=0.01,      # regularization hyperparameter for biases
    lambda_e=0.0,       # regularization hyperparameter for embedding matrix E and beta prime vector
    use_gpu=True,
    verbose=False,
    #seed = SEED
)

# RandomSearch
rs_vbpr = NewRandomSearch(
    model=vbpr,
    space=[
        Discrete("k", [10, 20, 50, 100]),
        Discrete("k2", [10, 20, 50, 100]),
        Continuous("learning_rate", low=0.001, high=1.0),
        Continuous("lambda_w", low=0.001, high=10.0),
        Continuous("lambda_b", low=0.001, high=100.0),
        Continuous("lambda_e", low=0.001, high=100.0),
    ],
    metric=HarmonicMean(
        10,
        Serendipity(),
        cornac.metrics.FMeasure(k=10),
        cornac.metrics.NCRR(),
        cornac.metrics.NDCG()
    ),
    eval_method=bm,
    n_trails=20,
)

# Put everything into an experiment and run it
experiment_vbpr = cornac.Experiment(eval_method=bm, models=[rs_vbpr], metrics=eval_metrics, user_based=False)
experiment_vbpr.run()

# Print best params
print('Random search best params: ', rs_vbpr.best_params)

creating from splits
initialising Combined Base
rating_threshold = 1.0
exclude_unknowns = True
---
Training data:
Number of users = 104
Number of items = 400
Number of ratings = 3874
Max rating = 1.0
Min rating = 0.0
Global mean = 0.4
---
Test data:
Number of users = 100
Number of items = 195
Number of ratings = 487
Number of unknown users = 0
Number of unknown items = 0
---
Validation data:
Number of users = 96
Number of items = 184
Number of ratings = 476
---
Total users = 104
Total items = 400

[RandomSearch_VBPR] Training started!
Optimization finished!
Optimization finished!
Optimization finished!
Optimization finished!
Optimization finished!
Optimization finished!
Optimization finished!
Optimization finished!
Optimization finished!
Optimization finished!
Optimization finished!
Optimization finished!
Optimization finished!
Optimization finished!
Optimization finished!
Optimization finished!
Optimization finished!
Optimization finished!
Optimization finished!
Optimization finished!

Ranking:   0%|          | 0/100 [00:00<?, ?it/s]

Ranking:   0%|          | 0/96 [00:00<?, ?it/s]


VALIDATION:
...
                  |  F1@10 | HarmonicMean | NCRR@-1 | NDCG@-1 | Serendipity | Time (s)
----------------- + ------ + ------------ + ------- + ------- + ----------- + --------
RandomSearch_VBPR | 0.0455 |       0.0120 |  0.0627 |  0.2494 |      0.0091 |   8.1018

TEST:
...
                  |  F1@10 | HarmonicMean | NCRR@-1 | NDCG@-1 | Serendipity | Train (s) | Test (s)
----------------- + ------ + ------------ + ------- + ------- + ----------- + --------- + --------
RandomSearch_VBPR | 0.0237 |       0.0065 |  0.0592 |  0.2363 |      0.0084 |  625.7288 |   6.6129

Random search best params:  {'k': 10, 'k2': 10, 'lambda_b': 32.95514183129859, 'lambda_e': 62.148379703438266, 'lambda_w': 5.770908150432974, 'learning_rate': 0.8869137411654933}


In [44]:
# Evaluate and calculate harmonic mean
EXPERIMENT = experiment_vbpr
TRAINING_SET = df_feedback_train

evaluate(EXPERIMENT, TRAINING_SET)

RandomSearch_VBPR
F1@10: 0.024
NCRR: 0.059
NDCG: 0.236
Distributional coverage: 8.633
Serendipity: 0.814
Harmonic mean: 0.077



#### 3.2.3 CausalRec

In [45]:
# Set the evaluation metric to optimise for hyperparameter tuning.
# We chose F1@10 because it seems to be the lowest.
fone10 = cornac.metrics.FMeasure(k=10)

# Define an evaluation method to split feedback into train and test sets
bm = CombinedBaseMethod.from_splits(
    train_data=train_new,
    test_data=test_new,
    val_data=validation_new,
    exclude_unknowns= SPLIT_TYPE == 'strat', # True when using stratified split, False when using lsuo
    verbose=True,
    item_image=item_image_modality,
)


# Instantiate CausalRec
causalrec = cornac.models.CausalRec(
    k=32,                               # dimension of the gamma latent factors
    k2=32,                              # dimension of the theta latent factors
    n_epochs=20,    
    batch_size=100,                     # batch size for SGD
    learning_rate=0.001,                # learning rate for SGD
    lambda_w=1,                         # regularization hyperparameter for latent factor weights
    lambda_b=0.01,                      # regularization hyperparameter for biases
    lambda_e=0.0,                       # regularization hyperparameter for embedding matrix E and beta prime vector
    mean_feat=img_features.mean(axis=0),    # The mean feature of all item embeddings serving as the no-treatment during causal inference
    tanh=1,                             # The number of tanh layers on the visual feature transformation
    lambda_2=0.8,                       # coefficient controlling the elimination of the visual bias in Eq. (28)
    use_gpu=True,
    trainable = True,
    verbose = False,
    seed = SEED
)

# RandomSearch
rs_causalrec = NewRandomSearch(
    model=causalrec,
    space=[
        Discrete("k", [10, 20, 30, 50, 100]),
        Discrete("k2", [10, 20, 30, 50, 100]),
        Continuous("learning_rate", low=0.001, high=1.0),
        Continuous("lambda_w", low=1, high=100.0),
        Continuous("lambda_b", low=0.01, high=100.0),
        Continuous("lambda_e", low=0.00, high=100.0),
        Discrete("tanh", [0, 1]),
        Continuous("lambda_2", low=0.01, high=100.0)
    ],
    metric=HarmonicMean(
        10,
        Serendipity(),
        cornac.metrics.FMeasure(k=10),
        cornac.metrics.NCRR(),
        cornac.metrics.NDCG()
    ),
    eval_method=bm,
    n_trails=20,
)

# Put everything into an experiment and run it
experiment_causalrec = cornac.Experiment(eval_method=bm, models=[rs_causalrec], metrics=eval_metrics, user_based=False)
experiment_causalrec.run()

# Print best params
print('Random search best params: ', rs_causalrec.best_params)

creating from splits
initialising Combined Base
rating_threshold = 1.0
exclude_unknowns = True
---
Training data:
Number of users = 104
Number of items = 400
Number of ratings = 3874
Max rating = 1.0
Min rating = 0.0
Global mean = 0.4
---
Test data:
Number of users = 100
Number of items = 195
Number of ratings = 487
Number of unknown users = 0
Number of unknown items = 0
---
Validation data:
Number of users = 96
Number of items = 184
Number of ratings = 476
---
Total users = 104
Total items = 400



[RandomSearch_CausalRec] Training started!
Optimization finished!
Optimization finished!
Optimization finished!
Optimization finished!
Optimization finished!
Optimization finished!
Optimization finished!
Optimization finished!
Optimization finished!
Optimization finished!
Optimization finished!
Optimization finished!
Optimization finished!
Optimization finished!
Optimization finished!
Optimization finished!
Optimization finished!
Optimization finished!
Optimization finished!
Optimization finished!

[RandomSearch_CausalRec] Evaluation started!


Ranking:   0%|          | 0/100 [00:00<?, ?it/s]

Ranking:   0%|          | 0/96 [00:00<?, ?it/s]


VALIDATION:
...
                       |  F1@10 | HarmonicMean | NCRR@-1 | NDCG@-1 | Serendipity | Time (s)
---------------------- + ------ + ------------ + ------- + ------- + ----------- + --------
RandomSearch_CausalRec | 0.0113 |       0.0040 |  0.0367 |  0.2257 |      0.0091 |   6.2968

TEST:
...
                       |  F1@10 | HarmonicMean | NCRR@-1 | NDCG@-1 | Serendipity | Train (s) | Test (s)
---------------------- + ------ + ------------ + ------- + ------- + ----------- + --------- + --------
RandomSearch_CausalRec | 0.0149 |       0.0048 |  0.0351 |  0.2190 |      0.0084 | 1195.2274 |   7.3389

Random search best params:  {'k': 20, 'k2': 20, 'lambda_2': 67.18710261552292, 'lambda_b': 4.391715490391161, 'lambda_e': 76.67389993957846, 'lambda_w': 34.29833888401744, 'learning_rate': 0.1954831275533693, 'tanh': 1}


In [46]:
# Evaluate and calculate harmonic mean
EXPERIMENT = experiment_causalrec
TRAINING_SET = df_feedback_train

evaluate(EXPERIMENT, TRAINING_SET)

RandomSearch_CausalRec
F1@10: 0.015
NCRR: 0.035
NDCG: 0.219
Distributional coverage: 8.633
Serendipity: 0.814
Harmonic mean: 0.049



### 3.4 Interactions + images + text

#### 3.4.1 VBPR

In [47]:
# Define an evaluation method to split feedback into train and test sets
bm = BaseMethod.from_splits(
    train_data=train_new,
    test_data=test_new,
    val_data=validation_new,
    verbose=True,
    exclude_unknowns= SPLIT_TYPE == 'strat', # True when using stratified split, False when using lsuo
    item_image=item_image_modality,
    item_text=item_text_modality
)

# Instantiate VBPR
vbpr_com = cornac.models.VBPR(
    k=10,               # dimension of the gamma latent factors
    k2=10,              # dimension of the tetha latent factors
    n_epochs=20,
    batch_size=100,
    learning_rate=0.005,
    lambda_w=0.01,      # regularization hyperparameter for latent factor weights
    lambda_b=0.01,      # regularization hyperparameter for biases
    lambda_e=0.0,       # regularization hyperparameter for embedding matrix E and beta prime vector
    use_gpu=True,
    verbose=False,
    seed = SEED
)

# RandomSearch
rs_vbpr_com = RandomSearch(
    model=vbpr_com,
    space=[
        Discrete("k", [10, 20, 50, 100]),
        Discrete("k2", [10, 20, 50, 100]),
        Continuous("learning_rate", low=0.001, high=1.0),
        Continuous("lambda_w", low=0.001, high=10.0),
        Continuous("lambda_b", low=0.001, high=100.0),
        Continuous("lambda_e", low=0.001, high=100.0),
    ],
    metric=fone10,
    eval_method=bm,
    n_trails=20,
)

# Put everything into an experiment and run it
experiment_vbpr_com = cornac.Experiment(eval_method=bm, models=[rs_vbpr_com], metrics=eval_metrics, user_based=False)
experiment_vbpr_com.run()

# Print best params
print('Random search best params: ', rs_vbpr_com.best_params)


rating_threshold = 1.0
exclude_unknowns = True
---
Training data:
Number of users = 104
Number of items = 400
Number of ratings = 3874
Max rating = 1.0
Min rating = 0.0
Global mean = 0.4
---
Test data:
Number of users = 100
Number of items = 195
Number of ratings = 487
Number of unknown users = 0
Number of unknown items = 0
---
Validation data:
Number of users = 96
Number of items = 184
Number of ratings = 476
---
Total users = 104
Total items = 400

[RandomSearch_VBPR] Training started!
Optimization finished!
Optimization finished!
Optimization finished!
Optimization finished!
Optimization finished!
Optimization finished!
Optimization finished!
Optimization finished!
Optimization finished!
Optimization finished!
Optimization finished!
Optimization finished!
Optimization finished!
Optimization finished!
Optimization finished!
Optimization finished!
Optimization finished!
Optimization finished!
Optimization finished!
Optimization finished!

[RandomSearch_VBPR] Evaluation started!


Ranking:   0%|          | 0/100 [00:00<?, ?it/s]

TypeError: compute() missing 2 required positional arguments: 'seen_items' and 'reco_items'

In [None]:
# Evaluate and calculate harmonic mean
EXPERIMENT = experiment_vbpr_com
TRAINING_SET = df_feedback_train

evaluate(EXPERIMENT, TRAINING_SET)

#### 3.4.2 CTR

In [None]:
# Define an evaluation method to split feedback into train and test sets
bm = BaseMethod.from_splits(
    train_data=train_new,
    test_data=test_new,
    val_data=validation_new,
    verbose=True,
    item_image=item_image_modality,
    item_text=item_text_modality
)

# Instantiate CTR model
ctr_com = cornac.models.CTR(k=250, max_iter=200, lambda_v=1, verbose = False, seed = 2023)

# RandomSearch
rs_ctr_com = RandomSearch(
    model=ctr_com,
    space=[
        Discrete("k", [50, 75, 100, 150, 200]),
        Continuous("lambda_u", low=1e-4, high=1e1),
        Continuous("lambda_v", low=1e-4, high=1e1),
        Continuous("a", low=0.9, high=1),
        Continuous("b", low=0.0, high=0.1),
        Continuous("eta", low=0.001, high=0.1),
    ],
    metric=cornac.metrics.FMeasure(k=10),
    eval_method=bm,
    n_trails=30,
)

# Put everything into an experiment and run it
experiment_ctr_com = cornac.Experiment(eval_method=bm, models=[rs_ctr_com], metrics=eval_metrics, user_based=False)
experiment_ctr_com.run()

# Print best params
print('Random search best params: ', rs_ctr_com.best_params)

In [None]:
# Evaluate and calculate harmonic mean
EXPERIMENT = experiment_ctr_com
TRAINING_SET = df_feedback_train

evaluate(EXPERIMENT, TRAINING_SET)

## 4. Retraining model on all data and make recommendations

In [None]:
# Recap which is the best image model

print("### Ratings + Images ###")
experiments = [experiment_vmf, experiment_vbpr, experiment_causalrec]

for e in experiments:
    evaluate(e, df_feedback_train)

print("### Ratings + Images + Text ###")
experiments = [experiment_vbpr_com, experiment_ctr_com]

for e in experiments:
    evaluate(e, df_feedback_train)

### 4.1 Retraining best model based on best hyperparams
The best model was VBPR.

In [None]:
# Define an evaluation method to split feedback into train and test sets
bm = BaseMethod.from_splits(
    train_data=train_new, # # VBPR won't work if we do + test or + validation
    test_data=test_new,
    val_data=validation_new,
    verbose=True,
    exclude_unknowns= SPLIT_TYPE == 'strat', # True when using stratified split, False when using lsuo
    item_image=item_image_modality,
    item_text=item_text_modality
)

# Retrieve the best hyperparams
params = rs_vbpr_com.best_params

# Train the full model using the best hyperparams
vbpr_full = cornac.models.VBPR(
    **params,
    n_epochs=20,
    batch_size=100,
    use_gpu=True,
    verbose=False,
    seed = SEED
    )


# Put everything into an experiment and run it
experiment_bestmodel = cornac.Experiment(eval_method=bm, models=[vbpr_full], metrics=eval_metrics, user_based=False)
experiment_bestmodel.run()

# Make the recommendations
MODEL = vbpr_full
TRAINING_SET_DF = df_feedback
rated, recommendations = make_recommendations(MODEL, TRAINING_SET_DF)

In [None]:
EXPERIMENT = experiment_bestmodel
TRAINING_SET = df_feedback

# Evaluate and calculate harmonic mean
evaluate(EXPERIMENT, TRAINING_SET)

### 4.2 Visualise recommendations

In [None]:
user = '0a733639-405d-4c72-9c43-4f85897f406d'

num_rated = len(rated[user])
print(f"Rated: {num_rated} cats")
show_results(user, rated)

num_recommended = len(recommendations[user])
print(f"Recommended: {num_recommended} cats")
show_results(user, recommendations)

### 4.3 Save output

In [None]:
df_output = pd.DataFrame(recommendations.items(), columns=['userID','catID'])

df_output.head()

In [None]:
df_output.to_csv('../data/recommendations.csv', index=False)