# Evaluation - K-fold Cross Validation
***Metrics:***
- Precision (Truncated)
- Recall
- normalized Discounted Cumulative Gain (nDCG)
- Hit Rate

In [7]:
# python 3.8 required!
%pip install scikit-learn
%pip install cython
%pip install pandas
%pip install numpy
%pip install rankfm

import warnings
warnings.filterwarnings("ignore")
from rankfm.rankfm import RankFM
from rankfm.evaluation import (discounted_cumulative_gain, diversity, hit_rate,
                               precision, recall, reciprocal_rank)
from pandas import DataFrame
import pandas as pd
import numpy as np
from sklearn.model_selection import StratifiedKFold
import pickle
import math

You should consider upgrading via the '/Users/izzy/.pyenv/versions/3.8.15/bin/python -m pip install --upgrade pip' command.[0m[33m
[0mNote: you may need to restart the kernel to use updated packages.
You should consider upgrading via the '/Users/izzy/.pyenv/versions/3.8.15/bin/python -m pip install --upgrade pip' command.[0m[33m
[0mNote: you may need to restart the kernel to use updated packages.
You should consider upgrading via the '/Users/izzy/.pyenv/versions/3.8.15/bin/python -m pip install --upgrade pip' command.[0m[33m
[0mNote: you may need to restart the kernel to use updated packages.
You should consider upgrading via the '/Users/izzy/.pyenv/versions/3.8.15/bin/python -m pip install --upgrade pip' command.[0m[33m
[0mNote: you may need to restart the kernel to use updated packages.
You should consider upgrading via the '/Users/izzy/.pyenv/versions/3.8.15/bin/python -m pip install --upgrade pip' command.[0m[33m
[0mNote: you may need to restart the kernel to use upd

In [10]:
"""
global vars
"""
rating_converter = {
    'user': int,
    'isbn': str,
    'rating': float
}

reader_config = {
    'encoding': 'utf-8',
    'separator': ';',
}

ratings = pd.read_csv('../data/BX-Book-Ratings-cleaned.csv', sep=reader_config['separator'], converters=rating_converter, encoding=reader_config['encoding'])
mean_rating = ratings.loc[ratings['rating'] > 0]['rating'].mean()
mean_rating_per_user = ratings.loc[ratings['rating'] > 0].select_dtypes(include=['int64', 'float64']).groupby('user').mean()
user_features = pickle.load(open("user_features", "rb"))
book_features = pickle.load(open("book_features", "rb"))

def good_ratings_and_implicit_filter(group):
    avg_rating = group['rating'].mean()
    
    return group.loc[(group['rating'] >= avg_rating) | (group['rating'].eq(0))]


In [11]:
def precision_at_k(model: RankFM, test_set: DataFrame, k=10):
    """
    Truncated precision
    If user in test set has less than k entries, only x (count of entries in test set for that user) recommendations are made, and TPs are divided by x instead of k
    Calculates mean precision at k recommendations.

    -- Important Definitions --
    True Positives: Recommended and interaction between user and book exists and rating is good or it is implicit feedback (rating = 0)
    False Positives: Recommended and interaction between user and book does not exist or if exists, rating is bad
    Good Rating: Above User Average Rating and NOT 0
    Implicit fb: Rating = 0
    Bad Rating: Below User Average Rating and NOT 0
    """
    test_set = test_set.groupby('user').apply(good_ratings_and_implicit_filter).reset_index(drop=True)
    interactions_per_user = test_set.groupby('user')['isbn'].apply(set)
    test_users = test_set['user'].unique()
    all_precisions = []

    for user in test_users:
        gt_books: set = interactions_per_user[user] # user item pairs to be predicted

        if len(gt_books) >= k:
            recommended_books = set(model.recommend([user], n_items=k, filter_previous=True).loc[user])
            all_precisions.append(len(recommended_books & gt_books) / k)
        else:
            recommended_books = set(model.recommend([user], n_items=len(gt_books)).loc[user])
            all_precisions.append(len(recommended_books & gt_books) / len(gt_books))

    return np.mean(all_precisions)

def recall_at_k(model: RankFM, test_set: DataFrame, k=10):
    """
    Calculates mean recall at k recommendations.

    -- Important Definitions --
    False Negatives: Not Recommended but should have, bc has good rating or implicit fb
    """
    test_set = test_set.groupby('user').apply(good_ratings_and_implicit_filter).reset_index(drop=True)
    interactions_per_user = test_set.groupby('user')['isbn'].apply(set)
    test_users = test_set['user'].unique()
    recs: DataFrame = model.recommend(users=test_users, n_items=k,filter_previous=True, cold_start='nan')
    user_in_recs = recs.index.values

    all_recalls = []

    for user in user_in_recs:
        recommended_books = set(recs.loc[user])
        gt_books = interactions_per_user[user]

        all_recalls.append(len(recommended_books & gt_books) / len(gt_books))
    
    return np.mean(all_recalls)

def rank_at_k(model: RankFM, test_set: DataFrame, k=10):
    """
    Calculates mean rank at k recommendations.
    """
    test_set = test_set.groupby('user').apply(good_ratings_and_implicit_filter).reset_index(drop=True)
    interactions_per_user = test_set.groupby('user')['isbn'].apply(set)
    test_users = test_set['user'].unique()
    recs: DataFrame = model.recommend(users=test_users, n_items=k,filter_previous=True, cold_start='nan')
    user_in_recs = recs.index.values

    totalndcg = []
    for user in user_in_recs:
        recommended_books = set(recs.loc[user])
        gt_books = interactions_per_user[user]

        dcg = 0
        idcg = 0
        tpcount = 0

        for ind, b in enumerate(recommended_books):
            for r in gt_books:
                if r == b:
                    tpcount += 1
                    if(ind >= 2):
                        dcg += 1/math.log2(ind)
        for ind in range(1, tpcount):
            idcg += 1/math.log2(ind+1)
        
        if(tpcount>0):
            idcg += 1
            totalndcg.append(dcg/idcg)
    
    return np.mean(totalndcg)

In [13]:
min_10_r_per_u = ratings.groupby('user').filter(lambda r: r['rating'].count() >= 10)[["user", "isbn", "rating"]]
min_10_r_per_u = min_10_r_per_u.reset_index()

model = RankFM(factors=50, loss='warp', max_samples=100, alpha=0.01, sigma=0.1, learning_rate=0.10, learning_schedule='invscaling')
stratified_kfold = StratifiedKFold(n_splits=4, shuffle=True)

all_precisions = []
all_recalls = []
all_ndcgs = []
all_hit_rates = []

for i, (train_index, test_index) in enumerate(stratified_kfold.split(X=min_10_r_per_u["user"], y=min_10_r_per_u["isbn"])):
    print(f"Fold {i}:")
    train_set = min_10_r_per_u.iloc[train_index]
    test_set = min_10_r_per_u.iloc[test_index]

    train_user_features = user_features[user_features["User-ID"].isin(train_set['user'])]
    train_user_features.loc[:,"Age"] /= train_user_features["Age"].max()
    train_book_features = book_features[book_features["ISBN"].isin(train_set['isbn'])]
    train_book_features.loc[:,"bf_1"] /= train_book_features['bf_1'].max()

    weights =[]

    for i, row in train_set.iterrows():
        if row.rating == 0:
            weights.append(1)
        elif row.rating >= mean_rating_per_user.loc[row.user].item():
            weights.append(2)
        else:
            weights.append(0)

    print("fitting...")
    model.fit(
        interactions=train_set[["user", "isbn"]],
        user_features=train_user_features,
        item_features=train_book_features,
        sample_weight=np.array(weights),
        epochs = 15
    )

    print("validating...")
    all_precisions.append(precision_at_k(model=model, test_set=test_set, k=10))
    all_recalls.append(recall_at_k(model=model, test_set=test_set, k=10))
    all_ndcgs.append(rank_at_k(model=model, test_set=test_set, k=10))
    all_hit_rates.append(hit_rate(model=model, test_interactions=test_set[["user", "isbn"]], k=10))

Fold 0:
fitting...
validating...
Fold 1:
fitting...
validating...
Fold 2:
fitting...
validating...
Fold 3:
fitting...
validating...


In [15]:
precision_mean = np.mean(all_precisions)
recall_mean = np.mean(all_recalls)
ndcg_mean = np.mean(all_ndcgs)
hit_rate_mean = np.mean(all_hit_rates)

print("precision: {}".format(precision_mean))
print("recall: {}".format(recall_mean))
print("nDCG: {}".format(ndcg_mean))
print("hit rate: {}".format(hit_rate_mean))

precision: 0.0262411855626625
recall: 0.019863213285777084
nDCG: 0.41146381789729336
hit rate: 0.16548891382672223


In [6]:
import pickle
from pandas import DataFrame

users: DataFrame = pickle.load(open("users", "rb"))
books: DataFrame= pickle.load(open("books", "rb"))

user_ids = users["User-ID"].unique()
book_ids = books["ISBN"].unique()

print("model recommends for {} users".format(len(user_ids)))
print("model considers {} books for recommendations".format(len(book_ids)))

model recommends for 47406 users
model considers 183084 books for recommendations
