AI PROJECT - NOVELTY

Study in the question: When referring to users with variant gaming history, will novelty-optimized recommender systems recommend less popular items than baseline algorithms while maintaining relevance in the recommendations?

Konstantina Ellina
20230419

-----

First import the important libraries and define useful functions for later

In [None]:
###   IMPORT LIBRARIES

import numpy as np
import pandas as pd
from scipy.sparse import csr_matrix
from scipy.sparse import csr_matrix
import operator
from recpack.util import get_top_K_values
from recpack.util import get_top_K_ranks
from sklearn.neighbors import NearestNeighbors
from metrics.novelty import get_novelty
from metrics.coverage import get_coverage
from metrics.recall import get_calibrated_recall
from metrics.fairness import get_publisher_fairness
from metrics.gini_index import get_gini_index
from metrics.ils import get_intra_list_similarity
from metrics.ndcg import get_ndcg
from collections import Counter

In [2]:
###   USEFUL FUNCTIONS FOR RECOMMENDER ALGORITHMS BELOW

def get_precision_at_k_user(pred: list[int], true: list[int], k: int) -> float: # use the function given in lecture-3 assignment
    if k < 0:
        raise ValueError("k must be greater than 0")
    elif k == 0:
        return 1.0

    if len(pred) == 0 and len(true) != 0:
        return 0.0
    elif len(true) == 0:
        return 1.0

    # convert to set for faster lookup
    true_set = set(true)

    # get top k predicted items
    top_k = pred[:k]

    # check how many of these are relevant
    correct = 0
    for item in top_k:
        if item in true_set:
            correct += 1
            true_set.remove(item)

    # return precision at k
    return correct / len(top_k)


# calculate precision at k using lists
def get_precision_at_k(pred: dict[int, list[int]], true: dict[int, list[int]], k): # use the function given in lecture-3 assignment
    total_precision_at_k = 0
    for user in true.keys():
        total_precision_at_k += get_precision_at_k_user(pred[user], true[user], k)
    return total_precision_at_k / len(true.keys())


def matrix_to_list(mat: csr_matrix, is_sorted: bool = True) -> dict[int, list[int]]: # use the function given in lecture-3 assignment
    """
    A helper function designed to convert a user item matrix into 
    a dictionary of users and their items. It preserves the order 
    given by the values in the matrix (0 values are ignored).
    :mat: a matrix containing the user item interactions
    >>> matrix_to_list(csr_matrix([[1, 0, 1], [0, 1, 0]]))
    {0: [0, 2], 1: [1]}
    """
    data = {}
    for user in set(mat.nonzero()[0]):
        if is_sorted:
            item_rank = [(item, mat[user, item]) for item in mat[user].indices]
            sorted_item_rank = sorted(item_rank, key=operator.itemgetter(1))  # sorts the items ascending!
            data[user] = list(map(operator.itemgetter(0), sorted_item_rank))
        else:
            data[user] = [item for item in mat[user].indices]

    return data


# calculate item-item similarity
def sparse_cosine_similarity(mat: csr_matrix) -> csr_matrix: # use function already made in lecture-3 assignment following the orders there
    transp = mat.T
    dot_prod = transp.dot(mat) # item-item dot product

    l2_norm = np.sqrt(dot_prod.diagonal())
    matrix_normalized = np.outer(l2_norm, l2_norm)
    matrix_normalized[matrix_normalized == 0] = 1e-9

    similarity = dot_prod/matrix_normalized
    similarity = similarity.todense()

    np.fill_diagonal(similarity, 0) # self similarity will happen so make diagonal 0

    return csr_matrix(similarity)


# for popularity measure
def get_popular_items(interaction_matrix, threshold_percent=90):
    item_counts = Counter(interaction_matrix.indices)  # count interactions per item
    
    # theshold with items in the top 10%
    popularity_threshold = np.percentile(list(item_counts.values()), threshold_percent)
    
    # find the popular items, whose interactions are more than the threshold
    popular_items = {item for item, count in item_counts.items() if count >= popularity_threshold}
    return popular_items

def get_popularity(pred, popular_items, k):
    total_recommended = 0
    total_popular_recommended = 0

    for user, items in pred.items():
        top_k_items = items[:k] # choose the top k items in the predictions
        total_recommended += len(top_k_items) # count all recommendations
        total_popular_recommended += len([item for item in top_k_items if item in popular_items]) # count all popular recommendations
    
    if total_recommended == 0: # to avoid division with 0
        return 0.0
    return total_popular_recommended / total_recommended # return the ratio as popularity metric

LOAD THE DATASETS NEEDED FOR THE RESEARCH

In [3]:
###   LOAD DATASETS

games = pd.read_csv('cleaned_datasets_students/games.csv')
train_interactions = pd.read_csv('cleaned_datasets_students/train_interactions.csv')
test_interactions = pd.read_csv('cleaned_datasets_students/test_interactions_in.csv')

DO SOME CLEANING IN THE DATASETS IF NEEDED

In [4]:
###   check with asserts if further cleaning is needed for dataset(no need here for extra cleaning)

def cleaning_datasets(dataset):
    ###   CLEANING
    dataset = dataset.drop_duplicates() # drop duplicates as cleaning

    # check for missing values in all columns
    assert dataset.isnull().sum().sum() == 0, "Dataset has missing values."

    # identify missing values
    missing_values = dataset[dataset.isnull().any(axis=1)]
    assert missing_values.empty, f"These are the rows with the missing values:\n{missing_values}"

    # check for invalid IDs
    invalid_ids = dataset[(dataset['user_id'] < 0) | (dataset['item_id'] < 0)]
    assert invalid_ids.empty, f"These are the rows with the invalid IDs:\n{invalid_ids}"

    # check for missing or incorrect playtime
    missing_playtime = dataset[dataset['playtime'].isnull()]
    assert missing_playtime.empty, f"These are the rows with the missing playtime:\n{missing_playtime}"

    return dataset

train_interactions = cleaning_datasets(train_interactions)
test_interactions = cleaning_datasets(test_interactions)


In [5]:
###   CHECK IF ALL ITEMS IN TEST DATASET ARE ALSO IN TRAIN DATASET, AND IF NOT THEN EXCLUDE THEM. useful for evaluation later

def check_missing_items(train_interactions, test_interactions):
    train_items = set(train_interactions['item_id'].unique())
    test_items = set(test_interactions['item_id'].unique())
    missing_items = test_items - train_items # find the missing items

    if missing_items:
        print(f"Extra items in test set: {missing_items}")
    else:
        print("Train and test have the same items.")

    return missing_items

missing_items = check_missing_items(train_interactions, test_interactions)

if len(missing_items) != 0: # if there are missing items
    print('The extra items are:', len(missing_items))
    filtered_test_interactions = test_interactions[~test_interactions['item_id'].isin(missing_items)] # exclude them
    print("\nExtra items were excluded from original test dataset.")
    print(f"Original test interactions: {len(test_interactions)}")
    print(f"Filtered test interactions: {len(filtered_test_interactions)}")


Extra items in test set: {np.int64(7168), np.int64(4103), np.int64(4106), np.int64(8202), np.int64(6166), np.int64(2590), np.int64(6183), np.int64(7735), np.int64(6204), np.int64(6716), np.int64(5703), np.int64(7250), np.int64(3667), np.int64(6228), np.int64(599), np.int64(4695), np.int64(6745), np.int64(6747), np.int64(4197), np.int64(7269), np.int64(7274), np.int64(6251), np.int64(7285), np.int64(3705), np.int64(5756), np.int64(7292), np.int64(2689), np.int64(3206), np.int64(4743), np.int64(3730), np.int64(4242), np.int64(5782), np.int64(154), np.int64(7322), np.int64(2716), np.int64(4253), np.int64(7841), np.int64(4775), np.int64(6312), np.int64(4286), np.int64(5313), np.int64(5825), np.int64(5315), np.int64(4298), np.int64(719), np.int64(6868), np.int64(3286), np.int64(3287), np.int64(4822), np.int64(4825), np.int64(5850), np.int64(6876), np.int64(3805), np.int64(4833), np.int64(4323), np.int64(6373), np.int64(5869), np.int64(3822), np.int64(4846), np.int64(6385), np.int64(7921), n

SPARSE MATRIX - After cleaning, create sparse matrix to work with in the rest of the research (followed the method given in the assignment in the course)

In [6]:
### CREATE SPARSE MATRICES FOR TRAIN AND TEST INTERACTIONS

# FOR TRAIN INTERACTIONS
train_user_id_mapping = {user_id: idx for idx, user_id in enumerate(train_interactions['user_id'].unique())} # user id mapping
train_interactions['new_user_id'] = train_interactions['user_id'].map(train_user_id_mapping) # apply on train

item_id_mapping = {item_id: idx for idx, item_id in enumerate(train_interactions['item_id'].unique())} # item id mapping(this will be the same in test datasets bc of same items)
train_interactions['new_item_id'] = train_interactions['item_id'].map(item_id_mapping) # apply on train

# keep mappings as dictionaries for later
train_new_to_old_user_id_mapping = {v: k for k, v in train_user_id_mapping.items()}
new_to_old_item_id_mapping = {v: k for k, v in item_id_mapping.items()}

# attributes for train matrix and apply them to csr_matrix to create one
num_train_users = train_interactions['new_user_id'].nunique()
num_items = train_interactions['new_item_id'].nunique()
train_rows = train_interactions['new_user_id'].values
train_cols = train_interactions['new_item_id'].values
train_data = train_interactions['playtime'].values  # use playtime as data for sparse matrix

# create the matrix using the attributes above
train_interaction_matrix_csr = csr_matrix((train_data, (train_rows, train_cols)), shape=(num_train_users, num_items))


# FOR TEST INTERACTIONS (use filtered_test_interactions here)
filtered_test_interactions = filtered_test_interactions.copy() # create copy bc of warnings
test_user_id_mapping = {user_id: idx for idx, user_id in enumerate(filtered_test_interactions['user_id'].unique())} # different user id mapping
filtered_test_interactions['new_user_id'] = filtered_test_interactions['user_id'].map(test_user_id_mapping) # apply on test
filtered_test_interactions['new_item_id'] = filtered_test_interactions['item_id'].map(item_id_mapping) # apply same item id mapping to test

test_new_to_old_user_id_mapping = {v: k for k, v in test_user_id_mapping.items()} # keep the user id mapping

# attributes for test matrix and apply them to csr_matrix to create one
num_test_users = filtered_test_interactions['new_user_id'].nunique()
test_rows = filtered_test_interactions['new_user_id'].values
test_cols = filtered_test_interactions['new_item_id'].values
test_data = filtered_test_interactions['playtime'].values

# create the matrix using the attributes above
test_interaction_matrix_csr = csr_matrix((test_data, (test_rows, test_cols)), shape=(num_test_users, num_items))

FILTERING - In this research, we are interested in the users that have played more than 3 games and more than 200 minutes in general. So here I filter the users and change the id mapping as needed(followed the methods shown in assignments in the course)

In [7]:
###   FILTER OUT USERS WITH SMALL GAMING HISTORY FROM SPARSE MATRIX

def MinItemsPerUser(interaction_matrix_csr, user_mapping, min_items, min_time):
    user_interactions_mask = interaction_matrix_csr.getnnz(axis=1) >= min_items # filter out users with strictly less than 3 interactions
    user_time_mask = interaction_matrix_csr.sum(axis=1).A1 >= min_time # filter out users with strictly less than 200 mins of playtime
    user_total_mask = user_interactions_mask & user_time_mask # create the total masks with both filters
    
    filtered_interaction_matrix = interaction_matrix_csr[user_total_mask] # matrix with filters

    # update the ids again
    updated_user_mapping = {new_idx: user_mapping[old_idx] for new_idx, old_idx in enumerate(np.where(user_total_mask)[0])}
    
    return filtered_interaction_matrix, updated_user_mapping

# apply the filters and get the new filtered matrix and the new id mapping both for train and test but separately
train_interaction_matrix_csr, train_new_to_old_user_id_mapping = MinItemsPerUser(train_interaction_matrix_csr, train_new_to_old_user_id_mapping, min_items=3, min_time=200)
test_interaction_matrix_csr, test_new_to_old_user_id_mapping = MinItemsPerUser(test_interaction_matrix_csr, test_new_to_old_user_id_mapping, min_items=3, min_time=200)

# check if the filters are applied correctly and all users have more than 3 interactions and more than 600 mins playtime
assert train_interaction_matrix_csr.getnnz(axis=1).min() >= 3
assert train_interaction_matrix_csr.sum(axis=1).min() >= 200
assert test_interaction_matrix_csr.getnnz(axis=1).min() >= 3
assert test_interaction_matrix_csr.sum(axis=1).min() >= 200

TEST SPLIT - split the sparse matrix of test_interactions to use half of it for the predicted recommendations and half of it for the true recommendations used later

In [8]:
###   SPLIT TEST_INTERACTIONS_MATRIX_CSR

np.random.seed(42) # define for reproducability

# get the data needed from the dataset
nonzero_indices = test_interaction_matrix_csr.nonzero()
nonzero_data = test_interaction_matrix_csr.data
num_test_interactions = len(nonzero_data)

shuffled_indices = np.random.permutation(num_test_interactions) # suffle the indices for randomness
split_point = num_test_interactions // 2 # split in half

# define the indices depending on the split point
test_mat_indices = shuffled_indices[:split_point]
test_true_indices = shuffled_indices[split_point:]

# create test_mat as the matrices before (this is used for the predictions)
test_mat_rows = nonzero_indices[0][test_mat_indices]
test_mat_cols = nonzero_indices[1][test_mat_indices]
test_mat_data = nonzero_data[test_mat_indices]
test_mat = csr_matrix((test_mat_data, (test_mat_rows, test_mat_cols)), shape=test_interaction_matrix_csr.shape) # make a new sparse matrix for the predictions

# create test_true as the matrices before (this is used for the true recommendations)
test_true_rows = nonzero_indices[0][test_true_indices]
test_true_cols = nonzero_indices[1][test_true_indices]
test_true_data = nonzero_data[test_true_indices]
test_true = csr_matrix((test_true_data, (test_true_rows, test_true_cols)), shape=test_interaction_matrix_csr.shape) # make a new sparse matrix for evaluation

SIMILARITY AND TRUE RECOMMENDATIONS

In [9]:
###   FIND TOP-K MOST SIMILAR ITEMS AND CALCULATE TRUE RECOMMENDATIONS

# item-item similarity to see how similar one item is to another based on user interactions
item_item_similarity = sparse_cosine_similarity(train_interaction_matrix_csr) # use train dataset here

# get top-n most similar items for each item in the similarity matrix
top_n = 20
item_item_similarity = get_top_K_values(item_item_similarity, top_n)

# this is the same true recommendations for every baseline algorithm and it is going to be used for comparison reasons
true_recommendations = matrix_to_list(test_true) # use test_true dataset here so we can evaluate models on train and test datasets

BASELINE AND NOVELTY-OPTIMIZED ALGORITHMS - Define the baseline algorithms and the novelty-optimized algorithms and find recommendations

In [10]:
###   ITEM-KNN BASELINE ALGORITHM

def item_knn_recommend(sim: csr_matrix, test: csr_matrix, n: int) -> dict[int, list[int]]: # recommend function following again the orders in lecture-3 assignment
    scores_matrix = test.dot(sim.T)
    scores_matrix = np.array(scores_matrix.todense()) # scores indicating the relevance of an item to a user

    recommendations = {}
    users = test.shape[0]
    for user in range(users): # for each user generate their top recommendations
        scores = scores_matrix[user]

        scores_sparse = csr_matrix(scores)
        top_n_ranks = get_top_K_ranks(scores_sparse, n) # top n items based on the scores

        highest_items = top_n_ranks.indices[top_n_ranks.data.argsort()]

        recommendations[user] = highest_items.tolist() # save top n recommended items for the user
    return recommendations


# calculate predicted recommendations
n = 20 # number of recommendations per user
pred_item_knn_baseline = item_knn_recommend(item_item_similarity, test_mat, n) # use test_mat in all the algorithms to find the recommendations

In [11]:
###   USER-KNN BASELINE ALGORITHM

def user_knn_recommend(train: csr_matrix, test: csr_matrix, k:int, n:int) -> dict[int, list[int]]:
    nn = NearestNeighbors(n_neighbors=k+1, metric='cosine', algorithm='auto', n_jobs=-1) # initialize the algorithm
    nn.fit(train) # fit to train matrix to compute the similar users
    
    # for each user in the test set find top-k similar users in train matrix
    distances, indices = nn.kneighbors(test)

    recommendations = {}
    for user, test_user_similar_users in enumerate(indices):
        similar_users = test_user_similar_users[1:]  # exclude the user itself
        
        similar_users_interactions = train[similar_users].sum(axis=0).A1  # sum interactions across items from similar users in train
        
        # top n items
        recommended_items = np.argpartition(-similar_users_interactions, range(n))[:n]
        recommendations[user] = recommended_items.tolist() # and save them to the dictionary
        
    return recommendations


# calculate predicted recommendations
n = 20
k = 10 # number of similar users
pred_user_knn_baseline = user_knn_recommend(train_interaction_matrix_csr, test_mat, k, n)

In [12]:
###   NOVELTY-ONLY ITEM-KNN ALGORITHM

# this function recommends only games the users haven't played
def item_knn_only_novel_recommend(sim: csr_matrix, test: csr_matrix, n: int) -> dict[int, list[int]]:
    scores_matrix = test.dot(sim.T)
    scores_matrix = np.array(scores_matrix.todense()) # scores indicating the relevance of an item to a user

    recommendations = {}
    users = test.shape[0]
    for user in range(users): # for each user generate their top recommendations
        scores = scores_matrix[user]

        interacted_items = test[user].indices # keep games already played by the user
        scores[interacted_items] = -np.inf # exclude them from scoring with giving them -infinity score(lowest)

        scores_sparse = csr_matrix(scores)
        top_n_ranks = get_top_K_ranks(scores_sparse, n) # top n items based on the scores

        highest_items = top_n_ranks.indices[top_n_ranks.data.argsort()]

        recommendations[user] = highest_items.tolist() # save top n recommended items for the user
    return recommendations


# calculate predicted recommendations
n = 20
pred_item_knn_only_novel = item_knn_only_novel_recommend(item_item_similarity, test_mat, n)

In [13]:
###   NOVELTY-ONLY USER-KNN ALGORITHM

def user_knn_only_novel_recommend(train: csr_matrix, test: csr_matrix, k:int, n:int) -> dict[int, list[int]]: # same as user-knn but now recommend only novel items
    nn = NearestNeighbors(n_neighbors=k+1, metric='cosine', algorithm='auto', n_jobs=-1)
    nn.fit(train)
    
    distances, indices = nn.kneighbors(test)
    
    recommendations = {}
    for user, test_user_similar_users in enumerate(indices):
        similar_users = test_user_similar_users[1:]  # exclude the user
        
        similar_user_items = train[similar_users].sum(axis=0).A1  # sum interactions across items from similar users in train
        
        # exclude items the user has already interacted with in the test matrix
        user_interactions = test[user].indices
        similar_user_items[user_interactions] = -999999

        # top-n items for recommendation
        recommended_items = np.argpartition(-similar_user_items, range(n))[:n]
        recommendations[user] = recommended_items.tolist()
        
    return recommendations


# calculate predicted recommendations
n = 20
k = 10
pred_user_knn_only_novel = user_knn_only_novel_recommend(train_interaction_matrix_csr, test_mat, k, n)

In [14]:
###   HYBRID NOVELTY RECOMMENDER

# recommend mostly novel items but still allow highly relevant previously seen items if they have a big score
def hybrid_novelty_recommend(sim: csr_matrix, test: csr_matrix, n: int, novelty_weight) -> dict[int, list[int]]: # uses item-knn method
    scores_matrix = test.dot(sim.T).toarray()

    recommendations = {}
    users = test.shape[0]
    for user in range(users):
        scores = scores_matrix[user]
        
        interacted_items = test[user].indices
        scores[interacted_items] *= (1 - novelty_weight)  # low the score for already seen items(if still bigger than novel items then recommend it)

        scores_sparse = csr_matrix(scores)
        top_n_ranks = get_top_K_ranks(scores_sparse, n) # top n items based on the scores

        highest_items = top_n_ranks.indices[top_n_ranks.data.argsort()]

        recommendations[user] = highest_items.tolist() # save top n recommended items for the user

    return recommendations


# calculate predicted recommendations
n = 20
pred_hybrid_novelty_03 = hybrid_novelty_recommend(item_item_similarity, test_mat, n, novelty_weight=0.3) # more relevance
pred_hybrid_novelty_05 = hybrid_novelty_recommend(item_item_similarity, test_mat, n, novelty_weight=0.5) # balanced
pred_hybrid_novelty_08 = hybrid_novelty_recommend(item_item_similarity, test_mat, n, novelty_weight=0.8) # more novelty

SANITY CHECKS - do some checks in the predictions to see if everything flows correctly

- check the stability of the recommender algorithm to see if they give the same results every time
- check for duplicate predictions in the algorithms

In [15]:
# run each algorithm 3 times to see if the results stay the same each time

def check_stability(stability_results, name): # check for the stability itself here
    for user in stability_results[0].keys():
        all_predictions = [set(pred[user]) for pred in stability_results]
        if not all(pred == all_predictions[0] for pred in all_predictions):
            print(f"Warning: Predictions for user {user} are not stable across iterations.")
            return
    print(name, ": predictions are stable across iterations.")

# run for 3 times all the algorithms
def run_predictions(model, item_item_similarity, train_matrix, test_fold_in, name, iterations=3, n=20, k=10):
    stability_results = []
    for _ in range(iterations):
        if name == "Item-KNN baseline":
            predictions = model(item_item_similarity, test_fold_in, n)
        if name == "User-KNN baseline":
            predictions = model(train_matrix, test_fold_in, k, n)
        if name == "Novelty-only Item-KNN":
            predictions = model(item_item_similarity, test_fold_in, n)
        if name == "Novelty-only User-KNN":
            predictions = model(train_matrix, test_fold_in, k, n)
        if name == "Hybrid algorithm with more relevance":
            predictions = model(item_item_similarity, test_fold_in, n, novelty_weight=0.3)
        if name == "Hybrid algorithm with balanced relevance and novelty":
            predictions = model(item_item_similarity, test_fold_in, n, novelty_weight=0.5)
        if name == "Hybrid algorithm with more novelty":
            predictions = model(item_item_similarity, test_fold_in, n, novelty_weight=0.7)

        stability_results.append(predictions)
    check_stability(stability_results=stability_results, name=name)


run_predictions(item_knn_recommend, item_item_similarity, train_interaction_matrix_csr, test_mat, name="Item-KNN baseline")
run_predictions(user_knn_recommend, item_item_similarity, train_interaction_matrix_csr, test_mat, name="User-KNN baseline")
run_predictions(item_knn_only_novel_recommend, item_item_similarity, train_interaction_matrix_csr, test_mat, name="Novelty-only Item-KNN")
run_predictions(user_knn_only_novel_recommend, item_item_similarity, train_interaction_matrix_csr, test_mat, name="Novelty-only User-KNN")
run_predictions(hybrid_novelty_recommend, item_item_similarity, train_interaction_matrix_csr, test_mat, name="Hybrid algorithm with more relevance")
run_predictions(hybrid_novelty_recommend, item_item_similarity, train_interaction_matrix_csr, test_mat, name="Hybrid algorithm with balanced relevance and novelty")
run_predictions(hybrid_novelty_recommend, item_item_similarity, train_interaction_matrix_csr, test_mat, name="Hybrid algorithm with more novelty")


Item-KNN baseline : predictions are stable across iterations.
User-KNN baseline : predictions are stable across iterations.
Novelty-only Item-KNN : predictions are stable across iterations.
Novelty-only User-KNN : predictions are stable across iterations.
Hybrid algorithm with more relevance : predictions are stable across iterations.
Hybrid algorithm with balanced relevance and novelty : predictions are stable across iterations.
Hybrid algorithm with more novelty : predictions are stable across iterations.


In [16]:
# in the predicted items, check if there are duplicates
def check_duplicate_predictions(pred_content):
    for user, items in pred_content.items():
        if len(items) != len(set(items)):
            print(f"Warning: User {user} has duplicate recommendations: {items}")
            return
    print("No duplicate recommendations found.")

check_duplicate_predictions(pred_item_knn_baseline)
check_duplicate_predictions(pred_user_knn_baseline)
check_duplicate_predictions(pred_item_knn_only_novel)
check_duplicate_predictions(pred_user_knn_only_novel)
check_duplicate_predictions(pred_hybrid_novelty_03)
check_duplicate_predictions(pred_hybrid_novelty_05)
check_duplicate_predictions(pred_hybrid_novelty_08)

No duplicate recommendations found.
No duplicate recommendations found.
No duplicate recommendations found.
No duplicate recommendations found.
No duplicate recommendations found.
No duplicate recommendations found.
No duplicate recommendations found.


EVALUATION - for each algorithm, run the metrics given(plus the popularity measure) and get results for each of them

In [17]:
###   EVALUATION

popular_items = get_popular_items(train_interaction_matrix_csr, threshold_percent=90)
predictions = [pred_item_knn_baseline, pred_user_knn_baseline, pred_item_knn_only_novel, pred_user_knn_only_novel, pred_hybrid_novelty_03, pred_hybrid_novelty_05, pred_hybrid_novelty_08]
methods = ['Item-KNN baseline','User-KNN baseline','Novelty-only Item-KNN','Novelty-only User KNN','Hybrid algorithm with more relevance ','Hybrid algorithm with balanced relevance and novelty', 'Hybrid algorithm with more novelty']
n_items = train_interaction_matrix_csr.shape[1]  # total items in train dataset(for coverage)
item_publisher = games.set_index('item_id')['publisher'].to_dict()
i2p_dict = {i: item_publisher.get(i, "Unknown") for i in range(max(item_publisher.keys()) + 1)}
n=20

# function to get all the metrics results
def evaluation_metrics(predictions, method_names, train, test, n_items, true, i2p_dict, popular_items, n):
    for method_name, pred in zip(method_names, predictions):
        print(f'\n{method_name}')
        print(f'NDCG: {get_ndcg(pred, true, n):.4f}')
        print(f'Recall: {get_calibrated_recall(pred, true, n):.4f}')
        print(f'Coverage: {get_coverage(pred, n_items, n):.4f}')
        print(f'Intra list: {get_intra_list_similarity(pred, train, n):.4f}')
        print(f'Gini index: {get_gini_index(pred, n):.4f}')
        print(f'Publisher fairness: {get_publisher_fairness(pred, i2p_dict, n):.4f}')
        print(f'Novelty: {get_novelty(pred, train, test, n):.4f}')
        print(f'Popularity: {get_popularity(pred, popular_items, n):.4f}')


evaluation_metrics(predictions=predictions, method_names=methods, train=train_interaction_matrix_csr, test=test_mat, n_items=n_items, true=true_recommendations, i2p_dict=i2p_dict, popular_items=popular_items, n=n)


Item-KNN baseline
NDCG: 0.1537
Recall: 0.1681
Coverage: 0.5192
Intra list: 0.0851
Gini index: 0.8825
Publisher fairness: 0.9047
Novelty: 0.9352
Popularity: 0.7927

User-KNN baseline
NDCG: 0.1250
Recall: 0.1757
Coverage: 0.2408
Intra list: 0.0678
Gini index: 0.8762
Publisher fairness: 0.8931
Novelty: 0.9460
Popularity: 0.9497

Novelty-only Item-KNN
NDCG: 0.1680
Recall: 0.1797
Coverage: 0.5256
Intra list: 0.0788
Gini index: 0.8785
Publisher fairness: 0.9012
Novelty: 0.9370
Popularity: 0.7806

Novelty-only User KNN
NDCG: 0.1824
Recall: 0.1989
Coverage: 0.2695
Intra list: 0.0574
Gini index: 0.8655
Publisher fairness: 0.8869
Novelty: 0.9490
Popularity: 0.9395

Hybrid algorithm with more relevance 
NDCG: 0.1627
Recall: 0.1738
Coverage: 0.5210
Intra list: 0.0830
Gini index: 0.8811
Publisher fairness: 0.9033
Novelty: 0.9357
Popularity: 0.7882

Hybrid algorithm with balanced relevance and novelty
NDCG: 0.1663
Recall: 0.1776
Coverage: 0.5223
Intra list: 0.0804
Gini index: 0.8799
Publisher fairn

SAVE RESULTS - save all predictions on csv

In [18]:
###   SAVE RESULTS

predictions = [
    "pred_item_knn_baseline", "pred_user_knn_baseline", 
    "pred_item_knn_only_novel", "pred_user_knn_only_novel", 
    "pred_hybrid_novelty_03", "pred_hybrid_novelty_05", 
    "pred_hybrid_novelty_08"
]
predictions_dict = {
    "pred_item_knn_baseline": pred_item_knn_baseline,
    "pred_user_knn_baseline": pred_user_knn_baseline,
    "pred_item_knn_only_novel": pred_item_knn_only_novel,
    "pred_user_knn_only_novel": pred_user_knn_only_novel,
    "pred_hybrid_novelty_03": pred_hybrid_novelty_03,
    "pred_hybrid_novelty_05": pred_hybrid_novelty_05,
    "pred_hybrid_novelty_08": pred_hybrid_novelty_08
}

for pred in predictions:
    prediction_content = predictions_dict[pred]

    # map back to original ids
    data = []
    for user_id, item_ids in prediction_content.items():
        original_user_id = test_new_to_old_user_id_mapping[user_id]  # map to original user id
        original_item_ids = [new_to_old_item_id_mapping[item_id] for item_id in item_ids]  # map to original item ids
        data.extend([(original_user_id, original_item_id) for original_item_id in original_item_ids])

    # create new dataframe with correct ids
    df = pd.DataFrame(data, columns=["user_id", "item_id"])
    
    # save this new dataframe as a csv to use for codabench
    csv_path = f"{pred}.csv"
    df.to_csv(csv_path, index=False)
    print(f"Saved {csv_path}")


Saved pred_item_knn_baseline.csv
Saved pred_user_knn_baseline.csv
Saved pred_item_knn_only_novel.csv
Saved pred_user_knn_only_novel.csv
Saved pred_hybrid_novelty_03.csv
Saved pred_hybrid_novelty_05.csv
Saved pred_hybrid_novelty_08.csv
