# Evaluate embedding methods

## Paths

In [1]:
drive_path = "/content/drive/MyDrive/Colab Notebooks/Codesentics/notebooks/"
bx_preprocessed_dataset_path = drive_path + "bx_data/preprocessed_dataset/"

ratings_path = bx_preprocessed_dataset_path + "preprocessed_ratings_data.pkl"
book_metadata_path = bx_preprocessed_dataset_path + "preprocessed_book_metadata.pkl"

pca_embeddings_save_path = drive_path + "book_embeddings/pca_book_embeddings.pkl"
gd_embeddings_save_path = drive_path + "book_embeddings/gd_book_embeddings.pkl"
nn_embeddings_save_path = drive_path + "book_embeddings/softmax_book_embeddings.pkl"

## Imports

In [2]:
import pickle
import pandas as pd
import numpy as np

from sklearn.neighbors import NearestNeighbors

## Load data and book embeddings

In [3]:
ratings = pd.read_pickle(ratings_path)
books = pd.read_pickle(book_metadata_path)

In [4]:
with open(pca_embeddings_save_path, 'rb') as f:
     pca_embeddings = pickle.load(f)
with open(gd_embeddings_save_path, 'rb') as f:
    matrix_factor_gd_embeddings = pickle.load(f)
with open(nn_embeddings_save_path, 'rb') as f:
    matrix_factor_nn_embeddings = pickle.load(f)

## Create ground truth dataset

From item-user ratings matrix. Where each item (book) vector has length given by number of users will Nearest Neighbors method (by cosine similarity, euclid distance had worse performance) be
used to create more accurate although more computation heavy method to get approximate ground truth recommendations for most rated books.

In [5]:
size_of_eval_dataset = 50
number_of_recommendations = 5

top_books = ratings.groupby("Book-Embedding-ID").count()["Book-Rating"].\
    sort_values(ascending=False).index[:50]

print(f"Evaluation dataset size: {size_of_eval_dataset} books")
print(f"Recommendations per book: {number_of_recommendations}")

Evaluation dataset size: 50 books
Recommendations per book: 5


In [6]:
def convert_temp_ids_to_book_ids(ratings, temp_ids):
    recommended_book_ids = ratings[
        ratings["Book-Embedding-ID"].isin(temp_ids[0])]

    sorted_recommended_book_ids = recommended_book_ids.sort_values(
        by=["Book-Embedding-ID"],
        key=lambda x: x.map(
            {v: i for i, v in enumerate(temp_ids[0])}))

    sorted_recommended_book_ids = sorted_recommended_book_ids["Book-ID"].unique()
    return sorted_recommended_book_ids

def get_book_titles_from_book_ids(books_metadata, book_ids):

    recommended_books = books_metadata[
        books_metadata['Book-ID'].isin(book_ids)].sort_values(
            by=["Book-ID"],
            key=lambda x: x.map({v: i for i, v in enumerate(book_ids)}))

    return recommended_books['Book-Title'].unique()

def get_k_nearest_neighbours_model(embeddings, metric="cosine"):
    knn_model = NearestNeighbors(metric=metric, n_jobs=-1)
    knn_model.fit(embeddings)
    return knn_model

def get_k_neighbours_for_vector(vector, knn_model, k=5):
    _, cos_indices = knn_model.kneighbors(
        vector, n_neighbors=k)
    return cos_indices

def get_k_recommendations_for_set_of_ids(
    set_of_ids,
    embeddings,
    k):

    # Prepare knn model
    knn_model = get_k_nearest_neighbours_model(embeddings, metric="cosine")

    # For each book ID query find recommended books IDs
    recommendation_dict = {}
    for book_emb_id in set_of_ids:

        book_embedding = embeddings[book_emb_id].reshape(1,-1)

        recommended_book_emb_ids = get_k_neighbours_for_vector(
            book_embedding, knn_model,
            k=k + 1)

        # Leave out first recommended ID as that is ID of queried book
        recommendation_dict[book_emb_id] = recommended_book_emb_ids[0,1:]

    return recommendation_dict

### Compute ground truth recommendations.

In [7]:
item_to_user_ratings_matrix = np.array(ratings.pivot(
    index="Book-Embedding-ID",
    columns="User-Embedding-ID",
    values='Book-Rating').fillna(0).astype("int8"))

In [8]:
evaluation_recommend_dict = get_k_recommendations_for_set_of_ids(
    set_of_ids=top_books,
    embeddings=item_to_user_ratings_matrix,
    k=number_of_recommendations)

## Get recommendations from book embeddings

In [9]:
pca_recommend_dict = get_k_recommendations_for_set_of_ids(
    set_of_ids=top_books,
    embeddings=pca_embeddings,
    k=number_of_recommendations)

mat_fact_gd_recommend_dict = get_k_recommendations_for_set_of_ids(
    set_of_ids=top_books,
    embeddings=matrix_factor_gd_embeddings,
    k=number_of_recommendations)

mat_fact_nn_recommend_dict = get_k_recommendations_for_set_of_ids(
    set_of_ids=top_books,
    embeddings=matrix_factor_nn_embeddings,
    k=number_of_recommendations)

## Evaluation

### Define evaluation metrics

In [10]:
# Normalized Discounted Cumulative Gain
def normalized_dcg_for_query(gt_ids, pred_ids):
    relevance_for_gt_ids = np.linspace(1, 3, len(gt_ids))
    pred_ids_relevance = [relevance_for_gt_ids[np.where(gt_ids == id)][0]
                          if id in gt_ids else 0.0
                          for id in pred_ids]

    dcg = np.sum(pred_ids_relevance /
                 np.log2(np.arange(2, len(pred_ids_relevance) + 2)))
    ideal_dcg = np.sum(relevance_for_gt_ids /
                       np.log2(np.arange(2, len(pred_ids_relevance) + 2)))

    norm_dcg = dcg / ideal_dcg

    return norm_dcg

# Average Precision
def average_precision_for_query(gt_ids, pred_ids):

    correct = 0
    total_precision = 0

    for i, id in enumerate(pred_ids):
        if id in gt_ids:
            correct += 1
            precision = correct / (i + 1)
            total_precision += precision

    average_precision = total_precision / len(gt_ids)

    return average_precision

# Reciprocal Rank
def reciprocal_rank_for_query(gt_ids, pred_ids):

    rankings = [id in gt_ids for id in pred_ids]
    rr_value = 1 / (rankings.index(True) + 1) if sum(rankings) > 0 else 0

    return rr_value

### Define function for complex evaluation

In [11]:
def evaluate_recommendations(
    ground_truth_recommendations,
    predicted_recommendations):

    ap_list = []
    ndcg_list = []
    rr_list = []

    evaluation_data = []
    query_evaluation_titles = []

    for i, (gt_ids, pred_ids, query_id) in enumerate(zip(
        ground_truth_recommendations.values(),
        predicted_recommendations.values(),
        ground_truth_recommendations.keys())):

        query_title = books[
            books["Book-Embedding-ID"] == query_id]["Book-Title"].iloc[0]

        recommended_book_ids = convert_temp_ids_to_book_ids(
            ratings, [pred_ids])
        recommended_book_titles = get_book_titles_from_book_ids(
            books, recommended_book_ids)

        gt_book_ids = convert_temp_ids_to_book_ids(
            ratings, [gt_ids])
        gt_book_titles = get_book_titles_from_book_ids(
            books, gt_book_ids)

        ap_list.append(average_precision_for_query(gt_ids, pred_ids))
        ndcg_list.append(normalized_dcg_for_query(gt_ids, pred_ids))
        rr_list.append(reciprocal_rank_for_query(gt_ids, pred_ids))

        evaluation_data.append((
            query_title, query_id,
            ap_list[-1], ndcg_list[-1], rr_list[-1],
            gt_ids, pred_ids))
        query_evaluation_titles.append((
            query_title, query_id,
            gt_book_titles, recommended_book_titles
        ))

    mean_average_precision = np.mean(ap_list)
    mean_ndcg = np.mean(ndcg_list)
    mean_reciprocal_rank = np.mean(rr_list)

    evaluation_data.append((
            "MEAN", -1,
            mean_average_precision, mean_ndcg, mean_reciprocal_rank,
            [], []))

    eval_df = pd.DataFrame(
        evaluation_data,
        columns=["Query title", "Query book id",
                 "Average precision", "Norm DCG", "MR",
                 "GT ids", "Pred ids"])
    eval_title_df = pd.DataFrame(
        query_evaluation_titles,
        columns=["Query title", "Query book id",
                 "GT titles", "Pred titles"])

    return eval_df, eval_title_df

### Evaluate recommendations from PCA embeddings

In [12]:
results_pca_embs, result_titles_pca_embs = evaluate_recommendations(
    evaluation_recommend_dict,
    pca_recommend_dict)

results_pca_embs.sort_values("Average precision", ascending=False)

Unnamed: 0,Query title,Query book id,Average precision,Norm DCG,MR,GT ids,Pred ids
11,The Firm,381,1.0,1.016856,1.0,"[132, 222, 38, 221, 219]","[132, 38, 222, 219, 221]"
7,A Time to Kill,219,1.0,1.0,1.0,"[222, 221, 381, 38, 132]","[222, 221, 381, 38, 132]"
26,Harry Potter and the Chamber of Secrets,33,1.0,1.0,1.0,"[29, 353, 34, 9, 32]","[29, 353, 34, 9, 32]"
28,The Client,221,1.0,1.004227,1.0,"[222, 38, 219, 132, 381]","[222, 38, 219, 381, 132]"
24,Interview with the Vampire,314,1.0,1.038632,1.0,"[359, 363, 364, 190, 622]","[359, 190, 364, 363, 622]"
12,Harry Potter and the Sorcerer's Stone,34,0.8,0.776111,1.0,"[33, 29, 353, 9, 203]","[33, 29, 353, 9, 443]"
32,The Rainmaker,132,0.8,0.878511,1.0,"[381, 38, 150, 221, 222]","[38, 150, 381, 222, 213]"
5,The Pelican Brief,222,0.76,0.792291,1.0,"[221, 219, 381, 132, 150]","[221, 219, 381, 38, 150]"
30,House of Sand and Fog,311,0.76,0.80492,1.0,"[576, 445, 273, 474, 246]","[576, 273, 445, 448, 246]"
39,The Chamber,38,0.76,0.911148,1.0,"[132, 221, 63, 381, 150]","[132, 150, 381, 222, 221]"


In [13]:
result_titles_pca_embs

Unnamed: 0,Query title,Query book id,GT titles,Pred titles
0,Wild Animus,31,"[At Home in Mitford, The Andromeda Strain, The...","[Smilla's Sense of Snow, The Andromeda Strain,..."
1,The Lovely Bones: A Novel,24,"[Good in Bed, Drowning Ruth, The Book of Ruth,...","[Nickel and Dimed: On, The Book of Ruth, The C..."
2,Divine Secrets of the Ya-Ya Sisterhood: A Novel,256,"[Girl in Hyacinth Blue, The Red Tent, The Nann...","[Where the Red Fern Grows, A Painted House, Th..."
3,The Da Vinci Code,25,"[Angels &amp; Demons, Touching Evil, The Sweet...","[Angels &amp; Demons, Middlesex: A Novel, Time..."
4,The Nanny Diaries: A Novel,497,"[The Devil Wears Prada : A Novel, The Hours: A...","[The Devil Wears Prada : A Novel, Blue Diary, ..."
5,The Pelican Brief,222,"[The Client, A Time to Kill, The Firm, The Rai...","[The Client, A Time to Kill, The Firm, The Cha..."
6,She's Come Undone,49,"[Empire Falls, The Bonesetter's Daughter, How ...","[The Poisonwood Bible: A Novel, A Virtuous Wom..."
7,A Time to Kill,219,"[The Pelican Brief, The Client, The Firm, The ...","[The Pelican Brief, The Client, The Firm, The ..."
8,The Horse Whisperer,224,"[The Loop, The Alibi, Envy, The Poisonwood Bib...","[Voyager, Dragonfly in Amber, Outlander, Tara ..."
9,Snow Falling on Cedars,254,"[Cry Wolf, Songs in Ordinary Time, While I Was...","[Girl in Hyacinth Blue, Balzac and the Little ..."


### Evaluate recommendations from gradient descent embeddings


In [14]:
results_gd_embs, result_titles_gd_embs = evaluate_recommendations(
    evaluation_recommend_dict,
    mat_fact_gd_recommend_dict)

results_gd_embs.sort_values("Average precision", ascending=False)

Unnamed: 0,Query title,Query book id,Average precision,Norm DCG,MR,GT ids,Pred ids
12,Harry Potter and the Sorcerer's Stone,34,0.55,0.541655,1.0,"[33, 29, 353, 9, 203]","[33, 29, 27, 353, 56]"
34,The Street Lawyer,63,0.333333,0.578743,1.0,"[193, 38, 150, 414, 213]","[414, 93, 193, 339, 322]"
24,Interview with the Vampire,314,0.333333,0.3376,1.0,"[359, 363, 364, 190, 622]","[359, 88, 363, 464, 529]"
28,The Client,221,0.3,0.455539,1.0,"[222, 38, 219, 132, 381]","[38, 217, 492, 219, 465]"
46,The Bridges of Madison County,301,0.3,0.703369,1.0,"[651, 11, 517, 412, 638]","[638, 431, 418, 11, 128]"
26,Harry Potter and the Chamber of Secrets,33,0.3,0.468913,1.0,"[29, 353, 34, 9, 32]","[34, 641, 583, 29, 529]"
48,The Brethren,82,0.28,0.364001,1.0,"[150, 213, 1, 193, 153]","[213, 325, 84, 194, 150]"
42,A Prayer for Owen Meany,126,0.233333,0.558061,0.5,"[514, 76, 111, 424, 296]","[541, 296, 111, 454, 331]"
36,One for the Money,115,0.2,0.385829,1.0,"[170, 352, 440, 36, 442]","[440, 504, 448, 56, 515]"
39,The Chamber,38,0.2,0.289372,1.0,"[132, 221, 63, 381, 150]","[221, 199, 143, 144, 173]"


### Evaluate recommendations from softmax embeddings

In [15]:
results_nn_embs, result_titles_nn_embs = evaluate_recommendations(
    evaluation_recommend_dict,
    mat_fact_nn_recommend_dict)

results_nn_embs.sort_values("Average precision", ascending=False)

Unnamed: 0,Query title,Query book id,Average precision,Norm DCG,MR,GT ids,Pred ids
12,Harry Potter and the Sorcerer's Stone,34,0.483333,0.510455,1.0,"[33, 29, 353, 9, 203]","[33, 224, 353, 29, 459]"
26,Harry Potter and the Chamber of Secrets,33,0.4,0.436345,1.0,"[29, 353, 34, 9, 32]","[29, 34, 562, 194, 104]"
47,Red Dragon,298,0.3,0.606912,1.0,"[52, 609, 192, 529, 139]","[529, 499, 211, 609, 420]"
18,The Secret Life of Bees,118,0.1,0.365146,0.5,"[43, 254, 627, 200, 256]","[515, 256, 171, 594, 35]"
22,Good in Bed,207,0.1,0.304289,0.5,"[420, 379, 180, 24, 572]","[64, 24, 515, 222, 280]"
1,The Lovely Bones: A Novel,24,0.066667,0.096457,0.333333,"[207, 246, 369, 100, 257]","[132, 493, 207, 395, 497]"
48,The Brethren,82,0.066667,0.192914,0.333333,"[150, 213, 1, 193, 153]","[102, 259, 1, 526, 29]"
45,1st to Die: A Novel,67,0.05,0.124626,0.25,"[46, 17, 403, 105, 41]","[601, 101, 1, 17, 82]"
13,A Painted House,1,0.05,0.083084,0.25,"[82, 46, 448, 273, 115]","[259, 424, 256, 82, 85]"
2,Divine Secrets of the Ya-Ya Sisterhood: A Novel,256,0.05,0.207709,0.25,"[97, 107, 497, 118, 64]","[1, 224, 101, 118, 381]"


In [16]:
result_titles_nn_embs

Unnamed: 0,Query title,Query book id,GT titles,Pred titles
0,Wild Animus,31,"[At Home in Mitford, The Andromeda Strain, The...","[Before I Say Good-Bye, Waiting, Interview wit..."
1,The Lovely Bones: A Novel,24,"[Good in Bed, Drowning Ruth, The Book of Ruth,...","[The Rainmaker, The Perfect Storm : A True Sto..."
2,Divine Secrets of the Ya-Ya Sisterhood: A Novel,256,"[Girl in Hyacinth Blue, The Red Tent, The Nann...","[A Painted House, The Horse Whisperer, Girl wi..."
3,The Da Vinci Code,25,"[Angels &amp; Demons, Touching Evil, The Sweet...","[High Five, The Saving Graces: A Novel, Harry ..."
4,The Nanny Diaries: A Novel,497,"[The Devil Wears Prada : A Novel, The Hours: A...","[1984, Angels &amp; Demons, To Kill a Mockingb..."
5,The Pelican Brief,222,"[The Client, A Time to Kill, The Firm, The Rai...","[The Tale of the Body Thief, The Outsiders, Ic..."
6,She's Come Undone,49,"[Empire Falls, The Bonesetter's Daughter, How ...","[The Witching Hour, The Simple Truth, From the..."
7,A Time to Kill,219,"[The Pelican Brief, The Client, The Firm, The ...","[The General's Daughter, The Partner, Five Qua..."
8,The Horse Whisperer,224,"[The Loop, The Alibi, Envy, The Poisonwood Bib...","[Harry Potter and the Sorcerer's Stone, The Na..."
9,Snow Falling on Cedars,254,"[Cry Wolf, Songs in Ordinary Time, While I Was...",[Balzac and the Little Chinese Seamstress : A ...


## Result

In [17]:
method_names = ["PCA",
                "Matrix factiorization by gradient descent",
                "Matrix factiorization by softmax model"]
method_results_dfs = [results_pca_embs, results_gd_embs, results_nn_embs]

comparison_data = []
for method, res_df in zip(method_names, method_results_dfs):
    comparison_data.append([method] + list(
        res_df[["Average precision", "Norm DCG", "MR"]][
                res_df["Query title"] == "MEAN"].values[0]))
comparison_df = pd.DataFrame(
    comparison_data,
    columns=["Embedding method", "mAP", "mNDCG", "MMR"])
comparison_df

Unnamed: 0,Embedding method,mAP,mNDCG,MMR
0,PCA,0.421267,0.519883,0.795
1,Matrix factiorization by gradient descent,0.0894,0.151023,0.334
2,Matrix factiorization by softmax model,0.035733,0.067514,0.120333


In [18]:
print(pca_embeddings.shape[1] == matrix_factor_gd_embeddings.shape[1])
print(pca_embeddings.shape[1] == matrix_factor_nn_embeddings.shape[1])

True
True


In [19]:
best_embedding_method = comparison_df["Embedding method"][
    comparison_df["mAP"] == comparison_df["mAP"].max()].values[0]

print(f"All {len(method_names)} methods produced book embeddings with \
 dimensionality of {pca_embeddings.shape[1]}, but method {best_embedding_method}\
 achieved best embedding vectors for nearest neighbor retrieval\
 by cosine metric.")

All 3 methods produced book embeddings with  dimensionality of 35, but method PCA achieved best embedding vectors for nearest neighbor retrieval by cosine metric.
