## 1. Data Preprocessing 

### 1.1 Load Libraries

In [1]:
import pandas as pd
import numpy as np
import pprint
from transformers import DistilBertTokenizer, DistilBertModel

from typing import Dict, Text
import tensorflow as tf
import tensorflow_recommenders as tfrs
import hnswlib

# min rating to consider
min_rating = 7

# parameters
output_dimension = 64
batch_size = 256
learning_rate = 0.1


### 1.2 Load Dataset

In [2]:
# Datasets
books = pd.read_csv("dataset/Books.csv")

ratings = pd.read_csv("dataset/Ratings.csv")

users = pd.read_csv("dataset/Users.csv")

  books = pd.read_csv("dataset/Books.csv")


In [3]:
# Visualization and Type Standardization
users["User-ID"] = users["User-ID"].apply(lambda x: f"user_{x}")

# Filter out books with missing or corrupted information
books["ISBN"] = books["ISBN"].apply(lambda x: f"book_{x}")
books.drop(["Image-URL-S", "Image-URL-M", "Image-URL-L"], axis=1, inplace=True)
books.dropna(inplace=True)

def clean_year(year):
    try:
        return int(year)
    except ValueError:
        return -1

def surpress_year(year):
    if year > max_year_boundary:
        return max_year_boundary
    elif year < min_year_boundary:
        return min_year_boundary
    
    return year

books['Year-Of-Publication'] = books['Year-Of-Publication'].apply(clean_year)
books = books[books['Year-Of-Publication'] != -1].reset_index(drop=True)
min_year_boundary = books['Year-Of-Publication'].mean() - books['Year-Of-Publication'].std()
max_year_boundary = books['Year-Of-Publication'].mean() + books['Year-Of-Publication'].std()
books['Year-Of-Publication'] = books['Year-Of-Publication'].apply(surpress_year)
books = books.drop_duplicates(subset='Book-Title')

In [4]:
ratings["ISBN"] = ratings["ISBN"].apply(lambda x: f"book_{x}")
ratings["User-ID"] = ratings["User-ID"].apply(lambda x: f"user_{x}")
ratings["Book-Rating"] = ratings["Book-Rating"].apply(lambda x: float(x))
ratings = ratings[ratings.ISBN.isin(books['ISBN'].unique())]
# Filtering products for simplicity

# Only consider high ratings
ratings = ratings[ratings["Book-Rating"] >= min_rating]
# Remove outlier users
outlier_threshold = ratings['User-ID'].value_counts().quantile(0.9)
user_rating_count_dict = ratings['User-ID'].value_counts().to_dict()
ratings['rate_count'] = ratings['User-ID'].map(user_rating_count_dict)
ratings = ratings[ratings['rate_count'] <= outlier_threshold]

# Consider user & book that has rating in ratings dataset
books = books[books.ISBN.isin(ratings['ISBN'].unique())]
users = users[users['User-ID'].isin(ratings['User-ID'].unique())]


In [5]:
books.head()

Unnamed: 0,ISBN,Book-Title,Book-Author,Year-Of-Publication,Publisher
1,book_0002005018,Clara Callan,Richard Bruce Wright,2001.0,HarperFlamingo Canada
2,book_0060973129,Decision in Normandy,Carlo D'Este,1991.0,HarperPerennial
3,book_0374157065,Flu: The Story of the Great Influenza Pandemic...,Gina Bari Kolata,1999.0,Farrar Straus Giroux
5,book_0399135782,The Kitchen God's Wife,Amy Tan,1991.0,Putnam Pub Group
14,book_1558746218,A Second Chicken Soup for the Woman's Soul (Ch...,Jack Canfield,1998.0,Health Communications


### 1.3 Book to Book Matches

In [6]:
# Group books which are read from same user
book_groups_raw = ratings.groupby('User-ID')
book_groups = pd.DataFrame(
    data={
        "User-ID": list(book_groups_raw.groups.keys()),
        "ISBN_list": list(book_groups_raw.ISBN.apply(list)),
    }
)
# Eliminate if user has read one book
book_groups = book_groups[book_groups['ISBN_list'].apply(len) > 1].reset_index(drop=True)
print(f"Number of Groups: {book_groups.shape[0]}")
book_groups.head()


Number of Groups: 16055


Unnamed: 0,User-ID,ISBN_list
0,user_100004,"[book_0345339703, book_0399146652, book_043906..."
1,user_10003,"[book_068483068X, book_0743446593]"
2,user_100035,"[book_0440211727, book_0671759310]"
3,user_100053,"[book_0312422156, book_0316769487, book_038549..."
4,user_100066,"[book_0060953713, book_0385722206, book_039309..."


In [7]:

book_matches = []
# for each book in our isbn_list we generate pairs
for isbn_list in book_groups['ISBN_list'].values:
    if len(isbn_list) <= 1:
        continue
    for i, main_isbn in enumerate(isbn_list[:-1]):
        for similar_isbn in isbn_list[i+1:]:
            book_matches.append([main_isbn, similar_isbn])

# Dataset generation and visualization
book_pairs_dataset = pd.DataFrame(book_matches, columns=["main_ISBN", "similar_ISBN"])
data_size = book_pairs_dataset.shape[0]
print(f"Number of Matches: {data_size}")
book_pairs_dataset.head()

Number of Matches: 90940


Unnamed: 0,main_ISBN,similar_ISBN
0,book_0345339703,book_0399146652
1,book_0345339703,book_0439064872
2,book_0345339703,book_059035342X
3,book_0399146652,book_0439064872
4,book_0399146652,book_059035342X


In [8]:
# Our final dataset to train our model 
# Main book features
main_books = books.rename(columns=lambda x: 'main_' + x if x != 'ISBN' else x).copy()
book_pairs = pd.merge(book_pairs_dataset, main_books,
                              left_on='main_ISBN',
                              right_on='ISBN')
book_pairs.drop("ISBN", axis=1, inplace=True)

# Similar book features
similar_books = books.rename(columns=lambda x: 'similar_' + x if x != 'ISBN' else x).copy()
book_pairs = pd.merge(book_pairs, similar_books,
                              left_on='similar_ISBN',
                              right_on='ISBN')

book_pairs.drop("ISBN", axis=1, inplace=True)
book_pairs.head()

Unnamed: 0,main_ISBN,similar_ISBN,main_Book-Title,main_Book-Author,main_Year-Of-Publication,main_Publisher,similar_Book-Title,similar_Book-Author,similar_Year-Of-Publication,similar_Publisher
0,book_0345339703,book_0399146652,The Fellowship of the Ring (The Lord of the Ri...,J.R.R. TOLKIEN,1986.0,Del Rey,The Cat Who Smelled a Rat,Lilian Jackson Braun,2001.0,Putnam Publishing Group
1,book_0316284955,book_0399146652,White Oleander : A Novel (Oprah's Book Club),Janet Fitch,2000.0,Back Bay Books,The Cat Who Smelled a Rat,Lilian Jackson Braun,2001.0,Putnam Publishing Group
2,book_0312278586,book_0399146652,The Nanny Diaries: A Novel,Emma McLaughlin,2002.0,St. Martin's Press,The Cat Who Smelled a Rat,Lilian Jackson Braun,2001.0,Putnam Publishing Group
3,book_0316666343,book_0399146652,The Lovely Bones: A Novel,Alice Sebold,2002.0,"Little, Brown",The Cat Who Smelled a Rat,Lilian Jackson Braun,2001.0,Putnam Publishing Group
4,book_0140293248,book_0399146652,The Girls' Guide to Hunting and Fishing,Melissa Bank,2000.0,Penguin Books,The Cat Who Smelled a Rat,Lilian Jackson Braun,2001.0,Putnam Publishing Group


### 1.4 Convert Dataset to TFDS

In [9]:
batch_size = 256
# Pairs dataset
book_pairs_final = tf.data.Dataset.from_tensor_slices({
    # main book features
    'main_ISBN': tf.cast(book_pairs['main_ISBN'], dtype=tf.string),
    'main_Book-Title': tf.cast(book_pairs['main_Book-Title'], dtype=tf.string),
    'main_Book-Author': tf.cast(book_pairs['main_Book-Author'], dtype=tf.string),
    'main_Year-Of-Publication': tf.cast(book_pairs['main_Year-Of-Publication'], dtype=tf.int32),
    'main_Publisher': tf.cast(book_pairs['main_Publisher'], dtype=tf.string),

    # similar book features
    'similar_ISBN': tf.cast(book_pairs['similar_ISBN'], dtype=tf.string),
    'similar_Book-Title': tf.cast(book_pairs['similar_Book-Title'], dtype=tf.string),
    'similar_Book-Author': tf.cast(book_pairs['similar_Book-Author'], dtype=tf.string),
    'similar_Year-Of-Publication': tf.cast(book_pairs['similar_Year-Of-Publication'], dtype=tf.int32),
    'similar_Publisher': tf.cast(book_pairs['similar_Publisher'], dtype=tf.string),
})

book_pairs_final = book_pairs_final.batch(batch_size)
# Book information dataset
book_infos = tf.data.Dataset.from_tensor_slices({
    'ISBN': tf.cast(books['ISBN'], dtype=tf.string),
    'Book-Title': tf.cast(books['Book-Title'], dtype=tf.string),
    'Book-Author': tf.cast(books['Book-Author'], dtype=tf.string),
    'Year-Of-Publication': tf.cast(books['Year-Of-Publication'], dtype=tf.int32),
    'Publisher': tf.cast(books['Publisher'], dtype=tf.string),
})
book_infos = book_infos.batch(batch_size)


In [10]:
tf.random.set_seed(1002)
train_percentage = 0.8
batch_count = (data_size + batch_size - 1) // batch_size
shuffled = book_pairs_final.shuffle(batch_count, seed=42, reshuffle_each_iteration=False)

train = shuffled.take(int(batch_count * train_percentage))
test = shuffled.skip(int(batch_count * train_percentage)).take(int(batch_count * (1 - train_percentage)))

## 2. Book to Book Model

In [11]:
for col in books.columns:
    print(f"Number of unique {col} {books[col].nunique()}")


Number of unique ISBN 41743
Number of unique Book-Title 41743
Number of unique Book-Author 21935
Number of unique Year-Of-Publication 80
Number of unique Publisher 5234


### 2.1 Book Title Model

In [12]:
def batch_distilbert_embeddings(book_titles):
    # convert bytes to string
    book_titles = [title.decode('utf-8') for title in book_titles.numpy()]
    # set inputs
    input_ids = tokenizer(book_titles, padding=True, truncation=True, return_tensors='pt', max_length=20)
    # generate embeddings
    outputs = bert_model(input_ids['input_ids'])
    # get last layer
    last_hidden_state = outputs.last_hidden_state

    # mean vector is required and also padded values should be excluded
    input_mask = tf.cast(input_ids['attention_mask'], tf.float32)
    input_mask_expanded = tf.expand_dims(input_mask, -1)
    sum_embeddings = tf.reduce_sum(last_hidden_state.detach().numpy() * input_mask_expanded, axis=1)
    sum_mask = tf.reduce_sum(input_mask_expanded, axis=1)
    mean_embeddings = sum_embeddings / sum_mask
    return book_titles, mean_embeddings

bert_model = DistilBertModel.from_pretrained('distilbert-base-uncased')
tokenizer = DistilBertTokenizer.from_pretrained('distilbert-base-uncased')

title_list = []
title_embeddings = []

# Process batches of book titles to get embeddings
for batch in book_infos:
    book_titles, embeddings = batch_distilbert_embeddings(batch['Book-Title'])
    title_list.extend(book_titles)
    title_embeddings.extend(embeddings)

title_embeddings_df = pd.DataFrame({'Book-Title':title_list, 'embedding':title_embeddings})


We strongly recommend passing in an `attention_mask` since your input_ids may be padded. See https://huggingface.co/docs/transformers/troubleshooting#incorrect-output-when-padding-tokens-arent-masked.


### 2.2 Book ISBN, Author, Publisher Models

In [14]:
# Book Title Model
title_output_dim = len(title_embeddings[0])
# An embedding if there are any unknown text input occurs
unknown_embedding = np.random.uniform(-1, 1, size=title_output_dim)
title_embeddings.insert(0, unknown_embedding)
book_title_model = tf.keras.Sequential([
  tf.keras.layers.StringLookup(
      vocabulary=title_list, mask_token=None),
  tf.keras.layers.Embedding(
                    input_dim=len(title_list) + 1,
                    output_dim=title_output_dim,
                    embeddings_initializer=tf.keras.initializers.Constant(np.vstack(title_embeddings)),
                    trainable=False)])

In [13]:
# Book ID Model
book_embedding_dimension = 64
unique_book_ids = books['ISBN'].unique()
book_id_model = tf.keras.Sequential([
  tf.keras.layers.StringLookup(
      vocabulary=unique_book_ids, mask_token=None),
  tf.keras.layers.Embedding(len(unique_book_ids) + 1, book_embedding_dimension)
])


# Author Model
author_embedding_dimension = 32
unique_book_authors = books['Book-Author'].unique()
book_author_model = tf.keras.Sequential([
  tf.keras.layers.StringLookup(
      vocabulary=unique_book_authors, mask_token=None),
  tf.keras.layers.Embedding(len(unique_book_authors) + 1, author_embedding_dimension)
])

# Publisher Model
publisher_embedding_dimension = 16
unique_book_publishers = books['Publisher'].unique()
book_publisher_model = tf.keras.Sequential([
  tf.keras.layers.StringLookup(
      vocabulary=unique_book_publishers, mask_token=None),
  tf.keras.layers.Embedding(len(unique_book_publishers) + 1, publisher_embedding_dimension)
])

# Publication Year Layer
book_year_layer = tf.keras.layers.Normalization(
    axis=None
)
book_year_layer.adapt(book_infos.map(lambda x: x["Year-Of-Publication"]))


In [15]:
# Author Embedding Visualization
book_author_model('Richard Bruce Wright')

<tf.Tensor: shape=(32,), dtype=float32, numpy=
array([ 0.0026858 , -0.03741129, -0.01967944, -0.00474253,  0.04196215,
        0.03489442, -0.02625586,  0.01576375, -0.01148453, -0.04737226,
        0.00830209,  0.01122935, -0.00154225,  0.04320015,  0.01335922,
        0.00305701,  0.04329654,  0.03451772,  0.04371735, -0.01457602,
       -0.04764173,  0.00885023,  0.00981665, -0.01966577, -0.04971072,
       -0.01737161,  0.01679437,  0.01647678, -0.02727261,  0.02590995,
       -0.01437413, -0.04865533], dtype=float32)>

### 2.3 Book Model

In [16]:
class BookModel(tfrs.Model):

  def __init__(self, book_id_model, book_title_model, book_author_model, book_publisher_model, book_year_layer):
    super().__init__()
    # assigning sub models to convert ids to embeddings
    self.book_id_model = book_id_model
    self.book_title_model = book_title_model
    self.book_author_model = book_author_model
    self.book_publisher_model = book_publisher_model
    self.book_year_layer = book_year_layer

  def call(self, features: Dict[Text, tf.Tensor]):
    
    # concatenation of embeddings
    return tf.concat([
        self.book_id_model(features["ISBN"]),
        self.book_title_model(features['Book-Title']),
        self.book_author_model(features["Book-Author"]),
        self.book_publisher_model(features["Publisher"]),
        tf.expand_dims(self.book_year_layer(features['Year-Of-Publication']), axis=1)
    ], axis=1)

In [17]:
# Initialization of our book model
book_model = BookModel(book_id_model,
                       book_title_model,
                       book_author_model,
                       book_publisher_model,
                       book_year_layer)

# Sample example
book_model({'Book-Author': ['Richard Bruce Wright'],
            'Book-Title': ['Clara Callan'],
            'ISBN': ['book_0002005018'],
            'Publisher': ['HarperFlamingo Canada'],
            'Year-Of-Publication': [2001]}).shape

TensorShape([1, 881])

In [18]:
# Metrics & Task
metrics = tfrs.metrics.FactorizedTopK(candidates=book_infos.map(lambda features: book_model(features)))
task = tfrs.tasks.Retrieval(metrics=metrics)

### 2.4 Book to Book Model

In [19]:
class Book2BookModel(tfrs.Model):
    def __init__(self, book_id_model, book_title_model, book_author_model, book_publisher_model, book_year_layer, output_dimension=64):
        super().__init__()
        self.book_id_model = book_id_model
        self.book_title_model = book_title_model
        self.book_author_model = book_author_model
        self.book_publisher_model = book_publisher_model
        self.book_year_layer = book_year_layer
        # combining book model with output dimension to fix output dimension
        self.book_model_raw = BookModel(self.book_id_model,
                                        self.book_title_model,
                                        self.book_author_model,
                                        self.book_publisher_model,
                                        self.book_year_layer)
        self.book_model = tf.keras.Sequential([self.book_model_raw,
                                               tf.keras.layers.Dense(output_dimension)])
        # Metrics & Task
        self.candidates = book_infos.map(lambda x: self.book_model(x))
        metrics = tfrs.metrics.FactorizedTopK(candidates=self.candidates)
        # negative sampling also applied
        self.task = tfrs.tasks.Retrieval(metrics=metrics,
                                        num_hard_negatives=5)
    
    def compute_loss(self, features: Dict[Text, tf.Tensor], training=True):
        # Generation of main book embedding from main item features
        main_book_embedding = self.book_model({'ISBN':features['main_ISBN'],
                                               'Book-Title':features['main_Book-Title'],
                                               'Book-Author': features['main_Book-Author'],
                                               'Publisher': features['main_Publisher'],
                                               'Year-Of-Publication':features['main_Year-Of-Publication']})

        # Generation of similar book embedding from similar item features
        similar_book_embedding = self.book_model({'ISBN':features['similar_ISBN'],
                                                  'Book-Title':features['similar_Book-Title'],
                                                  'Book-Author': features['similar_Book-Author'],
                                                  'Publisher': features['similar_Publisher'],
                                                  'Year-Of-Publication':features['similar_Year-Of-Publication']})

        # loss and the metric calculation
        # compute metrics set false to skyrock training speed
        return self.task(main_book_embedding,
                         similar_book_embedding,
                         compute_metrics=False)

### 2.5 Training the Model

In [20]:
# Book to Book Model initialization
book2book_model = Book2BookModel(book_id_model,
                                 book_title_model,
                                 book_author_model,
                                 book_publisher_model,
                                 book_year_layer)
book2book_model.compile(optimizer=tf.keras.optimizers.legacy.Adagrad(learning_rate=0.1))

In [21]:
book2book_model.fit(train, epochs=10)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.src.callbacks.History at 0x2c2ba6d60>

### 2.6 Embedding Extraction

In [22]:
# Function to apply the model to each book feature set and return ISBN with embeddings
def extract_embeddings_with_isbn(features):
    embeddings = book2book_model.book_model(features)
    return features['ISBN'], embeddings

# Mapping the function over the dataset
book_embeddings = book_infos.map(extract_embeddings_with_isbn)
isbn_list = []
embeddings_list = []

# Example to inspect or use the embeddings with ISBNs
for isbn, embedding in book_embeddings:
    isbn_list.extend(list(isbn.numpy().astype(str)))  
    embeddings_list.extend(list(embedding.numpy()))

book_embedding_dataset = pd.DataFrame({'ISBN':isbn_list, 'embedding':embeddings_list})
book_embedding_dataset.head()

Unnamed: 0,ISBN,embedding
0,book_0002005018,"[0.118825436, 0.045262557, -0.09441517, 0.0824..."
1,book_0060973129,"[-0.16090837, -0.025830925, -0.012401611, 0.06..."
2,book_0374157065,"[-0.029113185, 0.29734114, -0.16034544, 0.0993..."
3,book_0399135782,"[0.122434825, 0.02598485, -0.001726985, -0.148..."
4,book_1558746218,"[-0.18346615, -0.04044274, 0.062318973, 0.0636..."


In [23]:
book_embedding_dict = dict(zip(book_embedding_dataset.ISBN, book_embedding_dataset.embedding))
book_title_dict = dict(zip(books['ISBN'], books['Book-Title']))

### 2.7 ANN

In [24]:
dim = output_dimension

num_elements = book_embedding_dataset.shape[0]
# hnswlib initialization with cosine similarity
p = hnswlib.Index(space='cosine', dim=dim)

p.init_index(max_elements=num_elements, ef_construction=100, M=16)

p.set_ef(10)

embeddings = np.vstack(book_embedding_dataset["embedding"].values)
p.add_items(embeddings)

### 2.8 Similar Book Search

In [25]:
def book_search(isbn, k=3):
    """Gets input embeddings and return top k similar items"""

    # Generate embedding for the user query
    query_embedding = book_embedding_dict[isbn]

    if query_embedding is None:
        return "Invalid query or embedding generation failed."

    labels, _ = p.knn_query(query_embedding, k=k+1)
    results = book_embedding_dataset.iloc[list(labels[0][1:])].to_dict('records')
    similar_isbns = [similar_isbn['ISBN'] for similar_isbn in results]
    return similar_isbns

In [26]:
# Visualization of Recommendation
# ISBN of "Harry Potter and the Prisoner of Azkaban (Book 3)"
main_book = 'book_0439136350'
print(f"Main Book:\n{book_title_dict[main_book]}\n\nSimilar Books:")

similar_books = book_search(main_book, k=10)
for i, similar_book in enumerate(similar_books):
    print(f"{i+1}. {book_title_dict[similar_book]}")

Main Book:
Harry Potter and the Prisoner of Azkaban (Book 3)

Similar Books:
1. Harry Potter and the Sorcerer's Stone (Book 1)
2. Harry Potter and the Order of the Phoenix (Book 5)
3. The Dream Directory: The Comprehensive Guide to Analysis and Interpretation, With Explanations for More Than 350 Symbols and Theories
4. Behind the Attic Wall (Avon Camelot Books (Paperback))
5. Harry Potter and the Sorcerer's Stone (Harry Potter (Paperback))
6. The Black Cauldron (Chronicles of Prydain (Paperback))
7. Awaken from Death
8. The Cooperman Variations (Benny Cooperman Mysteries (Paperback))
9. The Wild Child
10. BLAST FROM THE PAST : A NOVEL (Kinky Friedman Novels (Hardcover))


## 3 Recommendations & Evaluation

### 3.1 Highly Rated Popular Books

In [27]:
def get_popular_books(df_ratings, k=10):
  # Calculate the number of ratings for each movie
  rating_counts = df_ratings['ISBN'].value_counts().reset_index()
  rating_counts.columns = ['ISBN', 'rating_count']

  # Get the most frequently rated movies
  min_ratings_threshold = rating_counts['rating_count'].quantile(0.95)

  # Filter movies based on the minimum number of ratings
  popular_movies = ratings.merge(rating_counts, on='ISBN')
  popular_movies = popular_movies[popular_movies['rating_count'] >= min_ratings_threshold]

  # Calculate the average rating for each movie
  average_ratings = popular_movies.groupby('ISBN')['Book-Rating'].mean().reset_index()

  # Get the top k rated movies
  top_10_movies = list(average_ratings.sort_values('Book-Rating', ascending=False).head(k).ISBN.values)
  return top_10_movies

popular_books = get_popular_books(ratings)
print("Popular Books:")
for i, popular_book in enumerate(popular_books):
  print(f"{i+1}. {book_title_dict[popular_book]}")

Popular Books:
1. Anne of Avonlea (Anne of Green Gables Novels (Paperback))
2. El Codigo Da Vinci / The Da Vinci Code
3. Maus 1. Mein Vater kotzt Geschichte aus. Die Geschichte eines Ã?Â?berlebenden.
4. Complete Chronicles of Narnia
5. The Cat in the Hat
6. The Ultimate Hitchhiker's Guide
7. Dandelion Wine (Grand Master Editions)
8. The Grapes of Wrath
9. Der Alchimist.
10. The Little Prince (Wordsworth Collection)


### 3.2 Similar Books 

In [28]:
# Function to apply the model to each book feature set and return ISBN with embeddings
def extract_pairs(features):
    return features['main_ISBN'], features['similar_ISBN']

main_isbn_list = []
similar_isbn_list = []

book_pairs_test = test.map(extract_pairs)
for main_isbn, similar_isbn in book_pairs_test:
    main_isbn_list.extend(list(main_isbn.numpy().astype(str)))  
    similar_isbn_list.extend(list(similar_isbn.numpy().astype(str)))

In [29]:
popular_reco_results = []
two_tower_reco_results = []
k = 50
popular_books = get_popular_books(ratings, k=k)

for main_isbn, similar_isbn in zip(main_isbn_list, similar_isbn_list):
    popular_reco_check = np.isin(popular_books, similar_isbn).astype(int)
    popular_reco_results.append(popular_reco_check)
    # get embedding based recommendations
    similar_books = book_search(main_isbn, k=k)
    two_tower_check = np.isin(similar_books, similar_isbn).astype(int)
    two_tower_reco_results.append(two_tower_check)

### 3.3 NDCG Scores

In [30]:
from sklearn.metrics import ndcg_score

# Since we have already sorted our recommendations
# An array that represent our recommendation scores is used.
representative_array = [[i for i in range(k, 0, -1)]] * len(two_tower_reco_results)

for k in [3, 5, 10, 50]:
  two_tower_result = ndcg_score(two_tower_reco_results,
                                  representative_array, k=k)
  popular_result = ndcg_score(popular_reco_results,
                              representative_array, k=k)
  
  print(f"Two Tower NDCG result at top {k}: {round(two_tower_result, 4)}")
  print(f"Popular recommendation NDCG result at top {k}: {round(popular_result, 4)}\n\n")

Two Tower NDCG result at top 3: 0.0112
Popular recommendation NDCG result at top 3: 0.0


Two Tower NDCG result at top 5: 0.0124
Popular recommendation NDCG result at top 5: 0.0


Two Tower NDCG result at top 10: 0.0138
Popular recommendation NDCG result at top 10: 0.0


Two Tower NDCG result at top 50: 0.017
Popular recommendation NDCG result at top 50: 0.0014




## References
- https://www.tensorflow.org/recommenders/examples/basic_retrieval
- https://www.tensorflow.org/recommenders/examples/featurization