In [None]:
import pandas as pd
import numpy as np
import pickle
import matplotlib.pyplot as plt
%matplotlib inline
from sklearn.metrics.pairwise import cosine_similarity

In [None]:
df_books = pickle.load(open('df_books.pickle','rb'))

In [None]:
df_authors = pickle.load(open('df_authors.pickle','rb'))

In [None]:
df_books.drop(['language_code','book_id','publication_year','series','genres_new'], axis=1, inplace=True)

In [None]:
books_latent = pickle.load(open('latent_matrix_books.pickle','rb'))
rating_latent = pickle.load(open('latent_matrix_rating.pickle','rb'))

## Book-to-book

In [None]:
def book_to_books(seedbookID, latentmatrix, rec_mode):
  if rec_mode == 'collaborative':
    seed_book = np.array(latentmatrix.loc[seedbookID]).reshape(1,-1)
  if rec_mode == 'content':
    seed_book = latentmatrix[df_books.index[df_books['work_id'] == seedbookID]]

  similarities = cosine_similarity(latentmatrix, seed_book, dense_output=True)

  if rec_mode == 'collaborative':
    index = latentmatrix.index.tolist()
  if rec_mode == 'content':
    index = df_books['work_id'].tolist()

  similarities = pd.DataFrame(similarities, index = index)
  similarities.columns = ['similarity_score']
  similarities.sort_values('similarity_score', ascending=False, inplace=True)
  similarities = similarities.iloc[1:]
  similarities = similarities[similarities['similarity_score'] > 0]

  return similarities

In [None]:
def similarity_scores(collaborative_score, content_score):
  #average both similarity scores
  df_sim = pd.merge(collaborative_score, pd.DataFrame(content_score['similarity_score']), left_index=True, right_index=True)
  df_sim['similarity_score'] = (df_sim['similarity_score_x'] + (df_sim['similarity_score_y'])*0.5)/2
  df_sim.drop("similarity_score_x", axis=1, inplace=True)
  df_sim.drop("similarity_score_y", axis=1, inplace=True)

  #sort by average similarity score
  df_sim.sort_values('similarity_score', ascending=False, inplace=True)

  #round similarity score
  df_sim['similarity_score'] = df_sim['similarity_score'].round(4)

  return  df_sim.head(20)

In [None]:
def get_recommendation(seed_book):
  collaborative = book_to_books(seed_book, rating_latent, 'collaborative')
  content = book_to_books(seed_book, books_latent, 'content')
  rec = similarity_scores(collaborative, content)
  rec = pd.merge(df_books, rec, how='right', left_on='work_id', right_index=True).reset_index().drop(['index','similarity_score'], axis=1)
  rec = rec[['title','authors','work_id','isbn','description','average_rating','image_url']]
  rec['authors'] = rec['authors'].apply(lambda a: [df_authors.loc[i]['name'] for i in a])
  return rec

## User to book

In [None]:
def user_to_user(seeduserID):
  seed_user = np.array(latent_rating_u2u.loc[seeduserID]).reshape(1,-1)
  similarities = cosine_similarity(latent_rating_u2u, seed_user, dense_output=True)
  index = latent_rating_u2u.index.tolist()
  similarities = pd.DataFrame(similarities, index = index)
  similarities.columns = ['similarity_score']
  similarities.sort_values('similarity_score', ascending=False, inplace=True)
  similarities = similarities.iloc[1:]
  similarities = similarities[similarities['similarity_score'] > 0]

  return similarities[:min(len(similarities),30)]


def user_to_book(userID):
  latent_rating_u2u = rating_latent.transpose()

  sim_user = user_to_user(userID)

  book_read = latent_rating_u2u.loc[userID]
  book_read = book_read.loc[(book_read > 0)]
  book_read = book_read.sort_values(ascending=False)[:min(30,len(book_read))]

  sim = latent_rating_u2u[latent_rating_u2u.index.isin(sim_user.index)]
  sim = sim.loc[:, (sim > 0).any(axis=0)].loc[~sim.index.isin(book_read.index)]
  avgrating = sim.replace(0,np.nan).apply(np.nanmean).dropna()
  avgrating = avgrating.sort_values(ascending=False).index[:min(50,len(avgrating))]

  similarities = cosine_similarity(book_latent[df_books.index[df_books['work_id'].isin(list(avgrating))]],book_latent[df_books.index[df_books['work_id'].isin(list(book_read.index))]])
  cos = pd.DataFrame(similarities, index = list(avgrating), columns = list(book_read.index))
  rec = df_books[df_books['work_id'].isin(cos.index[[a for (a,b) in [divmod(i, 30) for i in np.argsort(similarities, axis=None)[-10:]]]][::-1])][['title','authors','work_id','isbn','description','average_rating']]
  rec['authors'] = rec['authors'].apply(lambda a: [df_authors.loc[i]['name'] for i in a])

  return rec