In [None]:
import pandas as pd
import numpy as np
import csv
import pickle

In [None]:
def get_recommendations(books, bookid_to_title, title_to_bookid, title, similarities, weights, N):   
    # Get the index of the book that matches the title
    idx = int(title_to_bookid[title])
    idx -= 1
    
    # Get the total number of books
    num_books = len(similarities[0])

    # Get the pairwsie similarity scores of all books with that book
    similarity_scores = []
    for similarity in similarities:
        similarity_scores.append(list(enumerate(similarity[idx])))
    
    # Sum and average the similarity scores of the three feature sets to get true similarity
    sim_scores = []
    for i in range(num_books):  
        book_id = similarity_scores[0][i][0]
        
        score = 0
        for j in range(len(weights)):
            score += (similarity_scores[j][i][1] * weights[j])
            
        sim_scores.append((book_id, score))
        
    # Sort the books based on the highest similarity scores first
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)

    # Get the scores of the N most similar books
    sim_scores = sim_scores[1:N+1]

    # Get the book indices
    book_indices = [i[0] for i in sim_scores]

    # Return the top most similar books
    top_books = []
    for i in range(len(sim_scores)):
        s = sim_scores[i]
        book_id = s[0]
        top_books.append(book_id + 1) #actual book_id

    return top_books
    

In [34]:
# Set this to where you save and load all data
data_path = '../../goodbooks-10k/'

In [None]:
titles = []
books = pd.read_pickle('static/data/books_dataframe')
for index, row in books.iterrows():
    titles.append(row['title'])
titles.sort()
bookid_to_title = {}
title_to_bookid = {}
filename = 'static/data/books.csv'
with open(filename, "r", encoding='utf8') as f:
    reader = csv.reader(f, delimiter=",")
    for i, line in enumerate(reader):
        bookid_to_title[line[0]] = line[10]
        title_to_bookid[line[10]] = line[0]
cosine_sim_item_matrix = np.load('../.tmp/cosine_sim_item_matrix.npy')
cosine_sim_feature_matrix = np.load('../.tmp/cosine_sim_feature_matrix.npy')

In [None]:
top_recs_each_book_item_matrix = {}
titles_computed = 0
for title in titles:
    try:
        top_books = get_recommendations(books, bookid_to_title, title_to_bookid, title, [cosine_sim_item_matrix], [1], 99)
        top_recs_each_book_item_matrix[int(title_to_bookid[title])] = top_books
        titles_computed += 1
    except:
        print(titles_computed)
        print("can't get recs for: %s" % title)

In [None]:
f = open('static/data/top_recs_each_book_item_matrix.pkl',"wb")
pickle.dump(top_recs_each_book_item_matrix,f)
f.close()

In [None]:
f = open('static/data/top_recs_each_book_item_matrix.pkl',"rb")
top_recs_each_book_item_matrix = pickle.load(f)
f.close()

In [None]:
'''


'''

In [33]:
top_recs_each_book_feature_matrix = {}
titles_computed = 0
for title in titles:
    try:
        top_books = get_recommendations(books, bookid_to_title, title_to_bookid, title, [cosine_sim_feature_matrix], [1], 99)
        top_recs_each_book_feature_matrix[int(title_to_bookid[title])] = top_books
        titles_computed += 1
    except:
        print(titles_computed)
        print("can't get recs for: %s" % title)

669
can't get recs for: Around the World in Eighty Days (Extraordinary Voyages, #11)
903
can't get recs for: Before They Are Hanged (The First Law, #2)
1341
can't get recs for: Catch-22 (Catch-22, #1)
4153
can't get recs for: Me & Earl & the Dying Girl
4481
can't get recs for: Neuromancer (Sprawl, #1)
5618
can't get recs for: Shadow and Bone (Grisha, #1)
6019
can't get recs for: Streams in the Desert: 366 Daily Devotional Readings
7746
can't get recs for: The Lost Continent: Travels in Small-Town America
8747
can't get recs for: The Three Musketeers (The D'Artagnan Romances, #1)
9007
can't get recs for: The Wolf of Wall Street (The Wolf of Wall Street, #1)
9067
can't get recs for: There Was an Old Lady Who Swallowed a Fly (Classic Books) There Was an Old Lady Who Swallowed a Fly


In [35]:
f = open('static/data/top_recs_each_book_feature_matrix.pkl',"wb")
pickle.dump(top_recs_each_book_feature_matrix,f)
f.close()