In [1]:
import os
import sys
import numpy as np
import pandas as pd
import scipy
from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer
from sklearn.metrics.pairwise import linear_kernel, cosine_similarity
from sklearn.decomposition import TruncatedSVD

# Custom libraries
sys.path.append('../Util')
from loader import get_books, get_book_dataframe, get_book_features

In [2]:
# Set this to where you save and load all data - RELATIVE TO INSIDE UTIL FOLDER
data_path = '../../goodbooks-10k/'

In [3]:
# title of book to get recs for
title = 'The Fellowship of the Ring (The Lord of the Rings, #1)'

In [4]:
# Function that takes in book feature similarity matrices as input and outputs most similar book
def get_recommendations(df, indices, title, similarities, weights):
    
    # Get the index of the book that matches the title
    idx = indices[title]
    idx -= 1
    
    # Get the total number of books
    num_books = len(similarities[0])

    # Get the pairwsie similarity scores of all books with that book
    similarity_scores = []
    for similarity in similarities:
        similarity_scores.append(list(enumerate(similarity[idx])))
    
    # Sum and average the similarity scores of the three feature sets to get true similarity
    sim_scores = []
    for i in range(num_books):  
        book_id = similarity_scores[0][i][0]
        
        score = 0
        for j in range(len(weights)):
            score += (similarity_scores[j][i][1] * weights[j])
            
        sim_scores.append((book_id, score))
        
    # Sort the books based on the highest similarity scores first
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)

    # Get the scores of the N most similar books
    N = 20
    sim_scores = sim_scores[0:N]

    # Get the book indices
    book_indices = [i[0] for i in sim_scores]

    # Return the top 10 most similar books
    return df['title'].iloc[book_indices]

In [21]:
# Get dataframe from books
books = get_book_dataframe(data_path)

found books_dataframe in file...


In [22]:
#Construct a reverse map of indices and book titles
indices = pd.Series(books.index, index=books['title']).drop_duplicates()

In [23]:
# produce feature matrix
feature_matrix = get_book_features(books)
feature_matrix.shape

feature_matrix exists in file...


(10000, 82203)

In [24]:
# Compute the cosine similarity matrix for feature matrix
cosine_sim_features = cosine_similarity(feature_matrix)

KeyboardInterrupt: 

In [None]:
# Get recs using the combined feature matrix
similarities_features = [cosine_sim_features]
weights_features = [1]
recs_features = get_recommendations(books, indices, title, similarities_features, weights_features)
recs_features

In [None]:
'''

Load in Item Matrix

'''

In [14]:
# Load in item_matrix (concepts and features) and test recs
filename = '../.tmp/item_matrix.npy'
item_matrix = np.load(filename)
item_matrix.shape

(10000, 10)

In [15]:
# Compute the cosine similarity matrix for the item matrix
cosine_sim_item_matrix = cosine_similarity(item_matrix)

In [16]:
# Save similarity matrix using the item_matrix (joined matrices from ratings and features)
filename = '../.tmp/similarity_matrix.npy'
np.save(filename, cosine_sim_item_matrix)

In [17]:
# Get recs using the item matrix (concepts and features)
similarities_item_matrix = [cosine_sim_item_matrix]
weights_item_matrix = [1]
recs_item_matrix = get_recommendations(books, indices, title, similarities_item_matrix, weights_item_matrix)
recs_item_matrix

id
19     The Fellowship of the Ring (The Lord of the Ri...
7                                             The Hobbit
161    The Return of the King (The Lord of the Rings,...
155           The Two Towers (The Lord of the Rings, #2)
189    The Lord of the Rings (The Lord of the Rings, ...
766                Preludes & Nocturnes (The Sandman #1)
167                    American Gods (American Gods, #1)
110       A Clash of Kings  (A Song of Ice and Fire, #2)
135       A Storm of Swords (A Song of Ice and Fire, #3)
232                  The Gunslinger (The Dark Tower, #1)
165       A Feast for Crows (A Song of Ice and Fire, #4)
188    A Dance with Dragons (A Song of Ice and Fire, #5)
62           The Golden Compass (His Dark Materials, #1)
283    Good Omens: The Nice and Accurate Prophecies o...
205    Interview with the Vampire (The Vampire Chroni...
612        The Drawing of the Three (The Dark Tower, #2)
192    The Name of the Wind (The Kingkiller Chronicle...
322                         

In [18]:
# Test just the matrix derived from a single matrix
item_matrix_test = item_matrix[:,5:10]
item_matrix_test.shape

(10000, 5)

In [19]:
# Compute the cosine similarity matrix for the collab filtering matrix
cosine_sim_test = cosine_similarity(item_matrix_test)

In [20]:
similarities_test = [cosine_sim_test]
weights_test = [1]
recs_test = get_recommendations(books, indices, title, similarities_test, weights_test)
recs_test

id
19      The Fellowship of the Ring (The Lord of the Ri...
161     The Return of the King (The Lord of the Rings,...
155            The Two Towers (The Lord of the Rings, #2)
189     The Lord of the Rings (The Lord of the Rings, ...
3521                             Neil Gaiman's Neverwhere
5344               A Game of Thrones: Comic Book, Issue 1
466                             The Hobbit: Graphic Novel
6738    The Hedge Knight (The Hedge Knight Graphic Nov...
4976         Unfinished Tales of Númenor and Middle-Earth
2309                                The Children of Húrin
7                                              The Hobbit
8376                 Pani Jeziora (Saga o Wiedźminie, #7)
7470    The Dresden Files:  Storm Front, Volume 1-  Th...
2149    A Song of Ice and Fire (A Song of Ice and Fire...
9578                        Berserk, Vol. 1 (Berserk, #1)
2239    Dragons of Spring Dawning (Dragonlance: Chroni...
2527    A Song of Ice and Fire (A Song of Ice and Fire...
3500    Str