In [28]:
import os
import sys
import numpy as np
import pandas as pd
import scipy
import math
from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer
from sklearn.metrics.pairwise import linear_kernel, cosine_similarity
from sklearn.decomposition import TruncatedSVD
from surprise import Reader, Dataset, SVD, evaluate, dump, accuracy
from collections import defaultdict

# Custom libraries
sys.path.append('../Util')
from loader import get_books, get_book_dataframe, get_book_features
from joiner import get_ratings, get_joint, load_amazon, load_goodreads
from reduction import reduce_matrix, get_sparse

In [2]:
def get_top_n_recs(result, books, n, q):
    recs = []
    for i in range(len(result)):
        if q[i] == 0: # book user hasn't already rated
            recs.append((i, result[i]))
        else:
            recs.append((i, float('-inf'))) 
            # recs.append((i, result[i])) #leave this to verify things actually working
    recs = sorted(recs, key=lambda tup: tup[1], reverse=True)

    top_titles = []
    for i in range(n):
        book_id = recs[i][0]
        title = books.iloc[book_id]['title']
        top_titles.append(title)
    return top_titles

In [3]:
def map_user(q, V):
    # map new user to concept space by q*V
    user_to_concept = np.matmul(q, V)
    # map user back to itme space with user_to_concept * VT
    result = np.matmul(user_to_concept, V.T)
    return result

In [4]:
def map_user_sparse(q, V):
    q_sparse = scipy.sparse.csr_matrix(q)
    # map new user to concept space by q*V
    user_to_concept = q_sparse.dot(V)
    # map user back to itme space with user_to_concept * VT
    result = user_to_concept.dot(V.T).todense()
    return result.T

In [5]:
# Set this to where you save and load all data
data_path = '../../goodbooks-10k/'

In [6]:
# Get dataframe from books
books = get_book_dataframe(data_path)

found books_dataframe in file...


In [7]:
filename = '../.tmp/svd_20_1000.npy'
qi = np.load(filename)

In [8]:
'''
Users Ratings need to be in a -2 - 3 scale. Bad ratings should count 'against' recs
'''

"\nUsers Ratings need to be in a -2 - 3 scale. Bad ratings should count 'against' recs\n"

In [19]:
# user from goodreads
sparse_q = scipy.sparse.load_npz('../.tmp/cached_users/user_nickgreenquist.npz')
q = sparse_q.toarray()
q = np.array(q[0].tolist())
q.shape

(10000,)

In [15]:
sparse_q = scipy.sparse.load_npz('../.tmp/cached_users/user_likes_fantasy.npz')
q = sparse_q.toarray()
q = np.array(q[0].tolist())
q.shape

(10000,)

In [89]:
# r^ui = μ + bu + bi + qTipu
qi.shape

(10000, 1000)

In [16]:
recs = get_top_n_recs(map_user(q, qi), books, 25, q)
for r in recs:
    print(r)

City of Glass (The Mortal Instruments, #3)
The Scarlet Letter
The Other Boleyn Girl (The Plantagenet and Tudor Novels, #9)
The Lucky One
The Shining (The Shining #1)
A Breath of Snow and Ashes (Outlander, #6)
Veronika Decides to Die
Last Chance Saloon
Treasure Island
The White Queen (The Plantagenet and Tudor Novels, #2)
Year of Yes: How to Dance It Out, Stand In the Sun and Be Your Own Person
One Flew Over the Cuckoo's Nest
Outlander (Outlander, #1)
Four to Score (Stephanie Plum, #4)
Today Will Be Different
The Return of Sherlock Holmes
Heaven is for Real: A Little Boy's Astounding Story of His Trip to Heaven and Back
The Boys in the Boat: Nine Americans and Their Epic Quest for Gold at the 1936 Berlin Olympics
Scrappy Little Nobody
The Light Between Oceans
The Son of Neptune (The Heroes of Olympus, #2)
Ultimate Spider-Man, Volume 1: Power and Responsibility
Eat, Pray, Love
The League of Extraordinary Gentlemen, Vol. 1
The Story of Edgar Sawtelle


In [99]:
'''

Use Item Matrix to get recs for new user

'''

'\n\nUse Item Matrix to get recs for new user\n\n'

In [20]:
# Load in item_matrix (concepts and features) and test recs
filename = '../.tmp/item_matrix.npy'
item_matrix = np.load(filename)
item_matrix.shape

(10000, 2000)

In [21]:
recs = get_top_n_recs(map_user(q, item_matrix), books, 25, q)
for r in recs:
    print(r)

A Wrinkle in Time (A Wrinkle in Time Quintet, #1)
Grendel
A Wizard of Earthsea (Earthsea Cycle, #1)
Dandelion Wine (Green Town, #1)
A Wind in the Door (A Wrinkle in Time Quintet, #2)
Many Waters (A Wrinkle in Time Quintet, #4)
An Acceptable Time (A Wrinkle in Time Quintet, #5)
The Earthsea Trilogy
A Swiftly Tilting Planet (A Wrinkle in Time Quintet, #3)
Flowers for Algernon
Slaughterhouse-Five
Tuck Everlasting
The Bone Clocks
Alice in Wonderland
The Lost World (Professor Challenger, #1)
Alice's Adventures in Wonderland & Through the Looking-Glass
Howl's Moving Castle (Howl's Moving Castle, #1)
The Neverending Story
Alice's Adventures in Wonderland
Through the Looking-Glass, and What Alice Found There
Brave New World Revisited 
Brave New World / Brave New World Revisited
Journey to the Center of the Earth (Extraordinary Voyages, #3)
The Horse and His Boy (Chronicles of Narnia, #5)
The Silver Chair (Chronicles of Narnia, #4)


In [12]:
'''

Use Just part of Item Matrix for Recs

'''

'\n\nUse Just part of Item Matrix for Recs\n\n'

In [12]:
part = item_matrix[:,10:110]
(np.amin(part), np.amax(part))

(-0.77808343373846678, 1.4040590953389824)

In [13]:
recs = get_top_n_recs(map_user(q, part), books, 10, q)
for r in recs:
    print(r)

The Lord of the Rings (The Lord of the Rings, #1-3)
The Fellowship of the Ring (The Lord of the Rings, #1)
The Hobbit
The Return of the King (The Lord of the Rings, #3)
The Two Towers (The Lord of the Rings, #2)
The Belgariad, Vol. 1: Pawn of Prophecy / Queen of Sorcery / Magician's Gambit (The Belgariad, #1-3)
The Name of the Wind (The Kingkiller Chronicle, #1)
The Farthest Shore (Earthsea Cycle, #3)
The Tombs of Atuan (Earthsea Cycle, #2)
The Crown Conspiracy (The Riyria Revelations, #1)


In [14]:
'''

Use Feature Matrix for Recs

'''

'\n\nUse Feature Matrix for Recs\n\n'

In [24]:
# produce feature matrix
feature_matrix = get_book_features(books)
feature_matrix.shape

feature_matrix exists in file...


(10000, 82203)

In [15]:
recs = get_top_n_recs(map_user_sparse(q, feature_matrix), books, 25, q)
for r in recs:
    print(r)

A Wrinkle in Time (A Wrinkle in Time Quintet, #1)
Grendel
A Wizard of Earthsea (Earthsea Cycle, #1)
Dandelion Wine (Green Town, #1)
A Wind in the Door (A Wrinkle in Time Quintet, #2)
Many Waters (A Wrinkle in Time Quintet, #4)
An Acceptable Time (A Wrinkle in Time Quintet, #5)
The Earthsea Trilogy
A Swiftly Tilting Planet (A Wrinkle in Time Quintet, #3)
Slaughterhouse-Five
Flowers for Algernon
Tuck Everlasting
The Bone Clocks
Alice in Wonderland
The Neverending Story
Alice's Adventures in Wonderland & Through the Looking-Glass
Howl's Moving Castle (Howl's Moving Castle, #1)
The Lost World (Professor Challenger, #1)
Brave New World Revisited 
Alice's Adventures in Wonderland
Through the Looking-Glass, and What Alice Found There
Brave New World / Brave New World Revisited
The Horse and His Boy (Chronicles of Narnia, #5)
The Buried Giant
Journey to the Center of the Earth (Extraordinary Voyages, #3)


In [62]:
# Get recommendations using a log ranking method of both item and feature matrices
recs_features = get_top_n_recs(map_user_sparse(q, feature_matrix), books, len(books), q)
recs_svd = get_top_n_recs(map_user(q, qi), books, len(books), q)

In [75]:
title_to_rank_feature = {}
title_to_rank_svd = {}
for i in range(len(books)):
    title_feature = recs_features[i]
    title_to_rank_feature[title_feature] = math.log(i+1)
                                                   
    title_svd = recs_svd[i]
    title_to_rank_svd[title_svd] = math.log(i+1)


In [76]:
final = []
weight_feature = 0.5
for title,_ in title_to_rank_feature.items():
    rank = weight_feature*title_to_rank_feature[title] + (1.0-weight_feature)*title_to_rank_svd[title]
    final.append((rank, title))

In [None]:
final = sorted(final, key=lambda x: x[0])
for tup in final:
    print("rank: {}, title: {}".format(tup[0], tup[1]))

rank: 3.3825194883902707, title: Alice in Wonderland
rank: 3.708790201207272, title: The Lovely Bones
rank: 3.791878150353556, title: Eat, Pray, Love
rank: 3.979637980058198, title: Fahrenheit 451
rank: 4.100967175597111, title: Journey to the Center of the Earth (Extraordinary Voyages, #3)
rank: 4.213853012457351, title: Many Waters (A Wrinkle in Time Quintet, #4)
rank: 4.283562780082223, title: Treasure Island
rank: 4.342200555200073, title: The Secret Life of Bees
rank: 4.448499276371912, title: The Handmaid's Tale
rank: 4.528652867903561, title: A Wrinkle in Time (A Wrinkle in Time Quintet, #1)
rank: 4.541367123685518, title: A Wizard of Earthsea (Earthsea Cycle, #1)
rank: 4.69214683954981, title: The Ocean at the End of the Lane
rank: 4.728287627256895, title: Grendel
rank: 4.742392608822971, title: Daddy-Long-Legs (Daddy-Long-Legs, #1)
rank: 4.7734420173089624, title: The Girl on the Train
rank: 4.816626196844034, title: Peter Pan
rank: 4.8479856285538085, title: It
rank: 4.88096

In [None]:
'''
rank: 4.35709336812867, title: Alice in Wonderland
rank: 5.009634793762673, title: The Lovely Bones
rank: 5.17581069205524, title: Eat, Pray, Love
rank: 5.551330351464524, title: Fahrenheit 451
rank: 5.793988742542351, title: Journey to the Center of the Earth (Extraordinary Voyages, #3)
rank: 6.019760416262829, title: Many Waters (A Wrinkle in Time Quintet, #4)
rank: 6.1591799515125745, title: Treasure Island
rank: 6.276455501748272, title: The Secret Life of Bees
rank: 6.489052944091952, title: The Handmaid's Tale
rank: 6.64936012715525, title: A Wrinkle in Time (A Wrinkle in Time Quintet, #1)
rank: 6.674788638719165, title: A Wizard of Earthsea (Earthsea Cycle, #1)
rank: 6.976348070447749, title: The Ocean at the End of the Lane
rank: 7.048629645861918, title: Grendel
rank: 7.076839608994072, title: Daddy-Long-Legs (Daddy-Long-Legs, #1)
rank: 7.138938425966053, title: The Girl on the Train
rank: 7.225306785036196, title: Peter Pan
rank: 7.288025648455745, title: It
rank: 7.353978379562469, title: Shatter Me (Shatter Me, #1)
rank: 7.418282858337305, title: Clariel (Abhorsen, #4)
rank: 7.4671424693625585, title: The Godfather
rank: 7.512988857487977, title: Slaughterhouse-Five
rank: 7.561329684422259, title: Vanity Fair
rank: 7.576199846901711, title: Dandelion Wine (Green Town, #1)
rank: 7.585748211770405, title: Howl's Moving Castle (Howl's Moving Castle, #1)'''