In [7]:
# A module to handle recommendation systems based on sparse matrix decomposition.
# The basis is Singular Value Decomposition (SVD) and there are methods to 
# return similar items and find the N closest items for a user. 
#
# Written by Z. Miller 05/31/17. Open source if not used for profit.

import numpy as np
from scipy.sparse import coo_matrix,csr_matrix
from scipy.sparse.linalg import svds

class svdRec():
    def __init__(self):
        self.U, self.s, self.V = (None,None,None)
        self.user_encoder = None
        self.item_encoder = None
        self.mat = None
        self.decomp = False
    
    def load_csv_sparse(self,filename,delimiter=',',skiprows=None):
        print("Note: load_csv_sparse expects a csv in the format of: rowID, colID, Value, ...")
        u, m, r = np.loadtxt(filename, delimiter=delimiter, skiprows=skiprows, usecols=(0,1,2)).T
        self.mat = coo_matrix((r, (u-1, m-1)), shape=(u.max(), m.max())).tocsr()
        print("Created matrix of shape: ",self.mat.shape)
        
    def load_data_numpy(self, array, data_type=float):
        self.mat = csr_matrix(array,dtype=data_type)
        print("Created matrix of shape: ",self.mat.shape)
        
    def load_item_encoder(self, d):
        if type(d) != dict:
            raise TypeError("Encoder must be dictionary with key = itemID and value = Title")
        self.item_encoder = d
        
    def load_user_encoder(self, d):
        if type(d) != dict:
            raise TypeError("Encoder must be dictionary with key = userID and value = Title")
        self.user_encoder = d
        
    def get_item_name(self,itemid):
        if self.item_encoder:
            return self.item_encoder[str(itemid)]
        else:
            return "No ItemId -> Item-name Encoder Built!"
    
    def get_user_name(self,userid):
        if self.item_encoder:
            return self.user_encoder[str(userid)]
        else:
            return "No UserID -> Username Encoder Built!"
    
    def SVD(self, num_dim=None):
        if num_dim==None:
            print("Number of SVD dimensions not requested, using %s dimensions." % (min(self.mat.shape)-1), "To set, use num_dim.")
            num_dim = min(self.mat.shape)-1
        self.U, self.s, self.VT = svds(self.mat,k=num_dim)
        self.decomp = True
    
    def get_cell(self,i,j):
        return self.mat[1,:].toarray()[0,j]
    
    def get_similar_items(self, itemID, num_recom=5, show_similarity=False):
        if not self.decomp:
            raise ValueError("Must run SVD() before making recommendations!")
        recs = []
        for item in range(self.VT.T.shape[0]):
                recs.append([item+1,self.item_similarity(itemID-1,item)])
        if show_similarity:
            final_rec = [(i[0],i[1]) for i in sorted(recs,key=lambda x: x[1],reverse=True)]
        else:
            final_rec = [i[0] for i in sorted(recs,key=lambda x: x[1],reverse=True)]
        return final_rec[:num_recom]
    
    def item_similarity(self,item1,item2):
        if not self.decomp:
            raise ValueError("Must run SVD() before making recommendations!")
        return np.dot(self.VT.T[item1],self.VT.T[item2])
    
    def user_similarity(self,user1,user2):
        if not self.decomp:
            raise ValueError("Must run SVD() before making recommendations!")
        return np.dot(self.U[user1],self.U[user2])
    
    def user_item_similarity(self,user,item):
        if not self.decomp:
            raise ValueError("Must run SVD() before making recommendations!")
        return np.dot(self.U[user],self.VT.T[item])
    
    def user_item_predict(self,user,item):
        if not self.decomp:
            raise ValueError("Must run SVD() before making recommendations!")
        return np.dot(self.U[user],self.VT.T[item])
        
    def recommends_for_user(self, userID, num_recom=2, show_similarity=False):
        if not self.decomp:
            raise ValueError("Must run SVD() before making recommendations!")
        recs = []
        for item in range(self.VT.T.shape[0]):
            recs.append((item+1,self.user_item_predict(userID-1,item)))
        if show_similarity:
            final_rec = [(i[0],i[1]) for i in sorted(recs,key=lambda x: x[1],reverse=True)]
        else:
            final_rec = [i[0] for i in sorted(recs,key=lambda x: x[1],reverse=True)]
        return final_rec[:num_recom]
    
    def recs_from_closest_user(self, userID, num_users=1):
        if not self.decomp:
            raise ValueError("Must run SVD() before making recommendations!")
        userrecs = []
        for user in range(self.U.shape[0]):
            if user!= userID:
                userrecs.append([user,self.user_similarity(userID,user)])
        final_rec = [i[0] for i in sorted(userrecs,key=lambda x: x[1],reverse=True)]
        comp_user = final_rec[:num_users]
        print(comp_user)
        print("User #%s's most similar user is User #%s "% (userID, comp_user))
        data = self.mat.toarray()
        current = data[userID]
        recs = []
        for user in comp_user:
            rec_likes = data[user]
            for i,item in enumerate(current):
                if item != rec_likes[i] and rec_likes[i]!=0:
                    recs.append(i)
        return list(set(recs))

In [8]:
svd = svdRec()
svd.load_csv_sparse('data/ml-20m/ratings_small.csv', delimiter=',', skiprows=1)
svd.SVD(num_dim=100)

Note: load_csv_sparse expects a csv in the format of: rowID, colID, Value, ...
Created matrix of shape:  (702, 128594)


In [11]:
user_to_rec = 3
print("Items for User %s to check out based on similar user:\n"% user_to_rec, svd.recs_from_closest_user(user_to_rec,num_users=3))

[282, 488, 238]
User #3's most similar user is User #[282, 488, 238] 
Items for User 3 to check out based on similar user:
 [0, 1, 5, 2053, 4104, 9, 2057, 6153, 6154, 12, 6156, 15, 16, 18, 20, 24, 28, 2077, 31, 2079, 33, 2081, 35, 2080, 2082, 38, 2084, 2086, 2088, 2089, 43, 2091, 46, 47, 2095, 49, 57, 2107, 61, 4160, 4161, 2114, 30792, 6217, 2122, 2123, 2124, 86, 2135, 2136, 2138, 2140, 2141, 94, 30815, 2151, 30824, 59500, 109, 110, 2158, 2160, 2161, 116, 6265, 49273, 2173, 4224, 4231, 6280, 2187, 140, 139, 4239, 149, 4245, 4247, 152, 6293, 6296, 156, 157, 160, 164, 8359, 8360, 171, 172, 4269, 8367, 8372, 2230, 184, 6332, 8382, 193, 197, 4298, 6349, 207, 4305, 4307, 4309, 2268, 6364, 6366, 2272, 4320, 6372, 230, 231, 4328, 4326, 234, 6376, 238, 240, 241, 2290, 2288, 4339, 2293, 246, 4342, 4343, 6385, 6391, 2299, 2301, 255, 2305, 259, 260, 261, 2312, 4360, 6409, 2317, 4366, 271, 2320, 4367, 4368, 2323, 276, 4369, 6415, 2328, 6428, 2334, 287, 2336, 289, 4385, 291, 294, 295, 299, 2352, 23