In [10]:
import pandas as pd
from pandas import DataFrame
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from scipy import sparse 
class CF(object):
    """docstring for CF"""
    def __init__(self, Y_data, k, dist_func = cosine_similarity, uuCF = 1):
        self.uuCF = uuCF # user-user (1) or item-item (0) CF
        self.Y_data = Y_data if uuCF else Y_data[:, [1, 0, 2]]
        self.k = k
        self.dist_func = dist_func
        self.Ybar_data = None
        # number of users and items. Remember to add 1 since id starts from 0
        self.n_users = int(np.max(self.Y_data[:, 0])) + 1 
        self.n_items = int(np.max(self.Y_data[:, 1])) + 1
    
    def add(self, new_data):
        """
        Update Y_data matrix when new ratings come.
        For simplicity, suppose that there is no new user or item.
        """
        self.Y_data = np.concatenate((self.Y_data, new_data), axis = 0)
    
    def normalize_Y(self):
        users = self.Y_data[:, 0] # all users - first col of the Y_data
        self.Ybar_data = self.Y_data.copy()
        self.mu = np.zeros((self.n_users,))
        for n in range(self.n_users):
            # row indices of rating done by user n
            # since indices need to be integers, we need to convert
            ids = np.where(users == n)[0].astype(np.int32)
            # indices of all ratings associated with user n
            item_ids = self.Y_data[ids, 1] 
            # and the corresponding ratings 
            ratings = self.Y_data[ids, 2]
            # take mean
            m = np.mean(ratings) 
            if np.isnan(m):
                m = 0 # to avoid empty array and nan value
            self.mu[n] = m
            # normalize
            self.Ybar_data[ids, 2] = ratings - self.mu[n]

        ################################################
        # form the rating matrix as a sparse matrix. Sparsity is important 
        # for both memory and computing efficiency. For example, if #user = 1M, 
        # #item = 100k, then shape of the rating matrix would be (100k, 1M), 
        # you may not have enough memory to store this. Then, instead, we store 
        # nonzeros only, and, of course, their locations.
        self.Ybar = sparse.coo_matrix((self.Ybar_data[:, 2],
            (self.Ybar_data[:, 1], self.Ybar_data[:, 0])), (self.n_items, self.n_users))
        self.Ybar = self.Ybar.tocsr()

    def similarity(self):
        eps = 1e-6
        self.S = self.dist_func(self.Ybar.T, self.Ybar.T)
    
        
    def refresh(self):
        """
        Normalize data and calculate similarity matrix again (after
        some few ratings added)
        """
        self.normalize_Y()
        self.similarity() 
        
    def fit(self):
        self.refresh()
        
    
    def __pred(self, u, i, normalized = 1):
        """ 
        predict the rating of user u for item i (normalized)
        if you need the un
        """
        # Step 1: find all users who rated i
        ids = np.where(self.Y_data[:, 1] == i)[0].astype(np.int32)
        # Step 2: 
        users_rated_i = (self.Y_data[ids, 0]).astype(np.int32)
        # Step 3: find similarity btw the current user and others 
        # who already rated i
        sim = self.S[u, users_rated_i]
        # Step 4: find the k most similarity users
        a = np.argsort(sim)[-self.k:] 
        # and the corresponding similarity levels
        nearest_s = sim[a]
        # How did each of 'near' users rated item i
        r = self.Ybar[i, users_rated_i[a]]
        if normalized:
            # add a small number, for instance, 1e-8, to avoid dividing by 0
            return (r*nearest_s)[0]/(np.abs(nearest_s).sum() + 1e-8)

        return (r*nearest_s)[0]/(np.abs(nearest_s).sum() + 1e-8) + self.mu[u]
    
    def pred(self, u, i, normalized = 1):
        """ 
        predict the rating of user u for item i (normalized)
        if you need the un
        """
        if self.uuCF: return self.__pred(u, i, normalized)
        return self.__pred(i, u, normalized)
            
    
    def recommend(self, u):
        """
        Determine all items should be recommended for user u.
        The decision is made based on all i such that:
        self.pred(u, i) > 0. Suppose we are considering items which 
        have not been rated by u yet. 
        """
        ids = np.where(self.Y_data[:, 0] == u)[0]
        items_rated_by_u = self.Y_data[ids, 1].tolist()              
        recommended_items = []
        for i in range(self.n_items):
            if i not in items_rated_by_u:
                rating = self.__pred(u, i)
                if rating > 0: 
                    recommended_items.append(i)
        
        return recommended_items 
    
    def recommend2(self, u):
        """
        Determine all items should be recommended for user u.
        The decision is made based on all i such that:
        self.pred(u, i) > 0. Suppose we are considering items which 
        have not been rated by u yet. 
        """
        ids = np.where(self.Y_data[:, 0] == u)[0]
        items_rated_by_u = self.Y_data[ids, 1].tolist()              
        recommended_items = []
    
        for i in range(self.n_items):
            if i not in items_rated_by_u:
                rating = self.__pred(u, i)
                if rating > 0: 
                    recommended_items.append(i)
        
        return recommended_items 

    def print_recommendation(self):
        """
        print all items which should be recommended for each user 
        """
        print ('Recommendation: ')
        for u in range(self.n_users):
            recommended_items = self.recommend(u)
            if self.uuCF:
                print ('    Recommend item(s):', recommended_items, 'for user', u)
            else: 
                print ('    Recommend item', u, 'for user(s) : ', recommended_items)
    def file(self):
        """
        Determine all items should be recommended for user u.
        The decision is made based on all i such that:
        self.pred(u, i) > 0. Suppose we are considering items which 
        have not been rated by u yet. 
        """
                    
        recommended = []
        items = []
        user =[]
        for u in range(self.n_users):
            ids = np.where(self.Y_data[:, 0] == u)[0]
            items_rated_by_u = self.Y_data[ids, 1].tolist()  
            for i in range(self.n_items):
                if i not in items_rated_by_u:
                    rating = self.__pred(u, i)
                    items.append(i)
                    items.append(u)
                    recommended.append(rating)
        RS={
            'user':user,
            'item' :items,
            'recommended':recommended
        }
        df = DataFrame(RS, columns= ['user', 'item','recommended'])
        export_csv = df.to_csv ('export_dataframe.csv', index = None, header=True)
    

In [2]:
ratings = pd.read_csv('test_rate.csv', encoding='latin-1')
Y_data = ratings.to_numpy()
rs = CF(Y_data, k = 3, uuCF = 0)
rs.fit()

rs.print_recommendation()

  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)


Recommendation: 
    Recommend item 0 for user(s) :  []
    Recommend item 1 for user(s) :  [1, 7, 11, 14, 15, 18, 20, 24, 25, 29, 31, 33, 34, 36, 40, 47, 54, 56, 57, 60, 67, 71, 72, 73, 76, 78, 80, 85, 86, 87, 89, 90, 92, 94, 95, 98, 100]
    Recommend item 2 for user(s) :  [1, 8, 10, 12, 15, 20, 25, 31, 33, 35, 36, 45, 47, 53, 54, 56, 57, 60, 64, 65, 67, 77, 78, 86, 87, 89, 91, 95]
    Recommend item 3 for user(s) :  [1, 7, 11, 14, 18, 25, 31, 33, 38, 42, 49, 51, 52, 53, 54, 55, 56, 63, 65, 78, 85, 86, 87, 89]
    Recommend item 4 for user(s) :  [2, 5, 7, 8, 9, 16, 25, 26, 28, 29, 32, 33, 44, 67, 70, 72, 76, 77, 80, 82, 85]
    Recommend item 5 for user(s) :  [2, 4, 5, 6, 7, 10, 13, 15, 21, 24, 25, 26, 30, 31, 32, 33, 37, 40, 49, 53, 55, 59, 61, 62, 64, 69, 76, 77, 78, 80, 83, 85, 90, 99, 100]
    Recommend item 6 for user(s) :  [2, 8, 9, 13, 14, 15, 16, 22, 24, 26, 29, 36, 44, 46, 50, 59, 61, 64, 65, 67, 69, 75, 76, 78, 81, 84, 86, 88, 91, 92, 98]
    Recommend item 7 for user(s) : 

    Recommend item 60 for user(s) :  [1, 4, 6, 9, 17, 20, 21, 39, 42, 47, 55, 57, 61, 65, 75, 78, 84, 85, 88, 89]
    Recommend item 61 for user(s) :  [11, 15, 35, 36, 42, 55, 62, 85]
    Recommend item 62 for user(s) :  [1, 5, 6, 14, 20, 29, 30, 35, 37, 38, 41, 44, 50, 52, 55, 60, 61, 64, 65, 67, 69, 70, 73, 78, 85, 89, 100]
    Recommend item 63 for user(s) :  [7, 10, 13, 14, 15, 28, 32, 35, 36, 37, 39, 43, 46, 51, 53, 54, 55, 65, 71, 80, 83]
    Recommend item 64 for user(s) :  [6, 11, 12, 14, 17, 20, 24, 25, 26, 29, 31, 33, 34, 38, 41, 42, 44, 56, 59, 61, 64, 65, 72, 84, 87, 88, 90, 91, 92, 93, 94, 98]
    Recommend item 65 for user(s) :  [1, 7, 13, 16, 20, 25, 28, 29, 33, 35, 37, 49, 51, 55, 56, 58, 76, 79, 93, 99]
    Recommend item 66 for user(s) :  [8, 10, 11, 12, 14, 20, 25, 41, 56, 58, 59, 67, 69, 78, 83, 86, 87, 93, 100]
    Recommend item 67 for user(s) :  [3, 4, 6, 7, 8, 15, 21, 23, 24, 35, 37, 38, 41, 42, 44, 54, 58, 61, 69, 79, 82, 83, 84, 86, 89, 96]
    Recommend item 

In [3]:
rs2 = CF(Y_data, k = 3, uuCF = 1)
rs2.fit()

rs2.print_recommendation()

Recommendation: 
    Recommend item(s): [] for user 0
    Recommend item(s): [5, 7, 13, 15, 19, 20, 25, 26, 30, 31, 54, 55, 56, 59, 60, 62, 65, 68, 72, 73, 75, 76, 84, 85, 89, 91, 92, 94, 98, 99, 100] for user 1
    Recommend item(s): [1, 7, 9, 29, 31, 36, 40, 42, 46, 68, 77, 81, 84, 85, 86, 91, 99] for user 2
    Recommend item(s): [2, 7, 12, 13, 15, 26, 30, 33, 37, 38, 48, 52, 54, 55, 56, 64, 68, 74, 79, 81, 83, 84, 85, 91, 97, 99] for user 3
    Recommend item(s): [6, 7, 9, 11, 12, 37, 39, 53, 58, 60, 66, 68, 75, 76, 85, 87, 91, 92, 94, 95] for user 4
    Recommend item(s): [7, 15, 20, 21, 22, 25, 26, 27, 30, 31, 33, 35, 38, 40, 52, 54, 59, 70, 75, 81, 89, 90, 91, 94, 96, 97] for user 5
    Recommend item(s): [8, 10, 11, 13, 14, 15, 16, 30, 33, 35, 37, 39, 45, 52, 54, 56, 59, 60, 64, 65, 66, 68, 71, 73, 74, 75, 78, 79, 81, 87, 89, 92, 93, 97, 98, 100] for user 6
    Recommend item(s): [12, 13, 14, 16, 24, 25, 27, 34, 37, 43, 49, 54, 56, 63, 67, 68, 71, 89, 94, 98] for user 7
    Rec

    Recommend item(s): [2, 3, 7, 10, 13, 18, 26, 31, 33, 35, 37, 39, 45, 48, 54, 56, 60, 64, 66, 68, 71, 72, 75, 76, 77, 85, 87, 91, 96] for user 63
    Recommend item(s): [7, 9, 12, 16, 27, 29, 34, 35, 36, 38, 41, 46, 49, 53, 56, 57, 66, 74, 75, 76, 86, 88, 97, 99] for user 64
    Recommend item(s): [7, 9, 11, 13, 17, 31, 38, 48, 54, 58, 61, 65, 66, 72, 73, 74, 75, 79, 84, 87] for user 65
    Recommend item(s): [5, 9, 11, 13, 16, 19, 20, 26, 30, 33, 37, 39, 43, 46, 48, 53, 54, 59, 60, 61, 64, 66, 69, 70, 73, 74, 75, 80, 81, 84, 87, 89, 92, 96, 100] for user 66
    Recommend item(s): [1, 6, 9, 11, 13, 16, 18, 21, 22, 26, 30, 33, 34, 37, 38, 39, 56, 57, 61, 62, 64, 66, 71, 72, 76, 77, 93, 96, 98, 99] for user 67
    Recommend item(s): [11, 17, 26, 27, 29, 52, 60, 61, 66, 73, 84, 89, 94] for user 68
    Recommend item(s): [7, 10, 11, 13, 14, 16, 17, 18, 20, 21, 27, 30, 31, 33, 35, 37, 38, 39, 45, 46, 48, 52, 59, 60, 66, 69, 72, 73, 74, 75, 76, 79, 81, 84, 90, 94, 97, 99, 100] for user 69

In [11]:
rs2 = CF(Y_data, k = 3, uuCF = 1)
rs2.fit()

rs2.file()

NameError: name 'DataFrame' is not defined