In [1]:
import numpy as np

class Group():
    def __init__(self, config, members, candidate_items, ratings):
        #member ids
        self.members = sorted(members)
        
        #list of items that can be recommended. These should not have been
        #watched by any member of group
        self.candidate_items = candidate_items
        self.actual_recos = []
        self.false_positive = []
        
        self.ratings_per_member = [np.size(ratings[member].nonzero()) for member in self.members]
        
        #AF
        self.grp_factors_af = []
        self.bias_af = 0
        #eval. metrics for AF method for this group
        self.precision_af = 0
        self.recall_af = 0
        #recommended items acc. to AF method, 
        #after calculatng ratings of candidate items and filtering them
        self.reco_list_af = [] 
        
        #BF
        self.grp_factors_bf = []
        self.bias_bf = 0
        self.precision_bf = 0
        self.recall_bf = 0
        self.reco_list_bf = []
        
        #WBF
        self.grp_factors_wbf = []
        self.bias_wbf = 0
        self.precision_wbf = 0
        self.recall_wbf = 0
        #W matrix from the paper
        self.weight_matrix_wbf = []
        self.reco_list_wbf = []
    
    #verifies that given list of members can form a group w.r.t given data
    #ensure that there is atleast 1 movie in training data that hasn't been 
    #watched by any member of the group. Only these can be recommended.
    #call this method before calling group constructor.
    # For eg.
    @staticmethod
    def find_candidate_items(ratings, members):
        if len(members) == 0: return []
        
        unwatched_items = np.argwhere(ratings[members[0]] == 0)
        for member in members:
            cur_unwatched = np.argwhere(ratings[member] == 0)
            unwatched_items = np.intersect1d(unwatched_items, cur_unwatched)
        
        return unwatched_items
    
    #programmatically generate groups of given size
    @staticmethod
    def generate_groups(cfg, ratings, test_ratings, num_users, count, size, disjoint = True):
        avbl_users = [i for i in range(num_users)]
        groups = []
        testable_threshold = 50
        
        iter_idx = 0
        while iter_idx in range(count):
            group_members = np.random.choice(avbl_users, size = size, replace = False)
            candidate_items = Group.find_candidate_items(ratings, group_members)
            non_eval_items = Group.non_testable_items(group_members, test_ratings)
            testable_items = np.setdiff1d(candidate_items, non_eval_items)
            
            if len(candidate_items) != 0 and len(testable_items) >= testable_threshold:
                groups += [Group(cfg, group_members, candidate_items, ratings)]
                avbl_users = np.setdiff1d(avbl_users, group_members)
                iter_idx += 1
                
        return groups
    
    @staticmethod
    def non_testable_items(members, ratings): 
        non_eval_items = np.argwhere(ratings[members[0]] == 0)
        for member in members:
            cur_non_eval_items = np.argwhere(ratings[member] == 0)
            non_eval_items = np.intersect1d(non_eval_items, cur_non_eval_items)
        return non_eval_items
    
    def generate_actual_recommendations(self, ratings, threshold):
        non_eval_items = Group.non_testable_items(self.members, ratings)
            
        items = np.argwhere(np.logical_or(ratings[self.members[0]] >= threshold, ratings[self.members[0]] == 0)).flatten()
        fp = np.argwhere(np.logical_and(ratings[self.members[0]] > 0, ratings[self.members[0]] < threshold)).flatten()
        for member in self.members:
            cur_items = np.argwhere(np.logical_or(ratings[member] >= threshold, ratings[member] == 0)).flatten()
            fp = np.union1d(fp, np.argwhere(np.logical_and(ratings[member] > 0, ratings[member] < threshold)).flatten())
            items = np.intersect1d(items, cur_items)
        
        items = np.setdiff1d(items, non_eval_items)

        self.actual_recos = items
#         print 'acutal reco list: ', self.actual_recos
        self.false_positive = fp

    def evaluate_af(self):
        tp = float(np.intersect1d(self.actual_recos, self.reco_list_af).size)
        print '\ntp: ', tp
        fp = float(np.intersect1d(self.false_positive, self.reco_list_af).size)
        print 'fp: ', fp
        
        try:
            self.precision_af = tp / (tp + fp)
            print 'precision_af: ', self.precision_af
        except ZeroDivisionError:
            self.precision_af = np.NaN
            print 'precision_af: ', self.precision_af
            
        try:
            self.recall_af = tp / self.actual_recos.size
        except ZeroDivisionError:
            self.recall_af = np.NaN
        print 'recall_af: ', self.recall_af
        
        return (self.precision_af, self.recall_af, tp, fp)

        print "\nPrecision: " + str(self.precision_af)
        print "Recall: " + str(self.recall_af)

    def evaluate_bf(self):
        tp = float(np.intersect1d(self.actual_recos, self.reco_list_bf).size)
        print '\ntp: ', tp
        fp = float(np.intersect1d(self.false_positive, self.reco_list_bf).size)
        print 'fp: ', fp

        try:
            self.precision_bf = tp / (tp + fp)
            print 'precision_bf: ', self.precision_bf
        except ZeroDivisionError:
            self.precision_bf = np.NaN
            print 'precision_bf: ', self.precision_bf

        try:
            self.recall_bf = tp / self.actual_recos.size
        except ZeroDivisionError:
            self.recall_bf = np.NaN
        print 'recall_bf: ', self.recall_bf

        return (self.precision_bf, self.recall_bf, tp, fp)

        print "\nPrecision: " + str(self.precision_bf)
        print "Recall: " + str(self.recall_bf)

    def evaluate_wbf(self):
        tp = float(np.intersect1d(self.actual_recos, self.reco_list_wbf).size)
        print '\ntp: ', tp
        fp = float(np.intersect1d(self.false_positive, self.reco_list_wbf).size)
        print 'fp: ', fp

        try:
            self.precision_wbf = tp / (tp + fp)
            print 'precision_wbf: ', self.precision_wbf
        except ZeroDivisionError:
            self.precision_wbf = np.NaN
            print 'precision_wbf: ', self.precision_wbf

        try:
            self.recall_wbf = tp / self.actual_recos.size
        except ZeroDivisionError:
            self.recall_wbf = np.NaN
        print 'recall_wbf: ', self.recall_wbf

        return (self.precision_wbf, self.recall_wbf, tp, fp)

        print "\nPrecision: " + str(self.precision_wbf)
        print "Recall: " + str(self.recall_wbf)


print 'Class Group code block completed!'

Class Group code block completed!


In [2]:
import math
import numpy as np

class Aggregators:
    def __init__(self):
        pass
    
    #pass ratings or factors as input
    @staticmethod
    def average(arr):
        return np.average(arr, axis = 0, weights = None)

    @staticmethod
    def average_bf(arr):
        arr[arr == 0] = np.nan
        return np.nanmean(arr, axis=0)
    
    @staticmethod
    def weighted_average(arr, weights):
        return np.average(arr, axis = 0, weights = weights)
    
    @staticmethod
    def mode(arr):
        pass
        
    @staticmethod
    def median(arr):
        pass
    
    @staticmethod
    def least_misery(self, arr):
        pass
    
    @staticmethod
    def most_pleasure(self, arr):
        pass
print 'Aggregators block completed!'

Aggregators block completed!


In [3]:
# import numpy as np

# class Group():
#     def __init__(self, config, members, candidate_items, ratings):
#         #member ids
#         self.members = sorted(members)
        
#         #list of items that can be recommended. These should not have been
#         #watched by any member of group
#         self.candidate_items = candidate_items
#         self.actual_recos = []
#         self.false_positive = []
        
#         self.ratings_per_member = [np.size(ratings[member].nonzero()) for member in self.members]
        
#         #AF
#         self.grp_factors_af = []
#         self.bias_af = 0
#         #eval. metrics for AF method for this group
#         self.precision_af = 0
#         self.recall_af = 0
#         #recommended items acc. to AF method, 
#         #after calculatng ratings of candidate items and filtering them
#         self.reco_list_af = [] 
        
#         #BF
#         self.grp_factors_bf = []
#         self.bias_bf = 0
#         self.precision_bf = 0
#         self.recall_bf = 0
#         self.reco_list_bf = []
        
#         #WBF
#         self.grp_factors_wbf = []
#         self.bias_wbf = 0
#         self.precision_wbf = 0
#         self.recall_wbf = 0
#         #W matrix from the paper
#         self.weight_matrix_wbf = []
#         self.reco_list_wbf = []
    
#     #verifies that given list of members can form a group w.r.t given data
#     #ensure that there is atleast 1 movie in training data that hasn't been 
#     #watched by any member of the group. Only these can be recommended.
#     #call this method before calling group constructor.
#     # For eg.
#     @staticmethod
#     def find_candidate_items(ratings, members):
#         if len(members) == 0: return []
        
#         unwatched_items = np.argwhere(ratings[members[0]] == 0)
#         for member in members:
#             cur_unwatched = np.argwhere(ratings[member] == 0)
#             unwatched_items = np.intersect1d(unwatched_items, cur_unwatched)
        
#         return unwatched_items
    
#     #programmatically generate groups of given size
#     @staticmethod
#     def generate_groups(cfg, ratings, test_ratings, num_users, count, size, disjoint = True):
#         avbl_users = [i for i in range(num_users)]
#         groups = []
#         testable_threshold = 50
        
#         iter_idx = 0
#         while iter_idx in range(count):
#             group_members = np.random.choice(avbl_users, size = size, replace = False)
#             candidate_items = Group.find_candidate_items(ratings, group_members)
#             non_eval_items = Group.non_testable_items(group_members, test_ratings)
#             testable_items = np.setdiff1d(candidate_items, non_eval_items)
            
#             if len(candidate_items) != 0 and len(testable_items) >= testable_threshold:
#                 groups += [Group(cfg, group_members, candidate_items, ratings)]
#                 avbl_users = np.setdiff1d(avbl_users, group_members)
#                 iter_idx += 1
                
#         return groups
    
#     @staticmethod
#     def non_testable_items(members, ratings): 
#         non_eval_items = np.argwhere(ratings[members[0]] == 0)
#         for member in members:
#             cur_non_eval_items = np.argwhere(ratings[member] == 0)
#             non_eval_items = np.intersect1d(non_eval_items, cur_non_eval_items)
#         return non_eval_items
    
#     def generate_actual_recommendations(self, ratings, threshold):
#         non_eval_items = Group.non_testable_items(self.members, ratings)
            
#         items = np.argwhere(np.logical_or(ratings[self.members[0]] >= threshold, ratings[self.members[0]] == 0)).flatten()
#         fp = np.argwhere(np.logical_and(ratings[self.members[0]] > 0, ratings[self.members[0]] < threshold)).flatten()
#         for member in self.members:
#             cur_items = np.argwhere(np.logical_or(ratings[member] >= threshold, ratings[member] == 0)).flatten()
#             fp = np.union1d(fp, np.argwhere(np.logical_and(ratings[member] > 0, ratings[member] < threshold)).flatten())
#             items = np.intersect1d(items, cur_items)
        
#         items = np.setdiff1d(items, non_eval_items)

#         self.actual_recos = items
# #         print 'acutal reco list: ', self.actual_recos
#         self.false_positive = fp

#     def evaluate_af(self):
#         tp = float(np.intersect1d(self.actual_recos, self.reco_list_af).size)
#         print '\ntp: ', tp
#         fp = float(np.intersect1d(self.false_positive, self.reco_list_af).size)
#         print 'fp: ', fp
        
#         try:
#             self.precision_af = tp / (tp + fp)
#             print 'precision_af: ', self.precision_af
#         except ZeroDivisionError:
#             self.precision_af = np.NaN
#             print 'precision_af: ', self.precision_af
            
#         try:
#             self.recall_af = tp / self.actual_recos.size
#         except ZeroDivisionError:
#             self.recall_af = np.NaN
#         print 'recall_af: ', self.recall_af
        
#         return (self.precision_af, self.recall_af, tp, fp)

#         print "\nPrecision: " + str(self.precision_af)
#         print "Recall: " + str(self.recall_af)

#     def evaluate_bf(self):
#         tp = float(np.intersect1d(self.actual_recos, self.reco_list_bf).size)
#         print '\ntp: ', tp
#         fp = float(np.intersect1d(self.false_positive, self.reco_list_bf).size)
#         print 'fp: ', fp

#         try:
#             self.precision_bf = tp / (tp + fp)
#             print 'precision_bf: ', self.precision_bf
#         except ZeroDivisionError:
#             self.precision_bf = np.NaN
#             print 'precision_bf: ', self.precision_bf

#         try:
#             self.recall_bf = tp / self.actual_recos.size
#         except ZeroDivisionError:
#             self.recall_bf = np.NaN
#         print 'recall_bf: ', self.recall_bf

#         return (self.precision_bf, self.recall_bf, tp, fp)

#         print "\nPrecision: " + str(self.precision_bf)
#         print "Recall: " + str(self.recall_bf)

#     def evaluate_wbf(self):
#         tp = float(np.intersect1d(self.actual_recos, self.reco_list_wbf).size)
#         print '\ntp: ', tp
#         fp = float(np.intersect1d(self.false_positive, self.reco_list_wbf).size)
#         print 'fp: ', fp

#         try:
#             self.precision_wbf = tp / (tp + fp)
#             print 'precision_wbf: ', self.precision_wbf
#         except ZeroDivisionError:
#             self.precision_wbf = np.NaN
#             print 'precision_wbf: ', self.precision_wbf

#         try:
#             self.recall_wbf = tp / self.actual_recos.size
#         except ZeroDivisionError:
#             self.recall_wbf = np.NaN
#         print 'recall_wbf: ', self.recall_wbf

#         return (self.precision_wbf, self.recall_wbf, tp, fp)

#         print "\nPrecision: " + str(self.precision_wbf)
#         print "Recall: " + str(self.recall_wbf)


# import math
# import numpy as np

# class Aggregators:
#     def __init__(self):
#         pass
    
#     #pass ratings or factors as input
#     @staticmethod
#     def average(arr):
#         return np.average(arr, axis = 0, weights = None)

#     @staticmethod
#     def average_bf(arr):
#         arr[arr == 0] = np.nan
#         return np.nanmean(arr, axis=0)
    
#     @staticmethod
#     def weighted_average(arr, weights):
#         return np.average(arr, axis = 0, weights = weights)
    
#     @staticmethod
#     def mode(arr):
#         pass
        
#     @staticmethod
#     def median(arr):
#         pass
    
#     @staticmethod
#     def least_misery(self, arr):
#         pass
    
#     @staticmethod
#     def most_pleasure(self, arr):
#         pass
    

from Aggregators import Aggregators
from Group import Group
from Config import Config
from collections import defaultdict
import numpy as np
import pandas as ps
import warnings
from sklearn.metrics import mean_squared_error

#overflow warnings should be raised as errors
np.seterr(over='raise')

#global class.
class GroupRec:
    def __init__(self):
        self.cfg = Config(r"./config.conf")
        
        #training and testing matrices, init. with random sizes
        self.ratings = np.ndarray((10,10))
        self.test_ratings = np.ndarray((10,10))
        
        #read data into above matrices
        self.read_data(self.cfg.training_file)
        
        self.num_users = self.ratings.shape[0]
        self.num_items = self.ratings.shape[1]
        
        #predicted ratings matrix based on factors. 
        self.predictions = np.zeros((self.num_users, self.num_items))
        
        #output after self.sgd_factorize()
        #initialize all unknowns with random values from -1 to 1
        self.user_factors = np.random.uniform(-1, 1, (self.ratings.shape[0], self.cfg.num_factors))
        self.item_factors = np.random.uniform(-1, 1, (self.ratings.shape[1], self.cfg.num_factors))
        
        #either above or initialize factors with normally distributed numbers
#         self.user_factors = np.random.normal(scale=1./self.cfg.num_factors, size = (self.num_users, self.cfg.num_factors))
#         self.item_factors = np.random.normal(scale=1./self.cfg.num_factors, size = (self.num_items, self.cfg.num_factors))
        
        self.user_biases = np.zeros(self.num_users)
        self.item_biases = np.zeros(self.num_items)
        
        #global mean of ratings a.k.a mu
        self.ratings_global_mean = 0
        pass

    #add list of groups to grouprec
    def add_groups(self, groups):
        self.groups = groups
        pass
    
    #remove groups
    def remove_groups(self, groups):
        self.groups = []
        pass
    
    #read training and testing data into matrices
    def read_data(self, file):
        column_headers = ['user_id', 'item_id', 'rating', 'timestamp']
        print 'Reading data from ', file, '...'
        data = ps.read_csv(file, sep = '\t', names = column_headers)
        print 'Reading testing data from ', self.cfg.testing_file, '...'
        testing_data = ps.read_csv(self.cfg.testing_file, sep = '\t', names = column_headers)
        
        num_users = max(data.user_id.unique())
        num_items = max(data.item_id.unique())
        
        self.ratings = np.zeros((num_users, num_items))
        self.test_ratings = np.zeros((num_users, num_items))
        
        for row in data.itertuples(index = False):
            self.ratings[row.user_id - 1, row.item_id - 1] = row.rating
        
        for row in testing_data.itertuples(index = False):
            self.test_ratings[row.user_id - 1, row.item_id - 1] = row.rating 
        
    #split data set file into training and test file by ratio 
    def split_data(self, data_file, training_ratio = 0.7):
        pass
    
    def predict_user_rating(self, user, item):
        prediction = self.ratings_global_mean + self.user_biases[user] + self.item_biases[item]
        prediction += self.user_factors[user, :].dot(self.item_factors[item, :].T)
        return prediction
    
    def predict_group_rating(self, group, item, method):
        #bias_grp and
        if (method == 'af'):
            factors = group.grp_factors_af; bias_group = group.bias_af
        elif (method == 'bf'):
            factors = group.grp_factors_bf; bias_group = group.bias_bf
        elif (method == 'wbf'):
            factors = group.grp_factors_wbf; bias_group = group.bias_wbf
        
        return self.ratings_global_mean + bias_group + self.item_biases[item] \
                                        + np.dot(factors.T, self.item_factors[item])
        
    #matrix factorization code, this should be run before af, bf or wbf
    #outputs from this are used in methods
    def sgd_factorize(self):
        #solve for these for matrix ratings        
        ratings_row, ratings_col = self.ratings.nonzero()
        num_ratings = len(ratings_row)
        learning_rate = self.cfg.learning_rate_mf
        regularization = self.cfg.lambda_mf
        
        self.ratings_global_mean = np.mean(self.ratings[np.where(self.ratings != 0)])
        
        print 'Doing matrix factorization...'
        try:
            for iter in range(self.cfg.max_iterations_mf):
                print 'Iteration: ', iter
                rating_indices = np.arange(num_ratings)
                np.random.shuffle(rating_indices)
                
                for idx in rating_indices:
                    user = ratings_row[idx]
                    item = ratings_col[idx]

                    pred = self.predict_user_rating(user, item)
                    error = self.ratings[user][item] - pred
                    
                    self.user_factors[user] += learning_rate \
                                                * ((error * self.item_factors[item]) - (regularization * self.user_factors[user]))
                    self.item_factors[item] += learning_rate \
                                                * ((error * self.user_factors[user]) - (regularization * self.item_factors[item]))
                    
                    self.user_biases[user] += learning_rate * (error - regularization * self.user_biases[user])
                    self.item_biases[item] += learning_rate * (error - regularization * self.item_biases[item])
            
                self.sgd_mse()
            
        except FloatingPointError:
            print 'Floating point Error: '
            
    def predict_all_ratings(self):
        for user in range(self.num_users):
            for item in range(self.num_items):
                self.predictions[user, item] = self.predict_user_rating(user, item)
        
    
    def sgd_mse(self):
        self.predict_all_ratings()
        predicted_training_ratings = self.predictions[self.ratings.nonzero()].flatten()
        actual_training_ratings = self.ratings[self.ratings.nonzero()].flatten()
        
        predicted_test_ratings = self.predictions[self.test_ratings.nonzero()].flatten()
        actual_test_ratings = self.test_ratings[self.test_ratings.nonzero()].flatten()
    
        training_mse = mean_squared_error(predicted_training_ratings, actual_training_ratings)
        print 'training mse: ', training_mse
        test_mse = mean_squared_error(predicted_test_ratings, actual_test_ratings)
        print 'test mse: ', test_mse
            
        
    #AF method
    # Check if we want this method to take multiple groups or single group
    # as input
    def af_runner(self, groups = None, aggregator = Aggregators.average):
        #if groups is not passed, use self.groups
        if (groups is None):
            groups = self.groups
        
        #calculate factors
        for group in groups:
            member_factors = self.user_factors[group.members, :]
            member_biases = self.user_biases[group.members]
        
            #aggregate the factors
            if (aggregator == Aggregators.average):
                group.grp_factors_af = aggregator(member_factors)
                group.bias_af = aggregator(member_biases)
            elif (aggregator == Aggregators.weighted_average):
                group.grp_factors_af = aggregator(member_factors, weights = group.ratings_per_member)
                group.bias_af = aggregator(member_biases, weights = group.ratings_per_member)
            
            #predict ratings for all candidate items
            group_candidate_ratings = {}
            for idx, item in enumerate(group.candidate_items):
                cur_rating = self.predict_group_rating(group, item, 'af')
                
                if (cur_rating > self.cfg.rating_threshold_af):
                    group_candidate_ratings[item] = cur_rating
            
            #sort and filter to keep top 'num_recos_af' recommendations
            group_candidate_ratings = sorted(group_candidate_ratings.items(), key=lambda x: x[1], reverse=True)[:self.cfg.num_recos_af]
            
            group.reco_list_af = np.array([rating_tuple[0] for rating_tuple in group_candidate_ratings])
#             print 'members: ', group.members
#             print 'recommended items: ', group.reco_list_af
#             print 'recommended item ratings: ', group_candidate_ratings 

    def bf_runner(self, groups=None, aggregator=Aggregators.average_bf):
        # aggregate user ratings into virtual group
        # calculate factors of group
        lamb = self.cfg.lambda_mf

        for group in groups:
            all_movies = np.arange(len(self.ratings.T))
            watched_items = sorted(list(set(all_movies) - set(group.candidate_items)))

            group_rating = self.ratings[group.members, :]
            agg_rating = aggregator(group_rating)
            s_g = []
            for j in watched_items:
                s_g.append(agg_rating[j] - self.ratings_global_mean - self.item_biases[j])

            # creating matrix A : contains rows of [item_factors of items in watched_list + '1' vector]
            A = np.zeros((0, self.cfg.num_factors))

            for item in watched_items:
                A = np.vstack([A, self.item_factors[item]])
            v = np.ones((len(watched_items), 1))
            A = np.c_[A, v]

            factor_n_bias = np.dot(np.linalg.inv(np.dot(A.T, A) + lamb * np.identity(self.cfg.num_factors + 1)), np.dot(A.T, s_g))
            group.grp_factors_bf = factor_n_bias[:-1]
            group.bias_bf = factor_n_bias[-1]

            # Making recommendations on candidate list :
            group_candidate_ratings = {}
            for idx, item in enumerate(group.candidate_items):
                cur_rating = self.predict_group_rating(group, item, 'bf')

                if (cur_rating > self.cfg.rating_threshold_bf):
                    group_candidate_ratings[item] = cur_rating

            # sort and filter to keep top 'num_recos_bf' recommendations
            group_candidate_ratings = sorted(group_candidate_ratings.items(), key=lambda x: x[1], reverse=True)[
                                      :self.cfg.num_recos_bf]

            group.reco_list_bf = np.array([rating_tuple[0] for rating_tuple in group_candidate_ratings])
            #             print 'members: ', group.members
            #             print 'recommended items: ', group.reco_list_bf
            #             print 'recommended item ratings: ', group_candidate_ratings


    def wbf_runner(self, groups=None, aggregator=Aggregators.average_bf):
        # aggregate user ratings into virtual group
        # calculate factors of group
        lamb = self.cfg.lambda_mf
        for group in groups:
            all_movies = np.arange(len(self.ratings.T))
            watched_items = sorted(list(set(all_movies) - set(group.candidate_items)))

            group_rating = self.ratings[group.members, :]
            agg_rating = aggregator(group_rating)
            s_g = []
            for j in watched_items:
                s_g.append(agg_rating[j] - self.ratings_global_mean - self.item_biases[j])

            # creating matrix A : contains rows of [item_factors of items in watched_list + '1' vector]
            A = np.zeros((0, self.cfg.num_factors))  # 3 is the number of features here = K

            for item in watched_items:
                A = np.vstack([A, self.item_factors[item]])
            v = np.ones((len(watched_items), 1))
            A = np.c_[A, v]

            wt = []
            for item in watched_items:
                rated = np.argwhere(self.ratings[:, item] != 0)  # list of users who have rated this movie
                watched = np.intersect1d(rated, group)  # list of group members who have watched this movie
                std_dev = np.std(filter(lambda a: a != 0, self.ratings[:, item]))  # std deviation for the rating of the item
                wt += [len(watched) / float(len(group.members)) * 1 / (1 + std_dev)]  # list containing diagonal elements
            W = np.diag(wt)  # diagonal weight matrix

            factor_n_bias = np.dot(np.linalg.inv(np.dot(np.dot(A.T, W),A) + lamb * np.identity(self.cfg.num_factors + 1)),
                                   np.dot(np.dot(A.T, W), s_g))
            group.grp_factors_wbf = factor_n_bias[:-1]
            group.bias_wbf = factor_n_bias[-1]

            # Making recommendations on candidate list :
            group_candidate_ratings = {}
            for idx, item in enumerate(group.candidate_items):
                cur_rating = self.predict_group_rating(group, item, 'wbf')

                if (cur_rating > self.cfg.rating_threshold_wbf):
                    group_candidate_ratings[item] = cur_rating

            # sort and filter to keep top 'num_recos_wbf' recommendations
            group_candidate_ratings = sorted(group_candidate_ratings.items(), key=lambda x: x[1], reverse=True)[
                                      :self.cfg.num_recos_wbf]

            group.reco_list_wbf = np.array([rating_tuple[0] for rating_tuple in group_candidate_ratings])
            #             print 'members: ', group.members
            #             print 'recommended items: ', group.reco_list_wbf
            #             print 'recommended item ratings: ', group_candidate_ratings

        pass

    def evaluation(self):
#         self.read_data(self.cfg.testing_file, False)

        # For AF
        af_precision_list = []
        af_recall_list = []
        af_mean_precision = 0
        print "#########-------For AF-------#########"
        for grp in self.groups:
            grp.generate_actual_recommendations(self.test_ratings, self.cfg.rating_threshold_af)
            (precision, recall, tp, fp) = grp.evaluate_af()
            af_precision_list.append(precision)
            af_recall_list.append(recall)
        
        af_mean_precision = np.nanmean(np.array(af_precision_list))
        af_mean_recall = np.nanmean(np.array(af_recall_list))
        print '\nAF method: mean precision: ', af_mean_precision
        print 'AF method: mean recall: ', af_mean_recall

        #For BF
        bf_precision_list = []
        bf_recall_list = []
        bf_mean_precision = 0
        print "#########-------For BF-------#########"
        for grp in self.groups:
            grp.generate_actual_recommendations(self.test_ratings, self.cfg.rating_threshold_bf)
            (precision, recall, tp, fp) = grp.evaluate_bf()
            bf_precision_list.append(precision)
            bf_recall_list.append(recall)

        bf_mean_precision = np.nanmean(np.array(bf_precision_list))
        bf_mean_recall = np.nanmean(np.array(bf_recall_list))
        print '\nBF method: mean precision: ', bf_mean_precision
        print 'BF method: mean recall: ', bf_mean_recall

        #For BF
        wbf_precision_list = []
        wbf_recall_list = []
        wbf_mean_precision = 0
        print "#########-------For WBF-------#########"
        for grp in self.groups:
            grp.generate_actual_recommendations(self.test_ratings, self.cfg.rating_threshold_wbf)
            (precision, recall, tp, fp) = grp.evaluate_wbf()
            wbf_precision_list.append(precision)
            wbf_recall_list.append(recall)

        wbf_mean_precision = np.nanmean(np.array(wbf_precision_list))
        wbf_mean_recall = np.nanmean(np.array(wbf_recall_list))
        print '\nWBF method: mean precision: ', wbf_mean_precision
        print 'WBF method: mean recall: ', wbf_mean_recall

        # # For BF
        # for grp in self.groups:
        #     grp.generate_actual_recommendations(self.ratings, self.cfg.rating_threshold_bf)
        #     grp.evaluate_bf()
        #
        # # For WBF
        # for grp in self.groups:
        #     grp.generate_actual_recommendations(self.ratings, self.cfg.rating_threshold_wbf)
        #     grp.evaluate_wbf()

#     def run_all_methods(self, groups):
#         if (groups is None):
#             groups = self.groups
#         #PS: could call them without passing groups as we have already added groups to grouprec object
#         self.af_runner(groups, Aggregators.weighted_average)
#         self.bf_runner(groups, Aggregators.average_bf)
#         self.wbf_runner(groups, Aggregators.average_bf)

#         #evaluation
#         self.evaluation()
    

# if __name__ == "__main__":
#     #Workflow
#     gr = GroupRec()
#     #can, move this function also to config __init__, will decide later
# #     gr.read_data()
#     #factorize matrix
#     gr.sgd_factorize()
    
#     #add groups or generate random groups of given size
#     groups = []
#     members = [475, 549, 775]
#     candidate_items = Group.find_candidate_items(gr.ratings, members)
#     if len(candidate_items) != 0:
#         pass
#         #groups = [Group(gr.cfg, members, candidate_items, gr.ratings)]
    
#     #OR generate groups programmatically
#     #disjoint means none of the groups shares any common members     
#     small_groups = Group.generate_groups(gr.cfg, gr.ratings, gr.test_ratings, gr.num_users, 3, gr.cfg.small_grp_size, disjoint=True)
#     medium_groups = Group.generate_groups(gr.cfg, gr.ratings, gr.test_ratings, gr.num_users, 3, gr.cfg.medium_grp_size, disjoint=True)
#     large_groups = Group.generate_groups(gr.cfg, gr.ratings, gr.test_ratings, gr.num_users, 3, gr.cfg.large_grp_size, disjoint=True)
    
#     group_set = [small_groups, medium_groups, large_groups]
#     group_type = ['small', 'medium', 'large']
    
#     for idx, groups in enumerate(group_set):
#         if groups is []: continue;
        
#         #generated groups
#         print '******* Running for ', group_type[idx], ' groups *************'
#         print 'generated groups: '
#         for group in groups:
#             print(group.members)
        
#         gr.add_groups(groups)
#         gr.run_all_methods(groups)
#         gr.remove_groups(groups)    
#         pass
print 'GroupRec block completed!'


GroupRec block completed!


In [22]:
# import _GroupRec
# class GroupRec :
def run_all_methods(self, groups):
    if (groups is None):
        groups = self.groups
    #PS: could call them without passing groups as we have already added groups to grouprec object
    self.af_runner(groups, Aggregators.weighted_average)
    self.bf_runner(groups, Aggregators.average_bf)
    self.wbf_runner(groups, Aggregators.average_bf)

    #evaluation
    self.evaluation()
GroupRec.run_all_methods = classmethod(run_all_methods)

In [23]:
if __name__ == "__main__":
    #Workflow
    gr = GroupRec()
    #can, move this function also to config __init__, will decide later
#     gr.read_data()
    #factorize matrix
    gr.sgd_factorize()
    
    #add groups or generate random groups of given size
    groups = []
    members = [475, 549, 775]
    candidate_items = Group.find_candidate_items(gr.ratings, members)
    if len(candidate_items) != 0:
        pass
        #groups = [Group(gr.cfg, members, candidate_items, gr.ratings)]
    
    #OR generate groups programmatically
    #disjoint means none of the groups shares any common members     
    small_groups = Group.generate_groups(gr.cfg, gr.ratings, gr.test_ratings, gr.num_users, 3, gr.cfg.small_grp_size, disjoint=True)
    medium_groups = Group.generate_groups(gr.cfg, gr.ratings, gr.test_ratings, gr.num_users, 3, gr.cfg.medium_grp_size, disjoint=True)
    large_groups = Group.generate_groups(gr.cfg, gr.ratings, gr.test_ratings, gr.num_users, 3, gr.cfg.large_grp_size, disjoint=True)
    
    group_set = [small_groups, medium_groups, large_groups]
    group_type = ['small', 'medium', 'large']
    
    for idx, groups in enumerate(group_set):
        if groups is []: continue;
        
        #generated groups
        print '******* Running for ', group_type[idx], ' groups *************'
        print 'generated groups: '
        for group in groups:
            print(group.members)
        
        gr.add_groups(groups)
        gr.run_all_methods(groups)
        gr.remove_groups(groups)    
        pass


Reading data from  ./data/u1.base ...
Reading testing data from  ./data/u1.test ...
Doing matrix factorization...
Iteration:  0
training mse:  0.80400900094
test mse:  1.06078665703
******* Running for  small  groups *************
generated groups: 
[15, 61, 831]
[200, 531, 645]
[73, 317, 386]


AttributeError: GroupRec instance has no attribute 'run_all_methods'