In [1]:
import pandas as pd
import numpy as np
from sklearn.metrics import pairwise_distances
from scipy.spatial import distance

import trecs
from trecs.models import ImplicitMF, ImplicitMFLFD
from trecs.random import Generator
from trecs.metrics import MSEMeasurement, AverageFeatureScoreRange

In [2]:
mf = ImplicitMF(num_users=200, num_items=50, num_latent_factors=20)
mf.add_metrics(MSEMeasurement())
mf.startup_and_train(20)

100%|██████████| 20/20 [00:00<00:00, 569.46it/s]


In [3]:
mflfd = ImplicitMFLFD(num_users=200, num_items=50, num_latent_factors=20)
mflfd.add_metrics(MSEMeasurement())

In [4]:
mflfd.startup_and_train(20)
mflfd.run(10)

100%|██████████| 20/20 [00:00<00:00, 666.00it/s]
100%|██████████| 10/10 [00:00<00:00, 359.57it/s]


In [10]:
mflfd.rec[199]

array([28, 47, 24, 26, 17, 29, 21,  8, 35, 22])

In [11]:
mflfd.item_indices[2]

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
       17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33,
       34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49])

In [52]:
mflfd.items_hat.shape[1]

50

In [13]:
top_n_limit=50
k=10

def generate_recommendations(self, k=1, top_n_limit=None, item_indices=None):
    
    if item_indices is not None:
        if item_indices.size < self.num_users:
            raise ValueError(
                    "At least one user has interacted with all items!"
                    "To avoid this problem, you may want to allow repeated items."
                )
        if k > item_indices.shape[1]:
            raise ValueError(
                    f"There are not enough items left to recommend {k} items to each user."
                )
        if k == 0:
            return np.array([]).reshape((self.num_users, 0)).astype(int)
    
    if not top_n_limit:
        top_n_limit = self.items_hat.shape[1]       
        
    row = np.repeat(self.users.user_vector, self.item_indices.shape[1])
    row = row.reshape((self.num_users, -1))
    s_filtered = self.predicted_scores[row, self.item_indices]

    negated_scores = -1 * s_filtered  # negate scores so indices go from highest to lowest
    # break ties using a random score component
    scores_tiebreak = np.zeros(
        negated_scores.shape, dtype=[("score", "f8"), ("random", "f8")]
    )
    scores_tiebreak["score"] = negated_scores
    scores_tiebreak["random"] = self.random_state.random(negated_scores.shape)
    top_k = scores_tiebreak.argpartition(top_n_limit - 1, order=["score", "random"])[:, :top_n_limit]
    # now we sort within the top k
    row = np.repeat(self.users.user_vector, top_n_limit).reshape((self.num_users, -1))
    # again, indices should go from highest to lowest
    sort_top_k = scores_tiebreak[row, top_k].argsort(order=["score", "random"])
    top_k_recs = self.item_indices[row, top_k[row, sort_top_k]]

    #dims are attribute, items, users
    top_k_att = mflfd.items_hat[:, top_k_recs[:]].swapaxes(1,2)
    
    rec = []
    for idx, user in enumerate(mflfd.users_hat):

            #make a copy so as not to modify the original array
            user_item_feats = np.array(top_k_att[:,:,idx])

            orig_user_item_feats = np.array(user_item_feats)
            #user_item_feats_idx = [0]
            user_max_idx = top_k_recs[idx, 0] 
            recs_idxs = [user_max_idx]

            #hold the features of the recommended items
            recs_features = self.items_hat[:,user_max_idx]

            for rec in range(1,k):

                if rec == 1:
                    #for the second item, just use the first item values
                    centroid = recs_features
                else:
                    centroid = np.nanmean(recs_features, axis=0)

                centroid = centroid.reshape(1, -1)

                #set all the previously chosen item features to the centroid, so they will not be selected again
                #don't want to just remove rows because it will throw off the indexing
                user_item_feats[:, 0:rec+1]=centroid.T

                d = pairwise_distances(X=centroid, Y=user_item_feats.T, metric='cityblock',force_all_finite='allow_nan' )

                most_distant = np.argmax(d)

                distances.append(d.max())

                most_distant_feats = user_item_feats.T[most_distant]

                #get the index of the most distant item in the top k recs
                recs_idxs.append(top_k_recs[idx, most_distant])
                recs_features = np.vstack((recs_features, user_item_feats[:, most_distant]))

            rec.append(recs_idxs)
    
    return rec


In [49]:
np.array(all_recs_idxs)

array([[32, 35, 40, ...,  5, 16, 42],
       [ 6, 49, 41, ..., 21, 22, 11],
       [47, 34, 30, ..., 19,  8, 16],
       ...,
       [42, 16, 15, ..., 21, 43, 49],
       [12, 21,  8, ..., 43, 49, 13],
       [28, 40, 19, ..., 43, 42, 16]])

In [50]:
all_recs_idxs

[[32, 35, 40, 22, 19, 10, 8, 5, 16, 42],
 [6, 49, 41, 21, 8, 16, 43, 21, 22, 11],
 [47, 34, 30, 39, 35, 22, 10, 19, 8, 16],
 [23, 7, 8, 47, 22, 13, 43, 21, 49, 16],
 [24, 41, 49, 13, 40, 21, 14, 13, 49, 21],
 [43, 35, 40, 21, 2, 8, 22, 13, 21, 19],
 [20, 35, 11, 43, 8, 22, 16, 7, 13, 43],
 [3, 49, 13, 40, 21, 22, 13, 43, 8, 21],
 [3, 49, 13, 40, 21, 22, 13, 43, 8, 14],
 [46, 42, 49, 16, 11, 5, 21, 8, 43, 21],
 [22, 48, 35, 4, 40, 5, 8, 13, 14, 21],
 [27, 40, 13, 19, 10, 28, 40, 35, 43, 40],
 [1, 43, 7, 13, 40, 42, 5, 8, 16, 19],
 [26, 10, 19, 16, 40, 13, 42, 5, 8, 43],
 [23, 7, 8, 47, 22, 13, 43, 21, 49, 16],
 [32, 35, 40, 22, 19, 10, 16, 19, 40, 21],
 [31, 33, 16, 42, 15, 35, 40, 19, 8, 22],
 [33, 35, 2, 10, 16, 19, 22, 35, 40, 21],
 [20, 35, 11, 43, 8, 22, 16, 7, 13, 43],
 [29, 16, 15, 8, 22, 13, 10, 19, 22, 48],
 [3, 49, 13, 40, 21, 19, 8, 43, 35, 40],
 [0, 39, 10, 8, 28, 22, 19, 10, 8, 5],
 [24, 41, 8, 21, 22, 10, 49, 13, 14, 49],
 [18, 13, 40, 14, 39, 35, 22, 43, 10, 8],
 [24, 41,

In [None]:
def latent_factors_diversification(user_features, item_features, n_recs=10, top_n_limit=None):


    hat_ratings = np.dot(user_features, item_features.T) 

    if top_n_limit:
        #if constraining by top n, only retain the top n ratings within each user
        ind=np.argpartition(hat_ratings,-top_n_limit)[:,-top_n_limit:]
        n_ratings = np.take(hat_ratings, ind)
    else:
        #if not constraining by top n, retail all item indices for all users. 
        #If this is the case, in all_user_recs, recs_idxs should match original_recs_idxs
        ind=np.tile(np.arange(0,len(item_features)),(len(user_features),1))
        n_ratings = hat_ratings



    all_user_recs = dict()
    
    max_idx = np.argmax(n_ratings, axis=1)
    top_items=item_features[max_idx]
    
    all_recs = np.empty([user_features.shape[0],item_features.shape[1], n_recs])
    #all_recs = None
    

    for idx, user in enumerate(user_features):

        user_item_feats = item_features[ind[idx]]
        user_max_idx = np.argmax(n_ratings[idx])

        #get the top rec and add that as the first item for each user
        user_max = max_idx[idx]
        recs_features = top_items[idx]
        recs_idxs = [max_idx[idx]]
        recs_preds = [n_ratings[idx][user_max]]
        orig_recs_idxs = [ind[idx, user_max]]



        for rec in range(1,n_recs):
            if rec == 1:
                #for the second item, just use the first item values
                centroid = recs_features
            else:
                centroid = np.nanmean(recs_features, axis=0)

            centroid = centroid.reshape(1, -1)

            #set all the previously chosen item features to the centroid, so they will not be selected again
            #don't want to just remove rows because it will throw of the indexing
            user_item_feats[recs_idxs]=centroid

            d = pairwise_distances(X=centroid, Y=user_item_feats, metric='cityblock',force_all_finite='allow_nan' )
            most_distant = np.argmax(d)

            recs_idxs.append(most_distant)
            #get the item index from the original array of indices, not the constrained array
            orig_recs_idxs.append(ind[idx, most_distant])
            recs_preds.append(n_ratings[idx][most_distant])

            recs_features = np.vstack((recs_features, user_item_feats[most_distant]))

        all_recs[idx, :, :]=recs_features
            
        all_user_recs[idx]={'user_feats': user,
                        'original_recs_idx':orig_recs_idxs,
                        'recs_idx':recs_idxs,
                        'recs_features':recs_features,
                        'recs_preds':recs_preds}

        
    return all_recs, all_user_recs