In [1]:
import pandas as pd
import numpy as np

import trecs
from trecs.models import ImplicitMF, ImplicitMFLFD
from trecs.random import Generator
from trecs.metrics import MSEMeasurement, AverageFeatureScoreRange

In [None]:
mf = ImplicitMF(num_users=200, num_items=50, num_latent_factors=20)
mf.add_metrics(MSEMeasurement())
mf.startup_and_train(20)

In [2]:
mflfd = ImplicitMFLFD(num_users=200, num_items=50, num_latent_factors=20)
mflfd.add_metrics(MSEMeasurement())

In [3]:
mflfd.startup_and_train(20)
mflfd.run(10)

100%|██████████| 20/20 [00:00<00:00, 575.24it/s]
100%|██████████| 10/10 [00:00<00:00, 363.36it/s]


In [None]:
mflfd.rec

In [7]:
mflfd.item_indices[2]

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
       17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33,
       34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49])

In [8]:
mflfd.predicted_scores

PredictedScores([[ 1.36333226e-01, -1.66519458e-02,  6.49199936e-02, ...,
                  -7.32779306e-03,  1.20038806e-02,  5.92683249e-02],
                 [ 4.96087745e-02,  1.37658062e-01,  3.86123711e-02, ...,
                  -6.36656763e-02,  3.30936052e-02,  7.93674277e-05],
                 [ 2.43081972e-01,  7.76892541e-03,  8.26208732e-02, ...,
                   2.51812451e-01,  9.79027408e-02, -6.74336103e-02],
                 ...,
                 [ 3.92426919e-03,  2.06078771e-01,  1.71795467e-01, ...,
                   5.83166856e-03,  7.47863524e-02,  2.05539495e-01],
                 [ 1.40650402e-01,  3.74743174e-02,  3.50551931e-02, ...,
                   4.57870910e-03, -2.11677300e-03, -2.29767529e-02],
                 [ 8.97558951e-03, -9.52487544e-02, -2.03734547e-02, ...,
                   4.17621589e-02,  2.46576098e-01,  2.49137211e-01]])

In [34]:
top_n_limit=10
k=10

row = np.repeat(mflfd.users.user_vector, mflfd.item_indices.shape[1])
row = row.reshape((mflfd.num_users, -1))
s_filtered = mflfd.predicted_scores[row, mflfd.item_indices]

negated_scores = -1 * s_filtered  # negate scores so indices go from highest to lowest
# break ties using a random score component
scores_tiebreak = np.zeros(
    negated_scores.shape, dtype=[("score", "f8"), ("random", "f8")]
)
scores_tiebreak["score"] = negated_scores
scores_tiebreak["random"] = mflfd.random_state.random(negated_scores.shape)
top_k = scores_tiebreak.argpartition(k - 1, order=["score", "random"])[:, :k]
# now we sort within the top k
row = np.repeat(mflfd.users.user_vector, k).reshape((mflfd.num_users, -1))
# again, indices should go from highest to lowest
sort_top_k = scores_tiebreak[row, top_k].argsort(order=["score", "random"])
top_k_recs = mflfd.item_indices[row, top_k[row, sort_top_k]]
# ]  # extract items such that rows go from highest scored to lowest-scored of top-k

# #hat_ratings = np.dot(user_features, item_features.T) 
# if top_n_limit:
#     #if constraining by top n, only retain the top n ratings within each user
#     ind=np.argpartition(s_filtered,-top_n_limit)[:,-top_n_limit:]
#     n_ratings = np.take(s_filtered, ind)
# else:
#     #if not constraining by top n, retail all item indices for all users. 
#     #If this is the case, in all_user_recs, recs_idxs should match original_recs_idxs
#     ind=np.tile(np.arange(0,len(mflfd.item_features)),(len(mflfd.user_features),1))
#     n_ratings = s_filtered




In [45]:
#test=s_filtered[row, top_k_recs]
top_k_recs

array([[40,  7, 11, ..., 35, 20, 26],
       [29, 27, 41, ...,  8, 43, 22],
       [33, 31, 47, ..., 44, 23, 10],
       ...,
       [24,  1, 49, ...,  8, 26, 20],
       [38,  7, 35, ..., 20, 46, 12],
       [44,  7, 21, ...,  5, 14,  8]])

In [29]:
all_recs = np.empty([mflfd.users_hat.shape[0],mflfd.items_hat.shape[1], k])

for idx, user in enumerate(mflfd.users_hat):

        user_item_feats = mflfd.items_hat[top_k[idx]]
        user_max_idx = np.argmax(n_ratings[idx])

        #get the top rec and add that as the first item for each user
        user_max = max_idx[idx]
        recs_features = top_items[idx]
        recs_idxs = [max_idx[idx]]
        recs_preds = [n_ratings[idx][user_max]]
        orig_recs_idxs = [ind[idx, user_max]]

In [26]:
def latent_factors_diversification(user_features, item_features, n_recs=10, top_n_limit=None):


    hat_ratings = np.dot(user_features, item_features.T) 

    if top_n_limit:
        #if constraining by top n, only retain the top n ratings within each user
        ind=np.argpartition(hat_ratings,-top_n_limit)[:,-top_n_limit:]
        n_ratings = np.take(hat_ratings, ind)
    else:
        #if not constraining by top n, retail all item indices for all users. 
        #If this is the case, in all_user_recs, recs_idxs should match original_recs_idxs
        ind=np.tile(np.arange(0,len(item_features)),(len(user_features),1))
        n_ratings = hat_ratings



    all_user_recs = dict()
    
    max_idx = np.argmax(n_ratings, axis=1)
    top_items=item_features[max_idx]
    
    all_recs = np.empty([user_features.shape[0],item_features.shape[1], n_recs])
    #all_recs = None
    

    for idx, user in enumerate(user_features):

        user_item_feats = item_features[ind[idx]]
        user_max_idx = np.argmax(n_ratings[idx])

        #get the top rec and add that as the first item for each user
        user_max = max_idx[idx]
        recs_features = top_items[idx]
        recs_idxs = [max_idx[idx]]
        recs_preds = [n_ratings[idx][user_max]]
        orig_recs_idxs = [ind[idx, user_max]]



        for rec in range(1,n_recs):
            if rec == 1:
                #for the second item, just use the first item values
                centroid = recs_features
            else:
                centroid = np.nanmean(recs_features, axis=0)

            centroid = centroid.reshape(1, -1)

            #set all the previously chosen item features to the centroid, so they will not be selected again
            #don't want to just remove rows because it will throw of the indexing
            user_item_feats[recs_idxs]=centroid

            d = pairwise_distances(X=centroid, Y=user_item_feats, metric='cityblock',force_all_finite='allow_nan' )
            most_distant = np.argmax(d)

            recs_idxs.append(most_distant)
            #get the item index from the original array of indices, not the constrained array
            orig_recs_idxs.append(ind[idx, most_distant])
            recs_preds.append(n_ratings[idx][most_distant])

            recs_features = np.vstack((recs_features, user_item_feats[most_distant]))

        all_recs[idx, :, :]=recs_features
            
        all_user_recs[idx]={'user_feats': user,
                        'original_recs_idx':orig_recs_idxs,
                        'recs_idx':recs_idxs,
                        'recs_features':recs_features,
                        'recs_preds':recs_preds}

        
    return all_recs, all_user_recs

array([[40,  7, 11, ..., 35, 20, 26],
       [29, 27, 41, ...,  8, 43, 22],
       [33, 31, 47, ..., 44, 23, 10],
       ...,
       [24,  1, 49, ...,  8, 26, 20],
       [38,  7, 35, ..., 20, 46, 12],
       [44,  7, 21, ...,  5, 14,  8]])

In [19]:
n_ratings.shape

(200, 10)

In [46]:
import datetime

x = datetime.datetime.now()
print(x) 

2021-02-25 08:38:47.303962
