In [55]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import math
import json
from scipy.spatial.distance import pdist, squareform, cdist
#import scipy

#from sklearn.metrics.pairwise import euclidean_distances
from sklearn.metrics import pairwise_distances

#from lenskit.datasets import ML100K
from lenskit import batch, topn, util
from lenskit import crossfold as xf
from lenskit.algorithms import Recommender, als
from lenskit import topn
from lenskit.metrics.predict import rmse, mae


In [61]:
class NpEncoder(json.JSONEncoder):
    def default(self, obj):
        if isinstance(obj, np.integer):
            return int(obj)
        elif isinstance(obj, np.floating):
            return float(obj)
        elif isinstance(obj, np.ndarray):
            return obj.tolist()
        else:
            return super(NpEncoder, self).default(obj)

In [2]:
#Load data from Bollen et al., 2010
choice = pd.read_csv('co1_wide.csv')  


movie_cols = ['movie'+str(num) for num in range(1,21)] +['choice']
score_cols = ['score'+str(num) for num in range(1,21)]

movies_wide = choice[movie_cols + ['userid', 'condition']].rename(columns=dict(zip(movie_cols,list(range(1,21)))))
scores_wide = choice[score_cols + ['userid']].rename(columns=dict(zip(score_cols,list(range(1,21)))))

movies_long = movies_wide.melt(id_vars=['userid', 'condition', 'choice'], var_name='array_num', value_name='movieid')
#movies_long['item']=movies_long['movieid'].str.split(pat="/", expand=True)[[3]].astype(int)

scores_long = scores_wide.melt(id_vars=['userid'], value_vars=list(range(1,21)), var_name='array_num', value_name='score')

#create the cartesian product of all unique movies and users for subsequent join
unique_movies = list(pd.unique(movies_long['movieid']))
unique_users = list(pd.unique(movies_long['userid']))
index = pd.MultiIndex.from_product([unique_users, unique_movies], names = ["userid","movieid"])
movies_users_index = pd.DataFrame(index = index).reset_index()



movies = pd.merge(movies_users_index, movies_long, how='left', on=["userid", "movieid"])
movies_scores = pd.merge(movies, scores_long, how='left', on=["userid", "array_num"])

movies_scores['chose_movie'] = np.where(movies_scores['choice']==movies_scores['movieid'], 1, 0)

#problems due to NaN
dups=movies_scores.duplicated(keep=False)

#convert long form dataframe into format similar to ratings matrix
R_df_long = movies_scores[['userid', 'movieid', 'score']].dropna()#form for surprise

R_df_long['rating']=R_df_long['score']/10
R_df_long['item']=R_df_long['movieid'].str.split(pat="/", expand=True)[[3]].astype(int)

ratings=R_df_long[['userid', 'item', 'rating']]
ratings.columns=['user', 'item', 'rating']

In [68]:
score_cols = [c for c in choice.columns if 'score' in c]
movie_cols = [c for c in choice.columns if 'movie' in c]
all_subj_dict = {}

for sub in range(len(choice)):   
    
    subj_dict={}
    subj_dict['userid']=choice.loc[sub]['userid']
       
    for movie_num, movie in enumerate(movie_cols):
        if choice.loc[sub]['choice'] == choice.loc[sub][movie]:
            #Add +1 to deal with 1-indexing in MATLAB
            subj_dict['Y'] = movie_num+1
            break
    
    subj_dict['X']=[s for s in list(choice.loc[sub][score_cols].values/10) if not np.isnan(s)]
    
    if choice.loc[sub]['condition'] in ['Top5', 'Top5_NR']:
        subj_dict['J']=5
    else:
        subj_dict['J']=20
    
    sub_json = json.dumps(subj_dict, cls=NpEncoder)

    with open('{}_data.txt'.format(str(sub)), 'w') as json_file:
        json.dump(sub_json, json_file)
    
    all_subj_dict[str(sub)]=subj_dict
    
        
 

In [59]:
all_subj_dict

{'0': {'userid': 182,
  'Y': 4,
  'X': [3.9,
   3.8,
   3.8,
   3.8,
   3.8,
   3.8,
   3.7,
   3.7,
   3.7,
   3.7,
   3.7,
   3.7,
   3.7,
   3.7,
   3.7,
   3.7,
   3.7,
   3.7,
   3.7,
   3.7],
  'J': 20},
 '1': {'userid': 174,
  'Y': 20,
  'X': [4.2,
   4.2,
   4.2,
   4.2,
   4.1,
   3.8,
   3.7,
   3.6,
   3.5,
   3.5,
   3.4,
   3.3,
   3.3,
   3.3,
   3.2,
   3.2,
   3.1,
   3.1,
   3.0,
   3.0],
  'J': 20},
 '2': {'userid': 224,
  'Y': 9,
  'X': [4.1,
   4.0,
   4.0,
   4.0,
   4.0,
   3.9,
   3.9,
   3.8,
   3.8,
   3.8,
   3.7,
   3.7,
   3.7,
   3.7,
   3.6,
   3.6,
   3.6,
   3.6,
   3.5,
   3.5],
  'J': 20},
 '3': {'userid': 180,
  'Y': 11,
  'X': [3.6,
   3.6,
   3.5,
   3.5,
   3.5,
   3.4,
   3.3,
   3.3,
   3.3,
   3.3,
   3.2,
   3.2,
   3.2,
   3.2,
   3.2,
   3.2,
   3.2,
   3.2,
   3.2,
   3.1],
  'J': 20},
 '4': {'userid': 98, 'Y': 2, 'X': [3.5, 3.5, 3.5, 3.5, 3.5], 'J': 5},
 '5': {'userid': 151,
  'Y': 1,
  'X': [4.1,
   4.0,
   4.0,
   4.0,
   3.9,
   3.9,
   

In [69]:
DN_json = json.dumps(all_subj_dict, cls=NpEncoder)

with open('BollenData.txt', 'w') as json_file:
    json.dump(DN_json, json_file)

In [67]:
DN_json

'{"0": {"userid": 182, "Y": 4, "X": [3.9, 3.8, 3.8, 3.8, 3.8, 3.8, 3.7, 3.7, 3.7, 3.7, 3.7, 3.7, 3.7, 3.7, 3.7, 3.7, 3.7, 3.7, 3.7, 3.7], "J": 20}, "1": {"userid": 174, "Y": 20, "X": [4.2, 4.2, 4.2, 4.2, 4.1, 3.8, 3.7, 3.6, 3.5, 3.5, 3.4, 3.3, 3.3, 3.3, 3.2, 3.2, 3.1, 3.1, 3.0, 3.0], "J": 20}, "2": {"userid": 224, "Y": 9, "X": [4.1, 4.0, 4.0, 4.0, 4.0, 3.9, 3.9, 3.8, 3.8, 3.8, 3.7, 3.7, 3.7, 3.7, 3.6, 3.6, 3.6, 3.6, 3.5, 3.5], "J": 20}, "3": {"userid": 180, "Y": 11, "X": [3.6, 3.6, 3.5, 3.5, 3.5, 3.4, 3.3, 3.3, 3.3, 3.3, 3.2, 3.2, 3.2, 3.2, 3.2, 3.2, 3.2, 3.2, 3.2, 3.1], "J": 20}, "4": {"userid": 98, "Y": 2, "X": [3.5, 3.5, 3.5, 3.5, 3.5], "J": 5}, "5": {"userid": 151, "Y": 1, "X": [4.1, 4.0, 4.0, 4.0, 3.9, 3.9, 3.9, 3.8, 3.8, 3.8, 3.8, 3.8, 3.8, 3.8, 3.8, 3.8, 3.8, 3.8, 3.8, 3.8], "J": 20}, "6": {"userid": 162, "Y": 7, "X": [3.9, 3.9, 3.9, 3.9, 3.8, 3.6, 3.5, 3.5, 3.4, 3.4, 3.4, 3.4, 3.3, 3.3, 3.3, 3.3, 3.3, 3.3, 3.3, 3.3], "J": 20}, "7": {"userid": 193, "Y": 1, "X": [3.6, 3.6, 3.5, 3

In [3]:
#Create the cartesian product of all possible users and movies
unique_movies = list(pd.unique(movies_long['movieid'].dropna()))
movie_nums = [int(m.split('/')[3]) for m in unique_movies]
all_index = pd.MultiIndex.from_product([unique_users, movie_nums], 
                                                    names = ["user","item"])
all_u_i = pd.DataFrame(index=all_index).reset_index()

In [4]:
all_u_i

Unnamed: 0,user,item
0,182,551
1,182,2019
2,182,3825
3,182,2764
4,182,1206
...,...,...
182315,163,805
182316,163,1810
182317,163,993
182318,163,726


In [5]:
#For our first model, we want the low rank representation to approximate our data as closely as possible (i.e., overfit the data). 
#This way, our "real" user factors come as close as possible to user factors that could reproduce the predictions
#from the MF algorithm in Bollen et al., 2010.
algo_als = als.BiasedMF(10, iterations=100,reg=0.0001, bias=False, rng_spec=1)

algo_als.fit(ratings)
preds_als = batch.predict(algo_als, ratings)


#print("RMSE for ALS: {}".format(rmse(preds_als['prediction'], preds_als['rating'])))

algo_als_hat = als.BiasedMF(10, iterations=3,reg=0.0001, bias=False, rng_spec=1)

algo_als_hat.fit(ratings)
preds_als_hat = batch.predict(algo_als_hat, ratings)


print("RMSE for ALS: {}".format(rmse(preds_als['prediction'], preds_als['rating'])))
print("RMSE for ALS hat: {}".format(rmse(preds_als_hat['prediction'], preds_als_hat['rating'])))

print("MAE for ALS: {}".format(mae(preds_als['prediction'], preds_als['rating'])))
print("MAE for ALS hat: {}".format(mae(preds_als_hat['prediction'], preds_als_hat['rating'])))


RMSE for ALS: 0.01660285426101378
RMSE for ALS hat: 0.2547659899443631
MAE for ALS: 0.005854656281398418
MAE for ALS hat: 0.12020732917684518


In [6]:
#users = ratings['user'].unique()
#print(users)
#recs = batch.recommend(algo_als, users)
all_preds = batch.predict(algo_als_hat, all_u_i)

In [7]:
all_preds.head()

Unnamed: 0,user,item,prediction
0,19,551,5.089715
1,19,2019,-2.911799
2,19,3825,-9.592937
3,19,2764,-0.234379
4,19,1206,-3.346746


In [8]:
print("Summary statistics for 'actual' data...")
print("min")
print(preds_als.min())
print("\n")
print("max")
print(preds_als.max())
print("\n")
print("mean")
print(preds_als.mean())
print("\n")

print("Summary statistics for 'hat' data...")
print("min")
print(preds_als_hat.min())
print("\n")
print("max")
print(preds_als_hat.max())
print("\n")
print("mean")
print(preds_als_hat.mean())

Summary statistics for 'actual' data...
min
user          19.000000
item           1.000000
rating         2.400000
prediction     2.399999
dtype: float64


max
user           251.000000
item          3949.000000
rating           5.000000
prediction       5.014661
dtype: float64


mean
user           143.542056
item          1695.534579
rating           3.685159
prediction       3.685144
dtype: float64


Summary statistics for 'hat' data...
min
user          19.000000
item           1.000000
rating         2.400000
prediction     1.254292
dtype: float64


max
user           251.000000
item          3949.000000
rating           5.000000
prediction       5.763938
dtype: float64


mean
user           143.542056
item          1695.534579
rating           3.685159
prediction       3.673479
dtype: float64


In [9]:
item_features=pd.DataFrame(algo_als.item_features_, index=movie_nums)

In [10]:
item_features=pd.DataFrame(algo_als.item_features_, index=movie_nums).reset_index()
item_features_hat=pd.DataFrame(algo_als_hat.item_features_, index=movie_nums)

user_features=pd.DataFrame(algo_als.user_features_, index=unique_users)
user_features_hat=pd.DataFrame(algo_als_hat.user_features_, index=unique_users)

actual_items = {i:v for i,v in zip(algo_als.item_index_, algo_als.item_features_)}
actual_users = {u:v for u,v in zip(algo_als.user_index_, algo_als.user_features_)}

hat_items = {i:v for i,v in zip(algo_als_hat.item_index_, algo_als_hat.item_features_)}
hat_users = {u:v for u,v in zip(algo_als_hat.user_index_, algo_als_hat.user_features_)}

In [11]:
item_features.columns

Index(['index', 0, 1, 2, 3, 4, 5, 6, 7, 8, 9], dtype='object')

In [12]:
item_features.columns

Index(['index', 0, 1, 2, 3, 4, 5, 6, 7, 8, 9], dtype='object')

In [13]:
all_preds_feats = pd.merge(all_preds, item_features, left_on='item', right_on='index').drop(columns='index')

In [14]:
all_preds_feats.sort_values(['user', 'item'])

Unnamed: 0,user,item,prediction,0,1,2,3,4,5,6,7,8,9
64156,19,1,16.948305,-1.548257,-0.045319,-0.751157,-0.052875,-0.269830,-0.144795,0.038999,0.271114,-0.051169,-0.001139
139836,19,2,6.722043,0.027513,0.485471,-0.346955,0.000182,-0.142327,0.497053,-0.287489,0.104881,0.317564,0.090711
74648,19,7,0.923936,0.742481,0.516058,-0.255549,-0.734636,0.162527,0.386711,0.736182,-0.175698,-0.183765,-0.417835
44892,19,10,3.437130,0.621873,0.226003,-0.007399,0.356428,-1.044707,-0.239429,0.115518,0.329110,0.168624,0.104404
109908,19,16,-3.609928,0.543101,0.711567,0.138870,0.039406,0.000091,0.020364,-0.194478,-0.451202,0.005932,-0.367209
...,...,...,...,...,...,...,...,...,...,...,...,...,...
165635,251,3923,1.637322,-0.409971,-0.538277,0.177227,0.375517,-0.175836,-0.111924,-0.454722,0.210713,-0.391436,0.102341
52459,251,3929,-3.658906,0.358040,-0.299347,-0.316721,0.049043,-0.535703,0.137746,0.253934,-0.106500,-0.551198,0.622412
80839,251,3936,-1.253687,-0.555280,-0.218053,-0.188493,0.693863,0.424740,-0.155350,0.708075,0.210488,-0.413434,0.060417
70003,251,3945,-0.364683,4.808856,0.364956,0.547155,-0.157341,-0.380701,-0.183401,0.116229,0.099123,-0.206885,0.163830


In [15]:
#t=all_preds_feats[['user', 'prediction']].groupby('user').min()
all_preds_feats['rank']=all_preds_feats[['prediction', 'user']].groupby('user').transform('rank', ascending=False)
#data.groupby('group').transform('rank', method='min')

In [16]:
all_preds_feats.sort_values(['user', 'item']).head()

Unnamed: 0,user,item,prediction,0,1,2,3,4,5,6,7,8,9,rank
64156,19,1,16.948305,-1.548257,-0.045319,-0.751157,-0.052875,-0.26983,-0.144795,0.038999,0.271114,-0.051169,-0.001139,34.0
139836,19,2,6.722043,0.027513,0.485471,-0.346955,0.000182,-0.142327,0.497053,-0.287489,0.104881,0.317564,0.090711,137.0
74648,19,7,0.923936,0.742481,0.516058,-0.255549,-0.734636,0.162527,0.386711,0.736182,-0.175698,-0.183765,-0.417835,402.0
44892,19,10,3.43713,0.621873,0.226003,-0.007399,0.356428,-1.044707,-0.239429,0.115518,0.32911,0.168624,0.104404,264.0
109908,19,16,-3.609928,0.543101,0.711567,0.13887,0.039406,9.1e-05,0.020364,-0.194478,-0.451202,0.005932,-0.367209,678.0


In [17]:
hat_ratings = np.dot(algo_als_hat.user_features_, algo_als_hat.item_features_.T) 

#hat_ratings_df = pd.DataFrame(hat_ratings)
max_idx = np.argmax(hat_ratings, axis=1)
top_items=algo_als.item_features_[max_idx]

In [18]:
max_idx

array([ 981,  726,  187,  777,  107,  726,  726,  726,  107,  777,  726,
        107,  777,  777,  777,  777,  777,  187,  777,  726,  777,  107,
        777,  777,  726,  726,  777,  107,  107,  726,  107,  107,  107,
        726,  726,  726,  107,  777,  187,  107,  726,  777,  107,  107,
        726,  777,  726,  726,  726,  777,  726,  777,  107,  726, 1036,
        777,  777,  726,  726,  777,  107,  107,  726,  726,  777,  726,
        726,  107,  107,  726,  777,  777,  107,  777,  777,  726,  726,
        107,  777, 1036,  726,  726,  726,  726,  777,  107,  726,  726,
        107,  777,  777,  777,  777,  777,  726,  981,  726,  777,  726,
        107,  107,  187,  107,  777,  777,  777,  777,  107,  777,  107,
        726,  777,  107,  777,  107,  726,  777,  107,  726,  777,  107,
        726,  107,  777,  187,  107,  107,  777,  107,  777,  777,  726,
        282,  107,  187,  726,  777,  726,  107,  726,  777,  107,  187,
        777,  726,  107,  777,  726,  981,  726,  1

In [20]:
#print(idx)
#print(ind[idx, most_distant])

NameError: name 'idx' is not defined

In [None]:
user_max

In [None]:
n_ratings[idx][user_max]

In [None]:
orig_idx = ind[idx, user_max]
print(orig_idx)

In [None]:
hat_ratings[idx, orig_idx]

In [None]:
print(np.argmax(hat_ratings, axis=1)[idx])
print(ind[idx, max_idx[idx]])

In [None]:
np.nanmean(recs_features, axis=0)

In [90]:
#top_n_limit = 100
#n_recs = 10
#user_features = algo_als_hat.user_features_
#item_features = algo_als_hat.item_features_

def latent_factors_diversification(user_features, item_features, n_recs=10, top_n_limit=None):



    hat_ratings = np.dot(user_features, item_features.T) 

    if top_n_limit:
        #if constraining by top n, only retain the top n ratings within each user
        ind=np.argpartition(hat_ratings,-top_n_limit)[:,-top_n_limit:]
        n_ratings = np.take(hat_ratings, ind)
    else:
        #ind=np.repeat(np.arange(0,len(item_features)), len(user_features)).reshape(len(user_features), len(item_features))
        ind=np.tile(np.arange(0,len(item_features)),(len(user_features),1))
        n_ratings = hat_ratings



    all_user_recs = dict()
    
    max_idx = np.argmax(n_ratings, axis=1)
    top_items=item_features[max_idx]
    
    

    for idx, user in enumerate(user_features):

        user_item_feats = item_features[ind[idx]]
        user_max_idx = np.argmax(n_ratings[idx])

        #get the top rec and add that as the first item for each user
        user_max = max_idx[idx]
        recs_features = top_items[idx]
        recs_idxs = [max_idx[idx]]
        recs_preds = [n_ratings[idx][user_max]]
        orig_recs_idxs = [ind[idx, user_max]]



        for rec in range(1,n_recs):
            if rec == 1:
                #for the second item, just use the first item values
                centroid = recs_features
            else:
                centroid = np.nanmean(recs_features, axis=0)

            centroid = centroid.reshape(1, -1)

            #set all the previously chosen item features to the centroid, so they will not be selected again
            #don't want to just remove rows because it will throw of the indexing
            user_item_feats[recs_idxs]=centroid

            d = pairwise_distances(X=centroid, Y=user_item_feats, metric='cityblock',force_all_finite='allow_nan' )
            most_distant = np.argmax(d)

            recs_idxs.append(most_distant)
            #get the item index from the original array of indices, not the constrained array
            orig_recs_idxs.append(ind[idx, most_distant])
            recs_preds.append(n_ratings[idx][most_distant])

            recs_features = np.vstack((recs_features, user_item_feats[most_distant]))


        all_user_recs[idx]={'user_feats': user,
                        'original_recs_idx':orig_recs_idxs,
                        'recs_idx':recs_idxs,
                        'recs_features':recs_features,
                        'recs_preds':recs_preds}

        
    return all_user_recs


    
    
    
    

In [78]:
user_features = algo_als.user_features_
item_features = algo_als.item_features_
hat_ratings = np.dot(user_features, item_features.T) 

ind=np.repeat(np.arange(0,len(item_features)), len(user_features)).reshape(len(user_features), len(item_features))
n_ratings = hat_ratings

In [80]:
ind.shape

(172, 1060)

In [81]:
n_ratings.shape

(172, 1060)

In [88]:
t=np.tile(np.arange(0,len(item_features)),(len(user_features),1))

In [89]:
t

array([[   0,    1,    2, ..., 1057, 1058, 1059],
       [   0,    1,    2, ..., 1057, 1058, 1059],
       [   0,    1,    2, ..., 1057, 1058, 1059],
       ...,
       [   0,    1,    2, ..., 1057, 1058, 1059],
       [   0,    1,    2, ..., 1057, 1058, 1059],
       [   0,    1,    2, ..., 1057, 1058, 1059]])

In [None]:
user_item_feats = item_features[ind[0]]
user_max_idx = np.argmax(n_ratings[idx])

In [91]:
user_recs_test = latent_factors_diversification(user_features=algo_als_hat.user_features_, item_features=algo_als_hat.item_features_, 
                                                top_n_limit=None)

In [92]:
user_recs_test

{0: {'user_feats': array([-1.42680763,  2.57099385,  6.52898395, -1.48245462, -5.97311882,
          6.82861023,  0.08924562, -3.07441259, -8.40593482, -3.20339233]),
  'original_recs_idx': [981, 726, 777, 107, 220, 324, 478, 760, 663, 1024],
  'recs_idx': [981, 726, 777, 107, 220, 324, 478, 760, 663, 1024],
  'recs_features': array([[-4.20592482e+01,  6.57560811e-02, -1.72563990e-01,
           1.30606039e-01, -2.97764133e-01, -3.75372626e-02,
          -4.78783464e-01, -2.02383185e-01, -1.35367220e-01,
          -6.18882681e-01],
         [ 1.04374874e+02,  1.70997857e-01,  1.68107362e-01,
          -3.98071281e-01,  4.84412761e-01,  8.34763484e-02,
          -4.08063377e-01, -1.66962321e-01, -3.26759587e-01,
           3.09983485e-01],
         [ 3.00691944e-01, -7.92469557e+01,  6.70257183e+00,
           4.58626575e-01, -6.54630492e-01, -1.79322360e-01,
          -3.45576590e-02,  1.83438772e-01,  6.12854518e-02,
          -4.58082062e-01],
         [-4.44457684e+01, -7.20681666e-

In [25]:
hat_ratings = np.dot(algo_als_hat.user_features_, algo_als_hat.item_features_.T) 

In [36]:
ind=np.argpartition(hat_ratings,-100)[:,-100:]
n_ratings = np.take(hat_ratings, ind)

In [39]:
ind[0]

array([ 411, 1000,   31,  940,   28,  518,  757,  781,  710,  969,  391,
        893,  238,  896,   92,  638,   58,  939,  348,   51,   56,  163,
        833,  472,  338,   67,  607,  602, 1036,  516,  990,  301,  435,
        631,  567,  933,  951,  124,  681,  795,  741,  528, 1006,  744,
        864,   95,  107, 1007,  112,  419,  865,  775,  413,  776,  858,
        131,  852,  433,  685,  881,  439,  373,   70,  577,   69,  359,
        576,  837,  663,  981,  759,  736,  474,  339,  488,   46,  970,
       1037,   44,   41,  489,  823,  491,  181,  187,  324,  508,  755,
        193,  197,  807,  748,  804,  282,  561,  786,  642,  281,  640,
          0])

In [56]:
user_item_feats = algo_als_hat.item_features_[ind[1]]
print(len(user_item_feats))

100


In [59]:
unique_users[1]

174

In [40]:
user_item_feats[69]

array([-4.20592482e+01,  6.57560811e-02, -1.72563990e-01,  1.30606039e-01,
       -2.97764133e-01, -3.75372626e-02, -4.78783464e-01, -2.02383185e-01,
       -1.35367220e-01, -6.18882681e-01])

In [28]:
unique_users[0]

182

In [32]:
user=hat_ratings[0]

In [62]:
user_recs_test

{182: {'original_idx': [981, 107, 187, 1036, 324, 640, 663, 748, 1037, 837],
  'recs_idx': [69, 46, 84, 28, 85, 98, 68, 91, 77, 67],
  'recs_features': array([[ 2.29327038e-01,  8.86308650e-01,  1.08971989e+00,
           2.76796267e-01, -8.37889914e-02,  1.95010116e-01,
          -2.46252875e-01, -4.40771104e-01, -5.50526409e-01,
          -1.82175597e-01],
         [-4.44457684e+01, -7.20681666e-02, -4.72060528e-02,
          -5.24504513e-01,  5.60571987e-02,  2.40181370e-01,
          -2.95286951e-01, -1.88524039e-01,  5.87579801e-01,
           6.61843119e-02],
         [ 6.39758106e-01,  1.83600790e+01, -8.65192682e-01,
           1.78059463e-01, -3.84027254e-01,  8.23833758e-03,
          -3.43721475e-01,  5.19900720e-01,  5.59437672e-01,
           2.50773299e-01],
         [ 1.24801441e-01, -1.82603510e+00,  1.30395630e+00,
          -7.62996549e+00, -5.72735523e-02, -3.23770309e-01,
          -1.12130438e-01,  3.97737068e-01,  1.20741186e-01,
           1.12091619e-01],
      

In [None]:
user_recs

In [None]:
# item_features=pd.DataFrame(algo_als.item_features_)
# item_features_hat=pd.DataFrame(algo_als_hat.item_features_)
# n_feature=0

# item_hat = item_features[n_feature].tolist()
# item_actual = item_features_hat[n_feature].tolist()

# plt.plot(item_hat, item_actual, 'o', color='black');
# plt.xlabel("predicted factors")
# plt.ylabel("actual factors")
# plt.title("association between actual factors and hat factors")

# plt.show()

In [None]:
font = {'family' : 'normal',
        'weight' : 'bold',
        'size'   : 12}

plt.rc('font', **font)

n_features = list(range(0,10))
fig, axs = plt.subplots(math.ceil(len(n_features)/3), 3, figsize=(20,20))
plt.subplots_adjust(top=0.92, bottom=0.08, left=0.10, right=0.95, hspace=0.25,
                    wspace=0.35)
fig.suptitle('Actual vs. Hat Representation for Item Factors', size=20)

for idx, n_feature in enumerate(n_features):
    r=idx //3
    c=idx % 3

    item_hat = item_features[n_feature].tolist()
    item_actual = item_features_hat[n_feature].tolist()
    axs[r, c].set_title('Factor {}'.format(n_feature))

    axs[r,c].plot(item_hat, item_actual, 'o', color='blue');

for ax in axs.flat:
    ax.set(xlabel='hat representation', ylabel='actual representation')

# Hide x labels and tick labels for top plots and y ticks for right plots.
#for ax in axs.flat:
#    ax.label_outer()

fig.delaxes(axs[3][1])
fig.delaxes(axs[3][2])


In [None]:
font = {'family' : 'normal',
        'weight' : 'bold',
        'size'   : 12}

plt.rc('font', **font)

n_features = list(range(0,10))
fig, axs = plt.subplots(math.ceil(len(n_features)/3), 3, figsize=(20,20))
plt.subplots_adjust(top=0.92, bottom=0.08, left=0.10, right=0.95, hspace=0.25,
                    wspace=0.35)
fig.suptitle('Actual vs. Hat Representation for User Factors', size=20)

for idx, n_feature in enumerate(n_features):
    r=idx //3
    c=idx % 3

    user_hat = user_features[n_feature].tolist()
    user_actual = user_features_hat[n_feature].tolist()
    axs[r, c].set_title('Factor {}'.format(n_feature))

    axs[r,c].plot(user_hat, user_actual, 'o', color='red');

for ax in axs.flat:
    ax.set(xlabel='hat representation', ylabel='actual representation')

# Hide x labels and tick labels for top plots and y ticks for right plots.
#for ax in axs.flat:
#    ax.label_outer()

fig.delaxes(axs[3][1])
fig.delaxes(axs[3][2])