In [2]:
from EMF import *

In [1]:
from preprocess import * 



In [3]:
from usertouser import *

In [4]:
def explainable_score(user2user, users, items, theta=0):
    
    def _progress(count):
        sys.stdout.write('\rCompute Explainable score. Progress status : %.1f%%'%(float(count/len(users))*100.0))
        sys.stdout.flush()
    # initialize explainable score to zeros
    W = np.zeros((len(users), len(items)))

    for count, u in enumerate(users):            
        candidate_items = user2user.find_user_candidate_items(u)        
        for i in candidate_items:                
            user_who_rated_i, similar_user_who_rated_i = \
                user2user.similar_users_who_rated_this_item(u, i)
            if user_who_rated_i.shape[0] == 0:
                w = 0.0
            else:
                w = similar_user_who_rated_i.shape[0] / user_who_rated_i.shape[0]
            W[u,i] =  w  if w > theta else 0.0
        _progress(count)
    return W


In [5]:
epochs = 10


In [6]:
# load data
ratings, movies = prep_movielens()

In [12]:
ratings, uencoder, iencoder = ids_encoder(ratings)

In [8]:
users = sorted(ratings.userid.unique())
items = sorted(ratings.itemid.unique())

m = len(users)
n = len(items)

# get examples as tuples of userids and itemids and labels from normalize ratings
raw_examples, raw_labels = get_examples(ratings)

# train test split
(x_train, x_test), (y_train, y_test) = train_test_split(examples=raw_examples, labels=raw_labels)


In [9]:
ratings.head()

Unnamed: 0,userid,itemid,rating
0,0,0,5
1,0,1,3
2,0,2,4
3,0,3,3
4,0,4,3


In [10]:
usertouser = UserToUser(ratings, movies)

# compute explainable score
W = explainable_score(usertouser, users, items)

Normalize users ratings ...
Initialize the similarity model ...
Compute nearest neighbors ...
User to user recommendation model created with success ...
Compute Explainable score. Progress status : 99.9%

In [11]:
EMF = ExplainableMatrixFactorization(m, n, W, alpha=0.01, beta=0.4, lamb=0.01, k=10)

history = EMF.fit(x_train, y_train, epochs=epochs, validation_data=(x_test, y_test))

Training EMF
k=10 	 alpha=0.01 	 beta=0.4 	 lambda=0.01
epoch 1/10 - loss : 0.922 - val_loss : 1.036
epoch 2/10 - loss : 0.79 - val_loss : 0.873
epoch 3/10 - loss : 0.766 - val_loss : 0.837
epoch 4/10 - loss : 0.757 - val_loss : 0.822
epoch 5/10 - loss : 0.753 - val_loss : 0.814
epoch 6/10 - loss : 0.751 - val_loss : 0.808
epoch 7/10 - loss : 0.749 - val_loss : 0.805
epoch 8/10 - loss : 0.748 - val_loss : 0.802
epoch 9/10 - loss : 0.746 - val_loss : 0.799
epoch 10/10 - loss : 0.745 - val_loss : 0.797


In [13]:
test_user_ids = np.array([a for a,b in x_test]).astype(int)
test_movie_ids = np.array([b for a,b in x_test]).astype(int)

In [14]:
EMF.predict(0,0, uencoder, iencoder)

3.8936231398936

In [15]:
predictions = []
for a,b in x_test:
    predictions.append(EMF.predict(a,b,uencoder,iencoder))

In [16]:
predictions[:5]

[2.3181010158444924,
 3.3214274337099017,
 2.726314631261953,
 4.206935956569679,
 2.6715781758512045]

In [16]:
k_list = [1,5,10,15,20]

In [19]:
ndcgs = []
recalls = []
mnaps = []

for k in k_list:
    ndcgs.append(EMF.calc_ndcg(np.array(predictions), k, test_user_ids, y_test, test_movie_ids))
    recalls.append(EMF.calc_recalls(k,ratings, test_user_ids, uencoder, iencoder))
    mnaps.append(EMF.calc_mnap(k,ratings, test_user_ids, uencoder, iencoder))

In [21]:
print('NDCG scores @k: {}'.format([el.round(5) for el in ndcgs]), 
      'Recall scores @k: {}'.format([el.round(5) for el in recalls]),
      'MNAP scores @k: {}'.format([el.round(5) for el in mnaps]), sep = '\n')

NDCG scores @k: [0.87526, 0.91824, 0.93915, 0.94904, 0.95434]
Recall scores @k: [0.03024, 0.03024, 0.03024, 0.03024, 0.03024]
MNAP scores @k: [0.01, 0.05623, 0.07867, 0.0893, 0.10131]


In [15]:
EMF.calc_ndcg(np.array(predictions), 5, test_user_ids, y_test, test_movie_ids)

0.9182427410063273

In [10]:
EMF.evaluate(x_test, y_test)

validation error : 0.797


In [10]:
def eval_emf(model,x_test,y_test,predictions):
    test_user_ids = np.array([a for a,b in x_test]).astype(int)
    test_movie_ids = np.array([b for a,b in x_test]).astype(int)
    test_ratings = y_test
    ndgcs = []
    
    for target_users in np.unique(test_user_ids):
        
        target_movie_id = test_movie_ids[target_users == test_user_ids]
        target_rating = test_ratings[target_users == test_user_ids]

        rel = target_rating[np.argsort(-predictions[target_users == test_user_ids])]
        ndgc1 = ndgc_at_k(rel, k=1)
        ndgc5 = ndgc_at_k(rel, k=5)
        ndgc10 = ndgc_at_k(rel, k=10)
        ndgc15 = ndgc_at_k(rel, k=15)
        ndgc20 = ndgc_at_k(rel, k=20)

        ndgcs1.append(ndgc1)
        ndgcs5.append(ndgc5)
        ndgcs10.append(ndgc10)
        ndgcs15.append(ndgc15)
        ndgcs20.append(ndgc20)

    ndcg_f1 = np.mean(ndgcs1)
    ndcg_f5 = np.mean(ndgcs5)
    ndcg_f10 = np.mean(ndgcs10)
    ndcg_f15 = np.mean(ndgcs15)
    ndcg_f20 = np.mean(ndgcs20)

    return print("-------- K = 1 --------",
                  "NDCG@k:\t%f" % ndcg_f1,
                  "Recall@k:\t%f" % 0,
                  "MNAP@K:\t%f" % 0, 
                  "-------- K = 5 --------",
                  "NDCG@k:\t%f" % ndcg_f5,
                  "Recall@k:\t%f" % 0,
                  "MNAP@K:\t%f" % 0, 
                  "-------- K = 10 --------",
                  "NDCG@k:\t%f" % ndcg_f10,
                  "Recall@k:\t%f" % 0,
                  "MNAP@K:\t%f" % 0, 
                  "-------- K = 15 --------",
                  "NDCG@k:\t%f" % ndcg_f15,
                  "Recall@k:\t%f" % 0,
                  "MNAP@K:\t%f" % 0, 
                  "-------- K = 20 --------",
                  "NDCG@k:\t%f" % ndcg_f20,
                  "Recall@k:\t%f" % 0,
                  "MNAP@K:\t%f" % 0, sep = '\n')

In [21]:
def recommend(userid, N):
    u = uencoder.transform([userid])[0]
    predictions = np.dot(P[u], Q.T)
    top_idx = np.flip(np.argsort(predictions))[:N]
    top_items = iencoder.inverse_transform(top_idx)
    preds = predictions[top_idx]
    return top_items, preds 

In [67]:
x_test

array([[692, 381],
       [746, 110],
       [200, 211],
       ...,
       [880, 321],
       [660, 706],
       [675, 299]])

In [68]:
eval_emf(EMF, x_test,y_test,np.array(predictions))

IndexError: boolean index did not match indexed array along dimension 0; dimension is 9933 but corresponding boolean dimension is 10000