In [1]:
import numpy as np
import ml_metrics
from neuralrecommender.glocalk import GlocalK


def load_data_100k(path="data/MovieLens_100K/", delimiter='\t'):
    """Code by https://github.com/usydnlp/Glocal_K authors"""
    train = np.loadtxt(path+'movielens_100k_u1.base', skiprows=0, delimiter=delimiter).astype('int32')
    test = np.loadtxt(path+'movielens_100k_u1.test', skiprows=0, delimiter=delimiter).astype('int32')
    total = np.concatenate((train, test), axis=0)

    n_u = np.unique(total[:,0]).size  # num of users
    n_m = np.unique(total[:,1]).size  # num of movies
    n_train = train.shape[0]  # num of training ratings
    n_test = test.shape[0]  # num of test ratings

    train_r = np.zeros((n_m, n_u), dtype='float32')
    test_r = np.zeros((n_m, n_u), dtype='float32')

    for i in range(n_train):
        train_r[train[i,1]-1, train[i,0]-1] = train[i,2]

    for i in range(n_test):
        test_r[test[i,1]-1, test[i,0]-1] = test[i,2]

    train_m = np.greater(train_r, 1e-12).astype('float32')  # masks indicating non-zero entries
    test_m = np.greater(test_r, 1e-12).astype('float32')

    print('data matrix loaded')
    print('num of users: {}'.format(n_u))
    print('num of movies: {}'.format(n_m))
    print('num of training ratings: {}'.format(n_train))
    print('num of test ratings: {}'.format(n_test))

    return n_m, n_u, train_r, train_m, test_r, test_m
     
n_m, n_u, train_r, train_m, test_r, test_m = load_data_100k()

data matrix loaded
num of users: 943
num of movies: 1682
num of training ratings: 80000
num of test ratings: 20000


In [2]:
# Instantiate and train the model
recommender = GlocalK()
metrics = recommender.fit(train_r)
metrics

PRE-TRAINING finished.
FINE-TUNING finished.


{'epochs_p': 88,
 'epochs_f': 163,
 'best_rmse_p': 0.9524429,
 'best_rmse_f': 0.9397368}

In [None]:
# Recommend for all users
res = recommender.predict(np.arange(n_u))

In [86]:
# Evaluate the recommendations
k=50
ground_truth = np.argsort(-test_r, axis=0)[:k,:].T.tolist()
recommended = np.argsort(-res, axis=0)[:k,:].T.tolist()
random = np.random.randint(0,n_m,(n_u, k)).T.tolist()

print("Baseline (random):\t", ml_metrics.mapk(ground_truth, random, k=k), "\nGlocalK:\t\t", ml_metrics.mapk(ground_truth, recommended, k=k))

Baseline (random):	 0.0015963245812385394 
GlocalK:		 0.011698773036428344
