code in part inspired by: https://github.com/EthanRosenthal/torchmf

In [1]:
import os
import mxnet as mx
from mxnet import gluon, nd, ndarray

import pandas as pd
import numpy as np

In [2]:
data_path = '/home/ubuntu/mxnet-the-straight-dope/incubator-mxnet/example/recommenders/ml-100k/'
num_emb = 64
opt = 'Adam'
lr = 0.01
mmntm = 0.
wd = 0.
batch_size = 64
ctx = mx.gpu()

In [3]:
def download_ml_data(prefix):
    if not os.path.exists("%s.zip" % prefix):
        print("Downloading MovieLens data: %s" % prefix)
        os.system("wget http://files.grouplens.org/datasets/movielens/%s.zip" % prefix)
        os.system("unzip %s.zip" % prefix)    

In [4]:
def max_id(fname):
    mu = 0
    mi = 0
    with open(fname) as f:
        for line in f:
            tks = line.strip().split('\t')
            if len(tks) != 4:
                continue
            mu = max(mu, int(tks[0]))
            mi = max(mi, int(tks[1]))
    return mu + 1, mi + 1
max_users, max_items = max_id(data_path + 'u.data')

In [5]:
train_df = pd.read_csv(data_path+'u1.base', header=None, sep='\t')
test_df = pd.read_csv(data_path+'u1.test', header=None, sep='\t')

train_data = nd.array(train_df[[0,1]].values, dtype=np.float32)
train_label = nd.array(train_df[2].values, dtype=np.float32)

test_data = nd.array(test_df[[0,1]].values, dtype=np.float32)
test_label = nd.array(test_df[2].values, dtype=np.float32)

In [6]:
class SparseMatrixDataset(gluon.data.Dataset):
    def __init__(self, data, label):
        assert data.shape[0] == len(label)
        self.data = data
        self.label = label
        if isinstance(label, ndarray.NDArray) and len(label.shape) == 1:
            self._label = label.asnumpy()
        else:
            self._label = label       
        
    def __getitem__(self, idx):
        return self.data[idx, 0], self.data[idx, 1], self.label[idx]
    
    def __len__(self):
        return self.data.shape[0]
        

In [7]:
class MFBlock(gluon.Block):
    def __init__(self, max_users, max_items, num_emb, dropout_p=0.5):
        super(MFBlock, self).__init__()
        
        self.max_users = max_users
        self.max_items = max_items
        self.dropout_p = dropout_p
        self.num_emb = num_emb
        
        with self.name_scope():
            self.user_biases = gluon.nn.Embedding(max_users, 1)
            self.item_biases = gluon.nn.Embedding(max_items, 1)
            self.user_embeddings = gluon.nn.Embedding(max_users, num_emb)
            self.item_embeddings = gluon.nn.Embedding(max_items, num_emb)
            self.dropout = gluon.nn.Dropout(dropout_p)
            
    def forward(self, users, items):
#        predictions = self.user_biases(users)
        
#        predictions += self.item_biases(items)
        
    
        a = self.user_embeddings(users)
        b = self.item_embeddings(items)
        predictions = a * b
        
        predictions = nd.sum(predictions, axis=1)
        return predictions

        

In [8]:
net = MFBlock(max_users=max_users, max_items=max_items, num_emb=num_emb, dropout_p=0.)
net.collect_params()

mfblock0_ (
  Parameter mfblock0_embedding0_weight (shape=(944, 1), dtype=<class 'numpy.float32'>)
  Parameter mfblock0_embedding1_weight (shape=(1683, 1), dtype=<class 'numpy.float32'>)
  Parameter mfblock0_embedding2_weight (shape=(944, 64), dtype=<class 'numpy.float32'>)
  Parameter mfblock0_embedding3_weight (shape=(1683, 64), dtype=<class 'numpy.float32'>)
)

In [9]:
loss_function = gluon.loss.L2Loss()

In [10]:
net.collect_params().initialize(mx.init.Xavier(magnitude=2.24), ctx=ctx, force_reinit=True)

In [11]:
trainer = gluon.Trainer(net.collect_params(), 'sgd',
                        {'learning_rate': lr, 'momentum': mmntm, 'wd': wd})

In [12]:
train_data_iter = gluon.data.DataLoader(SparseMatrixDataset(train_data, train_label), 
                                        shuffle=True, batch_size=batch_size)
test_data_iter = gluon.data.DataLoader(SparseMatrixDataset(test_data, test_label),
                                          shuffle=True, batch_size=batch_size)

def eval_error(dataset, net):
    acc = mx.metric.RMSE()
    for i, (user, item, label) in enumerate(train_data_iter):
        user = user.as_in_context(ctx).reshape((64,))
        item = item.as_in_context(ctx).reshape((64,))
        label = label.as_in_context(ctx).reshape((64,))

        output = net(user, item)
        loss = loss_function(output, label)
        preds = nd.argmin(loss, axis=1)
        acc.update(preds=preds, labels=label)

    return acc.get()[1]
        
        

In [16]:
epochs = 10
smoothing_constant = 0.01

def train(data_iter, net):
    for e in range(epochs):
        print("epoc: {}".format(e))
        for i, (user, item, label) in enumerate(train_data_iter):
            user = user.as_in_context(ctx).reshape((64,))
            item = item.as_in_context(ctx).reshape((64,))
            label = label.as_in_context(ctx).reshape((64,))
            with mx.autograd.record():
                output = net(user, item)               
                loss = loss_function(output, label)
                loss.backward()
    return output

In [18]:
train(train_data_iter, net)

epoc: 0
epoc: 1
epoc: 2
epoc: 3
epoc: 4
epoc: 5
epoc: 6
epoc: 7
epoc: 8
epoc: 9



[-0.01764225 -0.01973249 -0.00171107 -0.00667995 -0.0017695  -0.00944531
 -0.00338054 -0.01602585  0.00235402 -0.0067983   0.00552201 -0.00785946
 -0.00317494 -0.02271797  0.00205854 -0.01704894 -0.00930793 -0.00491966
 -0.0005414   0.00098694  0.00228647 -0.01193818  0.0012119  -0.00317118
  0.00303162  0.00057618 -0.00223367  0.00641506  0.01062838 -0.0100396
 -0.00897237 -0.0068769  -0.00147783  0.00885371 -0.00623539  0.00142139
 -0.01124812 -0.02249821  0.0168259  -0.01339902 -0.01084941  0.00923354
 -0.00562321  0.00635514  0.00011133 -0.00374325 -0.00685176  0.0075858
  0.01573353  0.0043061  -0.00816258  0.00269518  0.00899333 -0.00982486
 -0.00354866 -0.00452892 -0.00129756  0.01327208  0.00828554 -0.00563988
 -0.00954418  0.00839976 -0.01502164  0.01955279]
<NDArray 64 @gpu(0)>