In [17]:
import mxnet as mx
import numpy as np
import random
import os
import urllib
import zipfile
import sys

In [37]:
class Batch(object):
    def __init__(self, data_names, data, label_names, label):
        self.data = data
        self.label = label
        self.data_names = data_names
        self.label_names = label_names
        
    @property
    def provide_data(self):
        return [(n, x.shape) for n, x in zip(self.data_names, self.data)]
    
    @property
    def provide_label(self):
        return [(n, x.shape) for n, x in zip(self.label_names, self.label)]
    
class DataIter(mx.io.DataIter):
    def __init__(self, fname, batch_size):
        super(DataIter, self).__init__()
        self.batch_size = batch_size
        self.data = []
        openfile = open(fname)
        for line in openfile:
            tks = line.strip().split('\t')
            if len(tks) != 4:
                continue
            self.data.append((int(tks[0]), int(tks[1]), float(tks[2])))
        self.provide_data = [('user', (batch_size, )), ('item', (batch_size, ))]
        self.provide_label = [('score', (self.batch_size, ))]
        openfile.close()

    def __iter__(self):
        for k in range(len(self.data) // self.batch_size):
            users = []
            items = []
            scores = []
            for i in range(self.batch_size):
                j = k * self.batch_size + i
                user, item, score = self.data[j]
                users.append(user)
                items.append(item)
                scores.append(score)

            data_all = [mx.nd.array(users), mx.nd.array(items)]
            label_all = [mx.nd.array(scores)]
            data_names = ['user', 'item']
            label_names = ['score']

            data_batch = Batch(data_names, data_all, label_names, label_all)
            yield data_batch

    def reset(self):
        random.shuffle(self.data)
        


In [29]:
import urllib.request
if not os.path.exists('ml-100k.zip'):
    urllib.request.urlretrieve('http://files.grouplens.org/datasets/movielens/ml-100k.zip', 'ml-100k.zip')
with zipfile.ZipFile("ml-100k.zip","r") as f:
    f.extractall("./")
def get_data(batch_size):
    return (DataIter('./ml-100k/u1.base', batch_size), DataIter('./ml-100k/u1.test', batch_size))

In [30]:
def max_id(fname):
    mu = 0
    mi = 0
    openfile = open(fname)
    for line in openfile:
        tks = line.strip().split('\t')
        if len(tks) != 4:
            continue
        mu = max(mu, int(tks[0]))
        mi = max(mi, int(tks[1]))
    return mu + 1, mi + 1
    openfile.close()
max_user, max_item = max_id('./ml-100k/u.data')
(max_user, max_item)

(944, 1683)

In [31]:
import math
def RMSE(label, pred):
    ret = 0.0
    n = 0.0
    pred = pred.flatten()
    for i in range(len(label)):
        ret += (label[i] - pred[i]) * (label[i] - pred[i])
        n += 1.0
    return math.sqrt(ret / n)

In [35]:
def train(network, batch_size, num_epoch, learning_rate):
    model = mx.model.FeedForward(
        ctx = mx.cpu(0),  
        symbol = network,
        num_epoch = num_epoch,
        learning_rate = learning_rate,
        wd = 0.0001,
        momentum = 0.9)

    batch_size = 64
    train, test = get_data(batch_size)

    import logging
    head = '%(asctime)-15s %(message)s'
    logging.basicConfig(level=logging.DEBUG)

    model.fit(X = train, 
              eval_data = test,
              eval_metric = RMSE,
              batch_end_callback=mx.callback.Speedometer(batch_size, 20000/batch_size),)


In [38]:
# @@@ AUTOTEST_OUTPUT_IGNORED_CELL
def plain_net(k):
    # input
    user = mx.symbol.Variable('user')
    item = mx.symbol.Variable('item')
    score = mx.symbol.Variable('score')
    # user feature lookup
    user = mx.symbol.Embedding(data = user, input_dim = max_user, output_dim = k) 
    # item feature lookup
    item = mx.symbol.Embedding(data = item, input_dim = max_item, output_dim = k)
    # predict by the inner product, which is elementwise product and then sum
    pred = user * item
    pred = mx.symbol.sum_axis(data = pred, axis = 1)
    pred = mx.symbol.Flatten(data = pred)
    # loss layer
    pred = mx.symbol.LinearRegressionOutput(data = pred, label = score)
    return pred

train(plain_net(64), batch_size=64, num_epoch=10, learning_rate=.05)



INFO:root:Start training with [cpu(0)]
INFO:root:Epoch[0] Batch [625]	Speed: 48308.15 samples/sec	Train-RMSE=3.696177
INFO:root:Epoch[0] Batch [1250]	Speed: 37708.73 samples/sec	Train-RMSE=3.701101
INFO:root:Epoch[0] Resetting Data Iterator
INFO:root:Epoch[0] Time cost=1.043
INFO:root:Epoch[0] Validation-RMSE=3.713837
INFO:root:Epoch[1] Batch [625]	Speed: 47868.05 samples/sec	Train-RMSE=3.641698
INFO:root:Epoch[1] Batch [1250]	Speed: 37602.91 samples/sec	Train-RMSE=2.876571
INFO:root:Epoch[1] Resetting Data Iterator
INFO:root:Epoch[1] Time cost=1.043
INFO:root:Epoch[1] Validation-RMSE=2.393987
INFO:root:Epoch[2] Batch [625]	Speed: 48601.04 samples/sec	Train-RMSE=1.831960
INFO:root:Epoch[2] Batch [1250]	Speed: 40598.68 samples/sec	Train-RMSE=1.438286
INFO:root:Epoch[2] Resetting Data Iterator
INFO:root:Epoch[2] Time cost=1.000
INFO:root:Epoch[2] Validation-RMSE=1.424105
INFO:root:Epoch[3] Batch [625]	Speed: 46031.39 samples/sec	Train-RMSE=1.236698
INFO:root:Epoch[3] Batch [1250]	Speed: 

In [39]:
# @@@ AUTOTEST_OUTPUT_IGNORED_CELL
def get_one_layer_mlp(hidden, k):
    # input
    user = mx.symbol.Variable('user')
    item = mx.symbol.Variable('item')
    score = mx.symbol.Variable('score')
    # user latent features
    user = mx.symbol.Embedding(data = user, input_dim = max_user, output_dim = k)
    user = mx.symbol.Activation(data = user, act_type="relu")
    user = mx.symbol.FullyConnected(data = user, num_hidden = hidden)
    # item latent features
    item = mx.symbol.Embedding(data = item, input_dim = max_item, output_dim = k)
    item = mx.symbol.Activation(data = item, act_type="relu")
    item = mx.symbol.FullyConnected(data = item, num_hidden = hidden)
    # predict by the inner product
    pred = user * item
    pred = mx.symbol.sum_axis(data = pred, axis = 1)
    pred = mx.symbol.Flatten(data = pred)
    # loss layer
    pred = mx.symbol.LinearRegressionOutput(data = pred, label = score)
    return pred

train(get_one_layer_mlp(64, 64), batch_size=64, num_epoch=10, learning_rate=.05)



INFO:root:Start training with [cpu(0)]
INFO:root:Epoch[0] Batch [625]	Speed: 36553.25 samples/sec	Train-RMSE=1.196051
INFO:root:Epoch[0] Batch [1250]	Speed: 36555.00 samples/sec	Train-RMSE=0.982350
INFO:root:Epoch[0] Resetting Data Iterator
INFO:root:Epoch[0] Time cost=1.188
INFO:root:Epoch[0] Validation-RMSE=0.984141
INFO:root:Epoch[1] Batch [625]	Speed: 29850.34 samples/sec	Train-RMSE=0.957677
INFO:root:Epoch[1] Batch [1250]	Speed: 31321.70 samples/sec	Train-RMSE=0.967071
INFO:root:Epoch[1] Resetting Data Iterator
INFO:root:Epoch[1] Time cost=1.404
INFO:root:Epoch[1] Validation-RMSE=0.974737
INFO:root:Epoch[2] Batch [625]	Speed: 36233.60 samples/sec	Train-RMSE=0.946655
INFO:root:Epoch[2] Batch [1250]	Speed: 30117.94 samples/sec	Train-RMSE=0.948026
INFO:root:Epoch[2] Resetting Data Iterator
INFO:root:Epoch[2] Time cost=1.313
INFO:root:Epoch[2] Validation-RMSE=0.964631
INFO:root:Epoch[3] Batch [625]	Speed: 32270.70 samples/sec	Train-RMSE=0.946844
INFO:root:Epoch[3] Batch [1250]	Speed: 

In [40]:
# @@@ AUTOTEST_OUTPUT_IGNORED_CELL
def get_one_layer_dropout_mlp(hidden, k):
    # input
    user = mx.symbol.Variable('user')
    item = mx.symbol.Variable('item')
    score = mx.symbol.Variable('score')
    # user latent features
    user = mx.symbol.Embedding(data = user, input_dim = max_user, output_dim = k)
    user = mx.symbol.Activation(data = user, act_type="relu")
    user = mx.symbol.FullyConnected(data = user, num_hidden = hidden)
    user = mx.symbol.Dropout(data=user, p=0.5)
    # item latent features
    item = mx.symbol.Embedding(data = item, input_dim = max_item, output_dim = k)
    item = mx.symbol.Activation(data = item, act_type="relu")
    item = mx.symbol.FullyConnected(data = item, num_hidden = hidden)
    item = mx.symbol.Dropout(data=item, p=0.5)    
    # predict by the inner product
    pred = user * item
    pred = mx.symbol.sum_axis(data = pred, axis = 1)
    pred = mx.symbol.Flatten(data = pred)
    # loss layer
    pred = mx.symbol.LinearRegressionOutput(data = pred, label = score)
    return pred
train(get_one_layer_mlp(256, 512), batch_size=64, num_epoch=10, learning_rate=.05)

INFO:root:Start training with [cpu(0)]




INFO:root:Epoch[0] Batch [625]	Speed: 5518.09 samples/sec	Train-RMSE=1.145038
INFO:root:Epoch[0] Batch [1250]	Speed: 5441.72 samples/sec	Train-RMSE=0.983353
INFO:root:Epoch[0] Resetting Data Iterator
INFO:root:Epoch[0] Time cost=7.391
INFO:root:Epoch[0] Validation-RMSE=0.994780
INFO:root:Epoch[1] Batch [625]	Speed: 5410.85 samples/sec	Train-RMSE=0.954181
INFO:root:Epoch[1] Batch [1250]	Speed: 5341.76 samples/sec	Train-RMSE=0.955525
INFO:root:Epoch[1] Resetting Data Iterator
INFO:root:Epoch[1] Time cost=7.529
INFO:root:Epoch[1] Validation-RMSE=0.963774
INFO:root:Epoch[2] Batch [625]	Speed: 5020.56 samples/sec	Train-RMSE=0.940759
INFO:root:Epoch[2] Batch [1250]	Speed: 5705.73 samples/sec	Train-RMSE=0.948125
INFO:root:Epoch[2] Resetting Data Iterator
INFO:root:Epoch[2] Time cost=7.575
INFO:root:Epoch[2] Validation-RMSE=0.959221
INFO:root:Epoch[3] Batch [625]	Speed: 5144.07 samples/sec	Train-RMSE=0.939967
INFO:root:Epoch[3] Batch [1250]	Speed: 5263.33 samples/sec	Train-RMSE=0.947161
INFO:r