In [1]:
import os, sys

In [2]:
os.environ['DGL_HOME'] = os.getcwd()
os.environ['PYTHONPATH'] = '{}/python:{}'.format(os.environ['DGL_HOME'], os.environ['PYTHONPATH'])
os.environ['DGL_LIBRARY_PATH'] = '{}/build'.format(os.environ['DGL_HOME'])
os.environ['DGLBACKEND'] = 'mxnet'
#os.environ['MXNET_ENGINE_TYPE'] = 'NaiveEngine'
sys.path.append('{}/python'.format(os.environ['DGL_HOME']))

In [3]:
import argparse

In [4]:
parser = argparse.ArgumentParser(description='GCMC')
parser.add_argument("--dropout", type=float, default=0.2,
        help="dropout probability")
parser.add_argument("--gpu", type=int, default=-1,
        help="gpu")
parser.add_argument("--lr", type=float, default=0.01,
        help="learning rate")
parser.add_argument("--wd", type=float, default=1e-4,
        help="weight decay")
parser.add_argument("--n-epochs", type=int, default=200,
        help="number of training epochs")
parser.add_argument("--n-hidden", type=int, default=6,
        help="number of hidden gcn units")
parser.add_argument("--n-layers", type=int, default=1,
        help="number of hidden gcn layers")
parser.add_argument("--normalization",
        choices=['sym','left'], default=None,
        help="graph normalization types (default=None)")

_StoreAction(option_strings=['--normalization'], dest='normalization', nargs=None, const=None, default=None, type=None, choices=['sym', 'left'], help='graph normalization types (default=None)', metavar=None)

In [5]:
args = parser.parse_args(['--normalization','left',])
print(args)

Namespace(dropout=0.2, gpu=-1, lr=0.01, n_epochs=200, n_hidden=6, n_layers=1, normalization='left', wd=0.0001)


In [6]:
from dgl.data.utils import download, get_download_dir, extract_archive

In [7]:
from zipfile import ZipFile

In [8]:
_urls = {
    'ml-100k' : 'http://files.grouplens.org/datasets/movielens/ml-100k.zip',
    'ml-1m'   : 'http://files.grouplens.org/datasets/movielens/ml-1m.zip',
#     'ml-10m'  : 'http://files.grouplens.org/datasets/movielens/ml-10m.zip',
}

In [9]:
_path = {
    'ml-100k':'ml-100k/u.data',
    'ml-1m':'ml-1m/ratings.dat',
#     'ml-10m':'ml-10M100K/ratings.dat',
}

In [10]:
get_sep = lambda k:{
    'ml-1m'  : '::',
#     'ml-10m' : '::',
}.get(k, None)

In [11]:
dataset = 'ml-100k'

In [12]:
download(_urls[dataset],
         '{}/{}.zip'.format(get_download_dir(), dataset))

'/home/ec2-user/yifeim-work/dgl/python/dgl/data/../../../_download/ml-100k.zip'

In [13]:
zf = ZipFile('{}/{}.zip'.format(get_download_dir(), dataset))

In [14]:
zf.namelist()

['ml-100k/',
 'ml-100k/allbut.pl',
 'ml-100k/mku.sh',
 'ml-100k/README',
 'ml-100k/u.data',
 'ml-100k/u.genre',
 'ml-100k/u.info',
 'ml-100k/u.item',
 'ml-100k/u.occupation',
 'ml-100k/u.user',
 'ml-100k/u1.base',
 'ml-100k/u1.test',
 'ml-100k/u2.base',
 'ml-100k/u2.test',
 'ml-100k/u3.base',
 'ml-100k/u3.test',
 'ml-100k/u4.base',
 'ml-100k/u4.test',
 'ml-100k/u5.base',
 'ml-100k/u5.test',
 'ml-100k/ua.base',
 'ml-100k/ua.test',
 'ml-100k/ub.base',
 'ml-100k/ub.test']

In [15]:
users = {}
items = {}
ratings = {}
with zf.open(_path[dataset]) as f:
    for line in f:
        user, item, rating, _ = line.decode('latin1').strip('\n').split(get_sep(dataset))
        
        users.setdefault(user, 0)
        users[user] += 1
        
        items.setdefault(item, 0)
        items[item] += 1
        
        ratings.setdefault(rating, 0)
        ratings[rating] += 1

user_idx = {k:i
            for i,k in enumerate(sorted(users))}

item_idx = {k:i+len(users)
            for i,k in enumerate(sorted(items))}

rating_idx = {k:i+1 for i,k in
              enumerate(sorted(ratings.keys()))}

print(rating_idx)
n_classes = max(rating_idx.values()) + 1

{'1': 1, '2': 2, '3': 3, '4': 4, '5': 5}


In [16]:
n_classes

6

# Create graph

In [17]:
import mxnet as mx

In [18]:
from dgl import DGLGraph

In [19]:
ctx=mx.gpu()

In [20]:
G = DGLGraph()

In [21]:
G.add_nodes(len(user_idx) + len(item_idx))

In [22]:
G.ndata['is_user'] = mx.nd.concat(
    mx.nd.ones(len(user_idx), ctx=ctx),
    mx.nd.zeros(len(item_idx), ctx=ctx),
    dim=0)

In [23]:
u = []
v = []
r = []
with zf.open(_path[dataset]) as f:
    for line in f:
        user, item, rating, _ = line.decode('latin1').strip('\n').split(get_sep(dataset))
        u.append(user_idx[user])
        v.append(item_idx[item])
        r.append(rating_idx[rating])

G.add_edges(u, v, {
    'r': mx.nd.array(r, dtype='float32', ctx=ctx),
})

In [24]:
G.edata['_rand'] = mx.nd.random.uniform_like(G.edata['r'])

In [25]:
G.edata['is_train'] = G.edata['_rand'] <= 0.8

In [26]:
G.edata['is_test'] = G.edata['_rand'] > 0.8

In [27]:
G.add_edges(*reversed(G.all_edges()), data=G.edata)

In [28]:
G.edata

{'r': 
[ 3.  3.  1. ...,  1.  2.  3.]
<NDArray 200000 @gpu(0)>, '_rand': 
[ 0.66865093  0.17409194  0.38500249 ...,  0.09558475  0.52363914
  0.3631283 ]
<NDArray 200000 @gpu(0)>, 'is_train': 
[ 1.  1.  1. ...,  1.  1.  1.]
<NDArray 200000 @gpu(0)>, 'is_test': 
[ 0.  0.  0. ...,  0.  0.  0.]
<NDArray 200000 @gpu(0)>}

# degree and average ratings

In [29]:
from functools import partial
from mxnet import gluon

In [30]:
G.update_all(
    lambda edge : {
        # n_edges, n_classes
        'm': mx.nd.one_hot(edge.data['r'], n_classes) *
             edge.data['is_train'].expand_dims(1)
    },
    lambda node : {
        # n_nodes, n_edges, n_classes
        'degree' : mx.nd.sum(node.mailbox['m'], axis=1)
        # n_nodes, n_classes
    },
)



In [31]:
G.ndata

{'is_user': 
[ 1.  1.  1. ...,  0.  0.  0.]
<NDArray 2625 @gpu(0)>, 'degree': 
[[  0.  19.  22.  43.  67.  64.]
 [  0.   0.   0.  13.  93.  42.]
 [  0.   4.   7.  18.  15.   2.]
 ..., 
 [  0.   6.   2.   5.   2.   0.]
 [  0.   3.   3.   2.   1.   2.]
 [  0.   1.   3.   1.   3.   0.]]
<NDArray 2625x6 @gpu(0)>}

In [32]:
def quad_naive(edge):
    pred = (mx.nd.dot(edge.src['h'], mx.nd.arange(n_classes, ctx=ctx))+
            mx.nd.dot(edge.dst['h'], mx.nd.arange(n_classes, ctx=ctx)))/2.
    return {'pred' : pred}

In [33]:
G.ndata['h'] = G.ndata['degree'] / (
    G.ndata['degree'].sum(axis=1, keepdims=True) + 1e-10)
G.apply_edges(quad_naive)
del G.ndata['h']
avg_pred = G.edata.pop('pred')



In [34]:
l2_loss = gluon.loss.L2Loss()

In [35]:
print('training error', (2. * l2_loss(
    avg_pred,
    G.edata['r'],
    G.edata['is_train']
).sum() / (G.edata['is_train']).sum()).sqrt().asscalar())

training error 0.96376991272


In [36]:
print('testing error', (2. * l2_loss(
    avg_pred,
    G.edata['r'],
    G.edata['is_test']
).sum() / (G.edata['is_test']).sum()).sqrt().asscalar())

testing error 0.984938442707


# Learn quad_fcn

In [37]:
from mxnet import gluon
from functools import partial
import time
import numpy as np

In [38]:
class QuadDense(gluon.Block):
    def __init__(self):
        super(QuadDense, self).__init__()
        self.dense = gluon.nn.Dense(1, flatten=True)
        
    def forward(self, edge):
        feat = (edge.src['h'] + edge.dst['h']) / 2.
        pred = self.dense(feat)
        return {'pred':pred}

In [39]:
quad_dense = QuadDense()

In [40]:
quad_dense.collect_params().initialize(ctx=ctx)

In [41]:
# use optimizer
trainer = gluon.Trainer(
    quad_dense.collect_params(),
    'adam',
    {'learning_rate': 0.1, 'wd': 1e-4,})

# initialize graph
dur = []
for epoch in range(args.n_epochs):
    t0 = time.time()
    # forward
    G.ndata['h'] = G.ndata['degree'] / (
        G.ndata['degree'].sum(axis=1, keepdims=True) + 1e-10)
    with mx.autograd.record():
        G.apply_edges(quad_dense)
        pred = G.edata.pop('pred').reshape((-1,))
        loss = l2_loss(pred, G.edata['r'], G.edata['is_train'])

    avg_loss = (loss.sum() / G.edata['is_train'].sum()).asscalar()
    
    loss.backward()
    trainer.step(len(G))

    dur.append(time.time() - t0)
    print("Epoch {:05d} | Loss {:.4f} | Time(s) {:.4f} | ETputs(KTEPS) {:.2f}".format(
        epoch, avg_loss, np.mean(dur), len(G.edges) / np.mean(dur) / 1000))

Epoch 00000 | Loss 6.7491 | Time(s) 0.0608 | ETputs(KTEPS) 3291.66
Epoch 00001 | Loss 6.0696 | Time(s) 0.1050 | ETputs(KTEPS) 1905.32
Epoch 00002 | Loss 5.4313 | Time(s) 0.1193 | ETputs(KTEPS) 1676.26
Epoch 00003 | Loss 4.8348 | Time(s) 0.1263 | ETputs(KTEPS) 1583.23
Epoch 00004 | Loss 4.2806 | Time(s) 0.1306 | ETputs(KTEPS) 1531.84
Epoch 00005 | Loss 3.7691 | Time(s) 0.1334 | ETputs(KTEPS) 1498.83
Epoch 00006 | Loss 3.3002 | Time(s) 0.1354 | ETputs(KTEPS) 1477.05
Epoch 00007 | Loss 2.8739 | Time(s) 0.1369 | ETputs(KTEPS) 1460.71
Epoch 00008 | Loss 2.4898 | Time(s) 0.1382 | ETputs(KTEPS) 1447.68
Epoch 00009 | Loss 2.1471 | Time(s) 0.1391 | ETputs(KTEPS) 1437.89
Epoch 00010 | Loss 1.8450 | Time(s) 0.1399 | ETputs(KTEPS) 1429.99
Epoch 00011 | Loss 1.5821 | Time(s) 0.1406 | ETputs(KTEPS) 1422.75
Epoch 00012 | Loss 1.3570 | Time(s) 0.1411 | ETputs(KTEPS) 1417.57
Epoch 00013 | Loss 1.1676 | Time(s) 0.1416 | ETputs(KTEPS) 1412.32
Epoch 00014 | Loss 1.0118 | Time(s) 0.1420 | ETputs(KTEPS) 140

In [42]:
print('training error', np.sqrt(avg_loss*2))

training error 0.929786292205


In [43]:
print('testing error', (
    2. * l2_loss(pred, G.edata['r'], G.edata['is_test']).sum() /
    G.edata['is_test'].sum()
).sqrt().asscalar())

testing error 0.959718942642


# Model

In [44]:
def gcn_msg(edge, normalization=None):
    # n_edges, n_classes
    train_r = mx.nd.broadcast_mul(
        mx.nd.one_hot(edge.data['r'], n_classes),
        edge.data['is_train'].expand_dims(1)
    )
    # n_edges, n_classes, n_hidden
    msg = mx.nd.broadcast_mul(
        edge.src['h'],
        train_r.expand_dims(2)
    )
    if normalization == 'sym':
        msg = mx.nd.broadcast_div(
            msg,
            (edge.src['degree'] + 1e-10).sqrt().expand_dims(2)
        )
    return {'m': msg}

In [45]:
def gcn_reduce(node, normalization=None):
    # n_nodes, n_classes, n_hidden
    accum = mx.nd.sum(node.mailbox['m'], axis=1)
    if normalization == 'sym':
        accum = mx.nd.broadcast_div(
            accum,
            (node.data['degree'] + 1e-10).sqrt().expand_dims(2)
        )
    elif normalization == 'left':
        accum = mx.nd.broadcast_div(
            accum,
            (node.data['degree'] + 1e-10).expand_dims(2)
        )
    return {'accum' : accum}

In [46]:
class NodeUpdateModule(gluon.Block):
    def __init__(self, out_feats, activation=None, dropout=0):
        super(NodeUpdateModule, self).__init__()
        self.linear = gluon.nn.Dense(out_feats,
                                     activation=activation,
                                     flatten=False)
        self.dropout = dropout

    def forward(self, node):
        accum = self.linear(node.data['accum'])
        if self.dropout:
            accum = mx.nd.Dropout(accum, p=self.dropout)
        return {'h': node.data['h'] + accum}

In [47]:
class ShareWeights(gluon.HybridBlock):
    def __init__(self, n_classes):
        super(ShareWeights, self).__init__()
        with self.name_scope():
            self.triu = self.params.get_constant(
                'triu',
                np.triu(np.ones((n_classes, n_classes))))
            
    def hybrid_forward(self, F, batch_class_hidden, triu):
        return F.dot(
            batch_class_hidden.swapaxes(1,2),
            triu
        ).swapaxes(1,2)

In [48]:
class GCMC(gluon.Block):
    def __init__(self,
                 in_feats,
                 n_hidden,
                 n_classes,
                 n_layers,
                 activation,
                 dropout,
                 normalization,
                 share_weights=True,
                 ):
        super(GCMC, self).__init__()
        self.n_hidden   = n_hidden
        self.n_classes  = n_classes
        self.dropout    = dropout

        with self.name_scope():
            self.inp_layer  = gluon.nn.Embedding(
                in_feats, n_classes * n_hidden)
            if share_weights:
                self.share_weights = ShareWeights(n_classes)
            else:
                self.share_weights = gluon.contrib.nn.Identity()

            self.gcn_msg    = partial(gcn_msg, normalization=normalization)
            self.gcn_reduce = partial(gcn_reduce, normalization=normalization)
            
            self.conv_layers = gluon.nn.Sequential()
            for i in range(n_layers):
                self.conv_layers.add(
                    NodeUpdateModule(n_hidden, activation, dropout))

            self.quad_fcn   = QuadDense()


    def forward(self, g, features):
        emb_inp = self.inp_layer(features)
        emb_inp = self.share_weights(
            emb_inp
            .reshape((-1, self.n_classes, self.n_hidden))
        )
        if self.dropout:
            emb_inp = mx.nd.Dropout(emb_inp, p=self.dropout)
        g.ndata['h'] = emb_inp
        for layer in self.conv_layers:
            g.update_all(self.gcn_msg, self.gcn_reduce, layer)

        g.apply_edges(self.quad_fcn)

        return g.edata.pop('pred')

# train

In [49]:
import numpy as np

In [50]:
model = GCMC(len(G), args.n_hidden, n_classes, args.n_layers, 'relu', args.dropout, args.normalization)

In [51]:
model

GCMC(
  (inp_layer): Embedding(2625 -> 36, float32)
  (share_weights): ShareWeights(
  
  )
  (conv_layers): Sequential(
    (0): NodeUpdateModule(
      (linear): Dense(None -> 6, Activation(relu))
    )
  )
  (quad_fcn): QuadDense(
    (dense): Dense(None -> 1, linear)
  )
)

In [52]:
model.collect_params().initialize(ctx=ctx)

In [53]:
model.collect_params()

gcmc0_ (
  Parameter gcmc0_embedding0_weight (shape=(2625, 36), dtype=float32)
  Constant gcmc0_shareweights0_triu (shape=(6, 6), dtype=<class 'numpy.float32'>)
  Parameter gcmc0_dense0_weight (shape=(6, 0), dtype=float32)
  Parameter gcmc0_dense0_bias (shape=(6,), dtype=float32)
  Parameter gcmc0_dense1_weight (shape=(1, 0), dtype=float32)
  Parameter gcmc0_dense1_bias (shape=(1,), dtype=float32)
)

In [54]:
loss_fcn = gluon.loss.L2Loss()

In [55]:
features = mx.nd.arange(len(G), ctx=ctx)

In [56]:
n_edges = len(G.edges)

In [57]:
# use optimizer
trainer = gluon.Trainer(
    model.collect_params(),
    'adam',
    {'learning_rate': 0.01, 'wd': 1e-4,})

# initialize graph
dur = []
for epoch in range(args.n_epochs):
    t0 = time.time()
    # forward
    with mx.autograd.record():
        pred = model(G, features)
        loss = loss_fcn(pred.reshape((-1,)), G.edata['r'], G.edata['is_train'])
    avg_loss = (loss.sum() / G.edata['is_train'].sum()).asscalar()
    
    loss.backward()
    trainer.step(len(G))

    dur.append(time.time() - t0)
    print("Epoch {:05d} | Loss {:.4f} | Time(s) {:.4f} | ETputs(KTEPS) {:.2f}".format(
        epoch, avg_loss, np.mean(dur), n_edges / np.mean(dur) / 1000))

Epoch 00000 | Loss 6.8678 | Time(s) 5.2667 | ETputs(KTEPS) 37.97
Epoch 00001 | Loss 6.7679 | Time(s) 4.2807 | ETputs(KTEPS) 46.72
Epoch 00002 | Loss 6.6272 | Time(s) 3.6470 | ETputs(KTEPS) 54.84
Epoch 00003 | Loss 6.4376 | Time(s) 3.9382 | ETputs(KTEPS) 50.78
Epoch 00004 | Loss 6.1814 | Time(s) 3.8315 | ETputs(KTEPS) 52.20
Epoch 00005 | Loss 5.8688 | Time(s) 3.7398 | ETputs(KTEPS) 53.48
Epoch 00006 | Loss 5.4921 | Time(s) 3.3636 | ETputs(KTEPS) 59.46
Epoch 00007 | Loss 5.0301 | Time(s) 3.7345 | ETputs(KTEPS) 53.56
Epoch 00008 | Loss 4.5077 | Time(s) 3.6316 | ETputs(KTEPS) 55.07
Epoch 00009 | Loss 3.9203 | Time(s) 3.5177 | ETputs(KTEPS) 56.85
Epoch 00010 | Loss 3.2853 | Time(s) 3.6667 | ETputs(KTEPS) 54.54
Epoch 00011 | Loss 2.6410 | Time(s) 3.6249 | ETputs(KTEPS) 55.17
Epoch 00012 | Loss 1.9857 | Time(s) 3.5916 | ETputs(KTEPS) 55.69
Epoch 00013 | Loss 1.4421 | Time(s) 3.4130 | ETputs(KTEPS) 58.60
Epoch 00014 | Loss 0.9923 | Time(s) 3.6047 | ETputs(KTEPS) 55.48
Epoch 00015 | Loss 0.6931

In [58]:
test_pred = model(G, features)

In [59]:
print('training loss', (2. * l2_loss(
    test_pred.reshape((-1,)),
    G.edata['r'],
    G.edata['is_train'],
).sum() / G.edata['is_train'].sum()).sqrt().asscalar())

training loss 0.911303


In [60]:
print('testing loss', (2. * l2_loss(
    test_pred.reshape((-1,)),
    G.edata['r'],
    G.edata['is_test'],
).sum() / G.edata['is_test'].sum()).sqrt().asscalar())

testing loss 0.946713
