In [1]:
from sklearn import metrics
import mxnet as mx
from mxnet import autograd, gluon, init, nd
from mxnet.gluon import data as gdata, loss as gloss, nn
import pickle
import random
import time
import sys
sys.path.append('/data/CaoZhong/utils/')
from my_utils import *
from tqdm import tqdm_notebook

## 加载数据

In [2]:
ctx = mx.gpu(4)
train_batch_size = 32
test_batch_size = 512
model_name = 'attention_gluon'
data_path = '../data/dataset_sub_gluon.pkl'
data_iter = DataIter(data_path, train_batch_size, test_batch_size)
user_count, item_count, cate_count = data_iter.get_count()
train_iter, test_iter = data_iter.get_data_iter()

In [3]:
for batch in train_iter:
    for name, data in zip(['uid','hist','hist_cate','pre','cate','label','sl'], batch):
        print(name, 'shape: ', data.shape, data.context)        
    break
for batch in test_iter:
    for name, data in zip(['uid','hist','hist_cate','pre','cate','label','sl'], batch):
        print(name, 'shape: ', data.shape)        
    break

uid shape:  (32,) cpu(0)
hist shape:  (32, 364) cpu(0)
hist_cate shape:  (32, 364) cpu(0)
pre shape:  (32,) cpu(0)
cate shape:  (32,) cpu(0)
label shape:  (32,) cpu(0)
sl shape:  (32,) cpu(0)
uid shape:  (512,)
hist shape:  (512, 430)
hist_cate shape:  (512, 430)
pre shape:  (512,)
cate shape:  (512,)
label shape:  (512,)
sl shape:  (512,)


## 注意力机制（1）

In [167]:
class Attention(nn.Block):
    def __init__(self, attention_size, **kwargs):
        super(Attention, self).__init__(**kwargs)
        self.model = nn.Sequential()
        self.model.add(nn.Dense(attention_size, activation='tanh', use_bias=False, flatten=False))
        self.model.add(nn.Dense(1, use_bias=False, flatten=False))
    
    
    """
    Parameters:
        query:    [B, H]
        keys:     [T, B, H]
        sl:       [B]
    """
    def forward(self, keys, query,sl):
        query = nd.broadcast_axis(query.expand_dims(0), axis=0, size=keys.shape[0])
        item_history = nd.concat(keys, query, dim=2)
        e = self.model(item_history)   # [T, B, 1]
        e = nd.SequenceMask(e, sl.reshape((-1)), use_sequence_length=True, value=(-2 ** 32 + 1))
        alpha = nd.softmax(e, axis=0)        # [T, B, 1]

        return (alpha * keys).sum(axis=0)  # [T, B, 1] * [T, B, H]


In [162]:
class HybridNet(nn.HybridBlock):
    def __init__(self, **kwargs):
        super(HybridNet, self).__init__(**kwargs)
        self.hidden = nn.Dense(10)
        self.output = nn.Dense(2)

    def hybrid_forward(self, F, x,y):
        x = x.expand_dims(0)
        y = y.expand_dims(0)
        x = F.broadcast_axis(x, axis=0, size=len(x))
        y = F.broadcast_axis(y, axis=0, size=len(y))
#         c = nd.concat(x,y,dim=0)
        return self.output(x*y)
#         x = F.relu(self.hidden(x))
#         y = F.relu(self.hidden(y))
#         z = x + y
#         z = self.output(z)
#         print(z)
#         return z.sum(0)

In [165]:
net = HybridNet()
net.initialize()
# net.hybridize()
x = nd.random.normal(shape=(1, 4))
c = nd.random.normal(shape=(1,2))
net(x,x)


[[ 0.06148639  0.0229429 ]]
<NDArray 1x2 @cpu(0)>

In [170]:
#  检测注意力机制
T, B, H = 3, 1, 4
att_model = Attention(5)
att_model.initialize()
sl = nd.array([2])
history = nd.random_uniform(shape=(T, B, H))
item = nd.ones(shape=(B, H))
c = model_t(history, item,sl)
print(c)


[[ 0.79998255  0.53840852  0.4154864   0.31237507]]
<NDArray 1x4 @cpu(0)>


## 建立模型

### nd.array

In [7]:
# class Model(nn.Block):
#     def __init__(self, item_count, cate_count, embed_size, attention_size, num_hiddens,  ctx, **kwargs):
#         super(Model, self).__init__(**kwargs)
#         self.num_hiddens = num_hiddens
#         self.item_embedding = nn.Embedding(item_count, embed_size)
#         self.cate_embedding = nn.Embedding(cate_count, embed_size)
#         self.batch_normal_layer = nn.BatchNorm()
#         self.dense_layer = nn.Dense(num_hiddens)
        
#         self.attention = attention_model(attention_size)
        
#         self.mlp = nn.Sequential()
#         self.mlp.add(nn.BatchNorm())
#         self.mlp.add(nn.Dense(80, activation='sigmoid'))
#         self.mlp.add(nn.Dense(40, activation='sigmoid'))
#         self.mlp.add(nn.Dense(1, activation=None))
    
#     def forward(self, item, cate, hist, hist_cate, ls,):
        
#         item = item.reshape((-1))                         # [B]
#         item_emb_w = self.item_embedding(item)            # [B, E]
        
#         cate_emb_w = self.cate_embedding(cate)
#         i_emb = nd.concat(item_emb_w, cate_emb_w,dim=1)   # [B, 2E]
        
#         hi_emb = self.item_embedding(hist)                 # [B, T, E]
#         hc_emb = self.cate_embedding(hist_cate)                 # [B, T, E]
#         h_emb = nd.concat(hi_emb, hc_emb, dim=-1)          # [B, T, 2E]
          
#         user_emb = attention_forward(self.attention, h_emb.swapaxes(0,1), i_emb, ls)  # [B, 2E]
#         user_emb = self.batch_normal_layer(user_emb)
        
#         user_emb = self.dense_layer(user_emb)
#         din = nd.concat(user_emb, i_emb, dim=-1)
#         score = self.mlp(din)
#         return score

### 混合模型

In [13]:
class HybridModel(nn.HybridBlock):
    def __init__(self, item_count, cate_count, embed_size, attention_size, num_hiddens,  ctx, **kwargs):
        super(HybridModel, self).__init__(**kwargs)
        self.num_hiddens = num_hiddens
        self.item_embedding = nn.Embedding(item_count, embed_size)
        self.cate_embedding = nn.Embedding(cate_count, embed_size)
        self.batch_normal_layer = nn.BatchNorm()
        self.dense_layer = nn.Dense(num_hiddens)
        
        self.attention = attention_model(attention_size)
        
        self.mlp = nn.HybridSequential()
        self.mlp.add(nn.BatchNorm())
        self.mlp.add(nn.Dense(80, activation='sigmoid'))
        self.mlp.add(nn.Dense(40, activation='sigmoid'))
        self.mlp.add(nn.Dense(1, activation=None))
    
    def forward(self,F, item, cate, hist, hist_cate, ls,time_length):
        
        item = item.reshape((-1))                         # [B]
        item_emb_w = self.item_embedding(item)            # [B, E]
        
        cate_emb_w = self.cate_embedding(cate)
        i_emb = F.concat(item_emb_w, cate_emb_w,dim=1)   # [B, 2E]
        
        hi_emb = self.item_embedding(hist)                 # [B, T, E]
        hc_emb = self.cate_embedding(hist_cate)                 # [B, T, E]
        h_emb = F.concat(hi_emb, hc_emb, dim=-1)          # [B, T, 2E]
        
  
        user_emb = attention_forward(F, self.attention, h_emb.swapaxes(0,1), i_emb, ls, time_length)  # [B, 2E]
        user_emb = self.batch_normal_layer(user_emb)
        
        user_emb = self.dense_layer(user_emb)
        din = F.concat(user_emb, i_emb, dim=-1)
        score = self.mlp(din)
        return score

In [14]:
#
# T, B= 5, 2
# item_count, cate_count, embed_size, attention_size,num_hiddens = 4, 3, 3, 3, 6
# cate_list = nd.array([0, 1, 3, 0], ctx=ctx)
# history = nd.array([[1, 1, 2,3,0],[2,2,2,0,0]], ctx=ctx)
# hist_cate = cate_list[history]
# item = nd.array([3,4], ctx=ctx)
# cate = cate_list[item]
# sl = nd.array([4,3], ctx=ctx)
# net = Model(item_count, cate_count, embed_size,attention_size, num_hiddens, ctx)
# net.initialize(init=init.Xavier(),force_reinit=True, ctx=ctx)
# net(item, cate, history, hist_cate, sl)

In [15]:
def train(net, train_iter, test_iter, lr, num_epochs, ctx):
    auc_list, loss_list, x_vals= [], [], []
    trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate':lr})
    
    global_step = 1
    stime = time.time()
    stime2 = time.time()
    print('auc: %.4f' % (eval_auc(net,test_iter, ctx)))
    epoch_bar = tqdm_notebook(range(1, num_epochs+1))
    for epoch in epoch_bar:
        l_sum = 0.0
        bar = tqdm_notebook(train_iter)
        for batch in bar:
            
            uid, hist, hist_cate, item, cate, label, sl = [data.as_in_context(ctx) for data in batch[:-1]]
            time_length = batch[-1]
            with autograd.record():
                pred = net(item,cate,hist,hist_cate, sl,time_length)
                l = loss(pred, label)
            l.backward()
            trainer.step(train_batch_size)
            
            l_sum += l.mean().asscalar()
            
            if global_step % 1000 ==0:
                test_auc = eval_auc(net,test_iter, ctx)
                tip = "epoch %d, global step:%d, loss %.4f, test auc:%.4f, time:%.2f" % (epoch,global_step,  l_sum/1000, test_auc, time.time()-stime2)
                tip_info(tip, out=False)
                bar.set_description_str(tip)
                loss_list.append(l_sum/1000)
                auc_list.append(test_auc)
                x_vals.append(global_step//1000)
                l_sum = 0.0
                stime2 = time.time()
            global_step += 1
        tip = 'epoch %d done, cost time:%.2f' % (epoch, time.time() - stime)
        tip_info(tip, out=False)
        epoch_bar.set_description_str(tip)
    return loss_list, auc_list, x_vals

In [16]:
loss = gloss.SigmoidBinaryCrossEntropyLoss()
net = HybridModel(item_count, cate_count, 64, 10, 128, ctx)
net.initialize(init=init.Xavier(),force_reinit=True, ctx=ctx)

In [17]:
loss_list, auc_list, x_vals= train(net,train_iter, test_iter, 0.1, 30, ctx)

auc: 0.4981


HBox(children=(IntProgress(value=0, max=30), HTML(value='')))

HBox(children=(IntProgress(value=0, max=2151), HTML(value='')))

HBox(children=(IntProgress(value=0, max=2151), HTML(value='')))

KeyboardInterrupt: 

In [12]:
file_name = 'train_result_'+model_name+'.pkl'
with open(file_name, 'wb') as f:
    pickle.dump(loss_list, f,pickle.HIGHEST_PROTOCOL)
    pickle.dump(auc_list, f,pickle.HIGHEST_PROTOCOL)