In [1]:
import warnings
warnings.filterwarnings('ignore')
import torch
import torch.optim as optim
import torch.nn as nn
import torch.nn.functional as F
import time
import os
import numpy as np
import pandas as pd
from torch.utils.data import TensorDataset,DataLoader

In [2]:
data = pd.read_csv('./data/ml-1m/ratings.dat',sep='::', names=['user','item','rating','timestamp'])

In [3]:
data.head()

Unnamed: 0,user,item,rating,timestamp
0,1,1193,5,978300760
1,1,661,3,978302109
2,1,914,3,978301968
3,1,3408,4,978300275
4,1,2355,5,978824291


In [4]:
user2id = {}
item2id = {}
for idx, user in enumerate(data['user'].unique().tolist()):
    user2id[user] = idx
for idx, item in enumerate(data['item'].unique().tolist()):
    item2id[item] = idx
    
data['user'] = data['user'].map(user2id)
data['item'] = data['item'].map(item2id)
data = data.sort_values(by=['user', 'timestamp'])

In [5]:
# 生成训练测试数据，对于每个用户的数据最后一个item为测试，其他item为训练
def neg_sample_item(num_item, neg_num,item_list):
    neg_list = []
    while len(neg_list)<neg_num:
        neg_item = np.random.choice(num_item, 1)[0]
        while neg_item in item_list:
            neg_item = np.random.choice(num_item, 1)[0]
        neg_list.append(neg_item)
    return neg_list

def generate_train_test_data(data, neg_num):
    # user rating item
    num_item = len(data['item'].unique())
    
    train = []
    test = []
    # split data
    for uid in data['user'].unique():
        item_list = data[data['user']==uid]['item'].tolist()
        for i in range(len(item_list)-8):
            item_seq = item_list[i:i+8]
            if i == len(item_list)-9:
                neg_list = neg_sample_item(num_item, neg_num,item_list)
                result_slice = [uid] + item_seq + neg_list
                test.append(result_slice)
            else:
                neg_list = neg_sample_item(num_item, neg_num,item_list)
                result_slice = [uid] + item_seq + neg_list
                train.append(result_slice)
    return train, test

In [6]:
%%time
train_data, test_data = generate_train_test_data(data, 3)

train_data = torch.from_numpy(np.array(train_data))
test_data = torch.from_numpy(np.array(test_data))
train_x = train_data[:,:6]
train_y = train_data[:,6:]

# construct dataset for train test
train_dataset = TensorDataset(train_x, train_y)
dataloader = DataLoader(dataset=train_dataset, batch_size=512, shuffle=True)

CPU times: user 3min 50s, sys: 1.82 s, total: 3min 52s
Wall time: 3min 51s


In [71]:
class SelfAttenion(nn.Module):
    def __init__(self, embedding_dim):
        """
        embeding_dim: int, laten vector dim of item
        """
        super(SelfAttenion, self).__init__()
        self.linear1 = nn.Linear(embedding_dim, embedding_dim)
        self.linear1.weight.data.normal_(mean=0, std=np.sqrt(2.0 / embedding_dim))
        # self.linear2 = nn.Linear(embedding_dim, embedding_dim)
        # init weight
        # nn.init.normal_(self.linear1.weight, mean=0, std=np.sqrt(2.0 / embedding_dim)
    
    def forward(self, item_embedding):
        """
        item_embeding: L*d user history L squence interaction item
        """
        Q = F.relu(self.linear1(item_embedding))
        K = F.relu(self.linear1(item_embedding))
        d = torch.FloatTensor([100]).cuda()
        affinity = torch.matmul(Q, torch.transpose(K, 1, 2))/torch.sqrt(d)
        
        # mask the diagonal value
        mask = torch.eye(item_embedding.size(1), item_embedding.size(1)).byte().cuda()
        affinity = affinity.masked_fill(mask, 0)
        S = F.softmax(affinity)
        A = torch.mean(torch.matmul(S, item_embedding), dim=1)
        return A

In [112]:
class AttSeqModel(nn.Module):
    def __init__(self, num_user, num_item, L, w, embedding_dim):
        """
        num_user: int, user number in dataset
        num_item: int, item number in dataset
        L: int the number of history item will consider
        embeding_dim: int, laten vector dim of item
        """
        super(AttSeqModel, self).__init__()
        self.embedding_dim = embedding_dim
        self.num_item = num_item
        self.L = L
        self.user_embed = nn.Embedding(num_user, embedding_dim)
        self.item_embed_short = nn.Embedding(num_item, embedding_dim)
        self.item_embed_long = nn.Embedding(num_item, embedding_dim)
        self.item_position_embed = nn.Embedding.from_pretrained(self.position_embed(L),freeze=True)
        self.att = SelfAttenion(embedding_dim).cuda()
        self.w = w
        
        # embedding init
        self.user_embed.weight.data.normal_(0,1.0/self.user_embed.embedding_dim)
        self.item_embed_short.weight.data.normal_(0, 1.0/self.item_embed_short.embedding_dim)
        self.item_embed_long.weight.data.normal_(0, 1.0/self.item_embed_long.embedding_dim)
        
    def position_embed(self, L):
        position_embedding = np.array([[pos/np.power(1000, 2.*i)/ self.embedding_dim for i in range(self.embedding_dim)]
                                      for pos in range(L)])
        position_embedding[:,0::2] = np.sin(position_embedding[:,0::2])
        position_embedding[:,1::2] = np.cos(position_embedding[:,1::2])
        return torch.from_numpy(position_embedding).cuda()
    
    def forward(self, user, seq_item, target=None, for_pred=False):
        """
        user: uid of user
        seq_item: L item id user interacte before
        target: item
        """
        # sequential item embedding
        item_embedding = self.item_embed_short(seq_item)  # L*d
        # item position embedding
        position_idx = torch.range(0,self.L-1).unsqueeze(0).expand(seq_item.size(0),-1).long().cuda()
        position_embedding = self.item_position_embed(position_idx)
        # add position embedding
        item_embedding_cat = item_embedding.float() + position_embedding.float()
        
        # attention
        attention = self.att(item_embedding_cat)
        
        # user embedding
        user_embedding = self.user_embed(user).squeeze()
        # target embedding short and long note: those two embedding is different 
        if target is None:
            target = torch.range(0,self.num_item-1).long().unsqueeze(0).cuda()
            target_embedding_short = self.item_embed_short(target).squeeze()
            target_embedding_long = self.item_embed_long(target).squeeze()
        else:
            target_embedding_short = self.item_embed_short(target).squeeze()
            target_embedding_long = self.item_embed_long(target).squeeze()
        # pred
        if for_pred == False:
            user_embedding = user_embedding.unsqueeze(1).expand(-1,target.size(1),-1)
            attention = attention.unsqueeze(1).expand(-1,target.size(1),-1)
            y_pred = self.w* torch.sqrt(torch.sum((user_embedding - target_embedding_long)**2, dim=2)) + (1-self.w)*torch.sqrt(torch.sum((attention-target_embedding_short)**2, dim=2))
            return y_pred
        else:
            user_embedding = user_embedding.unsqueeze(0).expand(target.size(1),-1)
            attention = attention.expand(target.size(1),-1)
            y_pred = self.w* torch.sqrt(torch.sum((user_embedding - target_embedding_long)**2, dim=1)) + (1-self.w)*torch.sqrt(torch.sum((attention-target_embedding_short)**2, dim=1))
            return y_pred

In [117]:
# parameters
num_user = len(data['user'].unique())
num_item = len(data['item'].unique())
L = 5
embedding_dim = 100
w = 0.2

In [118]:
def train(model, dataloader, test_data, epochs):
    model.train()
    optimizer = optim.Adam(model.parameters(), lr=0.001,weight_decay=0.0001)
    for epoch in range(epochs):
        losses = []
        start = time.time()
        for train_x, train_y in dataloader:
            user = train_x[:,0].cuda()
            item_seq = train_x[:,1:].cuda()
            target_pos = train_y[:,:3].cuda()
            target_neg = train_y[:,3:].cuda()
            y_pred_pos = model(user, item_seq, target_pos,for_pred=False)
            y_pred_neg = model(user, item_seq, target_neg, for_pred=False)
            optimizer.zero_grad()
            loss = torch.zeros(y_pred_pos.size(0),1).cuda()
            for i in range(y_pred_pos.size(1)):
                l = y_pred_pos[:,i].view(-1,1)
                y_pos_slice = l.expand(-1,y_pred_pos.size(1))
                loss += torch.sum(y_pos_slice - y_pred_neg + 0.5,dim=1).unsqueeze(1)
            loss = torch.mean(loss)
            losses.append(loss.item())
            loss.backward()
            optimizer.step()
        print("Epoch %d loss is %.3f and consume time is %.2f" %(epoch+1, np.mean(losses), (time.time() - start)))
        hr, mrr = test(model, test_data, 50)
        print("hr is %.3f and mrr is %.3f" %(hr, mrr))

In [119]:
def hr(y_target, y_pred, topk):
    y_pred = y_pred[:topk].cpu().numpy()
    for item in y_pred:
        if item in y_target:
            return 1
    return 0

def mrr(y_target, y_pred, topk):
    y_pred = y_pred[:topk].cpu().numpy()
    for idx in range(len(y_pred)):
        if y_pred[idx] in y_target:
            return 1/(idx+1)
    return 0

def test(model, test_data, topk):
    model.eval()
    HR = []
    MRR = []
    for idx in range(test_data.size(0)):
        uid = test_data[idx,0].unsqueeze(0).cuda()
        item_seq = test_data[idx, 1:6].unsqueeze(0).cuda()
        y_target = test_data[idx,6:9].numpy()
        y_pred = model(uid, item_seq,for_pred=True)
        y_pred = torch.argsort(y_pred)
        hits = hr(y_target, y_pred, topk)
        mrrs = mrr(y_target, y_pred, topk)
        HR.append(hits)
        MRR.append(mrrs)
    return np.mean(HR), np.mean(MRR)

In [120]:
selfatt = AttSeqModel(num_user, num_item, L, w, embedding_dim).cuda()
train(selfatt,dataloader,test_data,20)

Epoch 1 loss is -12.691 and consume time is 20.85
hr is 0.036 and mrr is 0.002
Epoch 2 loss is -35.784 and consume time is 20.82
hr is 0.052 and mrr is 0.005
Epoch 3 loss is -49.472 and consume time is 20.62
hr is 0.053 and mrr is 0.004
Epoch 4 loss is -57.987 and consume time is 21.19
hr is 0.067 and mrr is 0.004
Epoch 5 loss is -63.308 and consume time is 21.00
hr is 0.069 and mrr is 0.005
Epoch 6 loss is -66.359 and consume time is 21.51
hr is 0.066 and mrr is 0.004
Epoch 7 loss is -68.033 and consume time is 21.37
hr is 0.070 and mrr is 0.004
Epoch 8 loss is -68.973 and consume time is 21.21
hr is 0.065 and mrr is 0.004
Epoch 9 loss is -69.495 and consume time is 20.56
hr is 0.064 and mrr is 0.005
Epoch 10 loss is -69.803 and consume time is 21.15
hr is 0.070 and mrr is 0.006
Epoch 11 loss is -70.007 and consume time is 21.57
hr is 0.061 and mrr is 0.005
Epoch 12 loss is -70.078 and consume time is 21.66
hr is 0.061 and mrr is 0.005
Epoch 13 loss is -70.162 and consume time is 21.3