In [None]:
import torch
from torch import nn
from torch.nn import init
import torch.utils.data as data_utils
from torch.autograd import Variable
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import heapq

In [None]:
dataset = np.loadtxt('../Yelp/yelp.rating', usecols=[0,1,3], dtype=int)

In [None]:
FM_SIZE = [64,32,16,8,4,2,1]
N_FM = 32
N_USERS = np.max(dataset[:,0])
N_ITEMS = np.max(dataset[:,1])
LEARNING_RATE = 0.005
BATCH_SIZE = 256
EPOCH = 10

In [None]:
users_items = np.zeros((N_USERS+1,N_ITEMS+1), dtype=np.int8)
for line in dataset:
    users_items[line[0],line[1]] = 1

In [None]:
# training data
user_input, item_input, labels = list(),list(),list()  # x1 x2 -> y
n_negatives = 4  ## 1正例对应n个负例 ##
uipositives = list() # 作为测试集的交互正例
for i in range(N_USERS+1):
    uitems = dataset[dataset[:,0]==i]
    onepos = uitems[uitems[:,-1]==np.max(uitems),:2][0]
    uipositives.append(onepos)
    users_items[onepos[0], onepos[1]]=0
for uno, uitems in enumerate(users_items):
    positives = np.nonzero(uitems)[0]
    n_sample = len(positives) * n_negatives
    negative_items = list(set(range(N_ITEMS+1))^set(positives))
    negatives = np.random.choice(negative_items, n_sample)  # 负采样 -- 不放回
    for i in range(len(positives)): # 正实例
        user_input.append(uno)
        item_input.append(positives[i])
        labels.append(1)
    for j in range(n_sample): # 负实例
        user_input.append(uno)
        item_input.append(negatives[j])
        labels.append(0)
user_input = np.array(user_input)
item_input = np.array(item_input)
labels = np.array(labels)

In [None]:
# test data
utest = list()
itest = list()
for ui in uipositives:
    u = ui[0]
    i = ui[1]
    users_items[u, i] = 1
    positives = np.nonzero(users_items[u])[0]
    negative_items = list(set(range(N_ITEMS+1))^set(positives))
    negatives_sample = np.random.choice(negative_items, 999)  # 负采样 -- 不放回
    negatives = [i]  # 正例
    for n in negatives_sample:
        negatives.append(n)  # 添加负例
    utest.append([u for j in range(1000)])
    itest.append(negatives)
ytest = np.zeros((N_USERS+1,1000))
ytest[:, 0] = 1
utest = np.array(utest)
itest = np.array(itest)

In [None]:
torch_x1 = torch.from_numpy(user_input.reshape(-1, 1)).type(torch.LongTensor)
torch_x2 = torch.from_numpy(item_input.reshape(-1, 1)).type(torch.LongTensor)
torch_y  = torch.from_numpy(labels.reshape(-1, 1)).type(torch.FloatTensor)

torch_dataset = data_utils.TensorDataset(torch_x1, torch_x2, torch_y)
loader = data_utils.DataLoader(dataset = torch_dataset, batch_size = BATCH_SIZE, shuffle = True, num_workers = 4)

In [None]:
class ConvNCF(nn.Module):
    def __init__(self, fm_sizes, n_users, n_items, n_fm=N_FM, myStride=2, n_output=1):
        ''' e.g.--> fm_sizes = [64,32,16,8,4,2,1] '''
        super(ConvNCF, self).__init__()
        self.convs = list()
        
        self.user_embedding_layer = nn.Embedding(n_users+1, fm_sizes[0])
        self._set_normalInit(self.user_embedding_layer, hasBias = False) 
        self.item_embedding_layer = nn.Embedding(n_items+1, fm_sizes[0])
        self._set_normalInit(self.item_embedding_layer, hasBias = False) 
        for i in range(1, len(fm_sizes)):
            inChannel = 1 if i == 1 else n_fm
            #conv = nn.Conv2d(in_channels=inChannel, out_channels=32, kernel_size=fm_sizes[i]+myStride, stride=myStride)
            conv = nn.Conv2d(in_channels=inChannel, out_channels=n_fm, kernel_size=4, stride=myStride, padding=1)
            #self._set_normalInit(conv)
            setattr(self, 'conv%i' % i, conv)
            self.convs.append(conv)

        self.predict = nn.Linear(n_fm, n_output)         # output layer
        #self._set_uniformInit(self.predict)            # parameters initialization
        return
    
    def _set_normalInit(self, layer, parameter = [0.0, 0.01], hasBias = True):
        init.normal_(layer.weight, mean = parameter[0], std = parameter[1])
        if hasBias:
            init.normal_(layer.bias, mean = parameter[0], std = parameter[1])
        return
    
    def _set_uniformInit(self, layer, parameter = 1, hasBias = True):
        init.uniform_(layer.weight, a = - parameter, b = parameter)
        if hasBias:
            init.uniform_(layer.bias, a = - parameter, b = parameter)
        return
    
    def forward(self, user, item):
        x1   = self.user_embedding_layer(user)
        x2   = self.item_embedding_layer(item)
        temp = list()
        for i in range(x1.size()[0]):
            temp.append(torch.mm(x1[i].T, x2[i]))
        x = torch.stack(temp)
        x = x.view(x.size()[0], -1, x.size()[1], x.size()[2])
        ''' ## conv2d -input  (batch_size, channel, weight, height) '''
        for conv in self.convs:
            x = torch.relu(conv(x))
        ''' ## conv2d -output (batch_size, out_channel, out_weight, out_height) '''
        x = torch.flatten(x, start_dim = 1)
        #print(x)
        out = torch.sigmoid(self.predict(x))
        #out = self.predict(x)
        print(out)
        return out

In [None]:
model = ConvNCF(fm_sizes=FM_SIZE, n_fm=N_FM, n_users=N_USERS, n_items=N_ITEMS)
optimizer = torch.optim.Adam(model.parameters(), lr = LEARNING_RATE)
loss_func = torch.nn.BCELoss()
if(torch.cuda.is_available()):
    model = model.cuda()
    loss_func = loss_func.cuda()
print(model)

In [None]:
def getHitRatio(ranklist, gtItem):
    #HR击中率，如果topk中有正例ID即认为正确
    if gtItem in ranklist:
        return 1
    return 0

def getNDCG(ranklist, gtItem):
    #NDCG归一化折损累计增益
    for i in range(len(ranklist)):
        item = ranklist[i]
        if item == gtItem:
            return np.log(2) / np.log(i+2)
    return 0

def getH(ranklist1, ranklist2):
    L = len(ranklist1)
    common = len(list(set(ranklist1).intersection(set(ranklist2))))
    return 1-common/L

In [None]:
def movieEval_1(model, loss_func, utest, itest, ytest, topK = 10):
    if len(utest)==len(itest)==len(ytest):
        n_users = len(utest)
    else:
        print('the length of test sets are not equal.')
        return
    hit = 0
    undcg = 0
    rank_all_users = list()
    test_loss = list()
    for i in range(n_users):
        map_item_score = dict()
        x1test = Variable(torch.from_numpy(utest[i].reshape(-1, 1)).type(torch.LongTensor))
        x2test = Variable(torch.from_numpy(itest[i].reshape(-1, 1)).type(torch.LongTensor))
        y  = Variable(torch.from_numpy(ytest[i].reshape(-1, 1)).type(torch.FloatTensor))
        x1test, x2test, y = x1test.cuda(), x2test.cuda(), y.cuda()
        prediction = model(x1test, x2test)
        #print(prediction)
        loss = loss_func(prediction, y)
        test_loss.append(loss.cpu().item())
        pred_vector = prediction.cpu().data.numpy().T[0]
        positive_item = itest[i][0]  # 取正例
        for j in range(len(itest[i])):
            map_item_score[itest[i][j]] = pred_vector[j]
        ranklist = heapq.nlargest(topK, map_item_score, key=map_item_score.get)
        rank_all_users.append(ranklist)
        hit += getHitRatio(ranklist, positive_item)
        undcg += getNDCG(ranklist, positive_item)
    mean_test_loss = np.mean(test_loss)
    hr = hit / n_users
    ndcg = undcg / n_users
    print('test_loss:', mean_test_loss)
    print('HR@', topK, ' = %.4f' % hr)
    print('NDCG@', topK, ' = %.4f' % ndcg)
    return mean_test_loss, hr, ndcg, rank_all_users

In [None]:
train_loss_list = list()
test_loss_list  = list()
hr_list = list()
ndcg_list = list()
for e in range(EPOCH):
    train_loss = list()
    for step, (batch_x1, batch_x2, batch_y) in enumerate(loader):
        x1, x2, y = Variable(batch_x1), Variable(batch_x2), Variable(batch_y)
        if (torch.cuda.is_available()):
            x1, x2, y = x1.cuda(), x2.cuda(), y.cuda()
        optimizer.zero_grad()
        prediction = model(x1, x2)
        loss = loss_func(prediction, y) 
        loss.backward()        
        train_loss.append(loss.cpu().item())
        optimizer.step()
    print('------第'+str(e+1)+'个epoch------')
    mean_train_loss = np.mean(train_loss)
    print('train_loss:', mean_train_loss)
    train_loss_list.append(mean_train_loss)    
    test_loss, hr, ndcg, rank_all_users = movieEval_1(model, loss_func, utest, itest, ytest)
    test_loss_list.append(test_loss)
    hr_list.append(hr)
    ndcg_list.append(ndcg)