In [None]:
import torch
from torch import nn
from torch.nn import init
import torch.utils.data as data_utils
from torch.autograd import Variable
import numpy as np

In [None]:
# training data
dataset = np.loadtxt("./ml-1m/ratings.dat",delimiter='::',dtype=int)[:,[0,1,3]]
N_USERS = np.max(dataset[:,0])
N_ITEMS = np.max(dataset[:,1])
n_negatives = 4  ## 1正例对应n个负例 ##
users_items = np.zeros((N_USERS+1, N_ITEMS+1), dtype = np.int8)  # 混淆矩阵
user_input, item_input, labels = [],[],[]  # x1 x2 -> y
for u in range(dataset.shape[0]):   # 评分数据集隐式化
    users_items[dataset[u][0], dataset[u][1]] = 1
uipositives = list() # 作为测试集的交互正例
for i in range(N_USERS+1):
    if i==0: 
        continue
    uitems = dataset[dataset[:,0]==i]
    onepos = uitems[uitems[:,-1]==np.max(uitems),:2][0]
    uipositives.append(onepos)
    users_items[onepos[0], onepos[1]]=0
for uno, uitems in enumerate(users_items):
    if uno == 0:
        continue
    positives = np.nonzero(uitems)[0]
    n_sample = len(positives) * n_negatives
    negative_items = list(set(range(N_ITEMS+1))^set(positives))
    negatives = np.random.choice(negative_items, n_sample)  # 负采样 -- 不放回
    for i in range(len(positives)): # 正实例
        user_input.append(uno)
        item_input.append(positives[i])
        labels.append(1)
    for j in range(n_sample): # 负实例
        user_input.append(uno)
        item_input.append(negatives[j])
        labels.append(0)

In [None]:
# test data
utest = list()
itest = list()
for ui in uipositives:
    u = ui[0]
    i = ui[1]
    positives = np.nonzero(users_items[u])[0]
    negative_items = list(set(range(1,N_ITEMS+1))^set(positives))
    negatives_sample = np.random.choice(negative_items, 99)  # 负采样 -- 不放回
    negatives = [i]  # 正例
    for n in negatives_sample:
        negatives.append(n)  # 添加负例
    utest.append([u for j in range(100)])
    itest.append(negatives)
ytest = np.zeros((N_USERS,100))
ytest[:, 0] = 1

In [None]:
# Hyper parameters
BATCH_SIZE = 256
LEARNING_RATE = 0.001
EPOCH = 12
USER_VECTOR_SIZE = 1    # len(one-hot of user vecter) 
ITEM_VECTOR_SIZE = 1    # len(one-hot of item vecter) 
N_FACTORS  = 8          # 隐层size  
ACTIVATION = torch.relu

In [None]:
torch_x1 = torch.from_numpy(np.array(user_input, ndmin=2).T).type(torch.LongTensor)
torch_x2 = torch.from_numpy(np.array(item_input, ndmin=2).T).type(torch.LongTensor)
torch_y  = torch.from_numpy(np.array(labels, ndmin=2).T).type(torch.FloatTensor)
torch_dataset = data_utils.TensorDataset(torch_x1, torch_x2, torch_y)
loader = data_utils.DataLoader(dataset = torch_dataset, batch_size = BATCH_SIZE, shuffle = True, num_workers = 4)

In [None]:
class GMF(nn.Module):
    def __init__(self, user_vector_size, item_vector_size, n_factors,  
                 n_users, n_items, activation = torch.relu, batch_normalization = False, n_output = 1):
        super(GMF, self).__init__()
        self.activation = activation
        self.do_bn = batch_normalization
        parameter_LeCun = np.sqrt(n_factors)
        
        #self.bn_userInput = nn.BatchNorm1d(1)   # for input data
        #self.bn_itemInput = nn.BatchNorm1d(1)   # for input data
        
        self.gmf_user_embedding_layer = nn.Embedding(n_users+1, n_factors)
        self._set_normalInit(self.gmf_user_embedding_layer, hasBias = False) 
        self.gmf_item_embedding_layer = nn.Embedding(n_items+1, n_factors)
        self._set_normalInit(self.gmf_item_embedding_layer, hasBias = False) 
        
        #self.bn_user_elayer = nn.BatchNorm1d(mlp_embedding_size) 
        #self.bn_item_elayer = nn.BatchNorm1d(mlp_embedding_size)     

        self.predict = nn.Linear(n_factors, n_output)         # output layer
        self._set_uniformInit(self.predict, parameter = parameter_LeCun)            # parameters initialization
        return

    def _set_normalInit(self, layer, parameter = [0.0, 0.01], hasBias=True):
        init.normal_(layer.weight, mean = parameter[0], std = parameter[1])
        if hasBias:
            init.normal_(layer.bias, mean = parameter[0], std = parameter[1])
        return
    
    def _set_uniformInit(self, layer, parameter = 5, hasBias = True):
        init.uniform_(layer.weight, a = - parameter, b = parameter)
        if hasBias:
            init.uniform_(layer.bias, a = - parameter, b = parameter)
        return
    
    def _set_heNormalInit(self, layer, hasBias=True):
        init.kaiming_normal_(layer.weight, nonlinearity='relu')
        if hasBias:
            init.kaiming_normal_(layer.bias, nonlinearity='relu')
        return
    
    def _set_heUniformInit(self, layer, hasBias=True):
        init.kaiming_uniform_(layer.weight, nonlinearity='relu')
        if hasBias:
            init.kaiming_uniform_(layer.bias, nonlinearity='relu')
        return

    def forward(self, x1, x2):
        #if self.do_bn: 
            #x1 = self.bn_userInput(x1)     # input batch normalization
            #x2 = self.bn_itemInput(x2)
        x1 = self.gmf_user_embedding_layer(x1)
        x2 = self.gmf_item_embedding_layer(x2)
        x3 = torch.mul(x1, x2)
        #print(x3.data.numpy().shape)
        x  = torch.flatten(x3, start_dim=1)
        #print(x.data.numpy().shape)
        out = torch.sigmoid(self.predict(x))
        return out

In [None]:
gmf = GMF(user_vector_size = USER_VECTOR_SIZE, item_vector_size = ITEM_VECTOR_SIZE,
          n_factors = N_FACTORS, n_users = N_USERS, n_items = N_ITEMS, activation = ACTIVATION, batch_normalization = False, n_output = 1)
optimizer = torch.optim.Adam(gmf.parameters(), lr = LEARNING_RATE)
loss_func = torch.nn.BCELoss()
print(gmf)

In [None]:
def movieEval(model, loss_func, utest, itest, ytest, topK = 10):
    if len(utest)==len(itest)==len(ytest):
        n_users = len(utest)
    else:
        print('the length of test sets are not equal.')
        return
    hit = 0
    undcg = 0
    test_loss = list()
    for i in range(n_users):
        map_item_score = dict()
        x1test = Variable(torch.from_numpy(np.array(utest[i], ndmin=2).T).type(torch.LongTensor))
        x2test = Variable(torch.from_numpy(np.array(itest[i], ndmin=2).T).type(torch.LongTensor))
        y  = Variable(torch.from_numpy(np.array(ytest[i], ndmin=2).T).type(torch.FloatTensor))
        prediction = model(x1test, x2test)
        loss = loss_func(prediction, y)
        test_loss.append(loss.item())
        pred_vector = prediction.data.numpy().T[0]
        positive_item = itest[i][0]  # 取正例
        for j in range(len(itest[i])):
            map_item_score[itest[i][j]] = pred_vector[j]
        ranklist = heapq.nlargest(topK, map_item_score, key=map_item_score.get)
        hit += getHitRatio(ranklist, positive_item)
        undcg += getNDCG(ranklist, positive_item)
    mean_test_loss = np.mean(test_loss)
    hr = hit / n_users
    ndcg = undcg / n_users
    print('test_loss:', mean_test_loss)
    print('HR@', topK, ' = %.4f' % hr)
    print('NDCG@', topK, ' = %.4f' % ndcg)
    return mean_test_loss, hr, ndcg

In [None]:
train_loss_list = list()
test_loss_list  = list()
hr_list = list()
ndcg_list = list()
for e in range(EPOCH):
    train_loss = list()
    for step, (batch_x1, batch_x2, batch_y) in enumerate(loader):
        x1, x2, y = Variable(batch_x1), Variable(batch_x2), Variable(batch_y)
        optimizer.zero_grad()
        prediction = gmf(x1, x2)
        loss = loss_func(prediction, y)
        train_loss.append(loss.item())
        loss.backward()
        optimizer.step()
    print('------第'+str(e+1)+'个epoch------')
    mean_train_loss = np.mean(train_loss)
    print('train_loss:', mean_train_loss)
    train_loss_list.append(mean_train_loss)    
    test_loss, hr, ndcg = movieEval(gmf, loss_func, utest, itest, ytest)
    test_loss_list.append(test_loss)
    hr_list.append(hr)
    ndcg_list.append(ndcg)