In [None]:
import torch
import torch.nn as nn
from torch.autograd import Variable
import torch.distributed as dist
import torch.utils.data as data_utils
import numpy as np

In [None]:
class Net(nn.Module):
    
    def __init__(self, user_vector_size, item_vector_size, mlp_embedding_size, mlp_n_layers, mlp_predict_size, n_output = 1):
        ''' user_vector_size - user输入向量长度   item_vector_size - item输入向量长度
            mlp_n_factors  -  MLP模型嵌入层神经元数目
            mlp_n_layers - MLP模型层数   mlp_predict_size - MLP预测层神经元数目
            n_output - 输出层神经元数目
        '''
        super(Net, self).__init__()
        self.mlp_n_layers = mlp_n_layers
        #mlp_firstLayer_size = int(mlp_predict_size * np.power(2, mlp_n_layers-1))
        hidden_size = [mlp_predict_size]
        for i in range(1, mlp_n_layers):
            hidden_size.append(hidden_size[-1]*2)
        
        # MLP
        self.mlp_user_embedding_layer = nn.Linear(user_vector_size, mlp_embedding_size)
        self.mlp_item_embedding_layer = nn.Linear(item_vector_size, mlp_embedding_size)
        self.hidden = list()
        for i in range(mlp_n_layers):
            if i==0:
                self.hidden.append(nn.Linear(2 * mlp_embedding_size, hidden_size[-1]))
            else:
                self.hidden.append(nn.Linear(hidden_size[mlp_n_layers-i], hidden_size[mlp_n_layers-i-1]))
    
        # NeuMF Layer
        self.output_layer = nn.Linear(mlp_predict_size, n_output)
        return
    
    def forward(self, user_input, item_input): # 多输入
        # print('in forward', user_input.dtype)
        # MLP
        out01 = self.mlp_user_embedding_layer(user_input)
        out02 = torch.relu(out01)
        out11 = self.mlp_item_embedding_layer(item_input)
        out12 = torch.relu(out11)
        input_of_NCF = torch.cat((out02, out12), dim=1) # concatenation 
        #print('in forward', input_of_NCF.data.numpy().shape)
        # MLP隐层全连接
        out2x = list()
        for i in range(1,self.mlp_n_layers+1):
            if i==1:
                out2x.append(self.hidden[i-1](input_of_NCF))
                out2x.append(torch.relu(out2x[-1]))
            else:
                out2x.append(self.hidden[i-1](out2x[-1]))
                out2x.append(torch.relu(out2x[-1]))
        
        #NeuMF Layer
        out = torch.sigmoid(self.output_layer(out2x[-1]))
        return out


In [None]:
dataset = np.loadtxt("./ml-1m/ratings.dat",delimiter='::',dtype=int)[:,[0,1,3]]
n_users = np.max(dataset[:,0])
n_items = np.max(dataset[:,1])
n_negatives = 4  ## 1正例对应n个负例 ##
users_items = np.zeros((n_users+1, n_items+1), dtype = np.int8)  # 混淆矩阵
user_input, item_input, labels = [],[],[]  # x1 x2 -> y
for u in range(dataset.shape[0]):   # 评分数据集隐式化
    users_items[dataset[u][0], dataset[u][1]] = 1
uipositives = list() # 作为测试集的交互正例
for i in range(n_users+1):
    if i==0: 
        continue
    uitems = dataset[dataset[:,0]==i]
    onepos = uitems[uitems[:,-1]==np.max(uitems),:2][0]
    uipositives.append(onepos)
    users_items[onepos[0], onepos[1]]=0
for uno, uitems in enumerate(users_items):
    if uno == 0:
        continue
    positives = np.nonzero(uitems)[0]
    n_sample = len(positives) * n_negatives
    negative_items = list(set(range(n_items+1))^set(positives))
    negatives = np.random.choice(negative_items, n_sample)  # 负采样 -- 不放回
    for i in range(len(positives)): # 正实例
        user_input.append(uno)
        item_input.append(positives[i])
        labels.append(1)
    for j in range(n_sample): # 负实例
        user_input.append(uno)
        item_input.append(negatives[j])
        labels.append(0)

In [None]:
utest = list()
itest = list()
for ui in uipositives:
    u = ui[0]
    i = ui[1]
    positives = np.nonzero(users_items[u])[0]
    negative_items = list(set(range(1,n_items+1))^set(positives))
    negatives = list(np.random.choice(negative_items, 100))  # 负采样 -- 不放回
    negatives.append(i)
    utest.append([u for j in range(101)])
    itest.append(negatives)

In [None]:
BATCH_SIZE = 256
LEARNING_RATE = 0.0005
EPOCH = 20
user_vector_size = 1    # len(one-hot of user vecter) 
item_vector_size = 1    # len(one-hot of item vecter) 
mlp_embedding_size = 8       # MLP 嵌入层神经元数
mlp_n_layers  = 3       # MLP 隐层数
mlp_predict_size = 16  # MLP 第一层神经元数  后续依次减半

In [None]:
torch_x1 = torch.from_numpy(np.array(user_input, ndmin=2).T).type(torch.FloatTensor)
torch_x2 = torch.from_numpy(np.array(item_input, ndmin=2).T).type(torch.FloatTensor)
torch_y  = torch.from_numpy(np.array(labels, ndmin=2).T).type(torch.FloatTensor)

#x1 = Variable(torch.from_numpy(np.array(user_input, ndmin=2, dtype=np.flfloat).T))
#x2 = Variable(torch.from_numpy(np.array(item_input, ndmin=2, dtype=np.float32).T))
#y = Variable(torch.from_numpy(np.array(labels, ndmin=2, dtype=np.float32).T))
torch_dataset = data_utils.TensorDataset(torch_x1,torch_x2,torch_y)

In [None]:
loader = data_utils.DataLoader(
    dataset = torch_dataset,
    batch_size = BATCH_SIZE,
    shuffle = True,
    num_workers = 4,
)

In [None]:
def weights_init(m):
    if isinstance(m, nn.Linear):
        nn.init.normal_(m.weight.data, mean=0, std=0.01)
        nn.init.normal_(m.bias.data, mean=0, std=0.01)
    return

net = Net(user_vector_size = user_vector_size, item_vector_size = item_vector_size,
          mlp_embedding_size = mlp_embedding_size, mlp_n_layers = mlp_n_layers, mlp_predict_size = mlp_predict_size, n_output = 1)
net.apply(weights_init)
print(net)
optimizer = torch.optim.Adam(net.parameters(), lr=LEARNING_RATE)
loss_func = torch.nn.BCELoss()

In [None]:
for e in range(EPOCH):
    for step, (batch_x1, batch_x2, batch_y) in enumerate(loader):
        x1, x2, y = Variable(batch_x1), Variable(batch_x2), Variable(batch_y)
        prediction = net(x1, x2)
        loss = loss_func(prediction, y)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        print('Loss = %.4f' % loss.data)
        
        hit = 0
        for i in range(n_users):
            x1test = Variable(torch.from_numpy(np.array(utest[i], ndmin=2, dtype=np.float32).T))
            x2test = Variable(torch.from_numpy(np.array(itest[i], ndmin=2, dtype=np.float32).T))
            prediction = net(x1test, x2test)
            #print(prediction.data)
            itestResIndex = np.argsort(prediction.detach().numpy().T[0])
            itestTopKIndex = itestResIndex[len(itestResIndex)-10:]
            #print(itestTopKIndex)
            if 100 in itestTopKIndex:
                hit += 1
        print('HR@10 = %.4f' % (hit/n_users))
        break
    break
    #if e % 2 == 0:
    print('------第'+str(e)+'个epoch------')
    print('epoch loss = %.4f' % loss.data)
    hit = 0
    for i in range(n_users):
        x1test = Variable(torch.from_numpy(np.array(utest[i], ndmin=2, dtype=np.float32).T))
        x2test = Variable(torch.from_numpy(np.array(itest[i], ndmin=2, dtype=np.float32).T))
        prediction = net(x1test, x2test)
        print(prediction.data)
        itestResIndex = np.argsort(prediction.detach().numpy().T[0])
        itestTopKIndex = itestResIndex[len(itestResIndex)-10:]
        print(itestTopKIndex)
        if 100 in itestTopKIndex:
            hit += 1
    print('HR@10 = %.4f' % (hit/n_users))