## NeuMF

NeuMF model:

<img src="../data/img/NeuMF_model.png" height="600" width="600" style="float:left" >


评价指标：

* Hit Rate

$$Hit@l = \frac{1}{m}\sum_{u\in \mathrm{U}} 1(rank_{u,g_u} <= l)$$

* AUC

$$AUC = \frac{1}{m}\sum_{u\in \mathrm{U}} \frac{1}{\vert I \backslash S_u \vert} \sum _{j \in I \backslash S_u}1(rank_{u,g_u} < rank{u,j})$$

In [1]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.utils.data as Data
from sklearn.model_selection import train_test_split

In [2]:
path = '../data/ml-100k/u.data'
BATCH_SIZE = 512
data = pd.read_csv(path,sep='\t',header=None,names=['user_id', 'item_id', 'rating', 'timestamp'] )

In [3]:
num_users = data.user_id.unique().shape[0]+1
num_items = data.item_id.unique().shape[0]+1
num_factors=10
mlp_layer = [[num_factors*2, num_factors],[num_factors,num_factors],[num_factors,num_factors]]

In [13]:
class NeuMF(nn.Module):
    def __init__(self, num_factors, num_users, num_items, mlp_layer):
        super(NeuMF, self).__init__()
        self.P = nn.Embedding(num_embeddings=num_users, embedding_dim=num_factors)
        self.Q = nn.Embedding(num_embeddings=num_items, embedding_dim=num_factors)
        self.user_bias = nn.Embedding(num_users, 1)
        self.item_bias = nn.Embedding(num_items, 1)
        self.U = nn.Embedding(num_embeddings=num_users, embedding_dim=num_factors)
        self.V = nn.Embedding(num_embeddings=num_items, embedding_dim=num_factors)
        self.mlp = nn.Sequential()
        for i,layer in enumerate(mlp_layer):
            self.mlp.add_module('layer'+str(i+1),nn.Linear(layer[0], layer[1]))
            self.mlp.add_module('relu'+str(i+1),nn.ReLU())
    
    def forward(self, data):
        user_id, item_id = data[:,0], data[:,1]
        p = self.P(user_id)
        q = self.Q(item_id)
        b_p = self.user_bias(user_id)
        b_q = self.item_bias(item_id)
        mf_output = ((p*q).sum(dim=1)) + b_p.squeeze() + b_q.squeeze()
        u = self.U(user_id)
        v = self.V(item_id)
        mlp_input = torch.cat((u, v), dim=1)
        mlp_output = self.mlp(mlp_input).sum(dim=1)
        return mf_output+mlp_output

In [22]:
nmf = NeuMF(num_factors, num_users, num_items, mlp_layer)

In [23]:
print(nmf)

NeuMF(
  (P): Embedding(944, 10)
  (Q): Embedding(1683, 10)
  (user_bias): Embedding(944, 1)
  (item_bias): Embedding(1683, 1)
  (U): Embedding(944, 10)
  (V): Embedding(1683, 10)
  (mlp): Sequential(
    (layer1): Linear(in_features=20, out_features=10, bias=True)
    (relu1): ReLU()
    (layer2): Linear(in_features=10, out_features=10, bias=True)
    (relu2): ReLU()
    (layer3): Linear(in_features=10, out_features=10, bias=True)
    (relu3): ReLU()
  )
)


In [24]:
X_train, X_test, y_train, y_test = train_test_split(data[['user_id', 'item_id']].values, data['rating'].values, test_size=0.1, random_state=2020)

In [25]:
X_train = torch.tensor(X_train,dtype=torch.long)
y_train = torch.tensor(y_train, dtype=torch.float32)
X_test = torch.tensor(X_test,dtype=torch.long)
y_test = torch.tensor(y_test, dtype=torch.float32)

In [26]:
train_loader = Data.DataLoader(
    dataset = Data.TensorDataset(X_train,y_train),
    batch_size = BATCH_SIZE,
    shuffle=True,
)

In [27]:
def RMSE(y, y_):
    loss = ((y-y_)**2).sum()
    loss = loss/len(y)
    return loss**0.5

In [28]:
optimizer = torch.optim.Adam(nmf.parameters(), lr=0.02, weight_decay=0.0001)
loss_func = RMSE

In [29]:
for epoch in range(20) :
    for step, (tx, ty) in enumerate(train_loader) :
        output=nmf(tx)
        train_loss = loss_func(output, ty)
        
        optimizer.zero_grad()
        train_loss.backward()
        optimizer.step()
        
        if step % 300 == 0 :
            with torch.no_grad(): 
                y = nmf(X_test)
                test_loss = loss_func(y, y_test)
                print('Epoch : {}|train_loss : {:.4f}| test_loss :{:.4f}'.format(epoch, train_loss.item(), test_loss.item()))

Epoch : 0|train_loss : 4.967004776000977| test_loss :4.707469940185547
Epoch : 1|train_loss : 1.0342140197753906| test_loss :1.1050599813461304
Epoch : 2|train_loss : 0.9001923203468323| test_loss :0.9801263213157654
Epoch : 3|train_loss : 0.8529559373855591| test_loss :0.9584418535232544
Epoch : 4|train_loss : 0.8460323214530945| test_loss :0.9351161122322083
Epoch : 5|train_loss : 0.8072608113288879| test_loss :0.9267516732215881
Epoch : 6|train_loss : 0.8310391306877136| test_loss :0.9247966408729553
Epoch : 7|train_loss : 0.8483110666275024| test_loss :0.9202793836593628
Epoch : 8|train_loss : 0.7432692646980286| test_loss :0.9180523157119751
Epoch : 9|train_loss : 0.7926887273788452| test_loss :0.9198423027992249
Epoch : 10|train_loss : 0.8047158122062683| test_loss :0.9216089844703674
Epoch : 11|train_loss : 0.7863147258758545| test_loss :0.9174734950065613
Epoch : 12|train_loss : 0.779283344745636| test_loss :0.9187537431716919
Epoch : 13|train_loss : 0.8250163793563843| test_lo