## AutoRec

目标函数：


$$arg \min_{W,V,\mu,b} \sum_{i=1}^{M} \Vert R_{*i}-h(R_{*i})\Vert _{O}^{2}+\lambda (\Vert W\Vert _{F}^{2}+\Vert V \Vert ^{2}_{F})$$

$$h(R_{*i}) = f(W\cdot g(VR_{*i}+\mu)+b)$$

损失函数：


$$RMSE = \sqrt{\frac{1}{N} \sum_{i=1}^{N} (y_i - \hat y_i)^{2}}$$

In [1]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.utils.data as Data
from sklearn.model_selection import train_test_split

In [2]:
path = '../data/ml-100k/u.data'
BATCH_SIZE = 512

In [3]:
data = pd.read_csv(path,sep='\t',header=None,names=['user_id', 'item_id', 'rating', 'timestamp'] )

In [4]:
data.head()

Unnamed: 0,user_id,item_id,rating,timestamp
0,196,242,3,881250949
1,186,302,3,891717742
2,22,377,1,878887116
3,244,51,2,880606923
4,166,346,1,886397596


In [5]:
num_users = data.user_id.unique().shape[0]+1
num_items = data.item_id.unique().shape[0]+1
num_factors=500

In [6]:
X_train, X_test, y_train, y_test = train_test_split(data[['user_id', 'item_id']].values, data['rating'].values, test_size=0.1, random_state=2020)

In [7]:
train_data=np.zeros((num_items, num_users))

for i, X in enumerate(X_train):
    train_data[X[1]][X[0]] = y_train[i]

In [8]:
test_data = []
for i, X in enumerate(X_test):
    test_data.append(train_data[X[1]].tolist())
test = torch.tensor(test_data,dtype=torch.float32)
test_user = X_test[:,0]

In [9]:
train = torch.tensor(train_data,dtype=torch.float32)

In [10]:
train_loader = Data.DataLoader(
    dataset = Data.TensorDataset(train),
    batch_size = BATCH_SIZE,
    shuffle=True,
)

In [11]:
class AutoRec(nn.Module):
    def __init__(self,num_hidden, num_users):
        super(AutoRec,self).__init__()
        self.encoder = nn.Linear(in_features=num_users, out_features=num_hidden, bias=True)
        self.dropout = nn.Dropout(p=0.05)
        self.decoder = nn.Linear(in_features=num_hidden, out_features=num_users, bias=True)
        
    def forward(self, data):
        hidden = torch.sigmoid(self.encoder(data))
        output = self.decoder(self.dropout(hidden))
        return output*data.sign()
    
    def predict(self, data):
        hidden = torch.sigmoid(self.encoder(data))
        output = self.decoder(self.dropout(hidden))
        return output

In [12]:
def RMSE(y, y_):
    loss = ((y-y_)**2).sum()
    loss = loss/(len(y))
    return loss**0.5

def evaluator(user, predicts, y) :
    y_ = np.zeros(len(y))
    for i,(u) in enumerate(user):
        y_[i] = predicts[i][u]
    return RMSE(y, y_)

In [13]:
model = AutoRec(num_hidden=num_factors, num_users=num_users)

In [14]:
optimizer = torch.optim.Adam(model.parameters(), lr=0.002, weight_decay=1e-5)
loss_func = nn.MSELoss()

In [15]:
for epoch in range(25) :
    for step, data in enumerate(train_loader) :
        output=model(data[0])
        train_loss = loss_func(output, data[0])
        optimizer.zero_grad()
        train_loss.backward()
        optimizer.step()
        if step % 300 == 0 :
            output=model.predict(test)
            print('Epoch : {}|train_loss : {:.4f}|test_loss : {:.4f}'.format(epoch, train_loss.item(), evaluator(test_user, output.detach().numpy(), y_test)))

Epoch : 0|train_loss : 0.7790|test_loss : 3.1863
Epoch : 1|train_loss : 0.1041|test_loss : 1.1528
Epoch : 2|train_loss : 0.1567|test_loss : 1.6770
Epoch : 3|train_loss : 0.0945|test_loss : 1.1796
Epoch : 4|train_loss : 0.0684|test_loss : 1.1399
Epoch : 5|train_loss : 0.0678|test_loss : 1.1464
Epoch : 6|train_loss : 0.0615|test_loss : 1.0459
Epoch : 7|train_loss : 0.0617|test_loss : 1.0382
Epoch : 8|train_loss : 0.0598|test_loss : 1.0193
Epoch : 9|train_loss : 0.0515|test_loss : 0.9946
Epoch : 10|train_loss : 0.0542|test_loss : 0.9941
Epoch : 11|train_loss : 0.0541|test_loss : 0.9758
Epoch : 12|train_loss : 0.0483|test_loss : 0.9664
Epoch : 13|train_loss : 0.0477|test_loss : 0.9597
Epoch : 14|train_loss : 0.0524|test_loss : 0.9548
Epoch : 15|train_loss : 0.0484|test_loss : 0.9534
Epoch : 16|train_loss : 0.0437|test_loss : 0.9485
Epoch : 17|train_loss : 0.0476|test_loss : 0.9438
Epoch : 18|train_loss : 0.0441|test_loss : 0.9398
Epoch : 19|train_loss : 0.0409|test_loss : 0.9401
Epoch : 20