# AutoRec

从 AutoRec 开始，要进入使用深度学习技术进行 ctr 预估的部分了。第一个模型是 AutoRec，其使用协同过滤的用户和物品的共现矩阵，完成物品或者用户的自编码。再利用自编码，得到用户对物品的预估评分，最后利用评分进行推荐和排序。简述其过程就是，几个数据 $x$ 输入到模型之后，得到一个输出 $\hat{x}$，模型训练时候的目标是使得 $x$ 和 $\hat{x}$ 的值尽可能的接近。用公式表达就是：$min_{\theta}\sum_{i=1}^n||x^{(i)} - h(x^{(i)}; \theta)||^2$

In [2]:
import os
import pandas as pd
import numpy as np
import torch

# working directory
BASEDIR = os.getcwd()
dataframe = pd.read_csv(BASEDIR + '/assets/datasets/ml-latest-small/ratings.csv')

userId_dict = {}
movieId_dict = {}

userId_unique = dataframe.userId.unique()
movieId_unique = dataframe.movieId.unique()

idx = 0
for n in range(userId_unique.shape[0]):
    userId_dict[userId_unique[idx]] = idx
    idx += 1

idx = 0
for n in range(movieId_unique.shape[0]):
    movieId_dict[movieId_unique[idx]] = idx
    idx += 1

i = []
v = []

for row in dataframe.itertuples():
    i.append([userId_dict[row.userId], movieId_dict[row.movieId]])
    v.append(float(row.rating))

i = torch.LongTensor(i)
v = torch.DoubleTensor(v)
X_train = torch.sparse.DoubleTensor(i.t(), v).to_dense()

In [32]:
# PyTorch Version

import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

device = torch.device('cpu')
dtype = torch.double

INPUT_DIMENSION = X_train.shape[0]

class AutoRec(nn.Module):
    def __init__(self, m):
        super(AutoRec, self).__init__()
        self.h1 = nn.Linear(INPUT_DIMENSION, m).double()
        self.h2 = nn.Linear(m, INPUT_DIMENSION).double()

    def forward(self, x):
        x = self.h1(x)
        return self.h2(x).double()

LEARNING_RATE = 1e-5

BATCH_SIZE = 8
EPOCH = 100
PRINT_STEP = EPOCH / 10

m = 10
autoRec = AutoRec(10)
loss_fn = nn.MSELoss()
optimizer = optim.Adam(autoRec.parameters(), lr=LEARNING_RATE)

for epoch in range(EPOCH):
    optimizer.zero_grad()
    index = np.random.randint(0, X_train.shape[0], size=BATCH_SIZE)
    X_batch = X_train[:, index].T

    y_hat = autoRec(X_batch.double())
    loss = loss_fn(y_hat, X_batch)
    loss.backward()
    optimizer.step()

    if epoch % PRINT_STEP == 0:
        print('EPOCH: %d, loss: %f' % (epoch, loss))

EPOCH: 0, loss: 0.566581
EPOCH: 10, loss: 1.809191
EPOCH: 20, loss: 0.631760
EPOCH: 30, loss: 1.134131
EPOCH: 40, loss: 1.035285
EPOCH: 50, loss: 1.330597
EPOCH: 60, loss: 1.542136
EPOCH: 70, loss: 1.292075
EPOCH: 80, loss: 1.650838
EPOCH: 90, loss: 1.480408
