# LS-PLM(MLR)

MLR 的思想是，先考虑数据 $x$ 属于整体的哪一个部分，再看这个数据在这个部分上的二分类结果。MLR 的数学表达式可以很好的体现这个思想：$f(x) = \sum_{i=1}^m \frac{e^{u_i \cdot x}}{\sum_{j=1}^{m} e^{u_j \cdot x}} \cdot \frac{1}{1 + e^{-w_i \cdot x}}$ 。利用 PyTorch，可以很容易实现这个端到端的模型。

In [31]:
# load data

import os
import numpy as np
from scipy.sparse import coo_matrix

BASEDIR = os.getcwd()
row = []
col = []
data = []
y = []

col_cnt = -1

idx = 0
with open(BASEDIR + '/assets/datasets/criteo_ctr/small_train.txt') as f:
    line = f.readline()
    line = line.strip('\n')
    while line:
        elems = line.split(' ')
        y.append(int(elems[0]))
        for i in range(1, len(elems)):
            field, feature, value = elems[i].split(':')
            col_cnt = max(col_cnt, int(feature))
            row.append(idx)
            col.append(int(feature))
            data.append(float(value))
            
        line = f.readline()
        idx += 1

i = torch.LongTensor([row, col])
v = torch.DoubleTensor(data)
X_train = torch.sparse.DoubleTensor(i, v).to_dense().T
y_train = torch.LongTensor(y)

In [57]:
# PyTorch Version

import torch

def sigmoid(x):
    return 1.0 / (1 + torch.exp(-1 * x))

device = torch.device('cpu')
dtype = torch.double

INPUT_DIMENSION, OUTPUT_DIMENSION = X_train.shape[0], 1

m = 3
u = torch.rand(INPUT_DIMENSION, m, device=device, dtype=dtype, requires_grad=True)
w = torch.rand(INPUT_DIMENSION, m, device=device, dtype=dtype, requires_grad=True)

EPOCH = 10

LEARNING_RATE = 1e-3
BATCH_SIZE = 8

PRINT_STEP = EPOCH / 10

for epoch in range(EPOCH):
    index = np.random.randint(0, X_train.shape[1], size=BATCH_SIZE)
    X_batch = X_train[:, index]
    y_batch = y_train[index]

    y_softmax_part = torch.exp(u.T.mm(X_batch))
    y_linear_part = sigmoid(w.T.mm(X_batch))

    y_hat = y_softmax_part.mul(y_linear_part).div(y_softmax_part.sum(axis=0)).sum(axis=0)
    logloss = -1 * torch.sum(torch.mul(y_batch, torch.log(y_hat)) + torch.mul((1 - y_batch), torch.log(1 - y_hat))) / BATCH_SIZE

    logloss.backward()

    with torch.no_grad():
        u -= LEARNING_RATE * u.grad
        w -= LEARNING_RATE * w.grad

    if epoch % PRINT_STEP == 0:
        print('EPOCH: %d, loss: %f' % (epoch, logloss)) 

EPOCH: 0, loss: 1.989119
EPOCH: 1, loss: 1.625195
EPOCH: 2, loss: 2.060608
EPOCH: 3, loss: 2.718789
EPOCH: 4, loss: 2.496257
EPOCH: 5, loss: 1.947505
EPOCH: 6, loss: 1.977964
EPOCH: 7, loss: 2.236859
EPOCH: 8, loss: 2.670226
EPOCH: 9, loss: 2.287574
