# Product Neural Network

PNN 是在 DeepCrossing 模型上的修改，在 DeepCrossing 模型中，每一个 field 经过 Embedding 过后直接被拼接在了一起，然后送入全连接层进行后面的计算。在 PNN 模型中，在第一个全连接层之前增加了一个交叉乘积层，交叉的内容是 Embedding 之后的任意两个 field。PNN 的另一个修改是把 DeepCrossing 中的残差 Block 替换成了全连接层。在 PNN 中的交叉采用的是向量的外积，计算公式为 $z = |x||y| sin(x, y)$，外积有很好的特性，能衡量两个向量的接近程度。

In [1]:
# build train data

import os
import numpy as np

BASEDIR = os.getcwd()

fields_dict = {}
lines = None

with open(os.path.join(BASEDIR, 'assets/datasets/criteo_ctr/small_train.txt')) as f:
    lines = f.readlines()

fields_dict = {}
for line in lines:
    line = line.strip('\n')

    for elem in line.split(' ')[1:]:
        field, feature, _ = elem.split(':')

        if field not in fields_dict:
            fields_dict[field] = {'index': len(fields_dict), 'features': {}, 'last_idx': -1}

        if feature not in fields_dict[field]['features']:
            fields_dict[field]['features'][feature] = fields_dict[field]['last_idx'] + 1
            fields_dict[field]['last_idx'] = fields_dict[field]['last_idx'] + 1

for field in fields_dict.keys():
    if 'none' not in fields_dict[field]['features']:
        fields_dict[field]['features']['none'] = fields_dict[field]['last_idx'] + 1
        fields_dict[field]['last_idx'] = fields_dict[field]['last_idx'] + 1


def init_field_tensor(fields_dict):
    init_tensor = np.zeros((len(fields_dict), 1))
    for field in fields_dict.keys():
        init_tensor[fields_dict[field]['index']] = fields_dict[field]['last_idx']
    return init_tensor.astype(int)


X_train = []
y_train = []

for line in lines:
    line = line.strip('\n')
    elems = line.split(' ')
    y_train.append(float(elems[0]))

    init_tensors = init_field_tensor(fields_dict)
    for elem in elems[1:]:
        field, feature, _ = elem.split(':')
        field_idx = fields_dict[field]['index']
        feature_idx = fields_dict[field]['features'][feature]
        init_tensors[field_idx] = feature_idx
    X_train.append(init_tensors)

X_train = np.concatenate(X_train, 1)
y_train = np.array(y_train)

In [2]:
# build Product Neural Network

import torch
import torch.nn as nn
import torch.nn.functional as F

import numpy as np


class FieldEmbeddingBlock(nn.Module):
    def __init__(self, fields_dict, embedding_size):
        super(FieldEmbeddingBlock, self).__init__()
        self.fields_dict = fields_dict
        self.fields_embedding = {}
        self.embedding_size = embedding_size

        for field in self.fields_dict.keys():
            field_idx = self.fields_dict[field]['index']
            self.fields_embedding[field_idx] = nn.Embedding(len(self.fields_dict[field]['features']),
                                                            self.embedding_size)

    def forward(self, input_field_tensor):
        out = torch.zeros(input_field_tensor.shape[0] * self.embedding_size, input_field_tensor.shape[1])
        idx = 0
        for field in self.fields_dict.keys():
            field_idx = self.fields_dict[field]['index']
            out[idx:idx + self.embedding_size, :] = self.fields_embedding[field_idx](
                input_field_tensor[field_idx, :])
            idx += self.embedding_size
        return out.double()


class OuterBlock(nn.Module):
    def __init__(self, fields_dict, embedding_size):
        super(OuterBlock, self).__init__()
        self.field_cnt = len(fields_dict)
        self.fields_dict = fields_dict
        self.embedding_size = embedding_size
        self.output_dimension = self.embedding_size * self.embedding_size * self.field_cnt

    def forward(self, input_field_tensor):
        out = torch.zeros(input_field_tensor.shape[1], self.output_dimension)
        p = torch.zeros(self.embedding_size, input_field_tensor.shape[1])
        i = 0
        for _ in range(self.field_cnt):
            p = p + input_field_tensor[i:i + self.embedding_size, :]
            i *= self.embedding_size

        p = p.T
        p = torch.unsqueeze(p, 1)
        q = torch.transpose(p, 1, 2)
        p = q.matmul(p)

        idx1 = 0
        idx2 = 0
        step2 = self.embedding_size * self.embedding_size
        for _ in self.fields_dict.keys():
            out[:, idx2:idx2 + step2] = (input_field_tensor[idx1:idx1 + self.embedding_size, :].T.matmul(p)).reshape(
                (input_field_tensor.shape[1], -1))
            idx2 += step2
            idx1 += self.embedding_size
        return out



class InnerBlock(nn.Module):
    def __init__(self, fields_dict, embedding_size):
        super(InnerBlock, self).__init__()
        self.field_cnt = len(fields_dict)
        self.fields_dict = fields_dict
        self.embedding_size = embedding_size
        self.output_dimension = self.field_cnt

        self.inner_layer = []
        for _ in range(self.field_cnt):
            self.inner_layer.append(nn.Linear(self.embedding_size, 1, bias=True).double())

    def forward(self, input_field_tensor):
        input_field_tensor = input_field_tensor.T
        out = torch.zeros(input_field_tensor.shape[0], self.output_dimension)
        idx = 0
        for field in self.fields_dict.keys():
            field_idx = self.fields_dict[field]['index'] * self.embedding_size
            out[:, idx:idx+1] = self.inner_layer[idx](
                input_field_tensor[:, field_idx:field_idx + self.embedding_size])
            idx += 1
        return out


class ProductNeuralNetwork(nn.Module):
    def __init__(self, fields_dict, embedding_size, layers):
        super(ProductNeuralNetwork, self).__init__()
        self.fields_dict = fields_dict
        self.embedding_size = embedding_size
        self.field_embedding = FieldEmbeddingBlock(fields_dict, embedding_size)
        self.inner_block = InnerBlock(fields_dict, embedding_size)
        self.outer_block = OuterBlock(fields_dict, embedding_size)

        fc_layers = []
        fc_layers.append(nn.Linear(self.inner_block.output_dimension + self.cross_block.output_dimension, layers[0],
                                   bias=True).double())
        for i in range(1, len(layers)):
            fc_layers.append(nn.Linear(layers[i - 1], layers[i], bias=True).double())
        self.fc_block = nn.Sequential(*fc_layers)

    def forward(self, x):
        out = self.field_embedding(x)
        out1 = self.inner_block(out)
        out2 = self.outer_block(out)
        out = torch.cat([out1, out2], 1)
        return F.sigmoid(self.fc_block(out.double()).double())

In [3]:
import torch.optim as optim

device = torch.device('cpu')
LEARNING_RATE = 1e-3

EPOCH = 100
PRINT_STEP = EPOCH / 10
N = len(y_train)

HIDDEN_DIMENSION = 8
EMBEDDING_SIZE = 8       
product_nerual_network = ProductNeuralNetwork(fields_dict, EMBEDDING_SIZE, [5, 5, 5, 1])

BATCH_SIZE = 8
loss_fn = nn.BCELoss()
optimizer = optim.Adam(product_nerual_network.parameters(), lr=LEARNING_RATE)

for epoch in range(EPOCH):

    index = np.random.randint(0, X_train.shape[0], size=BATCH_SIZE)
    X_batch = torch.from_numpy(X_train[:, index]).long()
    y_batch = torch.from_numpy(y_train[index]).reshape(-1, BATCH_SIZE)

    y_hat = product_nerual_network(X_batch).reshape(-1, BATCH_SIZE)
    loss = loss_fn(y_hat, y_batch)

    loss.backward()
    optimizer.step()

    if epoch % PRINT_STEP == 0:
        print('EPOCH: %d, loss: %f' % (epoch, loss))

EPOCH: 0, loss: 53.167697
EPOCH: 10, loss: 26.788322
EPOCH: 20, loss: 13.736141
EPOCH: 30, loss: 25.000000
EPOCH: 40, loss: 25.000000
EPOCH: 50, loss: 12.500000
EPOCH: 60, loss: 0.000000
EPOCH: 70, loss: 0.000000
EPOCH: 80, loss: 25.000000
EPOCH: 90, loss: 12.500000
